#!/usr/bin/perl # processarchives.pl, copyright Casey Muller, let me know if you find # it useful at processarchives(at)null-terminated.com and maybe I can # give you a more up to date version. # fourth beta version: Mar 1st, 2005 # TODO for fifth beta: fix RSS metadata #directory constants $webdir = `pwd`; chomp $webdir; $caseydir = $webdir."/root/casey0"; #$caseydir = $webdir."/staging/casey0"; $nulltdir = $webdir."/root/null-terminated"; #$nulltdir = $webdir."/staging/null-terminated"; $archivedir = $caseydir."/archive"; $tagdir = $caseydir."/tags/"; $imgprefix = "http://bigfiles.null-terminated.com:81/digitalcamera/"; $thumbpath = "/visuals/photography/by-date/"; $thumbprefix = "http://null-terminated.com".$thumbpath; # structure: in each directory $archivedir/year/month there are # ##.entry files which contain the entry for the ##th day. In them, # the first line is the title, the second line is a timestamp (which # is added if it doesn't exist), and the rest is stuck in the # title.html file. Each index.html has links to all sub-titles (for # now). The most recent 10 go in $webdir. #load the feed templates require "feedconstants.pl"; #use XML::RSS; use POSIX; #my $rss = new XML::RSS; #$rss->channel( # title => $feedtitle, # link => $feedURL, # description => $feeddesc, # dc => { # rights => "Casey Muller under a Creative Commons license", # language => "en-us", # }, # syn => { # updatePeriod => "daily", # updateFrequency => "2", # updateBase => "1980-06-24T08:00-08:00", # }, # ); $today = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime(); open(RSS, '>', "$caseydir/casey0.rss"); print RSS < $feedtitle $feeddesc $today $feedRSSURL Casey Muller atomfeed\@nullterminated.com RSSHEADER @months = qw(January February March April May June July August September October November December); @monthnums{@months} = 1..$#months+1; use File::Find; # open the archive directory. .entry files get processed as entries find { preprocess => sub { # sort by month slice if it's a year dir my $depth = (substr $File::Find::dir, length($archivedir)) =~ tr[/][]; if($depth == 1) { sort {$monthnums{$a} <=> $monthnums{$b}} @_; } else { # need 2 to appear before 10 sort {$a <=> $b} @_; } }, wanted => sub { if(/(.*)\.entry(\d*)$/) { print "processing $_ in $File::Find::dir\n"; # get just the archive part $dir = substr $File::Find::dir, length($archivedir); $archdir = substr $File::Find::dir, length($caseydir); # get date from filename and path $day = $1; # split it up @tracedirs = split m%/%, $dir; shift @tracedirs; $year = shift @tracedirs; $month = shift @tracedirs; $monthnum = sprintf "%02s", $monthnums{$month}; $daynum = sprintf "%02s", $day; # open it open(INPUT, "$File::Find::name") || die "can't read $File::Find::name"; $title = ; chomp $title; $tagline = ; chomp $tagline; @tags = split ' ', $tagline; # what's the relative url? $url = "$archdir/$title.html"; $url =~ y/ /_/; # save info to add to rss and index $indexhtml = "$title \n"; unshift @allindex, $indexhtml; unshift @yearindex, $indexhtml; unshift @monthindex, $indexhtml; unshift @rsstitle, $title; unshift @rssurl, "$feedURL$url"; unshift @rssdate, "$year-$monthnum-$daynum"."T12:00:00-08:00"; # open the output file open(OUTPUT, '>', "$caseydir$url") || die "can't write $caseydir$url"; # write out the headers print OUTPUT $pretitle; print OUTPUT $title; print OUTPUT $title2date; print OUTPUT "$month $day, $year"; print OUTPUT $date2content; # dump the actual file: $entry = ""; $rssentry = ""; $#rsstags = -1; foreach $tag (@tags) { $tag = lc $tag; print "TAG: $tag\n"; # do we need a new file if(!exists $tagfiles{$tag}) { print "creating file for tag: $tag\n"; open($tagfiles{$tag}, '>', $tagdir.$tag.".html"); print { $tagfiles{$tag} } <

tag: $tag

HTMLBLOCK } # add ourselves print { $tagfiles{$tag} } $indexhtml; $entry .= "tag: $tag"; $rssentry .= " tag: $tag"; push @rsstags, $tag; # increment the counters $tagcount{$tag}++; } $entry .= "\n"; $rssentry .= "\n"; while() { # duplicated code: s##$3#g; s##$3#g; if(/^$/) { if($rssentry) { # if it's a non-leading newline, use
$rssentry .= "
\n
\n"; $entry .= "

\n

\n"; } } else { $entry .= $_; $rssentry .= $_; } } close INPUT; # strip trailing newlines $entry =~ s/[\s\n]+$//; # write it to the actual page print OUTPUT $entry; if($#allindex > 0) { print OUTPUT $content2prev; print OUTPUT $archiveentrystart; print OUTPUT $allindex[1]; print OUTPUT $archiveentryfinish; } print OUTPUT $postprev; close OUTPUT; # save a copy for the main page and truncate for rss unshift @entries, $entry; unshift @rssdesc, $rssentry; unshift @rsssubjects, [ @rsstags ]; return; } }, postprocess => sub { print "creating index for $File::Find::dir\n"; # open the output file open(OUTPUT, '>', "$File::Find::dir/index.html") || die "can't write $File::Find::dir/index.html"; # are we a month or a year? $dirs = (substr $File::Find::dir, length($archivedir)) =~ tr[/][]; print "got $dirs directories\n"; if($dirs == 0) { # the main archive page has all entries print OUTPUT $archivestart; foreach (@allindex) { print OUTPUT $archiveentrystart; print OUTPUT $_; print OUTPUT $archiveentryfinish; } print OUTPUT $archivefinish; # also create the frontpage open(MAIN, '>', "$caseydir/index.html") || die "can't write $caseydir/index.html"; print MAIN $mainstart; # add the recent photos at the top open(RECENTPHOTOS, "$webdir/recentPhotos"); while() { print MAIN $_; } close RECENTPHOTOS; # assume sorted by date, so take the last 5 for the main page/rss if($#allindex>=5) { @allindex = splice(@allindex, 0, 5); @entries = splice(@entries, 0, 5); @rsstitle = splice(@rsstitle, 0, 5); @rssurl = splice(@rssurl, 0, 5); @rssdesc = splice(@rssdesc, 0, 5); @rssdate = splice(@rssdate, 0, 5); @rsssubjects = splice(@rsssubjects, 0, 5); } open(NULLTINDEX, '>', $nulltdir."/index.html"); print NULLTINDEX "The latest entry from my project log is titled $rsstitle[0]. It was posted on $rssdate[0].\n\nSee more about all this or just start browsing the categories.\n\nRecent photographs and projects:
"; open(RECENTPROJS, $webdir."/recent") || die "can't read $File::Find::name"; while() { print NULLTINDEX $_; } close RECENTPROJS; close NULLTINDEX; while($#allindex+1) { print MAIN $mainprecontent; print MAIN shift @allindex; print MAIN shift @entries; print MAIN $mainpostcontent; $tmptitle = shift @rsstitle; $tmplink = shift @rssurl; $tmpdesc = shift @rssdesc; $tmpdesc =~ s/\&/\&\;/g; $tmpdesc =~ s/\/\>\;/g; $tmpdate = shift @rssdate; @tmpcats = @{ shift @rsssubjects }; $tmpcat = ""; foreach $cat (@tmpcats) { $tmpcat .= ""; } # subject => join(' ', @{ shift @rsssubjects }), print RSS < $tmptitle $tmplink $tmpdate $tmpdate

$tmpdesc $tmpcat RSSBLOCK # $rss->add_item( # title => shift @rsstitle, # link => shift @rssurl, # description => shift @rssdesc, # dc => { # date => shift @rssdate, # subject => join(' ', @{ shift @rsssubjects }), # } # ); } print MAIN $mainfinish; close MAIN; } else { if($dirs == 1) { print OUTPUT $yearormontharchive; foreach (@yearindex) { print OUTPUT $archiveentrystart; print OUTPUT $_; print OUTPUT $archiveentryfinish; } $#yearindex = -1; } elsif($dirs == 2) { print OUTPUT $yearormontharchive; foreach (@monthindex) { print OUTPUT $archiveentrystart; print OUTPUT $_; print OUTPUT $archiveentryfinish; } $#monthindex = -1; } } close OUTPUT; }}, $archivedir; # save rss file #$rss->save("$caseydir/casey0.rss"); print RSS "
\n"; close RSS; # fix rss file, allow multiple subjects #use Tie::File; #tie @rsslines, 'Tie::File', "$caseydir/casey0.rss"; #foreach (@rsslines) { # if(/\(.+)\<\/dc:subject\>/) { # @tags = split / /, $1; # $_ = ""; # foreach $tag (@tags) { # $_ .= "$tag" # } # } #} open(TAGS, '>', $tagdir."index.html"); print TAGS "All tags:\n"; foreach(keys %tagfiles) { if($tagcount{$_} > $maxtagcount) { $maxtagcount = $tagcount{$_}; } if($tagcount{$_} < $mintagcount) { $mintagcount = $tagcount{$_}; } } foreach(sort keys %tagfiles) { # interpolate between 12 and 36 $fontsize = ($tagcount{$_} - $mintagcount) / $maxtagcount * 24 + 12; print TAGS "$_($tagcount{$_}) \n"; print "$_ = $tagcount{$_}\n"; close $tagfiles{$_}; } close TAGS;