#!/usr/bin/perl -w # This script has two functions: # Firstly, it reads and verifies the couse database # (/anfs/www/html/teaching/????/coursedb.txt) against the syllabus # (in form of the diagnostic file CST.html-witter that LaTeX2HTML produces). # Secondly, it then produces a list of HTTP redirect instructions # (/anfs/www/html/teaching/????/CST/.htaccess) that map from URLs with # human-readable course identifiers to URLs with the LaTeX2HTML # node numbers where the actual syllabus pages are. This step decouples # finalizing the generation of the course-material directories (which # link to the syllabus pages) from finalizing the syllabus. # # This script takes one optional command-line arguments, the academic # year identifier (the "????" above, e.g. 0809 = 2008/09). # It has all pathnames hardwired in. If the year identifier is omitted, # it guesses by looking at the current date (if called after the end of May, # it assumes you mean already the following academic year). # # Markus Kuhn -- 2006-08-30 use FindBin qw($RealBin); # find directory where this file is located ... use lib '/anfs/www/tools/share/ucampas/perl-PlexTree', $RealBin; use bytes; use strict; use PlexTree; use CourseDB; # guess academic year code my $year4 = `date -d '-6 months' +%Y`; chomp $year4; my $year2 = substr($year4, 2, 2); my $year22 = $year2 . sprintf("%02d", $year2 + 1); if (@ARGV == 0) { # take the guessed year } elsif (@ARGV == 1 && $ARGV[0] =~ /^[0-9]{4}$/) { $year22 = $ARGV[0]; } else { die("Usage examples:\n". " $0\n". " $0 $year22\n"); } print "$0: Verifying syllabus against coursedb.txt and\n"; print "generating HTTP redirects from course identifiers to LaTeX2HTML node numbers.\n\n"; sub warning($) { my ($t) = @_; print "\x{1b}[1m$t\x{1b}[0m\n"; } # Load the course database my $dbfn = "/anfs/www/html/teaching/$year22/coursedb.txt"; print "Reading course titles and identifiers from $dbfn ...\n\n"; my $db = CourseDB::load($dbfn); if ($year22 ne $db->year('22')) { die("$dbfn: myear does not match the current academic year '$year22'\n"); } my %cid_from_name; my %node; my %redirect; my %coursename; my %classes; foreach my $course ($db->courses) { next unless $course->takes_place; my $cid = $course->code; my $name = $course->title; $name =~ s/ / /; # NBSP -> SP next unless $course->param('syllabus') =~ /^latexhtml/; $coursename{$cid} = $name; $cid_from_name{$name} = $cid; printf(" %-10s = '$name'\n", $cid_from_name{$name}); } # Load the latex2html mapping file for my $course ('CST') { my $fin = "$course.html-witter"; print "\nReading course titles from $course syllabus ($fin)\n"; print "and mapping them to course identifier and syllabus node number ...\n\n"; open(F, '<', $fin) || die("Cannot read '$fin': $!\n"); # skip introductory sections while () { last if /^(\d+)\/\d+:section:\.*\"(Michaelmas Term|Lent Term|Easter Term|Long Vacation) /; } # scan subsection titles my @redirects; while () { if (/^(\d+)\/\d+:subsection:\.*\"(.*)\" for node(\d+)\.html$/) { my $node_nr = $3; my $name = $2; #$file = $3; printf " %-50s", $name; $name =~ s/^Paper\s+\d:\s+//; $name =~ s/\s+\(.*\)$//; if (exists $cid_from_name{$name}) { my $cid = $cid_from_name{$name}; $node{$cid} = $node_nr; #print "$node_nr=$name ($file)\n"; printf("%15s.html -> node$node_nr.html", $cid); if (exists $redirect{$cid}) { print " (skipped)"; } else { $redirect{$cid} = $node_nr; } print "\n"; } else { warning(" '$name' (node$node_nr.html) not found in coursedb.txt"); } } } close F; my $dir = "/anfs/www/html/teaching/$year22/$course"; my $fout = "$dir/.htaccess"; print "\nWriting ".scalar(keys %redirect)." redirects to $fout ...\n"; open(FOUT, ">", $fout) || die("Cannot write '$fout': $!\n"); print FOUT ('# DO NOT EDIT: file autogenerated by ' . (getlogin || getpwuid($<)) . '@' . `/bin/hostname` . "# with $0\n"); foreach my $cid (sort {$a cmp $b} keys %redirect) { my $node_nr = $redirect{$cid}; print FOUT sprintf("Redirect /teaching/$year22/$course/%-15s http://www.cl.cam.ac.uk/teaching/$year22/$course/node$node_nr.html\n", "$cid.html"); unlink "$dir/$cid.html"; if (0) { # either just provide a course-id link to the node file symlink "node$node_nr-b.html", "$dir/$cid.html"; } else { # or make a copy while applying some modifications my $fin; my $fout; local $/; # slurp mode; open($fin, '<', "$dir/node$node_nr-b.html") or die("$dir/node$node_nr-b.html: $!\n"); open($fout, '>', "$dir/$cid.html") or die("$dir/$cid.html: $!\n"); $_ = <$fin>; s/(<\/?h)2>/${1}1>/ig; s/(<\/?h)3>/${1}2>/ig; if (/^(.*<\/h1>)(.*?)(

.*)$/si) { my ($head, $preamble, $main) = ($1, $2, $3); $preamble =~ s/<\/?(i|em)>//gi; $preamble =~ s/<(p)>\s*(Lecturers?:.*)/<$1 id=lecturer>$2<\/$1>/ig; $preamble =~ s/

/
/gi; # replace all

$preamble =~ s/(<\/p>\s*)
/$1

/si; # except for the first one $_ = "$head$preamble$main"; } else { die("$dir/node$node_nr-b.html -> $cid.html: failed to find preamble\n") } print $fout $_; close($fin) or die("$dir/node$node_nr-b.html: $!\n"); close($fout) or die("$dir/$cid.html: $!\n"); } } close FOUT; } # Check for syllabus coverage my @unmapped = grep { !exists $node{$_} } sort keys %coursename; if (@unmapped) { warning("\ncoursedb.txt entries that have no syllabus node number mapped to them:\n"); for my $cid (@unmapped) { printf(" %-10s = '%s'\n", $cid, $coursename{$cid}) } }