#!/usr/bin/perl

use 5.016;  # implies use strict;
use Data::Dumper;
use LWP::Simple;
use Digest::MD5 qw(md5 md5_hex md5_base64);
use lib '../../../../../scripts', '/anfs/www/VH-cl/scripts';
use CLWeb qw(utf8_to_sgml utf8_to_sgmlatt);

# Usage
print <<End if $#ARGV != 0;
Generates HTML pages from talks.cam XML.

Usage: updatearchive.pl talks.xml

talks.xml is the exported list from talks.cam.ac.uk. The resulting
HTML will be output into files named YYYY-b.html where YYYY are the
years of the talks from the XML files. The entire list will be output
to all-b.html

End
exit 1 if $#ARGV != 0;

# List of all talks.
my $allFile = "all-b.html";

# Load the list of slides.
my $slidesDir = "slides";
my $slidesIndex = {};
opendir(SLIDESDIR, $slidesDir);
for (grep {/^(\d\d\d\d)-\d\d-\d\d/ && -f "$slidesDir/$_"} readdir(SLIDESDIR))
{
  $_ =~ m/^(\d\d\d\d-\d\d-\d\d)(?:-(\d\d\d\d))?(.*?)$/;
  if ($2)
  {
    # File name includes the talk's time.
    $slidesIndex->{"$1-$2"} = "$slidesDir/$_";
  }
  else
  {
    # Only the talk's date.
    $slidesIndex->{"$1"} = "$slidesDir/$_"
  }
}
closedir(SLIDESDIR);

# Load the list of recordings
my @recordings = grep { !/-b.html$/ } glob('video/*.html');

# Load the talks.cam XML file and parse its talks.
my $inputFile = $ARGV[0];
my $separator = "<talk>";

open(SOURCE, "<$inputFile");

my @problems;
my @events;
my $eventsByDate = {};

my $months = {'Jan' => '01', 'Feb' => '02', 'Mar' => '03', 'Apr' => '04', 'May' => '05', 'Jun' => '06', 'Jul' => '07', 'Aug' => '08', 'Sep' => '09', 'Oct' => '10', 'Nov' => '11', 'Dec' => '12'};
my $monthNames = {'Jan' => 'January', 'Feb' => 'February', 'Mar' => 'March', 'Apr' => 'April', 'May' => 'May', 'Jun' => 'June', 'Jul' => 'July', 'Aug' => 'August', 'Sep' => 'September', 'Oct' => 'October', 'Nov' => 'November', 'Dec' => 'December'};

$/ = $separator;

# Ignore the preamble to the XML file.
<SOURCE>;

# Iterate over the <talk> entries.
while (<SOURCE>)
{
  undef $/;

  # Parse each event into the hash.
  if ($_ =~ /\s*<id>(\d*)<\/id>\s*<title>(.*?)<\/title>\s*<abstract>(.*?)<\/abstract>\s*<speaker>(.*?)<\/speaker>\s*<speaker_url>(.*?)<\/speaker_url>\s*<venue>(.*?)<\/venue>\s*<special_message>(.*?)<\/special_message>\s*<organiser>(.*?)<\/organiser>\s*<url>(.*?)<\/url>\s*<start_time>\w\w\w, (\d\d) (\w\w\w) (\d\d\d\d) (\d\d.\d\d):\d\d \+\d\d\d\d<\/start_time>\s*<end_time>.*?<\/end_time>/gs)
  {
    my $currentEvent = {};
    $currentEvent->{'id'} = $1;
    $currentEvent->{'title'} = $2;
    $currentEvent->{'abstract'} = prepareAbstract($3);
    $currentEvent->{'speaker'} = $4;
    $currentEvent->{'speaker_url'} = $5;
    $currentEvent->{'location'} = $6;
    $currentEvent->{'message'} = $7;
    $currentEvent->{'organizer'} = $8;
    $currentEvent->{'URL'} = $9;
    my ($day, $month, $year) = ($10, $11, $12);
    $currentEvent->{'time'} = $13;
    $currentEvent->{'plainTime'} = $13; substr($currentEvent->{'plainTime'}, 2, 1) = '';

    # Sort out the date and time format.
    $currentEvent->{'codedDate'} = "$year-$months->{$month}-$day";
    $currentEvent->{'codedTime'} = $currentEvent->{'time'};
    $currentEvent->{'htmlDateTime'} = "$day $monthNames->{$month} $currentEvent->{'time'}";
    $currentEvent->{'dateTime'} = "$year-$months->{$month}-$day-$currentEvent->{'plainTime'}";
    $currentEvent->{'year'} = $year;
    $currentEvent->{'title'} =~ s/&amp;quot;/\"/gs;
    push @events, $currentEvent;
    push @{($eventsByDate->{$currentEvent->{'codedDate'}})}, $currentEvent;
  }
  else
  {
    push @problems, "Non-matching string in source file: $_";
  }
  $/ = $separator;
}
close(SOURCE);

# Link the slides in by date or date-time (if ambiguous).
for my $talk (@events)
{
  if ((scalar @{$eventsByDate->{$talk->{'codedDate'}}}) > 1)
  {
    # Ambiguous case: require time to specify precisely.
    $talk->{'slides'} = $slidesIndex->{$talk->{'dateTime'}} if ($slidesIndex->{$talk->{'dateTime'}});
  }
  else
  {
    $talk->{'slides'} = $slidesIndex->{$talk->{'codedDate'}} if ($slidesIndex->{$talk->{'codedDate'}});
  }
  # also look for recordings that contain -t.... where .... is the talks.cam integer
  my @r = grep { /-t$talk->{'id'}/ } @recordings;
  $talk->{'recordings'} = \@r if @r;
}

# Use to write out XML instead of HTML (format in writeXml).
#writeXml($_) for (@events);

# For HTML output, sort the events by date and write them out.
@events = reverse sort compareEvents @events;
my $oldYear = 'none';
my $result = '';
writeHtml($_) for (@events);

# Output an error if the XML was not parsed completely.
print "\n\nProblems:\n\n" if (@problems > 0);
print $_ . "\n\n" for (@problems);

my $template = <<'EOF';
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>&year;</title>
<script type="text/javascript" src="scripts.js"></script>
<link rel="stylesheet" href="styles.css" />
</head>
<body onload="initializePage();">
<h1>&year; seminars</h1>
<p id="expandControls"><a href="javascript:expandAll();">Expand all</a> <a href="javascript:collapseAll();">Collapse all</a></p>
<!-- talks.cam entries start. Do not edit this part of the file. -->&talks;<!-- talks.cam entries end -->
&oldtalks;
<p class="footer"><a href="http://famfamfam.com/lab/icons/silk/">Silk icons</a> CC-BY</p>
</body>
</html>
EOF

# read in old talks (pre-2006)
my @oldtalks;
for my $year (reverse(1992..2005)) {
  open(OLD, '<', "$year-b.html")  || die("Cannot read $year-b.html: $!\n");
  local $/;
  my $oldhtml = <OLD>;
  close(OLD);
  $oldhtml =~ s/.*<body.*?>\s*//s;
  $oldhtml =~ s/<p class="footer">.*//s;
  $oldhtml =~ s/\"toggleShow\(\'talk([^\']*)\',this\);\"/\"toggleShow(\'talk\1\');\"/sg;
  $oldhtml =~ s/<h1>(\d{4}) seminars<\/h1>\s*<p id=\"expandControls\">.*?<\/p>/<h2>\1<\/h2>/sg;
  push @oldtalks, $oldhtml;
}

# All seminars list.
my $html = $template;
$html =~ s/&year;/All/g;
$html =~ s/&talks;/$result/g;
my $oldtalks = join('', @oldtalks);
$html =~ s/&oldtalks;/<!-- Start of old archives -->\n$oldtalks/g;
open(INDEX, '>', $allFile) || die("Cannot write $allFile: $!\n");
print INDEX $html;
close(INDEX) || die("Cannot write $allFile: $!\n");

# Split the result into years.
my @yearSplitMatches = $result =~ /(\d\d\d\d<\/h2>.*?)(?:<h2>|$)/sg;
for (@yearSplitMatches)
{
  # Write out talks into individual files by year.
  $_ =~ /(\d\d\d\d)<\/h2>(.*)/sg;
  my $talksYear = $1;
  my $talksContent = $2;
  my $yearFile = $talksYear . "-b.html";

  my $html = $template;
  $html =~ s/&year;/$talksYear/g;
  $html =~ s/&talks;/$talksContent/g;
  $html =~ s/&oldtalks;\s*//g;
  open(INDEX, ">$yearFile") || die("Cannot write $yearFile: $!\n");
  print INDEX $html;
  close(INDEX) || die("Cannot close $yearFile: $!\n");
}

my @recorded_events = grep { $_->{'recordings'} && @{$_->{'recordings'}} } @events;
CLWeb::write_file("recordings.xml", writeRSS(splice(@recorded_events,0,9)));

exit;

# Generates HTML for an event.
sub writeHtml
{
  my $talk = pop;

  # Output the year if it has changed.
  if ($talk->{'year'} != $oldYear)
  {
    $oldYear = $talk->{'year'};
    $result = $result . "<h2>$talk->{'year'}</h2>";
  }

  my $slides = "";
  if ($talk->{'slides'})
  {
    if ($talk->{'slides'} =~ m/\.pp[st]x?$/)
    {
      $slides = "<a href=\"$talk->{'slides'}\"><img src=\"powerpoint.png\" alt=\"View slides\" /></a>";
    }
    elsif ($talk->{'slides'} =~ m/\.pdf$/)
    {
      $slides = "<a href=\"$talk->{'slides'}\"><img src=\"acrobat.png\" alt=\"View slides/notes\" /></a>";
    }
    else
    {
      $slides = "<a href=\"$talk->{'slides'}\"><img src=\"slides.png\" alt=\"View slides/notes\" /></a>";
    }
  }
  for my $r (@{$talk->{'recordings'}}) {
      $slides .= qq{<a href="$r"><img src="control_play.png" alt="Recording" /></a>};
  }

  my $abstractID = 'talk' . $talk->{'id'};
  $result = $result . "<div class=\"talk\">";
  if (defined $talk->{'URL'} || (defined $talk->{'speakerURL'} && length($talk->{'speakerURL'}) > 0) || (defined $talk->{'location'} && length($talk->{'location'}) > 0))
  {
    $result = $result . "<p class=\"talkURL\"><a href=\"$talk->{'URL'}\"><img src=\"page.png\" alt=\"View original page\" /></a>$slides</p> " if (defined $talk->{'URL'});
    $result = $result . "<h3 class=\"talkHeader\" onclick=\"toggleShow('$abstractID',this);\">";
    $result = $result . "<span class=\"date\">$talk->{'htmlDateTime'}</span>$talk->{'title'} / <i>$talk->{'speaker'}</i></h3>" ;
  }
  else
  {
    $result = $result . "<h3 class=\"talkHeaderEmpty\"><span class=\"date\">$talk->{'htmlDateTime'}</span>$talk->{'title'} / <i>$talk->{'speaker'}</i></h3>" ;
  }
  $result = $result . "<div class=\"talkContainer\" id=\"$abstractID\">";
  $result = $result . "<div class=\"information\">";
  $result = $result . "<a href=\"$talk->{'speakerURL'}\">Speaker's page</a>" if (defined $talk->{'speakerURL'});
  $result = $result . "</div>";
  $result = $result . "<div class=\"location\"><p>$talk->{'location'}</p></div>";
  $result = $result . "<div class=\"abstract\">$talk->{'abstract'}</div>";
  $result = $result . "</div>";
  $result = $result . "</div>\n";
}

# http://web.resource.org/rss/1.0/spec
# http://blogs.law.harvard.edu/tech/rss
# http://www.feedvalidator.org/check.cgi?url=https%3A%2F%2Fwww.cl.cam.ac.uk%2Fresearch%2Fsecurity%2Fseminars%2Farchive%2Frecordings.xml
sub writeRSS {
    my @talks = @_;
    my @rss = <<'EOT';
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns="http://purl.org/rss/1.0/"
  xmlns:dc="http://purl.org/dc/elements/1.1/"
>
  <channel rdf:about="https://www.cl.cam.ac.uk/research/security/seminars/archive/video/">
    <title>Security Seminar recordings</title>
    <link>https://www.cl.cam.ac.uk/research/security/seminars/archive/video/</link>
    <description>
      Recently recorded Security Seminar talks,
      Computer Laboratory, University of Cambridge.
    </description>
    <items>
      <rdf:Seq>
EOT
    my $archive_url = "https://www.cl.cam.ac.uk/research/security/seminars/archive/";
    for my $talk (@talks) {
	push @rss, ('        <rdf:li resource="',
		    utf8_to_sgmlatt($archive_url, $talk->{'recordings'}[0]),
		    "\" />\n");
     }
    push @rss, <<'EOT';
      </rdf:Seq>
    </items>
  </channel>
EOT
    for my $talk (@talks) {
	push @rss, "  <item rdf:about=\"",
	    utf8_to_sgmlatt($archive_url, $talk->{'recordings'}[0]),
	    "\">\n";
	my $title = $talk->{'title'};
	$title = "$talk->{'speaker'}: $title" if $talk->{'speaker'};
	push @rss,'    <title>', utf8_to_sgml($title), "</title>\n";
	push @rss,'    <link>',
	    utf8_to_sgml($archive_url, $talk->{'recordings'}[0]), "</link>\n";
	push @rss,  "    <dc:date>$talk->{'codedDate'}</dc:date>\n";
	push @rss, "  </item>\n";
    }
    push @rss, <<'EOT';
</rdf:RDF>
EOT
    return @rss;
}

# Prepare the abstract for output as HTML.
sub prepareAbstract
{
  my $abstract = pop;

  # Replace new-lines.
  $abstract =~ s/\n\s*?\n\s*?/<\/p><p>/g;
  $abstract =~ s/\n/<br \/>/sg;

  return "<p>" . $abstract . "</p>";
}

# Compare two events by their date.
sub compareEvents
{
  return $a->{'codedDate'} <=> $b->{'codedDate'};
}
