#!/usr/bin/perl
# ucampas -- University of Cambridge web page augmentation system
#
# Generate decorated HTML files with site-navigation menus from
# simple bare-bone HTML files
#
# https://www.cl.cam.ac.uk/local/sys/web/ucampas/
#
# Markus Kuhn -- https://www.cl.cam.ac.uk/~mgk25/
#
# This script converts a bare-bones file "filename-b.html" into
# one augmented with house-style decoration and navigation information,
# called "filename.html".
#
# Simply calling this script in the current working directory will
# read index-b.html and produce index.html. This is equivalent to
# calling "ucampas index". One or more other base filenames can also be
# specified.
#
# Starting from the directory of the source file, this script will
# move higher up in the directory tree as long as it finds a configuration
# file "uconfig.txt" in each directory. It will then parse the entire
# subdirectory tree for such config files in order to build up a
# tree of documents for navigation.
#
# The uconfig.txt files contain foremost a comma-separated list of
# filenames or subdirectories. This list defines the relative order in
# which these files and subdirectories will appear as children of the
# current directory in the navigation tree.

# this software requires Perl 5.16 or newer
# (we had one minor opcode/require related problem with 5.14.2)
use 5.016;  # implies use strict;

# API version that separately distributed template files can require
$ucampas::VERSION = "1.003";

my $libdir;
BEGIN {
    use FindBin qw($RealBin); # find directory where this file is located ...
    $libdir= $RealBin;        #              ... derive library directory ...
    $libdir =~ s|/bin\z|/share/ucampas|;
}
use lib $libdir, "$libdir/perl-PlexTree";            # ... and add it to @INC
use NavTree qw(is_relpath prefix_url rurl);
use PlexTree "0.002";
use PlexTree::SGML qw(sgmldec);
use POSIX qw(strftime);
use Time::Local ();
use Storable qw(dclone);
use Data::Dumper;   # for debugging
use Encode qw(decode encode);
use Encode::Byte;
use Cwd 'abs_path';
use HTTP::Tiny;
use IO::Socket::SSL; # required by HTTP::Tiny for HTTPS

# Prevent "eval" from compiling risky opcodes, for example to hinder
# Perl code embedded in templates writing to the file system.
use Opcode qw(opmask_add opset invert_opset);
opmask_add(invert_opset opset(':browse',':base_io',':load'));

my $embedded_perl = 0;
my $quiet = 0;
my $display = 0;
my @locked_template_dirs;

# Execute a shell script and abort if the return value signals a failure
sub execute {
    my ($cmd) = @_;
    my ($r, @r);

    if (wantarray) { @r = `$cmd`; } else { $r = `$cmd`; }
    if ($? != 0) {
	if ($cmd =~ /pngtopnm/ && $? != 1) {
	    # we are fine, no idea why that return value appears
	    # with this particular tool (@!$%)
	} else {
	    die("Command failed (return " . ($? >> 8) . ", signal " .
		($? & 127) . ($? & 128 ? ", core dumped" : '') . "): '$cmd'\n");
	}
    }
    if (wantarray) { return @r; } else { return $r; }
}

# Look for image file img-$in.* and if it exists, return <img ...> element
sub image($$) {
    my ($cur, $in) = @_;
    my $dir = $cur->fpath;  # where are we in filespace?
    $dir =~ s/[^\/]+\z//;    # strip off any filename
    $dir = '' if $dir eq './';
    my ($path) = glob("${dir}img-$in.*");
    my $alt;
    my @size;

    if ($path) {
	my $fn = substr($path, length($dir));
	# extract pixel size and comment from image file
	if ($path =~ /.jpg\z/i) {
	    my $t = execute("rdjpgcom -verbose '$path'");
	    if ($t =~ /.*JPEG image is (\d+)w \* (\d+)h.*/s) {
		@size = ($1, $2);
	    }
	    $alt = execute("rdjpgcom '$path'");
	    chomp($alt);
	} elsif ($fn =~ /.(gif|png)\z/i) {
	    my $t = execute("$1topnm < '$path' 2>/dev/null");
	    if ($t =~ /^P\d\n(?:\#[^\n]*\n)*(\d+)\s+(\d+)\s/s) {
		@size = ($1, $2);
	    }
	}
	# allow for comment override via img$in symlink
	$alt = readlink("${dir}img-$in") if -l "${dir}img-$in";
	$alt = '' unless defined $alt;
	#return "<img src=\"$fn\" width=\"$size[0]\" height=\"$size[1]\" " .
	#  "alt=\"$alt\">" if @size;
	#return "<img src=\"$fn\" alt=\"$alt\">";
	my $img = c('*img')->setatt('src', $fn, 'alt', $alt);
	if (@size) {
	    $img->setatt('width', $size[0], 'height', $size[1]);
	}
	return $img;
    }
    return;
}

# Convert an HTTP timestamp (e.g. found in RSS feeds) into machine time.
# Returns undef if the input is not defined. Raise and exception
# if the input format is not recognized or invalid.
#
# This is a simplified version of the function of the same name in
# HTTP::Date, which sadly is not part of the standard Perl distribution.
my @DoW = qw(Sun Mon Tue Wed Thu Fri Sat);
my @MoY = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
my %MoY; @MoY{@MoY} = (1..12);
sub str2time {
    local($_) = shift;
    return undef unless defined $_;

    # fast exit for strictly conforming string
    if (/^[SMTWF][a-z][a-z], (\d\d) ([JFMAJSOND][a-z][a-z]) (\d\d\d\d) (\d\d):(\d\d):(\d\d) (?:GMT|UTC|\+0000|-0000)$/) {
	my $t = Time::Local::timegm($6, $5, $4, $1, $MoY{$2}-1, $3);
	die("str2time: unexpected time '$_'") if $t < 0;
	return $t;
    }

    # More lax parsing below
    s/^\s+//;  # kill leading space
    s/^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*//i; # Useless weekday

    my ($day, $mon, $yr, $hr, $min, $sec, $tz, $ampm);

    # try HTTP-alike formats, ctime and asctime, and ISO-8601-alike
    (($day,$mon,$yr,$hr,$min,$sec,$tz) =
     /^(\d\d?)(?:\s+|[-\/])(\w+)(?:\s+|[-\/])(\d+)(?:(?:\s+|:)(\d\d?):(\d\d)(?::(\d\d))?)?\s*([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)?\s*(?:\(\w+\))?\s*$/)
    ||
    (($mon, $day, $hr, $min, $sec, $tz, $yr) =
     /^(\w{1,3})\s+(\d\d?)\s+(\d\d?):(\d\d)(?::(\d\d))?\s+(?:([A-Za-z]+)\s+)?(\d+)\s*$/)
    ||
    (($yr, $mon, $day, $hr, $min, $sec, $tz) =
     /^(\d{4})[-\/]?(\d\d?)[-\/]?(\d\d?)(?:(?:\s+|[-:Tt])(\d\d?):?(\d\d)(?::?(\d\d(?:\.\d*)?))?)?\s*([-+]?\d\d?:?(:?\d\d)?|Z|z)?\s*$/)
    || die("str2time: unrecognized time format '$_'\n");

    # Translate month name to number
    $mon = $MoY{$mon} ||
	$MoY{"\u\L$mon"} ||
	($mon =~ /^\d\d?$/ && $mon >= 1 && $mon <= 12 && int($mon)) ||
	die("str2time: unrecognized month name in '$_'\n");

    length($yr) == 4 or die("str2time: unrecognized year in '$_'\n");

    # Make sure clock elements are defined
    $hr  = 0 unless defined($hr);
    $min = 0 unless defined($min);
    $sec = 0 unless defined($sec);

    # Compensate for AM/PM
    if ($ampm) {
        $ampm = uc $ampm;
        $hr = 0 if $hr == 12 && $ampm eq 'AM';
        $hr += 12 if $ampm eq 'PM' && $hr != 12;
    }

    $mon--;

    my $offset = 0;
    if ($tz =~ /^([-+])?(\d\d?):?(\d\d)?$/) {
        $offset = 3600 * $2;
        $offset += 60 * $3 if $3;
        $offset *= -1 if $1 && $1 eq '-';
    } elsif ($tz !~ /^UTC|GMT|Z|$/i) {
	die("str2time: unrecognized timezone '$tz' in '$_'\n");
    }

    my $frac = $sec; $frac -= ($sec = int($frac));
    my $t = Time::Local::timegm($sec, $min, $hr, $day, $mon, $yr) + $frac;
    die("str2time: odd time '$_'\n") if ($t < 0);
    return $t - $offset;
}

# RFC 1738 requires that many characters in URLs are encoded as % +
# 2-hexdigits. This function encodes this way all potentially unsafe
# characters found in a string. It is intended for strings to be
# safely embedded into a URL, such as field values. It must not be
# applied to entire URLs, or to strings that contain URL separator
# characters (e.g. :/?;&=#) for their intended purpose.
sub url_encode {
    return unless defined wantarray;
    my @s = @_;
    for my $s (@s) {
	utf8::encode($s) if utf8::is_utf8($s);
        $s =~ s/([^0-9a-zA-Z\$_.+!*'(),-])/sprintf("%%%02X", ord($1))/ge;
    }
    return wantarray ? @s : $s[0];
}

# prepare a list of Google Analytics <script> elements, which can be
# appended to a <head> element
sub spyware_scripts
{
    my ($cur) = @_;

    # Tracking ID or Measurement ID, e.g. 'UA-1234567-1' or 'G-XXXXXXX'
    my $token = $cur->param('google_analytics');
    return () unless $token;

    my @scripts;
    # Google Analytics global site tag (gtag.js) -- 2022-04
    my $code = "window.dataLayer = window.dataLayer || [];" .
        "function gtag(){dataLayer.push(arguments);}gtag('js', new Date());gtag('config', '$token');";
    push @scripts, c('*script(async=)')
        ->addkey('src' => text "https://www.googletagmanager.com/gtag/js?id=$token");
    push @scripts, c('*script')->append(text $code);
    return @scripts;
}

# generate title for a page
sub title {
    my ($cur, $src_title) = @_;

    my $title = $cur->get('title');  # to allow override from uconfig.txt
    $title = $src_title unless defined $title;
    my $organization = $cur->param('organization');
    my $section = $cur->param('section');
    if (defined $organization && index(lc($title), lc($organization)) < 0) {
	my $orgsection = $organization;
	$orgsection .= ' – ' . $section
	    if defined $section && index(lc($title), lc($section)) < 0;
	$title = $orgsection . ': ' . $title;
    }
    return $title;
}

# generate rel=Next and rel=Up navigation <link> elements for a page header
#
# https://www.iana.org/assignments/link-relations/link-relations.xhtml
# https://www.w3.org/MarkUp/html3/dochead.html
# https://www.w3.org/TR/1999/REC-html401-19991224/types.html#type-links
# https://www.w3.org/TR/html5/links.html#sequential-link-types
# http://microformats.org/wiki/existing-rel-values#HTML5_link_type_extensions
# https://addons.mozilla.org/en-GB/firefox/addon/link-widgets/
sub navigation_links {
    my ($cur) = @_;
    my @links;

    my $n;
    if ($n = $cur->preorder_next_visible) {
	push @links, c('*link(rel=Next)')->setatt('href', $n->rurl($cur));
    }
    $n = $cur->parent;
    while ($n && $n->get('nopage')) {
	$n = $n->parent;
    }
    if ($n) {
	push @links, c('*link(rel=Up)')->setatt('href', $n->rurl($cur));
    }

    return @links;
}

# Merge the elements and attributes from two <head> elements
# (the template <head> and the source <head>), and move these
# into the first one). Also update the title element.
sub merge_heads {
    my ($cur, $src, $dest) = @_;

    my $src_head = $src->cd('.l(*html)', '.l(*head)');
    my $dest_head = $dest->cd('.l(*html)', '.l(*head)');

    my $title = eval { $src_head->cd('.l(*title)')->getl(0) };
    if ($title = title($cur, $title)) {
	my $dest_title = $dest_head->cd('.l(*title)');
	if ($dest_title) {
	    $dest_title-> splice(0);  # empty any existing <title>
	} else {
	    $dest_title = $dest_head->append0->move(c '*title'); # add new one
	}
	$dest_title->append(text $title);
    }

    # copy other head elements from source document
    if ($src_head) {
	foreach my $h ($src_head->list) {
	    if ($h->tag == META) {
		my $element = $h->str;
		next if $element eq 'title';
		if ($element eq 'meta') {
		    next if $h->get('http-equiv') =~ /^Content-Type\z/i;
		    next if $h->get('name') =~ /^ucampas-/;
		}
		next if $element eq 'style' && $cur->param('discard_style');
	    } elsif ($h->tag == TEXT) {
		next;
	    }
	    $dest_head->append($h);
	}
    }

    # other header elements to be added
    $dest_head->append(navigation_links($cur));
    $dest_head->append(spyware_scripts($cur));
    if (my $add_head = relocate_html($cur->paramc('add_head'), $cur)) {
	if ($add_head->tag == CTRL && $add_head->len == 0) {
	    $dest_head->append($add_head->list);
	} else {
	    $dest_head->append($add_head);
	}
    }
}

# Merge the attributes from two <body> elements
# (the template <body> and the source <body>), and copy these
# into the first one).
sub merge_body_attributes {
    my ($dst_body, $src_body) = @_;

    # merge classes
    my @bodyclasses = split(/\s+/a, $src_body->get('class') || '');
    add_class($dst_body, @bodyclasses);
    # transfer the remaining attributes, unless they collide
    for my $a ($src_body->keys) {
        next if $a eq 'class';
        my $s = $src_body->get($a);
        my $d = $dst_body->get($a);
        if (!defined($d)) {
            $dst_body->setatt($a => $s)
        } elsif ($d ne $s) {
            warn("<body $a='$s'> overrides <body $a='$d'> in template\n");
            $dst_body->setatt($a => $s)
        }
    }
}

# copyright line (for use in page-template footer)
sub page_copyright {
    my ($cur, $default_owner) = @_;

    my $copyright_year =
	$cur->param('copyright_year') ||
	substr($cur->get('svndate') || $cur->get('mtime_iso_local'), 0, 4);
    my @copyright_holder = $cur->param('copyright_holder');
    unless (defined $copyright_holder[0]) {
	@copyright_holder = ( $cur->param('organization'),
			      $default_owner,
			      $cur->param('address') );
    }
    @copyright_holder = grep { $_ } @copyright_holder;
    return unless @copyright_holder;
    while (index($copyright_holder[1], $copyright_holder[0]) >= 0) {
	shift @copyright_holder; # eliminate duplication
    }
    return text ("© $copyright_year " . join (', ', @copyright_holder));
}

my %editurl_subst = (
    '%' => sub { '%' },
    'f' => sub { url_encode($_[0]->fpath) },
    's' => sub { url_encode($_[0]->srcfile) },
    'u' => sub { url_encode($_[0]->url) },
    'v' => sub { url_encode($_[0]->fpath('svnurl')) },
    );

sub edit_link {
    my ($cur, $text) = @_;

    my $editurl = $cur->paramc('edit_url')->get_url;
    return unless $editurl;

    # substitute % parameters
    $editurl =~ s/\%(.)/$editurl_subst{$1} && $editurl_subst{$1}->($cur)/ge;

    $text //= 'edit page';
    my $l = c('*a')->append(text $text);
    $l->addkey('href', text $editurl);

    return $l
}

# contact-details line (for use in page-template footer)
sub page_contact {
    my ($cur, $prefix) = @_;
    my @line;

    if (my $webmaster = $cur->paramc('webmaster')) {
        my $webmaster_name  = $webmaster->get('name');
        my $webmaster_url   = $webmaster->get_url('url');
        my $webmaster_email = $webmaster->get('email');
        $webmaster_name = $webmaster_email
            if !defined $webmaster_name;
        if (!defined $webmaster_url and defined $webmaster_email) {
            $webmaster_url = 'mailto:' . url_encode($webmaster_email);
            my $subject = $cur->fpath('url') // $cur->srcfile;
            $webmaster_url .= "?subject=" . url_encode($subject)
                if defined $subject;
        }
        if ($webmaster_name) {
            my $s = text $webmaster_name;
            $s = c('*a')->addkey('href', text $webmaster_url)->append($s)
                if $webmaster_url;
            push @line, text $prefix if $prefix;
            push @line, $s;
        }
    }

    # append edit link if we have one
    if (my $edit_link = edit_link($cur)) {
        push @line, text ' – ', $edit_link;
    }

    # group parts together, so they remain a single node
    return PlexTree->new->append(@line) if @line > 1;

    return @line;
}

# access-restrictions line (for use in page-template footer)
sub page_access {
    my ($cur) = @_;
    my $r = $cur->access_restrictions;
    return unless $r;
    return text $r;
}

# last-modified line (for use in page-template footer)
sub page_lastmod {
    my ($cur) = @_;
    return unless $cur->param('lastmod');

    my $lastmod = $cur->get('svndate') || $cur->get('mtime_iso_local');
    my $ownername = $cur->get('svnauthor') || $cur->get('ownergcos');
    my $s = text "Last modified $lastmod by $ownername";
    if (my $editurl = $cur->paramc('edit_url')->get_url) {
	my @p;
	if (my $surl = $cur->fpath('svnurl')) {
	    push @p, 's=' . urlencode($surl);
	}
	$editurl .= '?' . join(';', @p);
	$s = wrap(c('*a(accesskey=m, style="color: black")')
		  ->setatt('href' => $editurl), $s);
    }
    return $s;
}

# return @l with $s inserted between all elements
#
# example: join(',', @a) equals join('', interleave(',', @a).
sub interleave {
    my ($s, @l) = @_;
    return @l if @l < 2;
    return (map( { $_, $s } @l[0..($#l-1)]), @l[$#l]);
}

# append non-empty compound list @l to compund $c,
# or return undef if @l lacks elements that evaluate to true
#
# example: $c->insert(wrap(c('*ul'), map { wrap(c('*li'), $_) } @_ ))
sub wrap {
    my ($c, @l) = @_;
    @l = grep { $_ } @l;
    return unless @l;
    return $c->append(@l);
}

# Recursively traverse navigation tree and output nested <ul> (or <table>)
# of titles and links.
#
# Arguments:
#
#  $cur                 current position in the tree
#  $start               what root do we start from (default: $cur->top)
#  parameters, ...
#
# Parameter list:
#
#  'topopen' => ...     how many layers downwards from the top do we show
#                       (undef -> show full site map)
#  'forwardopen' => ... how deep below the current position do we go
#  'stoplength' => ...  do not recurse from nodes that have more than
#                       this many children
#  'class' => ...       class attribute value of <ul>
#  'style' => ...       style attribute value of <ul>
#  'table' => 1         use <table> instead of <ul>, <tr> instead of <li>,
#                       and <td> instead of <div>
#  'div' => 0           do not wrap the <li> content into a <div>
#  'mark' => ...        subroutine called with pointers to <li> and <a>
#                       compounds for all entries, allowing callers to
#                       specify additional decoration
#  'markswitch' => ...  subroutine called with pointers to <li> and <a>
#                       compounds for all entries that lead to a page
#                       with a different navigation tree representation
#  'markcurrent' => ... similar subroutine called for entry that leads
#                       to $cur
#  'markonpath' => ...  similar subroutine called for ancestor entry that
#                       is on the path to $cur
#  'marklink' => ...    similar subroutine called if entry is a *link
#  'showinvisibles' => ...  if set, show in the navigation list even nodes
#                           who have an 'invisible' attribute larger than 0
#                           but not larger than the value of showinvisibles
#  'navtopinclude' => ...   overrides the node attribute of the same name
#  'maxdepth' => ...        maximum allowed nesting level of result,
#                           prune result if recursion attempts to go deeper
#  'maxdepthretry' => 1     issue a warning if the navigation tree had to
#                           be pruned due to maxdepth (suggesting
#                           that a design limit of the style template was
#                           reached) and retry to start from further down
#  'flat' => 1          prevent recursion beyond first level (e.g. for tabs)
#  'touchpad' => 1      each menu entry with subentries is not a link
#                       and instead is repeated as a link as the first
#                       item of the submenu (for touchpads without hover)
#  'touchpad' => 2      while 'touchpad' => 1 does not create an <a> element,
#                       option 2 merely does not create an href attribute
#                       (useful if the <a> is still needed for styling)
#  'touchtext'=>'text'  text for the repeated 'touchpad' link, e.g. 'Overview'
#
# Node attributes that affect the behavior of this function:
#
#  invisible=1          make navlinks to this page invisible (unless
#                       the this page is the current page and has visible
#                       children, in which case we need to keep the page
#                       visible such that the children also become visible)
#  navtopinclude=1      prefix the top-level list with $start
#  navtopinclude='text' prefix the top-level list with $start and use
#                       the provided string as the navtitle
#  navstop=1            do not recurse beyond this point, for example because
#                       another in-page navigation (tabs, subnode list)
#                       takes over from here
#
sub navbar {
    my ($cur, $start, %p) = @_;
    my $maxdepth_warned;

    $p{'domain'} = 'nav' unless defined $p{'domain'};

    unless ($start) {
	$start = $cur->paramn($p{'domain'} . 'top');
	if ($start) {
	    # navtop values > 1 place set $start to an ancestor
	    my $n = int($start->get($p{'domain'} . 'top'));
	    $start = $start->up($n-1) if $n > 1;
	}
    }
    $start = $cur->top unless $start;
    $p{$p{'domain'} . 'topinclude'} = $start->param($p{'domain'} . 'topinclude')
	unless defined $p{$p{'domain'} . 'topinclude'};
    while ($start) {
	my $ul = eval {
	    _navbar($cur, $start, $cur->depth, $start->depth, \%p);
	};
	return $ul if $@ eq '';
	if ($@ eq "maxdepth exceeded\n") {
	    # the navigation bar got too deep, try again from further down
	    warn("\nWarning: a navigation bar nested >" .
		 $p{'maxdepth'} .
		 " levels deep is not supported by the style.\n" .
		 "Please flatten it by adding $p{'domain'}top=1 attributes.\n")
		unless $maxdepth_warned++;
	    $start = $start->nextonpathto($cur);
	    warn("Retrying with $p{'domain'}top at " . $start->dpath . ".\n")
		if $start;
	} else {
	    die($@);
	}
    }
    warn("Generating a navigation bar failed!\n");
}

# Similar to navbar, but instead of returning a nested <ul> tree, it
# appends to $container a sequence of <ul> elements that represent the
# various nesting levels, which is more suitable for rendering as
# multiple rows of tabs.
sub append_navtabs {
    my ($container, $cur, $start, %p) = @_;
    # change the domain parameter to 'tab' if you want to use
    # tabtop/tabtopinclude instead of navtop/navtopinclude, e.g.
    # because there are two types of menues called 'nav' and 'tab'
    # on a page.
    $p{'domain'} = 'nav' unless defined $p{'domain'};

    unless ($start) {
	$start = $cur->paramn($p{'domain'} . 'top');
	if ($start) {
	    # navtop values > 1 place set $start to an ancestor
	    my $n = int($start->get($p{'domain'} . 'top'));
	    $start = $start->up($n-1) if $n > 1;
	}
    }
    $start = $cur->top unless $start;
    $p{'navtopinclude'} = $start->param($p{'domain'} . 'topinclude')
	unless defined $p{'navtopinclude'};
    $p{'flat'} = 1;
    for (my $c = $start; $c; $c = $c->nextonpathto($cur)) {
	last if $c->get($p{'domain'} . 'stop');
	$container->append(_navbar($cur, $c, $cur->depth, $start->depth, \%p));
	$p{navtopinclude} = 0;
    }
}

# Auxiliary function used in _navbar that retrieves only the list
# elements that will actually be visible in the navbar. A node is
# invisible if the numeric value of its 'invisible' attribute exceeds
# $showinvisibles. However, if such a node is on the path to the
# current node, it remains visible. An invisible node is only visible
# on its own navbar if it has visible subnodes. This may sound
# complicated, but this heuristic seems to do just what one usually
# wants to achieve on a website with where some pages are invisible on
# the navtree.
sub _visiblelist {
    my ($cur, $start, $showinvisibles) = @_;
    $showinvisibles ||= 0; # convert undef to 0
    return () unless $start;
    my @list = grep {
	($_->get('invisible') || 0) <= $showinvisibles ||
	    ($_->isonpathto($cur) &&
	     ($_->nid ne $cur->nid ||
	      _visiblelist($cur, $_, $showinvisibles)))
    } $start->list;
    return @list;
}

# check if an HTML element belongs to all the listed classes
sub has_class {
    my ($element, @wclasses) = @_;
    my @hclasses = split(/\s+/a, $element->get('class'));
  WANTED:
    for my $w (@wclasses) {
	for my $h (@hclasses) {
	    next WANTED if $h eq $w;
	}
	return 0; # we wanted one that we don't have
    }
    return 1;     # we have all that we wanted
}

# add a class to a HTML element, preserving any classes it
# might already have (in a space-separated list of classes)
sub add_class {
    my $element = shift;
    return unless $element;
    my @classes = split(/\s+/a, $element->get('class'));
    for my $c (@_) {
	next if grep {$_ eq $c} @classes;  # do not add duplicates
	push @classes, $c;
    }
    $element->setatt(class => join(' ', @classes)) if @classes;
}

# recursive auxiliary function for navbar that does the actual work
#  $cur                 current position in the tree, as provided to navbar()
#  $this                root of the subtree to be generated recursively
#  $curdepth            $cur->depth (cached)
#  $startdepth          depth of the $start argument to navbar()
sub _navbar {
    my ($cur, $this, $curdepth, $startdepth, $p) = @_;

    my $domain = $p->{'domain'};
    return undef if $this->get("${domain}stop"); # e.g. for switch to tabs
    my @list = _visiblelist($cur, $this, $p->{'showinvisibles'});
    return undef unless @list;
    return undef if defined $p->{'stoplength'} && @list > $p->{'stoplength'};
    my $navtopinclude = defined $p->{"${domain}topinclude"} ?
	$p->{"${domain}topinclude"} : $cur->param("${domain}topinclude");
    if ($this->depth == $startdepth && $navtopinclude) {
	# include link to home page at the start of the navbar
	unshift @list, $this;
    }
    if ($this->depth > $startdepth && ($p->{'touchpad'} || $p->{'touchtext'})) {
	# include link to parent at the start of each submenue
	unshift @list, $this;
    }
    my $ul = c('*ul');
    $ul->set(c('*table')) if $p->{'table'};
    $ul->setatt('class', $p->{'class'}) if defined $p->{'class'};
    $ul->setatt('style', $p->{'style'}) if defined $p->{'style'};
    foreach my $c (@list) {
	my $duplicate = $c->nid eq $this->nid; # repeat of parent node
	my $cdepth = $c->depth;
	my $rdepth = $cdepth - $curdepth;
	my $sdepth = $cdepth - $startdepth;
	my $islink = ($c->tag == META && $c->str == 'link');
	my $li = $ul->append0->set('*li');
	$li->set(c('*tr')) if $p->{'table'};
	my $div = $li->append0->set('*div');
	$div->set(c('*td')) if $p->{'table'};
	my $navtitle = $c->navtitle || '[no title found]';
	if ($duplicate) {
            if ($navtopinclude ne '1' && $sdepth == 0) {
                $navtitle = $navtopinclude;
            } elsif ($p->{'touchtext'} && $sdepth > 0) {
                $navtitle = $p->{'touchtext'};
            }
	}
	my $item = text($navtitle);
	$item = c('*a')->setatt('href', $c->rurl($cur))->append($item);
        if ($p->{div} // 1) {
            # append $item to $div such that $item
            # ends up knowing its new parent
            $item = $div->append0->move($item);
        } else {
            # remove the <div> wrapper
            $div->move($item);
            # now: $div-nid == $item->nid
        }
	my $isonpathto = $c->isonpathto($cur);
	if ($islink) {
	    # the page is (possibly) not a navtree child
	    &{$p->{'marklink'}}($li, $div, $item) if $p->{'marklink'};
            &{$p->{'mark'}}($li, $div, $item, $c, $cur, $this) if $p->{'mark'};
	    next;
	}
	# recurse into child nodes and append submenu to $li, if any
	{ # this block acts as dummy loop for the following "next" statements
	    next if $p->{'flat'}; # don't recurse, e.g. for producing tabs
	    next if $c->get("${domain}stop"); # e.g. for switch to tabs
	    if (defined $p->{'maxdepth'} && $sdepth > $p->{'maxdepth'}) {
		die("maxdepth exceeded\n") if defined $p->{'maxdepthretry'};
		next;
	    }
	    next if $duplicate; # don't recurse into repeat of parent node
	    next unless
		(($isonpathto ||
		  !defined $p->{'topopen'} || $sdepth < $p->{'topopen'} ||
		  ($cur->isonpathto($c) && $rdepth < $p->{'forwardopen'})));
	    # recursively process child nodes and append their menu
	    $li->append(_navbar($cur, $c, $curdepth, $startdepth, $p));
	}
	# on touch-pads, we don't want menu items that have a sub menu
	# to be clickable, if the menu becomes visible with hover,
	# as touch-pads can't distinguish between hover and click
	if ($p->{'touchpad'} && $li->listlen > 1) {
	    if ($p->{'touchpad'} == 1) {
		# remove entire <a> element (while preserving its content)
		$item->move($item->cl(0));
	    } elsif ($p->{'touchpad'} == 2) {
		# remove only the href attribute, leaving the <a> element
		$item->deletekey(text 'href');
	    }
	}
	# we place all the following callbacks down here so the callee
	# can now see from $li->listlen > 1 if the node has a submenu
	if ($c->nid eq $cur->nid ||
	    ($isonpathto && $c->get("${domain}stop"))) {
	    # this item links to the current page
	    # (or is as close as we can get if navstop=1)
	    &{$p->{'markcurrent'}}($li, $div, $item) if $p->{'markcurrent'};
	} elsif ($isonpathto && !$duplicate) {
	    # this item links to a page that is on the navigation
	    # path to the current one (excluding a navtopinclude node)
	    &{$p->{'markonpath'}}($li, $div, $item) if $p->{'markonpath'};
	}
	if ($c->get("${domain}top") || !$c->param('navbar') ||
	    $c->str =~ /\.(?:pdf|doc)\z/i) {
	    # the page linked will have a different navigation tree
	    &{$p->{'markswitch'}}($li, $div, $item) if $p->{'markswitch'};
	}
	# generic callback to allow caller to decorate the links
	&{$p->{'mark'}}($li, $div, $item, $c, $cur, $this) if $p->{'mark'};
    }
    return $ul;
}

# Output the path to the current position in the navigation tree
# as a sequence of links. By default, the result is a <p> element.
#
# Arguments:
#
#  $cur                 current position in the tree
#  parameters, ...
#
# Parameter list:
#
#  'separator'  => ...  what to insert between links (e.g., text(' > '))
#  'ul'         => 1    output <ul> instead of default <p> element
#  'includecur' => 1    finish with the current page (rather than its parent)
#  'class'      => ...  class of <p> or <ul>
#  'id'         => ...  id of <p> or <ul>
#  'firstclass' => ...  class of first <li>
#  'divclass'   => ...  class of encapsulating <div>
#  'nonempty'   =>
sub breadcrumbs {
    my ($cur, %p) = @_;
    my $i = 0;
    my $bc;
    if ($p{'ul'}) {
	$bc = c('*ul');
    } else {
	$bc = c('*p');
    }
    if ($cur->param('breadcrumbs')) {
	my $start = $cur->paramn('breadcrumbtop');
	unless ($start) {
	    $start = $cur->top;
	    if (my $prefix = relocate_html($cur->paramc('breadcrumbprefix'),
					   $cur)) {
		foreach my $c ($prefix->list) {
		    if ($i++ > 0 && exists $p{'separator'}) {
			$bc->append($p{'separator'});
		    }
		    my $crumb = $c;
		    $crumb = c('*li')->append($crumb) if $p{'ul'};
		    $bc->append($crumb);
		}
	    }
	}
	my @path = $cur->path;
	shift @path while @path && $path[0]->nid != $start->nid;
	pop @path if @path && !$p{'includecur'};
	foreach my $c (@path) {
	    if ($cur->cd('breadcrumbtop')) {
		# make this the first breadcrumb
		$bc->clear_list;
		$i = 0;
	    }
	    my $title = $c->navtitle;
	    next unless defined $title;
            next if $c->get('breadcrumbskip');
	    if ($i++ > 0 && exists $p{'separator'}) {
		$bc->append($p{'separator'});
	    }
	    my $crumb = text($title);
	    my $url = $c->rurl($cur);
	    $crumb = c('*a')
		->addkey('href', text($url))
		->append($crumb) if defined $url;
	    $crumb = c('*li')->append($crumb) if $p{'ul'};
	    $bc->append($crumb);
	}
    }
    if ($bc->listlen == 0) {
	return undef if $p{'nonempty'};
        # preserve breadcrumb space even if none are shown
        $bc->append(text(' ')) unless $p{'ul'};
    } elsif (exists $p{'firstclass'}) {
	$bc->cl(0)->setatt('class', $p{'firstclass'});
    }
    $bc->setatt('class', $p{'class'}) if exists $p{'class'};
    $bc->setatt('id', $p{'id'}) if exists $p{'id'};
    $bc = c('*div')
	->append($bc)
	->setatt('class', $p{'divclass'}) if exists $p{'divclass'};
    return $bc;
}

# Add index.html to all relative HREF links in
# A and LINK elements of an HTML document
sub add_index($) {
    my ($html, $index_fn) = @_;

    $index_fn = 'index.html' unless defined $index_fn;
    for my $a ($html->lfind('*link', 'all'), $html->lfind('*a', 'all'),
	       $html->lfind('*area', 'all')) {
	my $href = $a->cd('href');
	next unless $href && $href->tag == TEXT();
	my $path = $href->str;
	next if $path =~ /^[a-z]+:/; # this is already a full URL
	my $fragment;
	$fragment = $1 if $path =~ s/(\#[^\/]*)\z//;
	if ($path eq '.' || $path eq './') {
	    $path = $index_fn . $fragment;
	} elsif ($path eq '..') {
	    $path .= '/' . $index_fn . $fragment;
	} elsif ($path =~ /\/\z/) {
	    $path .= $index_fn . $fragment;
	} else {
	    next;
	}
	$href->setstr($path);
    }

    return $html;
}

# Convert the provided string into an obfuscated JavaScript string expression,
# such that the string becomes tedious to decode without using a JavaScript
# interpreter. Used by obfuscate_email.
sub obfuscate_jsstring {
    my @s = split('',shift);
    my @e;
    my @ff = ('%d', '%2$d^0x%3$x', '0x%4$x-%2$d'); # numeric expressions
    my @f = ('%c', '\\x%02x', '\\x%02X', '\\u%04x'); # string literal
    while (@s) {
	if (rand()<0.25) {
	    # encode single character as a numeric expression
	    my $c = ord(shift @s);  # character to encode
	    my $r = int(rand(256)); # code offset
	    my $e = sprintf($ff[int(rand(@ff))], $c, $r, $c^$r, $c+$r);
	    push @e, "String.fromCharCode($e)";
	} else {
	    # encode $l characters as a single string literal (with escapes)
	    my $l = int(rand(6))+2;
	    my @ss = splice(@s, 0, $l);
	    my ($c, $t);
	    do {
		my @c = map( { sprintf($f[int(rand(@f))], ord($_)) } @ss);
		$c = join('', @c);
		$t = rand() < 0.5 ? "'" : '"';
	    } while index($c, $t) != -1;  # ensure absence of terminator
	    push @e, $t . $c . $t;
	}
	@e[$#e] .= "\n" if rand() < 0.25;  # sprinkle with line feeds
    }
    return join('+',@e);
}

# Search an HTML parse tree from $node downwards for anything that
# looks either like an <a href="mailto:..."> element or a plain-text
# email address, and then obfuscate it in a way such that a normal web
# browser still displays it as originally intended, but any simpler
# address harvester (without JavaScript interpreter) will miss
# the address, or get it wrong.
sub obfuscate_email($) {
    my ($node) = @_;

    # compatible alternative to document.currentScript according to
    # http://www.2ality.com/2014/05/current-script.html
    my $currentscript = "document.currentScript";
    $currentscript .= "||".
	"(function(){var scripts=document.getElementsByTagName('script');".
	"return scripts[scripts.length-1];})()";

    # Process <a href="mailto:..."> elements
    my @links = grep { $_->get('href') =~ /^mailto:./ } $node->lfind(c('*a'),'all');
    for my $n (@links) {
	my $link = $n->get('href');
	$n->cd('href')->set(text '');
	my $jslink = obfuscate_jsstring($link);
	my $script = trans "<script type=\"text/javascript\">".
	    "var s=$currentscript;s.parentNode.setAttribute('href', $jslink);".
	    "s.remove();</script>";
	$n->splice(0,0, $script);
    }
    # List of plaintext elements containing a potential email address
    my @text = grep { $_->str_text =~ /[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]@[a-zA-Z0-9]/ } $node->lfind(undef, 'all');
    for my $n (@text) {
	next if $n->parent->str_meta eq 'script';
	#print "full:".$n->str."\n";
	# http://www.w3.org/TR/html5/forms.html#valid-e-mail-address
	my @a = split /([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+)(@)([a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)((?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)/, $n->str;
	#print "split:".join(':',@a)."\n";
	my @b;
	while (@a >= 5) {
	    my $filler = shift @a;
	    my $localpart = shift @a;
	    my $atsign = shift @a;
	    my $host = shift @a;
	    my $domain = shift @a;
	    my $show = "$localpart(a)$host..."; # dummy text to be replaced
	    $show = '[Javascript required]';
	    push @b, text $filler if $filler ne '';
	    push @b, meta('span')->append(text  $show);
	    my $jsemail = obfuscate_jsstring($localpart.$atsign.$host.$domain);
	    push @b, trans("<script type=\"text/javascript\">".
			   "var s=$currentscript;".
			   "s.parentNode.replaceChild(document.createTextNode($jsemail),s.previousSibling);".
			   "s.remove();</script>");
	}
	push @b, text shift @a if @a;
	die('oops, leftover in @a: '.join(':',@a)) if @a;
	$n->parent->splice($n->pos,1,@b);
    }
}

# take a multi-line string and return each line
# prefixed with a line number
sub add_linenumbers($) {
    my $n;
    my @lines = split("\n", $_[0]);

    return join("\n", map { ++$n; sprintf("%3d: %s", $n, $_); } @lines );
}

# Process <?perl?> instructions embedded in HTML document $out, which is
# identified by $fn in error messages. Variables $cur and $src are
# just passed on to the embedded scripts.
# Returns the number of instructions found.
sub process_perl_instructions {
    my ($out, $fn, $cur, $src) = @_;

    my @pis = $out->lfind('+perl', 'all');
    foreach my $c (@pis) {
	# execute script
	{
	    local $SIG{__WARN__} = sub {
		print STDERR
		    "$fn:warning in <?perl?> script: $_[0]" .
		    add_linenumbers($c->getl(0)) . "\n";
	    };
	    eval $c->getl(0);
	}
	if ($@ ne '') {
	    die "$fn:error in <?perl?> script: " .
		PlexTree::print_error($@) . "\n" .
		add_linenumbers($c->getl(0)) . "\n";
	}
	# remove the script from the tree
	$c->cut;
    }
    return scalar(@pis);
}

# Given a PlexTree node from a uconfig.txt parameter that contains a piece
# of HTML or a link list as such as ((href='...', 'text'),...)
# (e.g. the value of a footlinks parameter), make a copy and adjust in there
# all href= and src= attributes that contain a relative URL to be valid from
# $cur.
sub relocate_html {
    my ($h, $cur) = @_;
    return $h unless $h;
    my $prefix = $h->upclass('NavTree')->url;
    $h = PlexTreeMem->new->copyfrom($h);
    for my $e ($h->lfind(undef, 'all')) {
	for my $att ('href', 'src') {
	    my $v = $e->cd($att);
	    next unless $v && defined ($v = $v->get) && is_relpath($v);
	    $e->setatt($att, NavTree::rurl(prefix_url($prefix, $v), $cur));
	}
    }
    # for backwards compatibility: add *a if element string is empty
    for my $e ($h->list) {
	$e->settstr(META, 'a')
	    if $e->len == 0 && $e->tag == CTRL;
    }
    return $h;
}

# prepare for template $t an index of HTML element attributes that
# contain relative URLs, which need to be adjusted
sub index_urls {
    my ($t) = @_;
    for my $e ($t->{html}->lfind(sub { $_[0]->tag == META }, 'all')) {
	my $s = $e->bstr;
	my $a;
	# look for one of these attributes:
	# <img src="..."> or <input src="...">
	# <script src="...">
	# <link rel="stylesheet" href="...">
	# and index them under urls_{img,script,stylesheet} respectively.
	if (($s eq 'img' || $s eq 'input') &&
	    ($a = $e->cd('src')) && is_relpath($a->str_text)) {
	    push @{$t->{urls_img}}, $a;
	} elsif ($s eq 'script' &&
		 ($a = $e->cd('src')) && is_relpath($a->str_text)) {
	    push @{$t->{urls_script}}, $a;
	} elsif ($s eq 'link' && $e->get('rel') eq 'stylesheet' &&
		 ($a = $e->cd('href')) && is_relpath($a->str_text)) {
	    push @{$t->{urls_stylesheet}}, $a;
	}
    }
}

# adjust the relative URLs that index_urls previously found
# in the cloned template $t to point to the relevant asset directory
# where these files are located, from the location of the current page
sub adjust_urls {
    my ($t, $cur) = @_;

    # Assets are additional files required by a template.
    # We distinguish between three types of assets, which can
    # (but don't have to be) stored in separate directories:
    # stylesheets (CSS), scripts (JavaScript), and images.
    #
    # To adjust any relative URLs used to address asset files
    # in the template, we need to know what base URL to use for these.
    #
    # The asset-directory URLs to be used can be set in many ways,
    # in the following priority order (where $style = $t->{style}
    # is the name of the active style) :
    #
    #   - via uconfig.txt:
    #
    #       * $style={stylesheets_url=...,
    #                 javascript_url=...,
    #                 images_url=...}
    #
    #       * $style={style_url=...},
    #
    #       * stylesheets_url=...,
    #         javascript_url=...,
    #         images_url=...,
    #
    #       * style_url=...,
    #
    #   - via $style.pl:
    #
    #       * stylesheets_url => '...',
    #         javascript_url => '...',
    #         images_url => '...',
    #
    #   - if none of the above is defined, and ucampas can determine
    #     the url of the $style.html file, then it will assume that
    #     that is a valid base URL to use

    # use a default value defined by the template Perl script or
    # use the URL of the template HTML file
    my %u;
    my $style_url;
    for my $s ('stylesheets_url', 'javascript_url', 'images_url') {
	# scan NavTree for asset-directory URL parameters ...
	my $p =
	    $cur->style_paramc($s) ||
	    ($style_url // ($style_url = $cur->style_paramc('style_url')));
	if ($p) {
	    # ... and use them if we have any
	    $t->{$s} = $p->get_url;
	} else {
	    # otherwise use any asset-directory URL set in template.pl
	    # or (as a last resort) the URL of the directory that
	    # contains the template
	    my $template_dir;
	    $template_dir = $t->{node}->parent->url if $t->{node};
	    # and convert relative URLs into full URLs accordingly
	    $t->{$s} = prefix_url($template_dir, $t->{$s} // '');
	}
	# then convert asset-directory URLs into relative paths from $cur
	$t->{$s} = NavTree::rurl($t->{$s}, $cur) if defined $t->{$s};
	# note that $t is a *cloned* template, so the above will only
	# affect the formatting of $cur, and not other pages

	# now take all the previously indexed relative pathnames
	# of asset files found in the template, and prefix them
	# with the appropriate asset-directory URL
	my @urls;
	if ($s eq 'stylesheets_url') {
	    @urls = @{$t->{'urls_stylesheet'}} if $t->{'urls_stylesheet'};
	} elsif ($s eq 'javascript_url') {
	    @urls = @{$t->{'urls_script'}} if $t->{'urls_script'};
	} elsif ($s eq 'images_url') {
	    @urls = @{$t->{'urls_img'}} if $t->{'urls_img'};
	} else {
	    die("s='$s'");
	}
	warn("Warning: using URL prefix '$t->{$s}'; ",
	     "check parameter $s= or url=\n")
	    if @urls && $t->{$s} =~ /^file:/ && !$cur->param('file_access');
	for my $a (@urls) {
	    $a->setstr(prefix_url($t->{$s}, $a->str));
	}
    }
}

#
# Load and cache style templates
#
# Ucampas has a fairly complex system for locating template files,
# which enables them to be located (or their location to be specified)
# both at various global default places, as well as locally, for
# only a subtree of the site. We load template files only once,
# as this involves parsing HTML and/or compiling Perl. We then make
# a deep copy of each template object for each page, as the template
# will modify its own template structure while formatting a page.

# cached template structures, indexed by the abs_path
# where they were loaded from
my %template;

# pathnames of template objects that were not loaded
# via a template= attribute found along the NavTree path,
# indexed by the style name associated with the template
my %template_path;

sub find_template {
    my ($cur) = @_;

    # identify style name of the template to use
    my $style = $cur->param('style') || 'default';

    # make sure the style name does not contain filesystem-unsafe characters
    die("Style name '$style' contains forbidden characters\n")
	unless $style =~ /^\w*\z/;

    # the location where to load the template from, which can either
    # end in a '/', in which case it is meant to be a directory and
    # we will append "template.{html,pl}", otherwise it is a filename
    # without extension, and we will append only ".{pl,html}".
    my $path;

    my @tried;

    # search along NavTree path for an attribute named $style with
    # a subattribute 'template' that specifies the value for $path
    my $temp = $cur->paramc($style, 'template');
    if ($temp) {
	my $path = $temp->get;
	$path = File::Spec->catfile($temp->up(2)->fdirname, $path)
	    unless File::Spec->file_name_is_absolute($path);
	my $a = abs_path($path);
	die("no abs_path for '$path'") unless $a;
	my $t = load_template($a, $style, \@tried);
	return $t if $t;
	die("Cannot find for style='$style' any of the template files\n",
	    map { "$_\n" } @tried);
    }

    # if there was no location explicitly specified in uconfig.txt,
    # try a global fall-back directory
    my $path = $template_path{$style};
    return $template{$path} if defined $path;
    my @template_dirs = ("$libdir/templates/$style");
    for $path (@template_dirs) {
	my $a = abs_path($path);
	die("no abs_path for '$path'") unless $a;
	my $t = load_template($a, $style, \@tried);
	next unless $t;
	$template_path{$style} = $a;
	return $t;
    }
    die("Cannot find for style='$style' any of the template files\n",
	map { "$_\n" } @tried);
}

# given an absolute path, load, cache and return a template structure
# that contains the result of parsing the relevant HTML and/or Perl files
sub load_template {
    my ($path, $style, $tried) = @_;

    my $a = abs_path($path);
    die("no abs_path for '$path'") unless $a;

    # do we have something cached at the provided abs_path?
    my $t = $template{$a};
    return $t if $t;

    # if we got a directory, append 'template' as the filename
    my $fn = $path;
    $fn = File::Spec->catfile($a, 'template')
	if $path =~ /\/\z/ || -d $path;

    if (@locked_template_dirs) {
	# if there is a restricted list of allowed prefixes from
	# where template files can be loaded, does this one match?
	die("$fn:\n",
	    "this template file is not under any directory prefix permitted by option -T:\n",
	    map { "$_\n" } @locked_template_dirs)
	    unless
	    grep { $_ eq substr($fn, 0, length($_)) } @locked_template_dirs;
    }

    # load the template file(s)
    my $t;
    if (-f "$fn.html") {
	$t = {
	    fn_html => "$fn.html",
	    id => {}
	};
	# parse template HTML
	$t->{html} = load_html($t->{fn_html}, $t->{id});
	# also determine the location of the template file in URL space
	# (this information helps later with adjusting relative URLs)
	$t->{node} = NavTree::find_file($t->{fn_html});
	index_urls($t);
    }
    if (-f "$fn.pl") {
	$t->{fn_perl} = "$fn.pl";
	# The file should have the form
	# {
	#   'images_url' => 'https://www.cl.cam.ac.uk/images/',
	#   'adjust' => sub { ... }
	# }
	# i.e. define a list of parameters that can be fed
	# into the template hash.
	my $r = do $t->{fn_perl};
	unless (defined $r) {
	    die($@) if $@;
	    die($t->{fn_perl}, ": ", $!);
	};
	unless (ref $r eq 'HASH') {
	    die($t->{fn_perl},
		": expected hash reference { ... => ..., ... }\n");
	}
	$t = { %{$t}, %{$r} };
    }
    if ($t) {
	$t->{style} = $style;
	$template{$a} = $t;
	return $t;
    }
    push @{$tried}, "$fn.{html,pl}" if $tried;  # report the filesnames tried

    return undef;
}

# make a deep copy of a template (using a single call to dclone)
sub clone_template {
    my ($t) = @_;
    my %s;  # temporary holding space for CODE references

    for my $p (keys %{$t}) {
	if (ref $t->{$p} eq 'CODE') {
	    # move CODE references out, as dclone can't handle them
	    $s{$p} = $t->{$p};
	    $t->{$p} = undef;
	}
    }
    my $c = dclone($t);
    # restore CODE references
    for my $p (keys %s) {
	$t->{$p} = $c->{$p} = $s{$p};
    }
    return $c;
}

sub apply_style($$$) {
    my ($cur, $src, $notes) = @_;

    # process Perl instructions embedded in source
    process_perl_instructions($src, $cur->fpath, $cur) if $embedded_perl;

    my $t = clone_template(find_template($cur));

    # adjust URLs in template to be correct relative paths to
    # the directories holding template assets (images, scripts, CSS, etc.)
    adjust_urls($t, $cur);

    my $out = $t->{html};

    # preserve prescript (things appearing before the DOCTYPE declaration)
    if ($src->cd(meta 'prescript')) {
	$out->addkey(meta 'prescript')->copyfrom($src->cd(meta 'prescript'));
    }

    merge_heads($cur, $src, $out);

    if ($t->{adjust}) {
	# process Perl template subroutine
	&{$t->{adjust}}($t, $out, $cur, $src);
    } elsif (process_perl_instructions($out, $t->{fn_html}, $cur, $src)) {
	# we processed Perl instructions embedded in template (deprecated)
    } else {
	# default adjustment subroutine:
	# just copy over body classes and content
	my $body = $src->cd('.l(*html)', '.l(*body)')
            or die('No <body> found in source!\n');
	my $outbody = $out->cd('.l(*html)', '.l(*body)')
	    or die('No <body> found in template!\n');
	merge_body_attributes($outbody, $body);
	$outbody->movelist($body);
    }

    # process <div class="ucampas-...">...</div> elements
    push @{$notes}, process_ucampas_divs($out, $cur, $t);

    # add index.html if requested by option -i or parameter file_access=1
    # (which used to be add_index=1)
    add_index($out) if $cur->param('file_access') or $cur->param('add_index');

    # obfuscate email addresses if requested by parameter obfuscate_email=1
    if ($cur->param('obfuscate_email')) {
	srand($cur->depth<<8+$cur->pos); # deterministic random seed
	obfuscate_email($out->cd('.l(*html)', '.l(*body)'));
    }

    print $out->print(), "\n" if 0;

    return $out;
}

# Process various <div class="ucampas-..."> elements found in HTML tree $out
# and transform them automatically into some feature (table of contents, etc.),
# where $cur is the location of the web page in the navigation tree.
sub process_ucampas_divs {
    my ($out, $cur, $t) = @_;

    my @notes;
    my $toc;
  DIV:
    my @divs = $out->lfind('*div', 'all');
    while (@divs) {
	my $div = shift @divs;
	my $class = $div->get('class');
	my @features = grep s/^ucampas-([a-z0-9-]+)\z/$1/, split(/\s+/a, $class);
	next unless @features;
	warn("More than one ucampas feature requested in <div class='$class'>\n")
	    if @features > 1;
	my $feature = @features[0];
	my $p;
	my $content = $div->cl(0);
	if ($content) {
	    if (!eval { $p = c('('.$content->str.' )'); }) {
		warn "\nContent of <div class='$class'>\n";
		warn $@->isa('PlexTree') ? $@->print_error : $@;
		next DIV;
	    }
	    $content->cut;
	} else {
	    $p = c('()');  # empty parameter list
	}

	# At this point, $div points to the currently processed HTML
	# element <div class='ucampas-$feature'> element. Its content
	# has already been removed and parsed into the
	# PlexTree $p. Feature processors can now extract their
	# parameters from $p and append their output to $div.

	# The following parameters are equally applied to all features
        # at the end. We read them already into variables here, such
        # that each feature may set them to their own default values,
        # in case the user left them undefined.

	# In case $div remains empty after processing, add $emptymsg
	# as its content.
	my $emptymsg = $p->cd('emptymsg');

	# In case $div has now content (including $emptymsg), prepend
	# and append a prelude or postscriptum, if defined,
	# respectively.
	my $prelude = $p->cd('prelude');
	my $postscriptum = $p->cd('postscriptum');

	# Specify what to do with the <div class='ucampas-$feature'>...</div>
	# wrapper:
	# $dropdiv > 0: always remove it, leaving its content in its place
	# $dropdiv = 0: remove it only if it lacks content
	# $dropdiv < 0: never remove it, even if it lacks content
	my $dropdiv = $p->get('dropdiv');

	# Now process the various features

	if ($feature eq 'toc') {

	    # table of contents
	    unless ($toc) {
		$toc = PlexTree->new;
		my $body = $out->cd('.l(*html)', '.l(*body)');
		my @levels = ();
		my @section_numbers;
		my $down = $toc;
		my %used;
		foreach my $h ($body->lfind(undef, 'all')) {
		    if ($h->str_meta =~ /^h([1-6])\z/i) {
			my $level = $1;
			my $id = $h->get('id');
			next unless defined $id; # ignore headings without id
			if (defined $used{$id}) {
			    die("Duplicate id attribute: <" .
				$h->str_meta . " id='$id'>\n");
			}
			$used{$id} = $h;
			if (!@levels || $level > $levels[$#levels]) {
			    $toc = $down->append0->set('*ul');
			    push @levels, $level;
			    push @section_numbers, 0;
			} else {
			    while (@levels && $level < $levels[$#levels]) {
				$toc = $toc->up(2);
				pop @levels;
				pop @section_numbers;
			    }
			}
			if ($#section_numbers < 0) {
			    warn("Inconsistent hierarchy of ".
				 "<h? id=\"...\"> elements\n");
			    next DIV;
			}
			$section_numbers[$#section_numbers]++;
			my $li = c('*li');
			if (defined $id) {
			    # append to list item an <a href='...'>
			    $li = $li->append0
				->set('*a')
				->addkey('href', text('#' . $id));
			}
			# copy heading text, but flatten any
			# embedded <a> elements (preserving their contents)
			$li->copyfrom_list($h);
			map { $_->flatten() } map { $_->lfind('*a', 'all') } $li->list;
			# prefix heading text with section numbers
			#$li->splice(0, 0, c("*span(class=sn)")
			#	    ->append(text(join('.', @section_numbers) .'  ')));
			$down = $toc->append0->set($li->top);
		    }
		}
		$toc = $toc->top->cl(0);
		# print "\n\$toc = " . $toc->print() . "\n";
	    }
	    if ($toc && $toc->listlen) {
		# fill generated table of contents into the div element
		my $c = $div->append0->set('*table')
		    ->append0->set('*tbody')
		    ->append0->set('*tr')
		    ->append0->set('*td');
		$c->append('*p("Contents")');
		$c->append($toc);
	    }

	} elsif ($feature eq 'sitemap') {

	    # sitemap
	    my $start = $cur->top;
	    my $startpath = $p->get('start');
	    if (defined $startpath) {
		$start = NavTree::find_file($cur->fsubdirname . $startpath);
	    }
            my $showpath = $p->get('showpath');
            my $checkurl = $p->get('checkurl');
	    my @opt;
            push @opt,
		mark => sub {
                    my ($li, $div, $item, $c, $cur, $this) = @_;
                    my @status;
                    if ($checkurl) {
                        my $url = $c->url;
                        if ($url =~ /^https?:\/\//) {
                            my @classes;
                            my $response = HTTP::Tiny->new->get($url);
                            my $redirects = 0;
                            if ($response->{redirects}) {
                                for my $redirect (@{$response->{redirects}}) {
                                    $redirects++;
                                    push @status, $redirect->{status};
                                    push @classes,
                                        "s${redirects}_$redirect->{status}";
                                }
                            }
                            push @status, $response->{status};
                            push @classes, "sn_$response->{status}";
                            if ($response->{url} =~
                                /^https?:\/\/([a-z0-9-\.]+)/) {
                                my $server = $1;
                                $server =~ s/\./_/g;
                                push @classes,
                                    "sn$response->{status}_${server}";
                            }
                            add_class($item, @classes);
                        };
                    }
                    if ($showpath) {
                        # append below each link the path from the root
                        my @t = ($c->apath());
                        if (@status && !(@status == 1 && $status[0] == 200)) {
                            # if we have done a link check, also append status
                            push @t, @status;
                        }
                        $div->append(meta 'br');
                        $div->append(meta('tt')->
                                     append(text(join(' ', @t)) ) );
                    }
            } if $showpath || $checkurl;
	    my $sitemap = navbar($cur, $start,
				 class          => 'sitemap',
				 showinvisibles => 1,
				 marklink => sub { $_[1]->append(text ' →') },
				 @opt
		);
	    $div->append($sitemap);
	    $dropdiv=1;

	} elsif ($feature eq 'subnodes') {

	    # list of links to subnodes
	    my $subnodes = navbar($cur, $cur,
                                  domain  => 'sub',
				  class   => 'subnodes',
				  topopen => 0,
		                  navtopinclude => 0);
	    $div->append($subnodes);
	    $dropdiv=1;

	} elsif ($feature eq 'tabs') {

	    # series of <ul> lists to generate navigation tabs
	    append_navtabs($div, $cur, undef,
			   domain => 'tab',
			   'markcurrent' =>
			   sub { $_[1]->parent->setatt(class => 'active') });

	} elsif ($feature eq 'filelist') {

	    # filelists
	    foreach my $pattern ($p->list) {
		my $glob = $pattern->str;
		my $usetitle = $pattern->get('usetitle');
		my $neatfilename = $pattern->get('neatfilename');
		my $mergeextensions = $neatfilename;
		my $dates = $pattern->get('dates'); # filenames start with ISO 8601 date
                my $clusteryears = $pattern->get('clusteryears');
		my $directory = $cur->fdirname;
		my @list = glob($directory . $glob);
		# remove $directory prefix from @list elements
		@list = grep {
		    index($_, $directory) == 0 ?
			(substr($_, 0, length($directory)) = '', 1) :
			0
		    } @list;
		# remove *-b.html files that have a derived *.html file
		@list = grep {
		    !($_ =~ /^(.+)-b(\.(?:html?|php))\z/i &&
		      grep({$_ eq "$1$2"} @list));
		} @list;
		# remove uconfig.txt files
		@list = grep { $_ !~ /^uconfig2?\.txt\z/ } @list;
		my %path;
		my %basename;
		my %extension;
		my %title;
		my %author;
		my %sortkey;
		my %date;
		my %year;
		# annotate each file with attributes
		for my $fn (@list) {
		    $basename{$fn} = $fn;
		    $basename{$fn} =~ s/^.*\///;
		    $extension{$fn} = '';
		    if ($basename{$fn} =~ s/(\.[^\.]{1,6})\z//) {
			$extension{$fn} = $1;
		    }
		    if ($dates) {
			if ($basename{$fn} =~ s/^((\d{4})(?:-(\d{2})(?:-\d{2}(?!\d))?)?)[-_]?//) {
			    $date{$fn} = $1;
			    $year{$fn} = $2;
			    my $month = $3;
			    if ($clusteryears =~ /^a/) {
				# academic year: 1 October to 30 September
				if ($month >= 10) {
				    $year{$fn} = sprintf("%4d/%02d", $year{$fn}, ($year{$fn}+1) % 100);
				} else {
				    $year{$fn} = sprintf("%4d/%02d", $year{$fn}-1, $year{$fn} % 100);
				}
			    }
			}
		    }
		    if ($neatfilename) {
			# make filename more presentable
			$basename{$fn} =~ s/_/ /g;
		    }
		    if ($usetitle) {
			if ($fn =~ /.pdf\z/i) {
			    my $pdfinfo;
			    my %pdfinfo;
			    if (open($pdfinfo,
				     "pdfinfo '$directory$fn' 2>/dev/null |")) {
				while (<$pdfinfo>) {
				    if (substr($_, 0, 16) =~ /^(.*):\s*$/) {
					$pdfinfo{$1} = substr($_, 16);
				    }
				}
				$title{$fn} = $pdfinfo{'Title'};
				$author{$fn} = $pdfinfo{'Author'};
				chomp $title{$fn};
				chomp $author{$fn};
			    }
			} elsif ($fn =~ /\.(?:html?|php)\z/i) {
			    # use *-b.html version if there is one
			    my $sourcefn = $fn;
			    $sourcefn =~ s/(\.(?:html?|php))\z/-b$1/i;
			    $sourcefn = $fn unless -r $directory.$sourcefn;
			    $title{$fn} =
				NavTree::head_scan($directory.$sourcefn)
				->get('title');
			} elsif (-d $directory.$fn) {
			    foreach my $indexfn ('index-b.html', 'index.html',
						 'index.htm',
                                                 'index-b.php', 'index.php') {
				if (-r $directory.$fn.$indexfn) {
				    $title{$fn} =
					NavTree::head_scan($directory.
							   $fn.$indexfn)
					->get('title');
				    last;
				}
			    }
			}
		    }
		    if (defined $date{$fn}) {
			my @month=('???', 'January', 'February', 'March', 'April', 'May', 'June',
				   'July', 'August', 'September', 'October', 'November', 'December');
			my $date;
			if ($date{$fn} =~ /^(\d{4})-?(\d{2})-?(\d{2})$/) {
			    $date = sprintf("%d ", $3) . $month[$2];
			} elsif ($date{$fn} =~ /^(\d{4})-?(\d{2})$/) {
			    $date = $month[$2];
			} elsif ($date{$fn} =~ /^(\d{4})$/) {
			    $date = "";
			} else {
			    die("Date error: '$date{$fn}'");
			}
			unless ($clusteryears) {
			    # append year
			    $date .= ' ' if defined $date;
			    $date .= $1;
			}
			if (defined $title{$fn}) {
			    $title{$fn} = $date . ' ' . $title{$fn};
			} else {
			    $title{$fn} = $date;
			}
		    }
		    $sortkey{$fn} = $date{$fn} . '~' . uc($basename{$fn}) .
			"~$basename{$fn}~$extension{$fn}~$fn";
		}
		@list = sort { $sortkey{$a} cmp $sortkey{$b} } @list;
		my %extension_preference = ( '.pdf' => 1, '.doc' => 2 );
		my @li;
		while (my $fn = shift @list) {
		    my @extensions = ($extension{$fn});
		    if ($mergeextensions) {
			while (@list and
			       $basename{$fn} eq $basename{$list[0]}) {
			    push @extensions, $extension{shift @list};
			}
			@extensions = sort( {
			    $extension_preference{$a} <=>
				$extension_preference{$b}
			} @extensions);
		    }
		    my $base = substr($fn, 0, length($fn) -
				      length($extension{$fn}));
		    my $linktext;
		    my $a = meta('a')->addkey(text 'href',
					      text($base.$extensions[0]));
		    if ($title{$fn}) {
			$linktext = "$title{$fn}";
		    } else {
			$linktext = $basename{$fn} ? $basename{$fn} : $fn;
			$linktext .= "$extension{$fn}" unless $mergeextensions;
		    }
		    $a->append(text $linktext);
		    my $li = meta('li')->append($a);
		    if ($mergeextensions) {
			$li->append(text '  (');
			while (my $extension = shift @extensions) {
			    my $neatextension = $extension;
			    $neatextension =~ s/^\.//;
			    $neatextension = uc($neatextension);
			    $li->append(meta('a')
					->addkey(text 'href',
						 text("$base$extension"))
					->append(text $neatextension));
			    $li->append(text ', ') if @extensions;
			}
			$li->append(text ')');
		    }
		    push @li, $li;
		    $year{$li} = $year{$fn};
		}
		if ($clusteryears) {
		    my @nli;
		    my $nli;
		    my $year;
		    while (my $li = shift @li) {
			if (defined $year && $year{$li} eq $year) {
			    $nli->append(text ', ', $li->list);
			} else {
			    push @nli, $nli if defined $nli;
			    $year = $year{$li};
			    $nli = meta('li');
			    $nli->append(text "$year: ") if defined $year;
			    $nli->append($li->list);
			}
		    }
		    push @nli, $nli if defined $nli;
		    @li = @nli;
		}
		if (@li) {
		    @li = reverse @li if $pattern->get('reverse');
		    if ($pattern->get('type') eq 'p') {
			my $p = c('*p')->append(shift(@li)->list);
			while (@li) {
			    $p->append(text ', ', shift(@li)->list);
			}
		        $div->append($p);
		    } else {
			# default: type=ul
			$div->append(c('*ul')->append(@li));
		    }
		}
	    } # foreach my $pattern

	} elsif ($feature eq 'newsfeed') {

	    # RSS newsfeed
	    # (intended and tested so far only for WordPress RSS 2.0 feeds)
	    next unless $p;
	    my $rssurl = $p->get('rssurl');
	    die("no rssurl specified for newsfeed\n") unless defined $rssurl;
	    eval {
		my $nodescription = $p->get('nodescription');
		my $noitemlink = $p->get('noitemlink');
		my $type = $p->get('type') || 'div';
		my $maxdays = $p->get('maxdays');
		my $maxitems = $p->get('maxitems');
		my $response = HTTP::Tiny->new->get($rssurl);
		die("GET request failed with status $response->{status} ",
		    "$response->{reason}\n",
		    $response->{status} == 599 ? $response->{content} : ())
		    unless $response->{status} == 200;
		my $xml = $response->{content};
		die("GET request returned empty file\n") unless $xml;
		my $rss=text($xml)->cd('.sgmldec');
		die("SGML decoding failed\n") unless $rss;
		my $top;
		my $rssversion;
		if ($top = $rss->cd('.l(*rss)')) {
		    # http://www.rssboard.org/rss-specification
		    $rssversion = $top->get('version');
		} elsif ($top = $rss->cd('.l(*rdf:rdf)')) {
		    # http://web.resource.org/rss/1.0/
		    $rssversion = '1.0';
		} else {
		    print STDERR $rss->print . "\n";
		    die("not an RSS file");
		}
		my $channel=$top->cd('.l(*channel)');
		die("no channel found\n") unless $channel;
		my $channel_title = $p->get('title') ||
		    $channel->cd('.l(*title)')->getl(0);
		my $channel_link  = $p->cd('link') ||
		    $channel->cd('.l(*link)');
		my @items = $channel->lfind('*item', 'all', 1); # RSS 2.0
		unless (@items) {
		    @items = $top->lfind('*item', 'all', 1); # RSS 1.0
		}
		splice(@items, $maxitems) if $maxitems;
		push @notes, scalar(@items) . ' newsfeed '
		    . (scalar(@items) == 1 ? 'item' : 'items');
		next unless @items;
		foreach my $item (@items) {
		    # prepare title link
		    my $title  = $item->cd('.l(*title)')->getl(0);
		    my $link   = $item->cd('.l(*link)') ||
			$item->cd('.l(*guid)');
		    my $url = $link->getl(0);
		    my $t = text($title); # HTML formatted item title
		    unless ($noitemlink) {
			$t = c('*a')
			    ->setatt('href' => $url)
			    ->append($t);
		    }
		    # prepare and check date
		    my $pubdate = $item->cd('.l(*pubdate)');
		    $pubdate = $pubdate->getl(0) if $pubdate;
		    my $date;
		    if ($pubdate) {
			$date = str2time($pubdate);
			#print "\n$pubdate => ", strftime("%Y-%m-%d %H:%M:%SZ", gmtime($date)), "\n";
		    }
		    next if (defined $date && defined $maxdays &&
			     $date + $maxdays * (60*60*24) < time);
		    # generate HTML for item
		    my $s = $div->append0;
		    if ($type eq 'div') {
			$s->set(c('*div'));
			$s->append(c('*h3')->append($t));
		    } elsif ($type eq 'ul') {
			$s->set(c('*li'));
			$s->append($t);
		    } elsif ($type eq 'ucam2008-rss') {
			$s->set(c('*div(class="news_item")'));
			$t->setatt('class', 'news_item_right_link')
			    if ($t->tag == META);
			$s->append(c('*p(class=news_item_date)')
				   ->append(text strftime("%e %b %Y", localtime $date)));
			$s->append(c('*p')->append($t));
		    }
		    unless ($nodescription) {
			# prepare article content
			my $content_encoded =
			    $item->cd('.l(*content:encoded)')->getl(0) //
			    $item->cd('.l(*description)')->getl(0);
			my $content;
			if (defined $content_encoded) {
			    # try to HTML parse content of <content:encoded> or <description>
			    eval {
				$content =
				    text('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">' .
					 "\n<body>\n" .
					 $content_encoded)->cd('.sgmldec')
					 || die("undefined content");
			    };
			    if ($@) {
				warn(PlexTree::print_error($@, $rssurl,
							   'skiprows' => 2));
				$content = c('*body')
				    ->append(trans($content_encoded));
			    } else {
				$content = $content->cd('.l(*body)')
				    || die("cannot find <body>");
				# strip redundant top-level <div> wrappers (e.g. emitted by Drupal)
				while ($content->listlen == 1 && $content->cl(0)->str_meta eq 'div') {
				    $content->cl(0)->flatten;
				}
				foreach my $c ($content->list) {
				    # prune whitespace between paragraphs as well as empty paragraphs
				    $c->cut if $c->str_text =~ /^\s+$/ ||
					($c->str_meta eq 'p' && $c->listlen == 0);
				}
			    }
			}
			# append article content
			if ($content) {
			    my $maxparagraphs = $p->get('maxparagraphs');
			    if ($maxparagraphs > 0) {
				# truncate after the first few paragraphs
				my $lastp = $content
				    ->lfind(c('*p'), $maxparagraphs-1, 1);
				if ($lastp &&
				    $lastp->pos < $content->listlen - 1) {
				    $content->splice($lastp->pos+1);
				    my $more = $p->cd('more');
				    $more = c('*a(" (more...)")')
					unless $more;
				    # eg: more=*p(class=more, *a("Read more"))
				    my $a = $more->lfind('*a');
				    die('attribute more lacks element *a\n')
					unless $a;
				    $a->setatt('href' => $url);
				    if ($more->match('*p') ||
					$more->match('*div')) {
					$content->append($more);
				    } else {
					$lastp->append($more);
				    }
				}
			    }
			    $s->append($content->list);
			}
		    }
		}
		return unless $div->listlen;
		# prepend channel title+link as heading
		if ($channel_title && !$prelude) {
		    $prelude = text $channel_title;
		    if ($channel_link->tag == TEXT) {
			$prelude = c('*a')
			    ->setatt('href' => $channel_link->str)
			    ->append($prelude);
		    }
		    $prelude = c('*h2')->append($prelude) unless $type eq 'ucam2008-rss';
		}
		# In case there was at least one item ...
		if ($type eq 'ul') {
		    $div->append(c('*ul')->movelist($div));
		} elsif ($type eq 'ucam2008-rss') {
		    # there probably ought to be a hook such that
		    # all this style-specific hackery can move into
		    # a template file
		    my $images_url = $t->{'images_url'};
		    my $dt = c('*dt');
		    $dt->append(c('*a')
				->setatt(href => $rssurl)
				->append(c('*img(alt="[rss feed]", class="news_rss_image", title="RSS feed")')
					 ->setatt(src => NavTree::rurl($images_url . "rssfeed.gif", $cur))));
		    $dt->append($prelude);
		    $div->append(c('*dl(class=news_box)')
				 ->append($dt)
				 ->append(c('*dd')->movelist($div)));
		    undef $prelude;
		}
	    };
	    if ($@) {
		die PlexTree::print_error($@, "newsfeed $rssurl");
	    }
	} elsif ($feature eq 'include-text') {
	    $dropdiv = 1 unless defined $dropdiv;
	    foreach my $file ($p->list) {
		my $fn = $file->ustr_text;
		$fn = $cur->fdirname . $fn unless $fn =~ /^\//;
		my $f;
		unless (open($f, '<:utf8', $fn)) {
		    warn("Cannot open file '$fn' requested in " .
			 "<div class='ucampas-$feature'>: $!\n");
		    next;
		}
		while (<$f>) {
		    $div->append(text $_);
		}
		close $f;
	    }
	} elsif ($feature eq 'include-html') {
	    foreach my $file ($p->list) {
		my $fn = $file->ustr_text;
		$fn = $cur->fdirname . $fn unless $fn =~ /^\//;
		my $f;
		unless (-r $fn) {
		    warn("Cannot read file '$fn' requested in " .
			 "<div class='ucampas-$feature'>: $!\n");
		    next;
		}
		my $src = load_html($fn);
		# recursively process any <div>s found in there
		my $there = NavTree::find_file($fn);
		process_ucampas_divs($src, $there, $t);
		# execute optional filter instructions to trim the retrieved tree
		my $body = $src->cd('.l(*html)', '.l(*body)');
		for my $filter ($file->list) {
		    my $type = $filter->ustr_text;
		    if ($type eq 'find') {
			my $pattern = $filter->cl(0);
			die("Missing pattern in find filter in <div class='ucampas-$feature'>!\n")
			    unless $pattern;
			my $maxdepth = $filter->get('maxdepth');
			$maxdepth = 1 unless defined $maxdepth;
			my $match = $body->lfind($pattern, $filter->get('skip'), $maxdepth);
			my $op = $filter->get('op');
			if (!defined $op || $op eq 'this') {
			    $body->splice(0, undef, $match);
			} elsif ($op eq 'content') {
			    $body->splice(0, undef, $match->list);
			} else {
			    next unless $match;
			    my $pos = $match->pos;
			    my $body = $match->parent;
			    if ($op eq 'cutbefore') {
				$body->splice(0, $pos);
			    } elsif ($op eq 'cutuntil') {
				$body->splice(0, $pos+1);
			    } elsif ($op eq 'cutfrom') {
				$body->splice($pos, undef);
			    } elsif ($op eq 'cutafter') {
				$body->splice($pos+1, undef);
			    } else {
				die("Unknown operation '$op' in <div class='ucampas-$feature'>:\n" .
				    $filter->print . "\n");
			    }
			}
		    }
		}
		if ($body) {
		    # adjust relative URLs
		    my $prefix = $there->url($cur);
		    $prefix =~ s/[^\/]+$//;
		    for my $e ($body->lfind(undef, 'all')) {
			for my $att ('href', 'src') {
			    my $v = $e->cd($att);
			    next unless $v && defined ($v = $v->get) && is_relpath($v);
			    next if $v eq '' || $v =~ /^[#?]/;
			    $e->setatt($att, NavTree::rurl(prefix_url($prefix, $v), $cur));
			}
		    }
		    # do the include
		    $div->movelist($body);
		}
	    }
	} else {
	    die("Unknown ucampas feature: " .
		"<div class='ucampas-$feature'>...</div>\n");
	}

	# apply parameters common to all features
	if (!$div->list) {
	    $div->append($emptymsg)
		if $emptymsg && !$prelude->isempty;
	}
	if ($div->list) {
	    # add prelude
	    $div->splice(0,0,$prelude)
		if $prelude && !$prelude->isempty;
	    # add postscriptum
	    $div->append($postscriptum)
		if $postscriptum && !$postscriptum->isempty;
	    # option to remove the <div class="ucampas-...">...</div> wrapper
	    $div->flatten if $dropdiv > 0;
	} else {
	    # remove empty <div class="ucampas-..."></div>
	    $div->cut unless $dropdiv < 0;
	}

    }  # foreach my $div

    return @notes;
}

# Parse the provided string $html as an HTML document
# and return a PlexTree cursor to its root. Check for
# duplicate ID attributes. If a hash reference $id is
# provided, fill it with a mapping from ID strings to the
# PlexTree cursors of the corresponding element nodes,
# aborting if new ID attributes collide with any existing
# ones.
sub parse_html {
    my ($html, $id) = @_;
    $id = {} unless defined $id;
    my $arg = PlexTree->new();
    $arg->append(text($html));
    $arg->setatt(public_id => '-//W3C//DTD HTML 4.01//EN');
    return sgmldec(undef, undef, $arg, 's',
		   { attribute_added => sub {
		       my ($elem, $name, $value) = @_;
		       if ($name eq 'id') {
			   # check for duplicate ID attributes
			   if (exists $id->{$value}) {
			       my $e1 = $elem->str;
			       my $e0 = $id->{$value}->str;
			       die("duplicate id='$value' ",
				   "in <$e0> and <$e1>, ",
				   "must be unique\n");
			   }
			   $id->{$value} = $elem;
		       }
		     }
		   } );
}

# Check an array @{$lines} of all lines (binary strings)
# loaded from UTF-8 plaintext file $fn and die with a
# detailed error message if non-UTF-8 characters are encountered.
sub check_utf8 {
    my ($lines, $fn) = @_;
    my $ln = 0;
    my (@error, @error_fixed);

    foreach my $line (@{$lines}) {
	$ln++;
	my $b = $line;
	my $u = decode('UTF-8', $b, Encode::FB_QUIET);
	if (length($b)) {
	    push @error, "$fn:$ln:".(length($u)+1).":Invalid UTF-8 sequence\n";
	    # show malformed UTF-8 sequences as replacement character
	    push @error, encode('UTF-8', decode('UTF-8', $line));
	    # Check if it might have been Windows CP-1252
	    eval {
		$u = decode('cp1252', $line, Encode::FB_CROAK);
	    };
	    push @error_fixed, encode('UTF-8', $u) unless $@;
	    last if @error > 18;
	}
    }
    if (@error) {
	push @error, <<'EOT';

The input contained characters that were not encoded using the
expected Unicode UTF-8 method. Often these are CP1252 characters from
Microsoft Office documents. On Windows you could fix them with the
Notepad++ function “Convert to UTF-8 without BOM”.
Common culprits: ‘’“”•€£°±²³µäè
EOT
	push @error, <<'EOT', @error_fixed if @error_fixed;

After CP-1252 to UTF-8 conversion, these lines would read:

EOT
	die @error;
    }
}

# Like parse_html(), but receives an input filename rather
# than the HTML file's content as a string.
sub load_html {
    my ($fn, $id) = @_;
    my $f;
    my @html;

    open($f, '<', $fn) or die "$fn: cannot read input file, $!\n";
    while (<$f>) {
	push @html, $_;
    }
    close($f);
    check_utf8(\@html, $fn);

    # parse HTML source
    my $src;
    eval {
	$src = parse_html(join('', @html), $id);
    } or die PlexTree::print_error($@, $fn);
    return $src;
}


# global counters
my $files = 0;
my $files_unchanged;
my $errors = 0;

# other global variables
my @suffixes = ('php', 'htm', 'html');
my %debug;

# track source files we have recursed from, for cycle detection
my @srcfiles;

# This function is called for each filename $fn listed on the command-line,
# as well as for each file representing a HTML subpage if recursion is on,
# to format that web page. The main part of the filename is sufficient in $fn,
# suffixes like "-b.html" and ".html" for the input and output filename
# are added automatically.
sub process_file {
    my ($fn,$recurse,@compound_options) = @_;
    my ($input_fn, $output_fn);
    my @notes; # to collect informative messages (no change, etc.)

    $files++;
    eval {
	# determine input and output filenames
	($input_fn, $output_fn) =
	    NavTree::preprocess_filename($fn, 'input', 'output');
    };
    if ($@) {
	# handle error messages
	print STDERR $@;
	$errors++;
	return;
    }

    # Let the NavTree library prepare the navigation-tree information
    # (by scanning the entire tree of uconfig.txt files)
    my $cur = NavTree::find_file($output_fn);
    # merge in configuration options from the command line (e.g. with -c)
    foreach my $c (@compound_options) {
	# TODO: worry about duplicate copies
	$cur->copyfrom_dir($c);
    }
    if ($cur->get('unreachable')) {
	push @notes, "unreachable from parent";
    }
    if ($cur->get('nopage')) {
	# This node is not associated with a web page, it is just a
	# heading in the navigation tree that links to its first child
	goto RECURSE;
    }
    unless (-f $input_fn) {
	print STDERR "$input_fn: source file does not exist\n";
	$errors++;
	return;
    }
    # read some inode data
    $cur->fstat;
    # try to read subversion metadata
    $cur->svninfo if $cur->param('svninfo');

    # some debugging output
    say $cur->print()         if $debug{'cur'};
    say $cur->parent->print() if $debug{'parent'};
    say $cur->top->print()    if $debug{'nav'};
    if ($debug{'path'} || $debug{'pathprint'}) {
	my $i = 0;
	foreach my $p ($cur->path) {
	    say "path depth $i: ", $p->apath;
	    say "  ->fpath = '" . $p->fpath . "'";
	    say "  ->fpath('url') = '" . $p->fpath('url') . "'";
	    say "  ->url(\$cur) = '" . $p->url($cur) . "'";
	    say "  ->rurl(\$cur) = '" . $p->rurl($cur) . "'";
	    say $p->print if $debug{'pathprint'};
	    $i++;
	}
    }

    # announce what is about to happen
    print STDERR "Updating " . $cur->dpath . " ..." unless $quiet;
    my $srcfile = $cur->srcfile;

    # catch all errors while we have not yet written \n
    eval {
        # check for cycle in recursion
        if (grep { $_ eq $srcfile } @srcfiles) {
            $recurse = 0;  # don't dig any deeper
            die("$srcfile: we've been here already\n",
                "Do you have a cycle (duplicate of an ancestor file) in your navigation tree?\n");
        }

	# load source file
	our $src = load_html($input_fn);

        say $src->print if $debug{'src'};

	# extract configuration information from it
	if (my $head = $src->cd('.l(*html)', '.l(*head)')) {
	    foreach my $meta ($head->lfind('*meta(name="ucampas-config")',
					   'all', 1)) {
		my $content = $meta->get('content');
		my $uconfig;
		eval {
		    $uconfig = c("( $content)");
		} or
		    die PlexTree::print_error($@, 'Content of ' .
					      '<meta name="ucampas-config">');
		$cur->addkey('noninherit')->copyfrom_dir($uconfig);
		# we do not copy the list here
		# (list changes would affect other nodes, too)
		$meta->cut;
	    }
	    if (my $base = $head->lfind('*base', 0, 1)) {
		my $href = $base->get('href');
		$cur->setatt('base' => $href) if $href;
	    }
	}
	# prepare formatted page
	my $out = apply_style($cur, $src, \@notes);
        say $out->print if $debug{'out'};
	my $h = $out->get(ctrl('sgmlenc')
			  ->addkey('noxmldecl', undef)
			  ->addkey('ncr', undef)
	                  ->addkey('comment',
			           text('DO NOT EDIT: file automatically '.
				   "generated by ucampas from\n     ".
			           $srcfile)));
	if (!defined $h) {
	    # not sure this still can happen
	    die "$input_fn:$.: no output defined\n";
	}

	my $old_html;
	if ($cur->param('change_check')) {
	    # check whether there is any change compared to the old file
	    my $oldin;
	    if (-f $output_fn && open($oldin, '<', $output_fn)) {
		local $/;
		$old_html = <$oldin>;
		close $oldin;
	    }
	}

	# change umask of this process if the user wants so
	my $umask = $cur->param('umask');
	my $orig_umask;
	$orig_umask = umask oct($umask) if defined $umask;

	if (defined $old_html && $old_html eq $h) {
	    # we don't rewrite the file unless necessary,
	    # to make life simpler for some backup schemes
	    push @notes, "no change";
	    $files_unchanged++;
	    if ($cur->param('change_check') eq 't') {
		# update ("touch") the timestamp (for the benefit of "make")
		utime(undef, undef, $output_fn);
	    }
	} else {
	    # write the resulting HTML file
	    my $tmp_fn = $output_fn . "~$$";
	    my $sigabort = 0;
	    # handle aborting signals while tmpfile exists
	    local $SIG{INT} = sub { $sigabort = 1; };
	    local $SIG{HUP} = $SIG{INT};
	    local $SIG{TERM} = $SIG{INT};
	    if (open(OUT, '>', $tmp_fn)) {
		if (!print OUT $h) {
		    die "$output_fn: " .
			"failed to write temporary output file '$tmp_fn', $!\n"
		}
		if (!close(OUT)) {
		    die "$output_fn: " .
			"failed to close temporary output file '$tmp_fn', $!\n";
		}
		rename $output_fn, $output_fn . '~'
		    if $cur->param('keepbackup');
		if (!rename $tmp_fn, $output_fn) {
		    die "$output_fn: " .
			"failed to rename temporary output file '$tmp_fn', $!\n";
		}
	    } else {
		die "$output_fn: " .
		    "cannot write temporary output file '$tmp_fn', $!\n";
	    }
	    die("Aborting due to received signal!\n") if $sigabort;
	}

	# restore umask
	umask $orig_umask if defined $orig_umask;

	# debug output
	if ($debug{'neighbours'}) {
	    #print "\nCurrent: " . $cur->fpath . " = " . $cur->url() . "\n";
	    print "\n";
	    my $head = $out->lfind('*html')->lfind('*head');
	    for my $rel ('Next', 'Up') {
		my $n = $head->lfind("*link(rel=$rel)");
		printf("%7s: %s\n", $rel, $n->get('href')) if $n;
	    }
	}

    };
    if ($@) {
	my $error = $@;
	# handle error messages
	if ($quiet) {
	    print STDERR $cur->apath . ': ';
	} else {
	    print STDERR " failed:\n";
	}
        if (Scalar::Util::blessed($error) && $error->isa('PlexTree')) {
            # we received an error compound
            die(PlexTree::print_error($error));
	} elsif ($error =~ /!\n\z/) {
	    # exceptions ending in exclamation mark are serious
	    die $error;
	}
	print STDERR $error;
	$errors++;
    } else {
	unless ($quiet) {
	    printf STDERR ' (' . join(', ', @notes) . ')' if @notes;
	    print STDERR "\n";
	}

	if ($display) {
	    # display the resulting page in a browser
	    my %substitute = (
		'%%' => '%',
		'%a' => abs_path($output_fn),
		'%c' => $output_fn,
		'%u' => $cur->url,
		);
	    my $cmd = $cur->param('display');  # command line to call browser
	    if (!$cmd) {
		$cmd =
		    "echo 'To use display option -d, ".
		    "specify a command line in attribute \"display\".'\n" .
		    "echo 'These substitutions will be made:'\n" .
		    join(';',
			 map { "echo ' ' \%$_ = '$_'" }
			 sort keys %substitute);
		$display = 0;
	    }
	    $cmd =~ s/(@{[join "|", keys %substitute]})/$substitute{$1}/g;
	    system($cmd);
	}
    }

    # recurse
  RECURSE:
    if ($recurse != 0) {
        push @srcfiles, $srcfile;
        recurse($cur, $recurse);
        pop @srcfiles;
    }
}

sub recurse($$) {
    my ($cur, $recurse) =@_;
    foreach my $c ($cur->list) {
	next if $c->tag != TEXT;    # do not recurse into cross-links
	next if $c->get('stoprecursion'); # we shall not descend here
	if ($c->get('nopage')) {
	    recurse($c);
	    next;
	}
	next if $c->get('missing'); # we warned already about missing dir
	my $fn = $c->fpath;
	next if $fn =~ /\.(?:ps|pdf|txt|docx?)\z/i;
	$fn .= '/' unless $fn =~ /\.(?:html?|php)\z/i || $fn =~ /\/\z/;
	process_file($fn, $recurse - 1);
    }
}


# Parse the command-line options

my $usage = <<'EOT';
ucampas -- University of Cambridge web page augmentation system -- Markus Kuhn

Usage: ucampas [options] file ...

Normal options:

  -r[depth]     Process not only the listed input files, but also recurse into
                all their child nodes listed in "uconfig.txt" files
                (up to "depth" levels deep)

  -i            Append "index.html" to relative HREF links to directories,
                such that they also work through file://... URLs

  -q            Quiet mode, output only errors and warnings

  -p            Execute any <?perl ... ?> processing instructions embedded
                in source HTML files. (Default is to remove them.)

  -d            Display the resulting page in a web browser by executing the
                command line specified in the "display" attribute.

  -b            Keep a backup copy of the previous output file (suffix: ~)

  -c 'options'  Add further options to those in the top-level "uconfig.txt"

  -T 'path'     Set directory where to search for style template
                files (repeat if needed) and disable attribute template_dirs

  --debug help  Show list of available debug output options

  --            Stop parsing options, any further argument is a filename.
                With no filename argument, do not process 'index' instead.

Documentation:

  https://www.cl.cam.ac.uk/local/web/ucampas/

EOT

my $debug_usage = <<'EOT';
Option --debug keyword1,keyword2,... enables additional
debugging outputs. The following keywords are supported:

  cur           Print the NavTree starting from the current node

  parent        Print the NavTree starting from the parent
                of the current node

  nav           Print the entire NavTree

  path          For each NavTree node from the root down to the
                current node, print the output of the methods
                fpath(), fpath('url'), url($cur), rurl($cur)

  printpath     Like path, but also print the entire NavTree
                starting from each NavTree node on the path

  src           Print the parsed HTML source document tree

  out           Print the output document tree (not yet HTML encoded)

  neighbours    Print the Next and Up links in the output document

  help          Print this help message

EOT

# parse the global uconfig.txt file
NavTree::set_global_uconfig("$libdir/uconfig.txt");

my $recurse = 0;
my $parse_options = 1;
my @compound_options;
while (@ARGV) {
    $_ = shift @ARGV;
    if ($parse_options && /^-/) {
	if (/^-r\z/) {
	    $recurse = -1;
	} elsif (/^-r(\d+)\z/) {
	    $recurse = $1;
	} elsif (/^-i\z/) {
	    push @compound_options, c('{file_access=1}');
	} elsif (/^-p\z/) {
	    $embedded_perl = 1;
	} elsif (/^-d\z/) {
	    $display = 1;
	} elsif (/^-q\z/) {
	    $quiet = 1;
	} elsif (/^-c\z/) {
	    my $t = shift @ARGV;
	    my $c;
	    die("Missing argument after -c.\n") unless defined $t;
	    if (!eval {
		$c = c("( $t)");
		}) {
		print STDERR PlexTree::print_error($@, 'Argument of option -c');
		exit(1);
	    }
	    push @compound_options, $c;
	} elsif (/^-b\z/) {
	    # keep the previous *.html file as *.html~
	    push @compound_options, c('{keepbackup=1}');
	} elsif (/^-T(.*)/) {
	    my $d = abs_path($1 || shift @ARGV);
	    $d .= '/' if $d !~ /\/\z/;
	    push @locked_template_dirs, $d;
	} elsif (/^--debug\z/) {
	    my $debug = shift @ARGV;
	    die("Missing argument after --debug.\n") unless defined $debug;
	    map { $debug{$_} = 1 } split(/,/, $debug);
            $Carp::Verbose = 1 if $debug{'backtrace'};
            if ($debug{help}) {
                print $debug_usage;
                exit 0;
            }
	} elsif (/^-h/ || /^--help/) {
	    print $usage;
	    exit 0;
	} elsif (/^--\z/) {
	    $parse_options = 0;
	} else {
	    die("Unknown command line option '$_'!\n\n" . $usage);
	}
    } else {
	process_file($_, $recurse, @compound_options);
    }
}

if ($files == 0 && $parse_options) {
    # default filename: index
    process_file('index', $recurse, @compound_options);
}

if (!$quiet && $files > 1) {
    printf STDERR "$files files processed";
    printf STDERR ", $errors of which skipped because of errors"
	if $errors > 0;
    printf STDERR ", $files_unchanged files unchanged"
	if defined $files_unchanged;
    printf STDERR ".\n";
}
exit $errors > 0;
