# API for reading and writing the Technical Report database files
#
# Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>

package TechReports;

use 5.016;
use utf8;
use lib '../../scripts', '/anfs/www/VH-cl/scripts';
use CLWeb ();
use Digest::MD5 ();
use POSIX ();
use Time::Local ();

# Load the named tr-database.txt database file and return a hash table
# that contains both parameters and TechReport objects indexed by
# report number.
#
# That hash table also contains a few special entries aimed at the
# save routine, to help it preserve the order of included
# parameters as well as any comments or empty lines that precede
# a parameter or tech-report line.
sub load {
    my ($class, $dbfile, $absfile, $timestampfile) = @_;

    my $db = bless { } => $class;
    my @missing;
    my $nr = 0;
    my $dbf;
    open($dbf, "<:encoding(UTF-8)", $dbfile)
	|| die("Can't open technical-report database '$dbfile':\n$!");

    my @comments;
    my @params;
    while (<$dbf>) {
	if (/^\s*\#/ || /^\s*$/) {
	    # record comments and empty lines
	    push @comments, $_;
	} elsif (/^(\d+)\|([^|]*)\|([^|]*)\|([^|]*)\|(\d*)\|(.*)$/) {
	    # read and create report entry
	    if (exists $db->{$1}) {
		die("$dbfile:$.:repeat occurrence of report number $1\n$_");
	    }
	    if ($1 < $nr) {
		warn("$dbfile:$.:non-monotonic numbering from $nr to $1\n");
	    } elsif ($1 != $nr + 1) {
		push @missing, ($nr+1)..($1-1);
	    }
	    $nr = $1;

	    my $tr = bless({ 'nr' => $nr, 'date' => $2, 'title' => $3,
			     'pages' => $5, 'db' => $db }
			   => 'TechReport');
	    my @notes = split(/\|/, $6, -1);
	    if (@comments) {
		$tr->{'_comment'} = [ @comments ];
		@comments = ();
	    }
	    eval {
		$tr->{'authors'} = [ map { TRAuthor::new($_) }
				     split(/, /,$4,-1) ];
	    };
	    # should contributors go into an extension of the author field
	    # rather than into a contrib= note?
	    die("$dbfile:$.:$@") if $@;
	    $tr->{'title'} =~ s/\\\\/\N{LINE SEPARATOR}/g;
	    die("$dbfile:$.:date field syntax error\n$_")
		unless $tr->{date} =~ /^|\d{4}|\d{4}-\d{2}|\d{4}-\d{2}-\d{2}$/;
	    warn("$dbfile:$.:no author\n$_")
		unless @{$tr->{'authors'}};

	    $db->{$nr} = $tr;
	    for my $note (@notes) {
		if ($note =~ /^([a-z0-9-]+)=(.*)$/) {
		    my $key = $1;
		    my $value = $2;
		    die("$dbfile:$.:duplicate note '$key'\n")
			if exists $tr->{$key};
		    $value =~ s/\\\\/\N{LINE SEPARATOR}/g;
		    $value =~ s/\\_/\N{PARAGRAPH SEPARATOR}/g;
		    $tr->{$key} = $value;
		    push @{$tr->{'_notes'}}, $key;
		} elsif ($note =~ /^([a-z0-9-]+)$/) {
		    die("$dbfile:$.:duplicate note '$1'\n") if exists $tr->{$1};
		    $tr->{$1} = undef;
		    push @{$tr->{'_notes'}}, $1;
		} else {
		    die("$dbfile:$.:syntax error in notes field '$note'\n$_");
		}
	    }
	} elsif (/^([a-z]+)=(.*)$/) {
	    # series parameters
	    my $key = $1;
	    my $value = $2;
	    push @params, $key;
	    $value =~ s/\\\\/\N{LINE SEPARATOR}/g;
	    $value =~ s/\\_/\N{PARAGRAPH SEPARATOR}/g;
	    $db->{$key} = $value;
	    if (@comments) {
		$db->{"_$key comment"} = [ @comments ];
		@comments = ();
	    }
	} else {
	    die("$dbfile:$.:syntax error\n$_");
	}
    }

    close($dbf);
    $db->{'_final comment'} = [ @comments ] if @comments;
    $db->{'_params'} = [ @params ] if @params;
    warn("$dbfile:missing report numbers " . join(', ', @missing) . "\n")
	if @missing && 0;

    $db->load_abstracts($absfile) if defined $absfile;
    $db->load_timestamps($timestampfile) if defined $timestampfile;

    return $db;
}

# Read the abstracts into an already existing TechReports table
sub load_abstracts {
    my ($db, $absfile) = @_;

    # read in abstracts
    my $abs;
    open($abs, "<:encoding(UTF-8)", $absfile)
	|| die("$absfile: cannot open abstracts database: $!\n");
    # record comments and skip empty lines
    my @comments;
    while (<$abs>) {
	last unless /^\s*\#/ || /^\s*$/;
	push @comments, $_ unless /^\s*$/;
    }
    $db->{'_abstracts comment'} = [ @comments ] if @comments;
    my $nr;
    /^==([0-9]+)==+$/
	or die("$absfile:$.:unexpected text before first separator line\n$_");
    $nr = $1;
    while (!eof($abs)) {
	my $tr = $db->{int($nr)};
	warn("$absfile:$.:found abstract for ".
	     "non-existing report number $nr\n") unless $tr;
	die("$absfile:$.:found second abstract for number $nr\n")
	    if $tr && exists $tr->{'abstract'};
	my @abstract;
	while (<$abs>) {
	    if (/^==([0-9]+)==+$/) {
		$nr = $1;
		last;
	    };
	    push @abstract, $_;
	}
	my $abstract = join('', @abstract);
	$abstract =~ s/\n+$/\n/s;
	$abstract =~ s/(\S)\n(\S)/$1 $2/sg;
	$tr->{'abstract'} = $abstract if $tr;
    }
    close($abs);
}

# list of technical reports
sub trs {
    my ($db, $all) = @_;
    my @trs = map { $db->{$_} } sort {$a <=> $b} grep { /^[0-9]+$/ } keys %$db;
    return @trs if $all;
    return grep {!exists $_->{deleted}} @trs;
}

# Convert a string indicating a range of report numbers (such as
# "100", "100-105", "-100", "100-") into a list of TechReport references.
# Several range-specifiers can be concatenated by comma.
sub range($) {
    my ($db, $rlist) = @_;
    my @list = ();

    return $db->trs unless $rlist;
    for $_ (split /,/, $rlist) {
	my $first;
	my $last;
	if (/^(\d+)$/) {
	    $first = $last = $1;
	} elsif (/^-(\d+)$/) {
	    $last = $1;
	} elsif (/^(\d+)-$/) {
	    $first = $1;
	} elsif (/^(\d+)-(\d+)$/) {
	    $first =$1;
	    $last = $2;
	} else {
	    die("Unrecognized range specification '$rlist'!\n");
	}
	push @list, grep(!(defined $first && $_->{nr} < $first or
			   defined $last  && $_->{nr} > $last),
			 $db->trs);
    }
    die("'$rlist' did not match any existing report number!\n")
	unless @list;
    return @list;
}

# Save a database of TR objects into a tr-database.txt file
sub save {
    my ($db, $dbfile) = @_;

    my @db;
    my %params;
    foreach my $p (grep { /^[a-z]+$/ } keys %{$db}) { $params{$p} = 1 };
    # output loaded parameters in order in which there were loaded
    # including any comments that preceded each parameter
    foreach my $p (@{$db->{'_params'}}) {
	push @db, @{$db->{"_$p comment"}};
	next unless exists $params{$p};
	my $value=$db->{$p};
	$value =~ s/\N{LINE SEPARATOR}/\\\\/g;
	$value =~ s/\N{PARAGRAPH SEPARATOR}/\\_/g;
	push @db, "$p=$value\n";
	delete $params{$p};
    }
    # then also output any newly added parameters
    foreach my $p (sort keys %params) {
	push @db, "$p=$db->{$p}\n";
    }
    foreach my $tr ($db->trs('all')) {
	push @db, @{$tr->{'_comment'}} if $tr->{'_comment'};
	push @db, $tr->db_line;
    }
    push @db, @{$db->{'_final comment'}} if exists $db->{'_final comment'};

    CLWeb::write_file($dbfile, @db);
}

sub save_abstracts {
    my ($db, $absfile) = @_;

    my @abs;
    push @abs, @{$db->{'_abstracts comment'}}
        if exists $db->{'_abstracts comment'};
    foreach my $tr ($db->trs('all')) {
	next unless exists $tr->{'abstract'};
	push @abs, "\n==$tr->{nr}================================================================\n";
	push @abs, $tr->{'abstract'};
    }
    CLWeb::write_file($absfile, @abs);
}

sub load_timestamps {
    my ($db, $timestampfile) = @_;

    my $tf;
    open($tf, "<", $timestampfile)
	|| die("$timestampfile: cannot open timestamp database: $!\n");
    my @modified;
    my @deleted;
    while (<$tf>) {
	next if /^\s*\#/ || /^\s*$/; # skip comments and empty lines;
	if (/^([0-9]+)\|(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})(\d{2})Z\|([0-9a-f]{32})$/) {
	    my $nr = $1;
	    my $lastmod = Time::Local::timegm($7, $6, $5, $4, $3-1, $2-1900);
	    my $lastdigest = $8;
	    die("$timestampfile:$.:odd timestamp\n$_")
		unless $lastmod;
	    if (exists $db->{$nr}) {
		my $tr = $db->{$nr};
		if ($lastdigest ne $tr->digest) {
		    push @modified, $nr;
		} else {
		    $tr->{'_lastmod'} = $lastmod;
		}
	    } else {
		push @{$db->{'_deleted'}}, $nr;
	    }
	} else {
	    die("Syntax error in timestamp database line $.:\n$_");
	}
    }
    close($tf);
    # also find any newly added TechReports
    my @added = map { $_->{nr} } grep { !exists $_->{'_lastmod'} } $db->trs;
    push @{$db->{'_updated'}}, @modified, @added;
    push @{$db->{'_deleted'}}, @deleted;
    my $timestamp = time;
    for my $nr (@modified, @added) {
	$db->{$nr}{'_lastmod'} = $timestamp;
    }
}

sub save_timestamps {
    my ($db, $timestampfile) = @_;
    my @ts = ("# TECHNICAL REPORT DATABASE MODIFICATION TIMESTAMPS\n".
	      "# AUTOMATICALLY GENERATED FILE -- DO NOT EDIT\n".
	      "# Format: nr|timestamp|md5-record-digest\n");

    my $timestamp = time;
    foreach my $tr ($db->trs('all')) {
	$tr->{'_lastmod'} = $timestamp unless $tr->{'_lastmod'};
	push @ts, ($tr->{nr}, '|', POSIX::strftime('%Y%m%dT%H%M%SZ',
						   gmtime($tr->{'_lastmod'})),
		   '|',$tr->digest,"\n");
    }

    CLWeb::write_file($timestampfile, @ts);
}

package TechReport;

# generate a single-line record of a TR in tr-database.txt format
sub db_line {
    my ($tr) = @_;

    my $title = $tr->{title};
    $title =~ s/\N{LINE SEPARATOR}/\\\\/g;
    my $line = "$tr->{nr}|$tr->{date}|$title|".
	join(', ', map { $_->save } $tr->authors)."|$tr->{pages}|";
    my @notes;
    # append notes
    foreach my $note (@{$tr->{'_notes'}}) {
	my $value = $tr->{$note};
	if (defined $value) {
	    $value =~ s/\N{LINE SEPARATOR}/\\\\/g;
	    $value =~ s/\N{PARAGRAPH SEPARATOR}/\\_/g;
	    push @notes, "$note=$value";
	} else {
	    push @notes, $note;
	}
    }
    return $line . join('|', @notes) . "\n";
}

sub digest {
    my ($tr) = @_;

    my $line = "$tr->{nr}.1|$tr->{date}|$tr->{title}|".  # the '.1' was part no
	join(', ', map { $_->save } $tr->authors)."|$tr->{pages}|";
    my @notes;
    # append notes
    foreach my $note (@{$tr->{'_notes'}}) {
	push @notes, defined $tr->{$note} ? "$note=$tr->{$note}" : $note;
    }
    $line .= join('|', @notes) . "\n";

    my $preimage = Encode::encode_utf8($line . $tr->{'abstract'});
    return Digest::MD5::md5_hex($preimage);
}

# full TR number
sub code {
    my ($tr) = @_;
    return "$tr->{db}{code}-$tr->{nr}";
}

# all available download files
sub files {
    my ($tr) = @_;
    my @files;
    for my $suffix ( 'pdf', 'ps.gz', 'dvi.gz' ) {
	my $fn = $tr->code.'.'.$suffix;
	if (-r $fn && ((stat(_))[2] & Fcntl::S_IROTH)) {
	    push @files, $fn;
	}
    }
    return @files;
}

# all available download formats (e.g. for DataCite Metadata Schema)
sub formats {
    my ($tr) = @_;
    my @formats;
    for my $suffix ( 'pdf', 'ps.gz', 'dvi.gz' ) {
        my $format = $suffix;
        $format =~ s/\.gz$//;
	my $fn = $tr->code.'.'.$suffix;
	if (-r $fn && ((stat(_))[2] & Fcntl::S_IROTH)) {
	    push @formats, uc($format);
	}
    }
    return @formats;
}

# URL of the full document file
sub url_file {
    my ($tr) = @_;

    return $tr->{url} if $tr->{url};
    my @files = $tr->files;
    $tr->{url} = $tr->{db}{reporturl} . $files[0] if @files;

    return $tr->{url};
}

sub downloads {
    my ($tr) = @_;
    my @html;

    for my $fn ($tr->files) {
	$fn =~ /\.(\w+)(?:\.gz)?$/;
	my $type = uc($1);
	my $size = (stat($fn))[7];
	push @html, '<a href="' . $fn . '">' . $type . '</a>' .
	    sprintf(' (%.1f MB)', $size/1e6);
    }

    return join('<br />', @html);
}

# URL of the abstracts web page
sub url_abs {
    my ($tr) = @_;
    return "$tr->{db}{reporturl}$tr->{db}{code}-$tr->{nr}.html";
}

# TR title in a single line, i.e. with line separators replaced
sub titleline {
    my ($tr) = @_;
    my $s = $tr->{title};
    return undef unless defined $s;
    # Substitute all line separators in a string with " : " or
    # -- if preceeded by punctuation or followed by parenthesis -- with " "
    $s =~ s/([\p{Pd}\.:,;?!])\N{LINE SEPARATOR}/$1 /g;
    $s =~ s/\N{LINE SEPARATOR}([\(])/ $1/g;
    $s =~ s/\N{LINE SEPARATOR}/ : /g;
    return $s;
}

sub html_title {
    my ($tr) = @_;
    return CLWeb::utf8_to_sgml($tr->titleline) || '<i>title missing</i>';
}

sub html_doc {
    my ($tr) = @_;
    return '<a href="'.$tr->url_doc.'">'.$tr->html_title.'</a>';
}

sub html_abs {
    my ($tr) = @_;
    return '<a href="'.$tr->url_abs.'">'.$tr->html_title.'</a>';
}

sub authors {
    my ($tr, $param) = @_;
    my @authors = @{$tr->{authors}};
    if ($param->{maxauthors} > 0 &&
	@authors > $param->{maxauthors}) {
	splice(@authors, $param->{maxauthors} - 1);
	push @authors, $TRAuthor::et_al;
    }
    return @authors;
}

sub html_authors {
    my ($tr, $param) = @_;
    return join(', ', map {$_->html} $tr->authors($param));
}

# Does the authors field actually list the editors of the report?
# Assume yes if the title starts with "Proceedings ".
sub editors {
    return $_[0]->{title} =~ /^Proceedings /;
}

# parse the contrib field and return a list
# of contributions and contributors the form
# ( contribution1 => [ contributor1, controbutor2 ],
#   contributionw => [ contributor3 ], ... )
sub contributions {
    my ($tr) = @_;
    my @contributions;
    for my $c (split(';;', $tr->{contrib})) {
        if ($c =~ /^(.*)::(.*)$/) {
            my $contribution = $1;
            my $contributors = $2;
            my @contributors;
            eval {
                @contributors =
                    map { TRAuthor::new($_) } split(/, /,$contributors,-1);
            };
            die($tr->code.":cannot parse contrib field:$@") if $@;
            push @contributions, $contribution => \@contributors;
        }
    }
    return @contributions;
}

# get just a list of the contributors, without identifying the contribution
sub contributors {
    my ($tr) = @_;
    my @contributions = $tr->contributions;
    my @contributors;
    while (@contributions) {
        my $contribution = shift @contributions;
        my $contributors = shift @contributions;
        push @contributors, @{$contributors};
    }
    return @contributors;
}

# if it is a thesis, return degree, college and submission-date
sub thesis {
    my ($tr) = @_;
    if (my @thesis = grep(/^phd|ophd|mphil|cst/, @{$tr->{_notes}})) {
	if ($tr->{$thesis[0]} =~ /^(?:([^,]*)(?:,([\d-]*))?)?$/) {
	    my $degree=$thesis[0];
	    my $college=$1;
	    my $submission=$2;
	    die("TR-$tr->{nr}:syntax error in submission date '$submission'")
		unless ($submission =~ /^(\d{4}(-\d{2}(-\d{2})?)?)?$/);
	    return ($degree, $college, $submission);
	} else {
	    die("TR-$tr->{nr}:",
		"invalid thesis note '$thesis[0]=$tr->{$thesis[0]}'\n");
	}
    }
    return;
}

# is this a CL PhD?
sub is_phd($) {
    my ($self) = @_;
    return exists $self->{phd};
}

sub submitted($) {
    return ($_[0]->thesis)[2];
}

# estimate publication year (needed e.g. for DOI registration)
sub year {
    my ($tr, $dir) = @_;
    # if we have a publication date, simply take the year from that
    if ($tr->{date} =~ /^(\d{4})/) { return $1 };
    # otherwise, if the nearest preceeding and following report
    # with a year show the same year, take that
    if ($dir) {
        # recurse one way only subsequently
        return $tr->{db}{$tr->{nr}+$dir}->year($dir)
            if $tr->{db}{$tr->{nr}+$dir};
    } else {
        # recurse both ways at first
        my ($prev, $next);
        $prev = $tr->{db}{$tr->{nr}-1}->year(-1)
            if $tr->{db}{$tr->{nr}-1};
        $next = $tr->{db}{$tr->{nr}+1}->year(+1)
            if $tr->{db}{$tr->{nr}+1};
        if ($prev eq $next) { return $prev; }
    }
    # otherwise, if the report is based on a thesis,
    # use the submission date recorded for that
    # (this can often be a year earlier than submission,
    # due to the length of the examination process)
    if ($tr->submitted =~ /^(\d{4})/) { return $1 };
    return;
}

# returns the licence name, URL, and short-code applicable to the report,
# or an empty list in case the default licence applies
sub licence {
    my ($tr) = @_;

    if (my $cc = $tr->{'cc'}) {
        if ($cc eq 'by') {
            return
                ('Creative Commons Attribution 4.0 International (CC BY 4.0)',
                 'https://creativecommons.org/licenses/by/4.0/', $cc);
        } elsif ($cc eq 'by-sa') {
            return
                ('Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0)',
                 'https://creativecommons.org/licenses/by-sa/4.0/', $cc);
        } elsif ($cc eq 'by-nd') {
            return
                ('Creative Commons Attribution-NoDerivatives 4.0 International (CC BY-ND 4.0)',
                 'https://creativecommons.org/licenses/by-nd/4.0/', $cc);
        } else {
            die("Unknown CC licence '$cc' in ", $tr->code, "\n");
        }
    }

    return;
}

# a bibliographic reference for the report
sub html {
    my ($tr) = @_;
    my @html;

    push @html, $tr->html_authors . ': ' if $tr->authors;
    push @html, $tr->html_abs;
    push @html, ', ' . expand_date($tr->{date}) if $tr->{date};
    push @html, ".";
    my @ss = ();
    push @ss, "$tr->{pages} pages" if $tr->{pages};
    push @ss, "PhD thesis"
        if grep(/^o?phd/, @{$tr->{_notes}});
    push @ss, "MPhil thesis"
        if grep(/^mphil/, @{$tr->{_notes}});
    push @html, ' (' . join(', ', @ss) . ')' if @ss;

    return @html;
}

sub doi {
    my ($tr) = @_;
    return if $tr->{'to-appear'};
    my $doiprefix = $tr->{db}{doiprefix};
    return "$doiprefix/tr-$tr->{nr}" if $doiprefix;
    return;
}

# https://blog.datacite.org/introducing-datacite-json/
# https://doi.test.datacite.org/dois/10.81026%2Ftr-577
sub datacite_json {
    my ($tr) = @_;
    my $doi = $tr->doi;
    die("No DOI prefix defined\n") unless $doi;
    my $j = {
	id  => "https://doi.org/" . $tr->doi,
	doi => $doi,
	url => $tr->url_abs,
	types => {
	    bibtex => 'TechReport',
	    resourceType => 'technical report',
	    resourceTypeGeneral => 'Report'
	},
    };
    for my $author ($tr->authors) {
        my $c = {
            name => $author->fullname,
        };
        if ($author->{forenames} && $author->{surname}) {
            $c->{givenName}  = $author->{forenames};
            $c->{familyName} = $author->{surname};
            $c->{nameType}   = 'Personal';
        }
        push @{$j->{creators}}, $c;
    };
    push @{$j->{titles}}, {
        lang => 'en',
        title => $tr->{title},
    } if $tr->{title};
    $j->{publisher} = $tr->{db}->{department} .
        ", University of Cambridge" if $tr->{db}->{department};
    my $year = $tr->year;
    $j->{publicationYear} = $tr->year if $year;
    push @{$j->{sizes}}, "$tr->{pages} pages" if $tr->{pages};
    my @formats = $tr->formats;
    push @{$j->{formats}}, @formats if @formats;
    push @{$j->{descriptions}}, {
        lang => 'en',
        description => $tr->{abstract},
        descriptionType => 'Abstract',
    } if $tr->{abstract};
    return $j;
}

# some auxiliary functions

use Exporter 'import';
our @EXPORT_OK = qw(expand_date expand_date_us);

our @month = ('???', 'January', 'February', 'March', 'April', 'May', 'June',
              'July', 'August', 'September', 'October', 'November', 'December');

# turn '2002-01-14' into '14 January 2002', etc.
sub expand_date {
    my ($d) = @_;

    if ($d =~ /^(\d{4})-?(\d{2})-?(\d{2})$/) {
	return sprintf("%d ", $3) . $month[$2] . " $1";
    } elsif ($d =~ /^(\d{4})-?(\d{2})$/) {
	return $month[$2] . " $1";
    }

    return $d;
}

# turn '2002-01-14' into 'January 14, 2002', etc.
sub expand_date_us {
    my ($d) = @_;

    if ($d =~ /^(\d{4})-?(\d{2})-?(\d{2})$/) {
	return $month[$2] . sprintf(" %d", $3) . ", $1";
    } elsif ($d =~ /^(\d{4})-?(\d{2})$/) {
	return $month[$2] . " $1";
    }

    return $d;
}

package TRAuthor;

use strict;
use utf8;

our $et_al = bless { surname => 'et al.' } => 'TRAuthor';

sub new {
    my ($s) = @_;
    my $a = bless {} => 'TRAuthor';

    if ($s =~ /^(?:(.+) )?([^ <>]+)(?: <([a-z]+[0-9]*)>)?$/) {
	$a->{'forenames'} = $1 if $1;
	$a->{'surname'} = $2;
	$a->{'crsid'} = $3 if $3;
	$a->{'surname'} =~ s/ / /g; # any NBSPs in surname are actually spaces
    } else {
	die("Unexpected format of author name: '$s'\n");
    }
    return $a;
}

sub fullname {
    my ($self) = @_;
    return join(' ', grep { defined } $self->{forenames}, $self->{surname});
}

sub surnamefirst {
    my ($self) = @_;
    return join(', ', grep { defined } $self->{surname}, $self->{forenames});
}

sub sortname {
    my ($self) = @_;
    return join(' ', $self->{surname}, $self->{forenames}, $self->{crsid});
}

sub url  {
    my ($self) = @_;
    return $self->{url} if $self->{url};
    my $url;
    my $verbose = 0;
    if (!$self->{url} && $self->{crsid}) {
        print STDERR "homepage('$self->{crsid}') = " if $verbose;
        $url = CLWeb::homepage_url($self->{crsid});
        print STDERR "'$url'\n" if $verbose;
	$self->{url} = $url if $url;
    }
    return $self->{url};
}

sub html {
    my ($self, $param) = @_;
    my $name = $param->{surnamefirst} ? $self->surnamefirst : $self->fullname;
    my $html = CLWeb::utf8_to_sgml(CLWeb::nobreak $name);
    my $url = $self->url;
    $html = '<a href="' . $url . '">' . $html . '</a>' if $url;
    return $html;
}

# return author data as a string as formatted in tr-database.txt
sub save($) {
    my ($a) = @_;
    my $s = $a->{surname};
    $s =~ s/ / /g; # encode any space in surname as NBSP
    $s = $a->{forenames}.' '.$s if $a->{forenames};
    $s .= " <$a->{crsid}>" if $a->{crsid};
    return $s;
}

1;