#!/usr/bin/perl
# Produce the data file entities.pl included by SGML.pm with
#
# $ wget https://html.spec.whatwg.org/entities.json
# $ ./entities.pl entities.json >entities.pm
#
# See: https://html.spec.whatwg.org/multipage/named-characters.html

use 5.016;
use JSON::PP;

binmode STDOUT, ":utf8";
binmode STDERR, ":utf8";

my $f;

if (0) {
    use HTTP::Tiny;
    my $r = HTTP::Tiny->new->get('https://html.spec.whatwg.org/entities.json');
    die "$r->{status} $r->{reason}\n" unless $r->{success};
    $f = $r->{content};
} else {
    local $/;
    $f = <>;
}

my $j = decode_json $f;

my @e = grep { /^&.*;\z/ } keys %$j;

@e = sort { $j->{$a}{codepoints}[0] <=> $j->{$b}{codepoints}[0] ||
            $j->{$a}{codepoints}[1] <=> $j->{$b}{codepoints}[1] } @e;

say 'our %entities = (';
for my $e (@e) {
    die($e) if @{$j->{$e}{codepoints}} > 2;
    if ($e =~ /^&(.+);\z/) {
        my $c = $1;
        my $u = $j->{$e}{characters};
        my $uu = join('', map { sprintf("\\N{U+%x}", $_)} @{$j->{$e}{codepoints}});
        printf(qq{  %-25s => "%s",\n}, $c, $uu);
    }
}
say ');';
