#!/usr/bin/perl
# Generate an ASCII listing of the Microsoft/Adobe WGL4.0 Character Set
# Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>

if (0) {
    #$source='http://www.adobe.com/supportservice/devrelations/opentype/';
    #$source='http://partners.adobe.com/asn/tech/type/opentype/appendices/';
    $source='http://partners.adobe.com/public/developer/opentype/';
    $intro='index_wgl.html';
    @tables=('index_wgl1.html', 'index_wgl2.html',
	     'index_wgl3.html', 'index_wgl4.html' );
} else {
    $source='https://docs.microsoft.com/en-gb/typography/opentype/spec/';
    $intro='wgl4';
    @tables=('wgl4b', 'wgl4c', 'wgl4d', 'wgl4e');
}

$unicodedata = "UnicodeData.txt";

# read list of all Unicode names
open(UDATA, $unicodedata) || die("Can't open Unicode database '$unicodedata': $!
");
while (<UDATA>) {
    if (/^([0-9,A-F]{4,8});([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*)$/) {
	$ucs = hex($1);
        $name{$ucs} = $2;
        $comment{$ucs} = $12;
    } else {
        die("Syntax error in line '$_' in file '$unicodedata'");
    }
}
close(UDATA);

print "# Windows Glyph List 4.0 (WGL4) Unicode subset generated from tables on\n";
print "# <$source$intro>.\n";
$count = 0;
for $table (@tables) {
    print STDERR "loading $source$table\n";
    open(WGL4, "wget -O - $source$table |") || die "Can't call 'wget'";
    while (<WGL4>) {
	if (/^\s*<td.*>\s*U\+([0-9a-fA-F]{4})\s*(<\/td>)?\s*$/) {
	    $ucs = hex($1);
	    printf("0x%04X\t# %s\n", $ucs, $name{$ucs});
	    $count++;
	}
    }
    close(WGL4);
}
print "# $count characters in above table\n";
my $expected = 657;
warn "Warning: found $count characters, expected $expected!\n"
    if $count != $expected;
