#!/usr/bin/perl
# Generate an ASCII listing of the Microsoft/Adobe WGL4.0 Character Set
# Markus Kuhn <mkuhn@acm.org>

if (1) {
    $source='http://www.adobe.com/supportservice/devrelations/opentype/';
    $intro='wgl4.htm';
    @tables=('wgl4b.htm', 'wgl4c.htm', 'wgl4d.htm', 'wgl4e.htm');
} else {
    $source='http://www.microsoft.com/typography/OTSPEC/';
    $intro='WGL4.HTM';
    @tables=('WGL4B.HTM', 'WGL4C.HTM', 'WGL4D.HTM', 'WGL4E.HTM');
}

$unicodedata = "UnicodeData-Latest.txt";

# read list of all Unicode names
open(UDATA, $unicodedata) || die("Can't open Unicode database '$unicodedata': $!
");
while (<UDATA>) {
    if (/^([0-9,A-F]{4});([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*)$/) {
        $name{$1} = $2;
        $comment{$1} = $12;
    } else {
        die("Syntax error in line '$_' in file '$unicodedata'");
    }
}
close(UDATA);

print "# Windows Glyph List 4.0 (WGL4) Unicode subset generated from tables on\n";
print "# <$source$intro>.\n";
$count = 0;
for $table (@tables) {
    print STDERR "loading $source$table ...\n";
    open(WGL4, "webcopy -o $source$table |") || die "Can't call 'webcopy'";
    while (<WGL4>) {
	if (/^\s*<TD.*>\s*U\+([0-9a-f]{4})\s*(<\/TD>)?\s*$/) {
	    $ucs = $1;
	    $ucs =~ tr/a-f/A-F/;
	    print "0x$ucs\t# $name{$ucs}\n";
	    $count++;
	}
    }
    close(WGL4);
}
print "# $count characters in above table\n";
printf STDERR "Warning: found $count characters, expected 654!\n" if ($count != 654);
