`MLLEX.ML`;

let type MLDialect = [| SPML; HTML; SML |];

let type MLLexRep = [|
    Ide:  string;
    QualIde: string list;
    NumLab: int;
    Tyvar: string;
    Nonide: string;
    Int:  int;
    Real:  real;
    Tok:  string;
    TokList:  string list;
    Unit: unit;
    Eof:  unit  |];

let type MLLex <=> MLLexRep;

let type MLLexMode <=> [| Expr; Type |];


let type MLCharClass = [|
    blank; letter; prime; underbar; hash; digit;
    delim; dot; lftpar; lftbra; rhtbra; pardec; symbol;
    tilde; star; apex; quote; percent; eof; illegal |];

let { MLCharClassFn a =
    if (a>=9 And a<=13)%HT LF VT FF CR% Or a=32%SP% then [|blank|]
    else if (a>=65 And a<=90)%A..Z% Or (a>=97 And a<=122)%a..z% then [|letter|]
    else if a=39%'% then [|prime|]
    else if a=95%_% then [|underbar|]
    else if a=35%#% then [|hash|]
    else if a>=48 And a<=57 %0..9% then [|digit|]
    else if a=44%,% Or a=59%;% then [|delim|]
    else if a=46%.% then [|dot|]
    else if a=40%(% then [|lftpar|]
    else if a=91%[% Or a=123%{% then [|lftbra|]
    else if a=41%)% Or a=93%]% Or a=125%}% then [|rhtbra|]
    else if a=124%|% then [|pardec|]
    else
        if
            a=33%!% Or a=36%$% Or a=38%&% Or a=43%+%
            Or a=45%-% Or a=47%/% Or a=58%:% Or (a>=60 And a<=64)%<=>?@%
            Or a=92%\% Or a=94%^%
        then
            [|symbol|]
    else if a=126%~% then [|tilde|]
    else if a=42%*% then [|star|]
    else if a=96%`% then [|apex|]
    else if a=34%"% then [|quote|]
    else if a=37%percent% then [|percent|]
    else if a= ~1 then [|eof|]
    else [|illegal|]: MLCharClass

ins MLCharClassArr = ArrayOfFn(129,\i.MLCharClassFn(i-2)) }

ins MLCharClass ch = MLCharClassArr sub (asc(ch)+2);


let { { { rec chars'sub(p,n,l) =
    if n>=0 then
        let ch=chr(n) in
            chars'sub(p,n-1,if p(MLCharClass(ch)) then ch::l else l)
    else l }
ins
    chars p = implode(chars'sub(p,127,nil)) }

enc { blanks = chars(\cl. cl is blank)

and alphanumerics = chars(\cl.
    cl is letter Or cl is prime Or cl is underbar Or cl is digit)

and digits = chars(\cl. cl is digit)

and pardecs = chars(\cl. cl is pardec)

and SPMLSymbols = chars(\cl.
    cl is symbol Or cl is hash Or cl is tilde Or cl is star Or cl is pardec)

and HTMLSymbols = chars(\cl.
    cl is symbol Or cl is hash Or cl is tilde Or cl is star Or cl is pardec)

and { isSMLSymbol( cl: MLCharClass ) =
    cl is symbol Or cl is tilde Or cl is pardec Or cl is apex Or cl is percent

enc SMLSymbols = chars(isSMLSymbol) }

and toklist'breaks =
    `_//` #@ chars(\cl.cl is quote Or cl is blank Or cl is illegal)

and PMLCommentBreaks = `/R/L` #@ chars(\cl. cl is percent)

and SMLCommentBreaks = `/R/L(*` }

enc { { rec spaces'sub n = if n=0 then [] else `/1` :: spaces'sub(n-1) }
ins
    spaces n = implode(spaces'sub(n)) } }


ins MLReadLexFromBufStream (dialect: MLDialect) bufStream =
%(Used to be called "MLTokenize")%

    let { { scanpossibleexponent(ch) =
        if ch = `E` Or ch = `e` then
            let cl = MLCharClass(PeekChar(2,bufStream)) in
                if
                    if cl is digit then true
                    else
                        if
                            if cl is tilde
                            then MLCharClass(PeekChar(3,bufStream)) is digit
                            else false
                        then ( Do(ScanChar(bufStream)); true )
                        else false
                then (
                    Do(ScanChar(bufStream)); Do(ScanWhileIn(digits,bufStream));
                    true )
                else false
        else false

    and scantokescchar(ch) =
        if ch = `^` then
            let a = asc(GetChar(bufStream)) in
                if a>=64 And a<=95 then chr(a-64)
                else failwith `Scan`
        else
            let a = asc(ch) in
                if a>=49 And a<=57 %1..9% then spaces(a-48)
                else if ch = `0` then spaces(10)
                else if ch = `R` then `/R`
                else if ch = `L` then `/L`
                else if ch = `T` then `/T`
                else if ch = `B` then `/B`
                else if ch = `E` then `/E`
                else if ch = `N` then `/N`
                else if ch = `D` then `/D`
                else if MLCharClass(ch) is illegal then failwith `Scan`
                else ch

    and scanstringescchar(ch) =
        let a = asc(ch) in
            if ch = `^` then
                let a = asc(GetChar(bufStream)) in
                    if a>=64 And a<=95 then chr(a-64)
                    else failwith `Scan`
            else if a>=48 And a<=57 %0..9% then
                let a = a-48 and b = asc(GetChar(bufStream)) - 48 in
                    if b>=0 And b<=9 then
                        let c = asc(GetChar(bufStream)) - 48 in
                            if c>=0 And c<=9 then
                                let n = (a*10+b)*10+c in
                                    if n<=255 then chr(n) else failwith `Scan`
                            else failwith `Scan`
                    else failwith `Scan`
            else if ch = `n` then `/L`
            else if ch = `t` then `/T`
            else if ch = `"` Or ch = `\` then ch
            else if MLCharClass(ch) is blank then (
                Do(ScanWhileIn(blanks,bufStream)); SkipTok(bufStream);
                if GetChar(bufStream) = `\` then ``
                else failwith `Scan` )
            else failwith `Scan` }

    enc rec { scan(m: MLLexMode): MLLex =
        let cl = MLCharClass(ScanChar(bufStream)) in absMLLex(
            if cl is blank then () else SetPrompt(`= `,bufStream);
            case cl of [|
            blank.  scanblank(m);
            letter.  scanpossqualide(scanletter());
            prime.  scanprime();
            underbar.  scanunderbar();
            hash.  scanhash();
            digit.  scandigit();
            delim.  [| Nonide = GetTok(bufStream) |];
            dot.  scandot();
            lftpar.  scanlftpar(m);
            lftbra.  scanlftbra();
            rhtbra.  [| Nonide = GetTok(bufStream) |];
            pardec.  scanpardec();
            symbol.  scansymbol();
            tilde.  scantilde();
            star.  scanstar(m);
            apex.  scanapex();
            quote.  scanquote();
            percent.  scanpercent(m);
            eof.  [| Eof |];
            illegal.  failwith `Scan`  |] )

    and scanblank(m) = (
        Do(ScanWhileIn(blanks,bufStream)); SkipTok(bufStream);
        repMLLex(scan(m)) )

    and scanpossqualide(s) =
        case dialect of [|
        SPML . [| Ide = s |];
        HTML . [| Ide = s |];
        SML .
            if PeekChar(1,bufStream)=`.` then [| QualIde = s::scanqualide() |]
            else [| Ide = s |] |]

    % The following function is only applicable in SML %
    and scanqualide() =
        let _ = GetChar(bufStream)
        enc cl = MLCharClass(PeekChar(1,bufStream))
        enc s =
            if cl is letter then scanletter()
            else if isSMLSymbol(cl) Or cl is star then scansymbol'()
            else failwith `Scan`
        in
            s :: if PeekChar(1,bufStream)=`.` then scanqualide() else []

    and scanletter() = (
        Do(ScanWhileIn(alphanumerics,bufStream));
        GetTok(bufStream) )

    and scanprime() = [| Tyvar = scanletter() |]
	% RH: was [| Tyvar = implode(`*`::tl(explode(scanletter()))) |] %
        % Memo:  Temporary hack for type-checker %

    and scanunderbar() =
        case dialect of [|
        SPML . [| Ide = scanletter() |];
        HTML . [| Nonide = GetTok(bufStream) |];
        SML . [| Nonide = GetTok(bufStream) |] |]

    and scanhash() =
        case dialect of [|
        SPML . scansymbol();
        HTML . scansymbol();
        SML .  [| Nonide = GetTok(bufStream) |]
%          if MLCharClass(PeekChar(1,bufStream)) is digit then
                [| NumLab = (
                    SkipTok(bufStream);
                    Do(ScanWhileIn(digits,bufStream));
                    intoftok(GetTok(bufStream))
                        ?? [`IntOfString`] failwith `Scan` ) |]
            else
                failwith `Scan` 
%       |]

    and scandigit() =
        let ch = ScanWhileIn(digits,bufStream) in
            if
                if
                    if ch = `.`
                    then MLCharClass(PeekChar(2,bufStream)) is digit
                    else false
                then (
                    Do(ScanChar(bufStream)); Do(ScanChar(bufStream));
                    Do(scanpossibleexponent(ScanWhileIn(digits,bufStream)));
                    true )
                else
                    scanpossibleexponent(ch)
            then
                [| Real =
                    realofstring(GetTok(bufStream))
                        ?? "FloatOfString" failwith `Scan` |]
            else
                [| Int =
                    intoftok(GetTok(bufStream))
                        ?? [`IntOfString`] failwith `Scan` |]

    and scandot() = (
        Do(ScanWhileIn(`.`,bufStream));
        [| Nonide = GetTok(bufStream) |] )

    and scanlftpar(m) =
        let ch = PeekChar(1,bufStream) in
            if ch = `)` then
                ( Do(ScanChar(bufStream)); SkipTok(bufStream); [| Unit |] )
            else if ch = `*` then
                case dialect of [|
                SPML . scanlftbra();
                HTML . scanlftbra();
                SML . ( Do(ScanChar(bufStream)); scanSMLcomment(m,0) ) |]
            else
                scanlftbra()

    and scanSMLcomment(m,n) = (
        Do(ScanUntilIn(SMLCommentBreaks,bufStream)); SkipTok(bufStream);
        let cl = MLCharClass(GetChar(bufStream)) in
            if cl is lftpar then
                if PeekChar(1,bufStream) = `*` then
                    ( Do(ScanChar(bufStream)); scanSMLcomment(m,n+1) )
                else
                    scanSMLcomment(m,n)
            else if cl is star then
                if PeekChar(1,bufStream) = `)` then (
                    Do(GetChar(bufStream));
                    if n=0 then repMLLex(scan(m)) else scanSMLcomment(m,n-1) )
                else
                    scanSMLcomment(m,n)
            else if cl is eof then repMLLex(scan(m))
            else scanSMLcomment(m,n) )
            
    and scanlftbra() = (
        if case dialect of [| SPML . true; HTML . true; SML . false |]
        then Do(ScanWhileIn(pardecs,bufStream))
        else ();
        [| Nonide = GetTok(bufStream) |] )

    and scanpardec() =
        if case dialect of [| SPML . true; HTML . true; SML . false |] then
            let cl = MLCharClass(ScanWhileIn(pardecs,bufStream))
            in
                if cl is rhtbra then (
                    Do(ScanChar(bufStream)); [| Nonide = GetTok(bufStream) |] )
                else
                    scansymbol()
        else
            scansymbol()

    and scansymbol() = scanpossqualide(scansymbol'())

    and scansymbol'() =
        let ch = ScanWhileIn(
            case dialect of [|
                SPML . SPMLSymbols;
                HTML . HTMLSymbols;
                SML . SMLSymbols |],
            bufStream)
        in
            if (dialect is SML) And (ch = `*`) then
                if PeekChar(2,bufStream) = `)` then failwith `Scan`
                else ( Do(ScanChar(bufStream)); scansymbol'() )
            else
                GetTok(bufStream)

    and scantilde() =
        if MLCharClass(PeekChar(1,bufStream)) is digit then scandigit()
        else scansymbol()

    and scanstar(m) =
        if case dialect of [| SPML . true; HTML . true; SML . false |] then
            case repMLLexMode(m) of [|
            Expr . scansymbol();
            Type . (
                outtok(`***Warning:  Old-style type variable`); newline(1);
                Do(ScanWhileIn(`*`,bufStream));
		[| Tyvar = implode(map(\c.if c=`*` then `'` else c)(explode(scanletter()))) |] ) |]
                % RH: was[| Tyvar = scanletter() |] ) |]%
		%Allows "*_a" etc.%
        else
            if (dialect is SML) And (PeekChar(1,bufStream) = `)`) then
                ( Do(ScanChar(bufStream)); [| Nonide = GetTok(bufStream) |] )
            else
                scansymbol()

    and scanapex() =
        if case dialect of [| SPML . true; HTML . true; SML . false |]
        then ( SkipTok(bufStream); scantok(nil) )
        else scansymbol()

    and scanquote() = (
        SkipTok(bufStream);
        case dialect of [|
        SPML . scantoklist(nil,nil);
        HTML . scantoklist(nil,nil);
        SML . scanstring(nil) |] )

    and scantok(l) =
        let l = (Do(ScanUntilIn(`/`//`,bufStream)); GetTok(bufStream)) :: l
        in
            if GetChar(bufStream) = `//` then
                scantok(scantokescchar(GetChar(bufStream))::l)
            else %cl is apex Or cl is eof% [| Tok = implode(rev(l)) |]

    and scanstring(l) =
        let l = (Do(ScanUntilIn(`"\`,bufStream)); GetTok(bufStream)) :: l
        in
            if GetChar(bufStream) = `\` then
                scanstring(scanstringescchar(GetChar(bufStream))::l)
            else %cl is quote Or cl is eof% [| Tok = implode(rev(l)) |]

    and scantoklist(l,lt) =
        let _ = ScanUntilIn(toklist'breaks,bufStream)
        enc l = GetTok(bufStream) :: l
        enc ch = GetChar(bufStream)
        enc cl = MLCharClass(ch)
        in
            if cl is quote Or cl is eof then
                let t = implode(rev(l)) in
                    [| TokList = rev(if t=`` then lt else t::lt) |]
            else if cl is blank then
                let t = implode(rev(l)) in
                    scantoklist(nil,if t=`` then lt else t::lt)
            else if ch = `_` then scantoklist(` `::l,lt)
            else if ch = `//` then
                let ch = GetChar(bufStream) in
                    if ch = `\` then
                        let t = implode(rev(l)) in
                            scantoklist(
                                nil,
                                if t=`` then ``::lt else ``::(t::lt) )
                    else
                        scantoklist(scantokescchar(ch)::l,lt)
            else %cl is illegal% failwith `Scan`

    and scanpercent(m) =
        case dialect of [|
        SPML . scanpercent'(m);
        HTML . scanpercent'(m);
        SML . scansymbol() |]

    and scanpercent'(m) = (
        Do(ScanUntilIn(PMLCommentBreaks,bufStream)); SkipTok(bufStream);
        if MLCharClass(GetChar(bufStream)) is percent then repMLLex(scan(m))
        else scanpercent'(m) ) } }

    in
        scan;



% The following function assumes that the string corresponds to a valid Lex,
  it should really check it is valid. %
% It is not defined yet for Lex variants Tok and TokList %
% It does not expect qualified names. %
% It should really take a LexMode parameter!! %

let { { rec lexOfValidTokPardec(to,l): MLLexRep =
    if null l then [| Ide = to |]
    else
        let cl = MLCharClass(hd(l)) in
            if cl is pardec then lexOfValidTokPardec(to,tl(l))
            else if cl is rhtbra then [| Nonide = to |]
            else [| Ide = to |] }

and { rec lexOfValidTokStar(to,l): MLLexRep =
    if null l then [| Ide = to |]
    else
        let cl = MLCharClass(hd(l)) in
            if cl is star then lexOfValidTokStar(to,tl(l))
            else if
                cl is letter Or cl is prime Or cl is underbar Or cl is digit
            then [| Tyvar = to |]
            else %cl is symbol ...% [| Ide = to |] } }

ins MLLexOfValidTok (dialect: MLDialect) (to): MLLex =
    let l = explode to in
        absMLLex(
            if null l then failwith `LexOfValidTok`
            else
                case MLCharClass(hd(l)) of [|
                blank. failwith `LexOfValidTok`;
                letter. [| Ide = to |];
                prime.
                    case dialect of [|
                    SPML . [| Ide = to |];
                    HTML . [| Ide = to |];
                    SML . [| Tyvar = to |] |];
                underbar.
                    case dialect of [|
                    SPML . [| Ide = to |];
                    HTML . [| Nonide = to |];
                    SML . [| Nonide = to |] |];
                hash.
                    case dialect of [|
                    SPML . [| Ide = to |];
                    HTML . [| Ide = to |];
                    SML .  [| Nonide = to |]
%                          [| NumLab =
                        intoftok(substring(to,2,LengthTok(to)-1))
                            ?? [`IntOfString`] failwith `LexOfValidTok` |] 
%                   |];
                digit. failwith `LexOfValidTok`;
                delim. [| Nonide = to |];
                dot. [| Nonide = to |];
                lftpar. [| Nonide = to |];  % Should check whether "()" %
                lftbra. [| Nonide = to |];
                rhtbra. [| Nonide = to |];
                pardec. lexOfValidTokPardec(to,tl(l));
                symbol. [| Ide = to |];
                tilde.
                    if
                        if null(tl(l)) then false
                        else MLCharClass(hd(tl(l))) is digit
                    then failwith `LexOfValidTok`
                    else [| Ide = to |];
                star. lexOfValidTokStar(to,tl(l));
                apex.
                    case dialect of [|
                    SPML . failwith `LexOfValidTok`;
                    HTML . failwith `LexOfValidTok`;
                    SML . [| Ide = to |] |];
                quote. failwith `LexOfValidTok`;
                percent.
                    case dialect of [|
                    SPML . failwith `LexOfValidTok`;
                    HTML . failwith `LexOfValidTok`;
                    SML . [| Ide = to |] |];
                eof. failwith `LexOfValidTok`;
                illegal. failwith `LexOfValidTok` |] );


let MLTokOfLex(lex: MLLex) =
    case repMLLex(lex) of [|
    Ide = to . to;
    QualIde = l . MLQualIdeOfStringList(l);
    NumLab = i . MLNumLabOfInt(i);
    Tyvar = to . to;
    Nonide = to . to;
    Int . failwith `TokOfLex`;
    Real . failwith `TokOfLex`;
    Tok . failwith `TokOfLex`;
    TokList . failwith `TokOfLex`;
    Unit . failwith `TokOfLex`;
    Eof . failwith `TokOfLex` |];

let MLLexHasWord( lex: MLLex, wd: string ): bool =
    case repMLLex(lex) of [|
    Ide = to . to = wd;
    QualIde = l . MLQualIdeOfStringList(l) = wd;
    NumLab = i . MLNumLabOfInt(i) = wd;
    Tyvar = to . to = wd;
    Nonide = to . to = wd;
    Int . false;
    Real . false;
    Tok . false;
    TokList . false;
    Unit . false;
    Eof . false |];

let { printtoklist(l) = (outtok `"`; listprint(l,outtok,` `); outtok `"`)

and outqualide(l) = listprint(l,outtok,`.`) }

ins { MLOutLex lx =
    case repMLLex(lx) of [|
    Ide = wd . outtok(wd);
    QualIde = l . outqualide(l);
    NumLab = i . outtok(MLNumLabOfInt(i));
    Tyvar = wd . outtok(wd);
    Nonide = wd . outtok(wd);
    Int = i . printint(i);
    Real = r . printreal(r);
    Tok = t . printtok(t);
    TokList = l . printtoklist(l);
    Unit . outtok(`()`);
    Eof . outtok(`<Eof>`) |]

and MLPrLex lx =
    case repMLLex(lx) of [|
    Ide = wd . printtok(wd);
    QualIde = l . ( outtok(`/``); outqualide(l); outtok(`/``) );
    NumLab = i . printtok(MLNumLabOfInt(i));
    Tyvar = wd . printtok(wd);
    Nonide = wd . printtok(wd);
    Int = i . printint(i);
    Real = r . printreal(r);
    Tok = t . printtok(t);
    TokList = l . printtoklist(l);
    Unit . printtok(`()`);
    Eof . outtok(`<Eof>`) |] };
