File ‹TPTP_Parser/ml_yacc_lib.ML›
signature STREAM =
sig type 'xa stream
val streamify : (unit -> '_a) -> '_a stream
val cons : '_a * '_a stream -> '_a stream
val get : '_a stream -> '_a * '_a stream
end
signature LR_TABLE =
sig
datatype ('a,'b) pairlist = EMPTY | PAIR of 'a * 'b * ('a,'b) pairlist
datatype state = STATE of int
datatype term = T of int
datatype nonterm = NT of int
datatype action = SHIFT of state
| REDUCE of int
| ACCEPT
| ERROR
type table
val numStates : table -> int
val numRules : table -> int
val describeActions : table -> state ->
(term,action) pairlist * action
val describeGoto : table -> state -> (nonterm,state) pairlist
val action : table -> state * term -> action
val goto : table -> state * nonterm -> state
val initialState : table -> state
exception Goto of state * nonterm
val mkLrTable : {actions : ((term,action) pairlist * action) array,
gotos : (nonterm,state) pairlist array,
numStates : int, numRules : int,
initialState : state} -> table
end
signature TOKEN =
sig
structure LrTable : LR_TABLE
datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b)
val sameToken : ('a,'b) token * ('a,'b) token -> bool
end
signature LR_PARSER =
sig
structure Stream: STREAM
structure LrTable : LR_TABLE
structure Token : TOKEN
sharing LrTable = Token.LrTable
exception ParseError
val parse : {table : LrTable.table,
lexer : ('_b,'_c) Token.token Stream.stream,
arg: 'arg,
saction : int *
'_c *
(LrTable.state * ('_b * '_c * '_c)) list *
'arg ->
LrTable.nonterm *
('_b * '_c * '_c) *
((LrTable.state *('_b * '_c * '_c)) list),
void : '_b,
ec : { is_keyword : LrTable.term -> bool,
noShift : LrTable.term -> bool,
preferred_change : (LrTable.term list * LrTable.term list) list,
errtermvalue : LrTable.term -> '_b,
showTerminal : LrTable.term -> string,
terms: LrTable.term list,
error : string * '_c * '_c -> unit
},
lookahead : int
} -> '_b *
(('_b,'_c) Token.token Stream.stream)
end
signature LEXER =
sig
structure UserDeclarations :
sig
type ('a,'b) token
type pos
type svalue
end
val makeLexer : (int -> string) -> unit ->
(UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token
end
signature ARG_LEXER =
sig
structure UserDeclarations :
sig
type ('a,'b) token
type pos
type svalue
type arg
end
val makeLexer : (int -> string) -> UserDeclarations.arg -> unit ->
(UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token
end
signature PARSER_DATA =
sig
type pos
type svalue
type arg
type result
structure LrTable : LR_TABLE
structure Token : TOKEN
sharing Token.LrTable = LrTable
structure Actions :
sig
val actions : int * pos *
(LrTable.state * (svalue * pos * pos)) list * arg->
LrTable.nonterm * (svalue * pos * pos) *
((LrTable.state *(svalue * pos * pos)) list)
val void : svalue
val extract : svalue -> result
end
structure EC :
sig
val is_keyword : LrTable.term -> bool
val noShift : LrTable.term -> bool
val preferred_change : (LrTable.term list * LrTable.term list) list
val errtermvalue : LrTable.term -> svalue
val showTerminal : LrTable.term -> string
val terms: LrTable.term list
end
val table : LrTable.table
end
signature PARSER =
sig
structure Token : TOKEN
structure Stream : STREAM
exception ParseError
type pos
type result
type arg
type svalue
val makeLexer : (int -> string) ->
(svalue,pos) Token.token Stream.stream
val parse : int * ((svalue,pos) Token.token Stream.stream) *
(string * pos * pos -> unit) * arg ->
result * (svalue,pos) Token.token Stream.stream
val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token ->
bool
end
signature ARG_PARSER =
sig
structure Token : TOKEN
structure Stream : STREAM
exception ParseError
type arg
type lexarg
type pos
type result
type svalue
val makeLexer : (int -> string) -> lexarg ->
(svalue,pos) Token.token Stream.stream
val parse : int * ((svalue,pos) Token.token Stream.stream) *
(string * pos * pos -> unit) * arg ->
result * (svalue,pos) Token.token Stream.stream
val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token ->
bool
end
functor Join(structure Lex : LEXER
structure ParserData: PARSER_DATA
structure LrParser : LR_PARSER
sharing ParserData.LrTable = LrParser.LrTable
sharing ParserData.Token = LrParser.Token
sharing type Lex.UserDeclarations.svalue = ParserData.svalue
sharing type Lex.UserDeclarations.pos = ParserData.pos
sharing type Lex.UserDeclarations.token = ParserData.Token.token)
: PARSER =
struct
structure Token = ParserData.Token
structure Stream = LrParser.Stream
exception ParseError = LrParser.ParseError
type arg = ParserData.arg
type pos = ParserData.pos
type result = ParserData.result
type svalue = ParserData.svalue
val makeLexer = LrParser.Stream.streamify o Lex.makeLexer
val parse = fn (lookahead,lexer,error,arg) =>
(fn (a,b) => (ParserData.Actions.extract a,b))
(LrParser.parse {table = ParserData.table,
lexer=lexer,
lookahead=lookahead,
saction = ParserData.Actions.actions,
arg=arg,
void= ParserData.Actions.void,
ec = {is_keyword = ParserData.EC.is_keyword,
noShift = ParserData.EC.noShift,
preferred_change = ParserData.EC.preferred_change,
errtermvalue = ParserData.EC.errtermvalue,
error=error,
showTerminal = ParserData.EC.showTerminal,
terms = ParserData.EC.terms}}
)
val sameToken = Token.sameToken
end
functor JoinWithArg(structure Lex : ARG_LEXER
structure ParserData: PARSER_DATA
structure LrParser : LR_PARSER
sharing ParserData.LrTable = LrParser.LrTable
sharing ParserData.Token = LrParser.Token
sharing type Lex.UserDeclarations.svalue = ParserData.svalue
sharing type Lex.UserDeclarations.pos = ParserData.pos
sharing type Lex.UserDeclarations.token = ParserData.Token.token)
: ARG_PARSER =
struct
structure Token = ParserData.Token
structure Stream = LrParser.Stream
exception ParseError = LrParser.ParseError
type arg = ParserData.arg
type lexarg = Lex.UserDeclarations.arg
type pos = ParserData.pos
type result = ParserData.result
type svalue = ParserData.svalue
val makeLexer = fn s => fn arg =>
LrParser.Stream.streamify (Lex.makeLexer s arg)
val parse = fn (lookahead,lexer,error,arg) =>
(fn (a,b) => (ParserData.Actions.extract a,b))
(LrParser.parse {table = ParserData.table,
lexer=lexer,
lookahead=lookahead,
saction = ParserData.Actions.actions,
arg=arg,
void= ParserData.Actions.void,
ec = {is_keyword = ParserData.EC.is_keyword,
noShift = ParserData.EC.noShift,
preferred_change = ParserData.EC.preferred_change,
errtermvalue = ParserData.EC.errtermvalue,
error=error,
showTerminal = ParserData.EC.showTerminal,
terms = ParserData.EC.terms}}
)
val sameToken = Token.sameToken
end;
structure LrTable : LR_TABLE =
struct
open Array List
infix 9 sub
datatype ('a,'b) pairlist = EMPTY
| PAIR of 'a * 'b * ('a,'b) pairlist
datatype term = T of int
datatype nonterm = NT of int
datatype state = STATE of int
datatype action = SHIFT of state
| REDUCE of int
| ACCEPT
| ERROR
exception Goto of state * nonterm
type table = {states: int, rules : int,initialState: state,
action: ((term,action) pairlist * action) array,
goto : (nonterm,state) pairlist array}
val numStates = fn ({states,...} : table) => states
val numRules = fn ({rules,...} : table) => rules
val describeActions =
fn ({action,...} : table) =>
fn (STATE s) => action sub s
val describeGoto =
fn ({goto,...} : table) =>
fn (STATE s) => goto sub s
fun findTerm (T term,row,default) =
let fun find (PAIR (T key,data,r)) =
if key < term then find r
else if key=term then data
else default
| find EMPTY = default
in find row
end
fun findNonterm (NT nt,row) =
let fun find (PAIR (NT key,data,r)) =
if key < nt then find r
else if key=nt then SOME data
else NONE
| find EMPTY = NONE
in find row
end
val action = fn ({action,...} : table) =>
fn (STATE state,term) =>
let val (row,default) = action sub state
in findTerm(term,row,default)
end
val goto = fn ({goto,...} : table) =>
fn (a as (STATE state,nonterm)) =>
case findNonterm(nonterm,goto sub state)
of SOME state => state
| NONE => raise (Goto a)
val initialState = fn ({initialState,...} : table) => initialState
val mkLrTable = fn {actions,gotos,initialState,numStates,numRules} =>
({action=actions,goto=gotos,
states=numStates,
rules=numRules,
initialState=initialState} : table)
end;
structure Stream :> STREAM =
struct
datatype 'a str = EVAL of 'a * 'a str Unsynchronized.ref | UNEVAL of (unit->'a)
type 'a stream = 'a str Unsynchronized.ref
fun get(Unsynchronized.ref(EVAL t)) = t
| get(s as Unsynchronized.ref(UNEVAL f)) =
let val t = (f(), Unsynchronized.ref(UNEVAL f)) in s := EVAL t; t end
fun streamify f = Unsynchronized.ref(UNEVAL f)
fun cons(a,s) = Unsynchronized.ref(EVAL(a,s))
end;
signature FIFO =
sig type 'a queue
val empty : 'a queue
exception Empty
val get : 'a queue -> 'a * 'a queue
val put : 'a * 'a queue -> 'a queue
end
structure LrParser :> LR_PARSER =
struct
structure LrTable = LrTable
structure Stream = Stream
fun eqT (LrTable.T i, LrTable.T i') = i = i'
structure Token : TOKEN =
struct
structure LrTable = LrTable
datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b)
val sameToken = fn (TOKEN(t,_),TOKEN(t',_)) => eqT (t,t')
end
open LrTable
open Token
val DEBUG1 = false
val DEBUG2 = false
exception ParseError
exception ParseImpossible of int
structure Fifo :> FIFO =
struct
type 'a queue = ('a list * 'a list)
val empty = (nil,nil)
exception Empty
fun get(a::x, y) = (a, (x,y))
| get(nil, nil) = raise Empty
| get(nil, y) = get(rev y, nil)
fun put(a,(x,y)) = (x,a::y)
end
type ('a,'b) elem = (state * ('a * 'b * 'b))
type ('a,'b) stack = ('a,'b) elem list
type ('a,'b) lexv = ('a,'b) token
type ('a,'b) lexpair = ('a,'b) lexv * (('a,'b) lexv Stream.stream)
type ('a,'b) distanceParse =
('a,'b) lexpair *
('a,'b) stack *
(('a,'b) stack * ('a,'b) lexpair) Fifo.queue *
int ->
('a,'b) lexpair *
('a,'b) stack *
(('a,'b) stack * ('a,'b) lexpair) Fifo.queue *
int *
action option
type ('a,'b) ecRecord =
{is_keyword : term -> bool,
preferred_change : (term list * term list) list,
error : string * 'b * 'b -> unit,
errtermvalue : term -> 'a,
terms : term list,
showTerminal : term -> string,
noShift : term -> bool}
local
val println = fn s => (TextIO.print s; TextIO.print "\n")
val showState = fn (STATE s) => "STATE " ^ (Int.toString s)
in
fun printStack(stack: ('a,'b) stack, n: int) =
case stack
of (state,_) :: rest =>
(TextIO.print("\t" ^ Int.toString n ^ ": ");
println(showState state);
printStack(rest, n+1))
| nil => ()
fun prAction showTerminal
(stack as (state,_) :: _, next as (TOKEN (term,_),_), action) =
(println "Parse: state stack:";
printStack(stack, 0);
TextIO.print(" state="
^ showState state
^ " next="
^ showTerminal term
^ " action="
);
case action
of SHIFT state => println ("SHIFT " ^ (showState state))
| REDUCE i => println ("REDUCE " ^ (Int.toString i))
| ERROR => println "ERROR"
| ACCEPT => println "ACCEPT")
| prAction _ (_,_,action) = ()
end
val ssParse =
fn (table,showTerminal,saction,fixError,arg) =>
let val prAction = prAction showTerminal
val action = LrTable.action table
val goto = LrTable.goto table
fun parseStep(args as
(lexPair as (TOKEN (terminal, value as (_,leftPos,_)),
lexer
),
stack as (state,_) :: _,
queue)) =
let val nextAction = action (state,terminal)
val _ = if DEBUG1 then prAction(stack,lexPair,nextAction)
else ()
in case nextAction
of SHIFT s =>
let val newStack = (s,value) :: stack
val newLexPair = Stream.get lexer
val (_,newQueue) =Fifo.get(Fifo.put((newStack,newLexPair),
queue))
in parseStep(newLexPair,(s,value)::stack,newQueue)
end
| REDUCE i =>
(case saction(i,leftPos,stack,arg)
of (nonterm,value,stack as (state,_) :: _) =>
parseStep(lexPair,(goto(state,nonterm),value)::stack,
queue)
| _ => raise (ParseImpossible 197))
| ERROR => parseStep(fixError args)
| ACCEPT =>
(case stack
of (_,(topvalue,_,_)) :: _ =>
let val (token,restLexer) = lexPair
in (topvalue,Stream.cons(token,restLexer))
end
| _ => raise (ParseImpossible 202))
end
| parseStep _ = raise (ParseImpossible 204)
in parseStep
end
val distanceParse =
fn (table,showTerminal,saction,arg) =>
let val prAction = prAction showTerminal
val action = LrTable.action table
val goto = LrTable.goto table
fun parseStep(lexPair,stack,queue,0) = (lexPair,stack,queue,0,NONE)
| parseStep(lexPair as (TOKEN (terminal, value as (_,leftPos,_)),
lexer
),
stack as (state,_) :: _,
queue,distance) =
let val nextAction = action(state,terminal)
val _ = if DEBUG1 then prAction(stack,lexPair,nextAction)
else ()
in case nextAction
of SHIFT s =>
let val newStack = (s,value) :: stack
val newLexPair = Stream.get lexer
in parseStep(newLexPair,(s,value)::stack,
Fifo.put((newStack,newLexPair),queue),distance-1)
end
| REDUCE i =>
(case saction(i,leftPos,stack,arg)
of (nonterm,value,stack as (state,_) :: _) =>
parseStep(lexPair,(goto(state,nonterm),value)::stack,
queue,distance)
| _ => raise (ParseImpossible 240))
| ERROR => (lexPair,stack,queue,distance,SOME nextAction)
| ACCEPT => (lexPair,stack,queue,distance,SOME nextAction)
end
| parseStep _ = raise (ParseImpossible 242)
in parseStep : ('_a,'_b) distanceParse
end
fun mkFixError({is_keyword,terms,errtermvalue,
preferred_change,noShift,
showTerminal,error,...} : ('_a,'_b) ecRecord,
distanceParse : ('_a,'_b) distanceParse,
minAdvance,maxAdvance)
(lexv as (TOKEN (term,value as (_,leftPos,_)),_),stack,queue) =
let val _ = if DEBUG2 then
error("syntax error found at " ^ (showTerminal term),
leftPos,leftPos)
else ()
fun tokAt(t,p) = TOKEN(t,(errtermvalue t,p,p))
val minDelta = 3
val stateList =
let fun f q = let val (elem,newQueue) = Fifo.get q
in elem :: (f newQueue)
end handle Fifo.Empty => nil
in f queue
end
val (_, numStateList) =
List.foldr (fn (a,(num,r)) => (num+1,(a,num)::r)) (0, []) stateList
datatype ('a,'b) change = CHANGE of
{pos : int, distance : int, leftPos: 'b, rightPos: 'b,
new : ('a,'b) lexv list, orig : ('a,'b) lexv list}
val showTerms = String.concat o map (fn TOKEN(t,_) => " " ^ showTerminal t)
val printChange = fn c =>
let val CHANGE {distance,new,orig,pos,...} = c
in (TextIO.print ("{distance= " ^ (Int.toString distance));
TextIO.print (",orig ="); TextIO.print(showTerms orig);
TextIO.print (",new ="); TextIO.print(showTerms new);
TextIO.print (",pos= " ^ (Int.toString pos));
TextIO.print "}\n")
end
val printChangeList = app printChange
fun parse (lexPair,stack,queuePos : int) =
case distanceParse(lexPair,stack,Fifo.empty,queuePos+maxAdvance+1)
of (_,_,_,distance,SOME ACCEPT) =>
if maxAdvance-distance-1 >= 0
then maxAdvance
else maxAdvance-distance-1
| (_,_,_,distance,_) => maxAdvance - distance - 1
fun catList l f = List.foldr (fn(a,r)=> f a @ r) [] l
fun keywordsDelta new = if List.exists (fn(TOKEN(t,_))=>is_keyword t) new
then minDelta else 0
fun tryChange{lex,stack,pos,leftPos,rightPos,orig,new} =
let val lex' = List.foldr (fn (t',p)=>(t',Stream.cons p)) lex new
val distance = parse(lex',stack,pos+length new-length orig)
in if distance >= minAdvance + keywordsDelta new
then [CHANGE{pos=pos,leftPos=leftPos,rightPos=rightPos,
distance=distance,orig=orig,new=new}]
else []
end
fun tryDelete n ((stack,lexPair as (TOKEN(term,(_,l,r)),_)),qPos) =
let fun del(0,accum,left,right,lexPair) =
tryChange{lex=lexPair,stack=stack,
pos=qPos,leftPos=left,rightPos=right,
orig=rev accum, new=[]}
| del(n,accum,left,right,(tok as TOKEN(term,(_,_,r)),lexer)) =
if noShift term then []
else del(n-1,tok::accum,left,r,Stream.get lexer)
in del(n,[],l,r,lexPair)
end
fun tryInsert((stack,lexPair as (TOKEN(_,(_,l,_)),_)),queuePos) =
catList terms (fn t =>
tryChange{lex=lexPair,stack=stack,
pos=queuePos,orig=[],new=[tokAt(t,l)],
leftPos=l,rightPos=l})
fun trySubst ((stack,lexPair as (orig as TOKEN (term,(_,l,r)),lexer)),
queuePos) =
if noShift term then []
else
catList terms (fn t =>
tryChange{lex=Stream.get lexer,stack=stack,
pos=queuePos,
leftPos=l,rightPos=r,orig=[orig],
new=[tokAt(t,r)]})
fun do_delete(nil,lp as (TOKEN(_,(_,l,_)),_)) = SOME(nil,l,l,lp)
| do_delete([t],(tok as TOKEN(t',(_,l,r)),lp')) =
if eqT (t, t')
then SOME([tok],l,r,Stream.get lp')
else NONE
| do_delete(t::rest,(tok as TOKEN(t',(_,l,r)),lp')) =
if eqT (t,t')
then case do_delete(rest,Stream.get lp')
of SOME(deleted,l',r',lp'') =>
SOME(tok::deleted,l,r',lp'')
| NONE => NONE
else NONE
fun tryPreferred((stack,lexPair),queuePos) =
catList preferred_change (fn (delete,insert) =>
if List.exists noShift delete then []
else case do_delete(delete,lexPair)
of SOME(deleted,l,r,lp) =>
tryChange{lex=lp,stack=stack,pos=queuePos,
leftPos=l,rightPos=r,orig=deleted,
new=map (fn t=>(tokAt(t,r))) insert}
| NONE => [])
val changes = catList numStateList tryPreferred @
catList numStateList tryInsert @
catList numStateList trySubst @
catList numStateList (tryDelete 1) @
catList numStateList (tryDelete 2) @
catList numStateList (tryDelete 3)
val findMaxDist = fn l =>
List.foldr (fn (CHANGE {distance,...},high) => Int.max(distance,high)) 0 l
val maxDist = findMaxDist changes
val changes = catList changes
(fn(c as CHANGE{distance,...}) =>
if distance=maxDist then [c] else [])
in case changes
of (l as change :: _) =>
let fun print_msg (CHANGE {new,orig,leftPos,rightPos,...}) =
let val s =
case (orig,new)
of (_::_,[]) => "deleting " ^ (showTerms orig)
| ([],_::_) => "inserting " ^ (showTerms new)
| _ => "replacing " ^ (showTerms orig) ^
" with " ^ (showTerms new)
in error ("syntax error: " ^ s,leftPos,rightPos)
end
val _ =
(if length l > 1 andalso DEBUG2 then
(TextIO.print "multiple fixes possible; could fix it by:\n";
app print_msg l;
TextIO.print "chosen correction:\n")
else ();
print_msg change)
val findNth = fn n =>
let fun f (h::t,0) = (h,rev t)
| f (h::t,n) = f(t,n-1)
| f (nil,_) = let exception FindNth
in raise FindNth
end
in f (rev stateList,n)
end
val CHANGE {pos,orig,new,...} = change
val (last,queueFront) = findNth pos
val (stack,lexPair) = last
val lp1 = List.foldl(fn (_,(_,r)) => Stream.get r) lexPair orig
val lp2 = List.foldr(fn(t,r)=>(t,Stream.cons r)) lp1 new
val restQueue =
Fifo.put((stack,lp2),
List.foldl Fifo.put Fifo.empty queueFront)
val (lexPair,stack,queue,_,_) =
distanceParse(lp2,stack,restQueue,pos)
in (lexPair,stack,queue)
end
| nil => (error("syntax error found at " ^ (showTerminal term),
leftPos,leftPos); raise ParseError)
end
val parse = fn {arg,table,lexer,saction,void,lookahead,
ec=ec as {showTerminal,...} : ('_a,'_b) ecRecord} =>
let val distance = 15
val minAdvance = 1
val maxAdvance = Int.max(lookahead,0)
val lexPair = Stream.get lexer
val (TOKEN (_,(_,leftPos,_)),_) = lexPair
val startStack = [(initialState table,(void,leftPos,leftPos))]
val startQueue = Fifo.put((startStack,lexPair),Fifo.empty)
val distanceParse = distanceParse(table,showTerminal,saction,arg)
val fixError = mkFixError(ec,distanceParse,minAdvance,maxAdvance)
val ssParse = ssParse(table,showTerminal,saction,fixError,arg)
fun loop (lexPair,stack,queue,_,SOME ACCEPT) =
ssParse(lexPair,stack,queue)
| loop (lexPair,stack,queue,0,_) = ssParse(lexPair,stack,queue)
| loop (lexPair,stack,queue,distance,SOME ERROR) =
let val (lexPair,stack,queue) = fixError(lexPair,stack,queue)
in loop (distanceParse(lexPair,stack,queue,distance))
end
| loop _ = let exception ParseInternal
in raise ParseInternal
end
in loop (distanceParse(lexPair,startStack,startQueue,distance))
end
end;
;