Return-path: <ted.briscoe@cl.cam.ac.uk>
Delivery-date: Mon, 23 Sep 1996 14:04:34 +0100
Received: from cl.cam.ac.uk [128.232.0.139] (ejb)
	by heaton.cl.cam.ac.uk with esmtp (Exim 0.52 #2)
	id E0v5Agb-0004MR-00; Mon, 23 Sep 1996 14:04:33 +0100
To: Miles.Osborne@cl.cam.ac.uk
Subject: code
Date: Mon, 23 Sep 1996 14:04:31 +0100
From: Ted Briscoe <Ted.Briscoe@cl.cam.ac.uk>
Message-Id: <E0v5Agb-0004MR-00@heaton.cl.cam.ac.uk>


This isn't quite what you want but you can copy the stuff which embeds
execute-with-timeout around parse

;;; COMP-STATS.LSP

;;; Modify fparse to add flag 'STatistics', printing out for each sentence
;;; a list consisting of:
;;;
;;; - number of parses
;;;
;;; - if zero parses, a list of edges of the form
;;;
;;;   (<pre-verb> <rule-name> <word>_<tag> {* | <compl-lex>} ...)
;;;   where <pre-verb> is either ({* | <subj-lex>}+) or PASSIVE {* | <dir-obj-lex>}
;;;
;;;   otherwise a list of tree nodes from successful parses of the form
;;;   ((<pre-verb> <rule-name> <word>_<tag> {* | <compl-head>} ...) . <freq>)
;;;   where <pre-verb> is either {* | <subj-lex>} or PASSIVE {* | <dir-obj-lex>}
;;;
;;; where <word> and <tag> have been specified as of interest, and occur
;;; as the first daughter of the edge/node. The complement lexical items are
;;; nouns or particles/prepositions tagged with one of the labels in +noun-tags+
;;; or +prt/prep-tags+. Additionally, nouns must lie on a path, all of whose
;;; categories agree on non-variable values with +noun-cats+; particles/prepositions
;;; must be the 1st daughter in a path of categories which agree with
;;; +prt/prep-cats. The frequency is the number of times a node of that
;;; form occurs in the parse trees.
;;;
;;; Example calls:
;;;
;;; !(parse-file1 "tag/sents" 'statistics '(|go| VVN))
;;; !(parse-file1 "tag/sents" 'statistics '(* VBDZ VBDR VBZ VVD VVG VV0 VB0 VVN VVZ))
;;; fpar st * VBDZ VBDR VVD VVG VV0 VB0 VVN VVZ VHD
;;; tag/sents

;;; 3rd argument is a list containing the word and possible tags for it. The
;;; word can be '*', meaning any word with the given tags.
;;;
;;; Example output from fparse:
;;;
;;; (Jackie_NP1 have_VHD go_VVN into_II the_AT station_NNL1) 3
;;; (3 (("V/pp" |go_VVN| |into_II|) . 1) (("V/have_ppart" |have_VHD| *) . 3)
;;;    (("V/0" |go_VVN|) . 2))

;;; Can also do:
;;;
;;; v st * VBDZ VBDR VVD VVG VV0 VB0 VVN VVZ VHD
;;; (pprint (stats-from-edges (car *chart-edges) '* '("VBDZ" "VBDR" "VBZ" "VVD" "VVG" "VV0" "VB0" "VVN" "VVZ" "VHD")))
;;;

(defparameter +parse-timeout+ 45)
(defparameter +verb-cats+
   '("[N -, V +, BAR 0, PLU @p, AUX @a, VFORM @v, INV @i, FIN @f, CONJ @c, TA @t]."
     "[N -, V +, BAR 0, PLU @p, VSUBCAT @s, PRT @r, AUX @a, VFORM @v, FIN @f, CONJ @c, TA @t]."
     "[N -, V +, BAR 1, PLU @p, MOD @m, AUX @a, VFORM @v, FIN @f, CONJ @c, SCOLON @sc]."
     "[N -, V +, BAR 2, PLU @p, WH @w, AUX @a, VFORM @v, INV @i, FIN @f, CONJ @c, SCOLON @sc, COLON @co, DASH @da, TA @ta, BAL @ba, BRACK @br, COMMA @co, TXTCAT @tc, TXT @tx]."
     ))

(defparameter +noun-cats+
    '("[N +, V -, BAR 0, PLU @p, POSS @o, NTYPE @n, WH @w, MOD @m, CONJ @c, TA @a]."
      "[N +, V -, BAR @b, PLU @p, POSS @o, NTYPE @n, WH @w, MOD @m, CONJ @c, SCOLON @sc, COLON @cl, DASH @d, TA @a, BAL @l, BRACK @br, COMMA @co, TXTCAT @t, TXT @tx]."
      "[N +, V -, BAR 2, PLU @p, POSS @o, NTYPE @n, WH @w, MOD @m, CONJ @c, SCOLON @sc]."
      ))
     
(defparameter +adj-cats+
   '("[N +, V +, BAR @b, ATYPE @at, AFORM @af, ADV @ad, MOD @m, CONJ @c, TA @t]."
     "[N +, V +, BAR 2, ATYPE @at, AFORM @af, ADV @ad, MOD @m, CONJ @c, SCOLON @s, TA @t]."
     ))

(defparameter +prt/prep-cats+
    '("[N -, V -, BAR 0, PSUBCAT @s, PFORM @p, ADJ @a, MOD @m, CONJ @c, TA @t]." 
      "[N -, V -, BAR 1, PFORM @p, ADJ @a, WH @w, MOD @m, CONJ @c]." 
      "[N -, V -, BAR 2, PFORM @p, ADJ @a, WH @w, MOD @m, CONJ @c, SCOLON @sc]." 
      "[N +, V -, BAR 2, PLU @p, POSS @o, NTYPE PRO, WH +, MOD @m, CONJ -, SCOLON @sc, COLON @cl, DASH @d, TA @a, BAL @l, BRACK @br, COMMA @co, TXTCAT @t, TXT @tx]."
      "[MINOR PRT]." "[MINOR COMP, TA @t]."
      "[N +, V +, BAR @b, ATYPE @a, AFORM NONE, ADV +, MOD @m, CONJ -, TA @t]."
      "[N -, V +, BAR 2, PLU @p, WH @w, AUX @a, VFORM @v, INV @i, FIN @f,  CONJ @c, SCOLON @sc, COLON @cl, DASH @d, TA @t, BAL @bl, BRACK @br, COMMA @co, TXTCAT UNIT, TXT CL]."
      ))


(defparameter +aux-pass-rules+ '("V/be_ppart/-" "V/be_ppart/+"))
(defparameter +lex-part-rules+ '("V/0_p"))
(defparameter +aux-pass-tags+
   '("VB0" "VBDR" "VBDZ" "VBG" "VBM" "VBN" "VBR" "VBZ"))


(defvar *verb-cats*)
(defvar *noun-cats*)
(defvar *adj-cats*)
(defvar *prt/prep-cats*)

(proclaim '(special *chart-edges *parser-fparse-commands))


;;; Add new option to GDE parse loop fparse and view commands

(eval-when (load eval)
   ;; (unembed display-parse-trees2)
   ;; (unembed invoke-parser)
   ;; (unembed parse-file-sentence)
   ;; (unembed parse-sentences)
   (embed parse-sentences
      (lambda nil
         (setup-category-defining-info)
         (parse-sentences)))
   (embed parse-file-sentence
      (lambda (sent verbose command-options)
         (let ((n-parses 0))
            (execute-with-timeout
               (+parse-timeout+
                   ;; ran out of time during parsing/unpacking/stats
                   (format t "-1~%"))
               (setq n-parses
                  (parse-file-sentence sent verbose command-options)))
            (when (eql n-parses 0)
               (let ((word
                        (if (eq (car command-options) '*)
                           '* (string (car command-options))))
                     (tags (mapcar #'string (cdr command-options))))
                  (print-comp-stats
                     (stats-from-edges (car *chart-edges) word tags)))))))
   (embed invoke-parser
      (lambda (words verbose)
         (invoke-parser
            (let ((n 0))
               (mapcar
                  #'(lambda (w)
                     (let* ((str (string w))
                            (underscore (position #\_ str)))
                        (if underscore
                           (intern
                              (format nil "~A:~A~A" (subseq str 0 underscore)
                                 (incf n) (subseq str underscore)))
                           w)))
                  words))
            verbose)))
   (embed display-parse-trees2
      (lambda (trees mode command-options)
         (fresh-line *standard-output*)
         (if (eq mode 'statistics)
            (let ((word
                     (if (eq (car command-options) '*)
                        '* (string (car command-options))))
                  (tags (mapcar #'string (cdr command-options))))
               (print-comp-stats (stats-from-parses trees word tags)))
            (display-parse-trees2 trees mode command-options))))
   (pushnew
      (make-command-entry :shortest 2 :name
         'statistics :action '(parse-file 'statistics))
      *parser-fparse-commands :test #'equal)
   (pushnew
      (make-command-entry :shortest 2 :name
         'statistics :action '(display-parse-trees 'statistics))
      *parser-view-commands :test #'equal)
   )


(defun print-comp-stats (stats &optional stream)
   (fresh-line stream)
   (write stats :stream stream :pretty nil :escape t)
   (when stream (finish-output stream)))


(defun setup-category-defining-info nil
   (setq *verb-cats* (category-defining-info +verb-cats+))
   (setq *noun-cats* (category-defining-info +noun-cats+))
   (setq *adj-cats* (category-defining-info +adj-cats+))
   (setq *prt/prep-cats* (category-defining-info +prt/prep-cats+)))

(defun category-defining-info (strs)
   (mapcar
      #'(lambda (str)
         (let*
            ((*input-text (get-reply1 str))
               (*input-comments nil) (highest-binding-no 0)
               (*current-item (pop *input-text))
               (binding-list (parse-category-binding nil nil)))
            (declare
               (special *input-text *input-comments highest-binding-no
                  *current-item))
            (let ((conv
                     (car
                        (convert-category-to-parser
                           (car binding-list) (cdr binding-list) nil))))
               (cons (svref conv 0)
                  (do* ((ind 1 (1+ ind)) (res nil))
                     ((eql ind (length conv)) (nreverse res))
                     (unless (g-varp (svref conv ind))
                        (push (cons ind (svref conv ind)) res)))))))
      strs))
            

(defun maybe-packed-nodes (node)
   (let
      ((tail (cdar node)) (packed nil))
      (loop
         (when (or (atom tail) (atom (car tail)))
            (return
               (if packed
                  (cons (cons (cons (caar node) tail) (cdr node)) packed)
                  (list node))))
         (push (cddr (cdar tail)) packed)
         (setq tail (cdr tail)))))


(defun tree-node-category-value (cat vt feature)
   (let
      ((pos
          (position feature (svref *index-category-table (svref cat 0))
             :test #'eq)))
      (if pos
         (let ((val (svref cat (1+ pos))))
            (when (g-varp val) (setq val (g-dereference val vt)))
            (if (symbolp val) val nil)) ; return nil for variables
         nil)))


(defun category-match-p (cat type)
   (dolist (item type nil)
      (when
         (and (eql (car item) (svref cat 0))
            (dolist (sub (cdr item) nil)
               (when (eql (cdr sub) (svref cat (car sub)))
                  (return t))))
         (return t))))


(defun split-lexical-rep (word-and-tag)
   (let* ((str (string word-and-tag))
          (underscore-pos (position #\_ str)))
       (if underscore-pos
          (list
             (list
                (intern (subseq str 0 underscore-pos))
                (intern (subseq str (1+ underscore-pos)))))
          (error "No underscore in ~S" word-and-tag))))


;;;; SUCCESSFUL PARSES

;;; [To reduce space requirement, could record in a bit vector / integer
;;; successful analyses in parse forest, using only 1-2 bits per node; then
;;; traverse forest again picking info up in accordance with this]

(defun stats-from-parses (nvts-and-trees word tags)
   (let ((res nil))
      (dolist (nvt-and-tree nvts-and-trees)
         (dolist
            (local
               (get-local-trees (cdr nvt-and-tree) (car nvt-and-tree)
                  word tags nil nil))
            (let ((found (assoc local res :test #'equal)))
               (if found
                  (incf (cdr found))
                  (push (cons local 1) res)))))
      (cons (length nvts-and-trees) res)))


(defun get-local-trees (tree vt word tags chain res)
   (if (and (cdr tree) (atom (cadr tree)))
      res
      (progn
         (dolist
             (rep (tree-node-representations tree vt (car tree) word tags chain)
                res)
            (if (eq (car rep) 'PASSIVE)
               (push rep res)
               (push
                  (cons (local-tree-subject-representation chain vt (list tree))
                     rep)
                  res)))
         (dolist (daughter (cdr tree))
            (when (consp daughter)
               (setq res
                  (get-local-trees
                     daughter vt word tags (cons tree chain) res))))
         res)))


(defun local-tree-subject-representation (chain vt done)
   ;; find lowest enclosing S node -
   ;; look for N* consitituent as first daughter and go down N* path ending
   ;; in an N lexical entry -
   ;; otherwise find S/VP sibling and find a path ending in a V 
   (if chain
      (let ((tree (car chain)))
         (if (and (category-match-p (caar tree) *verb-cats*)
                (eq (tree-node-category-value (caar tree) vt 'BAR) '|2|))
            (or
               (local-tree-subject-representation1
                  (car (last (cdar chain))) done *noun-cats*)
               (some
                  #'(lambda (d)
                      (local-tree-subject-representation1 d done *verb-cats*))
                  (cdar chain))
               nil)
            (local-tree-subject-representation (cdr chain) vt
               (cons (car chain) done))))
      nil))


(defun local-tree-subject-representation1 (tree done possible-cats)
   (if (member tree done :test #'eq)
      nil
      (some-representation1 tree possible-cats)))
 

;;; Compute required representation(s) (without subject info) for a node from parse tree.
;;; Return nil or list containing them

(defun tree-node-representations (tree vt mother word tags chain)
   (mapcan
      #'(lambda (first-d)
         (when
            (and (cdr first-d) (atom (cadr first-d))
               (word-and-tag-match-p (string (cadr first-d)) word tags))
            ;; (? word_tag ...)
            (tree-node-representations1 mother vt first-d tree chain)))
      (maybe-packed-nodes (car (last (cdr tree))))))


(defun tree-node-representations1 (mother vt first-d tree chain)
   (when (search "/cj_" (cadr mother))
      ;; prevent e.g. (V/cj_int/- met:5_VVD (V/cj_end and:6_CC helped:7_VVD))
      ;; since otherwise helped gets mistakenly identified as a complement
      (return-from tree-node-representations1 nil))
   (let ((rep
            (list*
               (verbal-category-representation (caar first-d) vt)
               (split-lexical-rep (cadr first-d)) ; head word
               (reverse
                  (mapcar #'(lambda (d) (complement-representation d vt))
                     (butlast (cdr tree)))))))
      (cond
         ((and chain (aux-pass-node-p (car chain)))
            ;; (N1/n_pprt ...) or (V/be_ppart *_VB* ...) immediately enclosing
            (list
               (list* 'PASSIVE
                  (local-tree-subject-representation chain vt (list tree))
                  rep)))
         ((and chain
               (member (cadr mother) +lex-part-rules+ :test #'equal))
            ;; V/0_p mother - eg (V/ing (V/0_p thinking_VVG of_IO) (V/0 leaving_VVG))
            ;; build representation one level up, with current rep e.g.
            ;; ((VSUBCAT VPING) ((|thinking:3| VVG)) ((|of:4| IO)))
            ;; spliced in as first args but ignoring 1st component 
            (list
               (list*
                  (verbal-category-representation (car mother) vt) 
                  (second rep) (third rep)
                  (reverse
                     (mapcar #'(lambda (d) (complement-representation d vt))
                        (butlast (cdar chain)))))))
         (t
            (list rep)))))


(defun verbal-category-representation (cat vt)
   (let
      ((valv (tree-node-category-value cat vt 'VSUBCAT))
       (valp (tree-node-category-value cat vt 'PRT)))
      (cond
         ((null valv) (list 'VSUBCAT '|?|))
         ((null valp) (list 'VSUBCAT valv))
         (t (list 'VSUBCAT valv 'PRT valp)))))


(defun aux-pass-node-p (tree)
   ;; matches (N1/n_pprt ...) or (V/be_ppart/- or /+ *_VB* ?)
   (or
      (string= (find-if #'stringp (cdar tree)) "N1/n_pprt")
      (and
         (member (find-if #'stringp (cdar tree)) +aux-pass-rules+ :test #'string=)
         (let ((first-d (car (maybe-packed-nodes (second (cdr tree))))))
            (and 
               (cdr first-d) (atom (cadr first-d))
               (word-and-tag-match-p
                  (symbol-name (cadr first-d)) '* +aux-pass-tags+))))))
         

;;; Check if a word_tag symbol has word as first component (* matches all)
;;; and tag is a member of poss-tags

(defun word-and-tag-match-p (x word poss-tags)
   (let ((len-x (length (the simple-string x)))
         (number-pos (position #\: (the simple-string x)))
         (tag-pos (position #\_ (the simple-string x))))
      (and
         (or (eq word '*) (fast-string= x word 0 number-pos))
         (or (null poss-tags)
            (dolist (tag poss-tags nil)
               (when (fast-string= x tag (1+ tag-pos) len-x)
                  (return t)))))))


(defun fast-string= (x y start-x end-x)
   (declare (simple-string x y) (fixnum start-x end-x))
   (do
      ((x-ind start-x (1+ x-ind))
         (y-ind 0 (1+ y-ind))
         (x-final (length (the simple-string x)))
         (y-final (length (the simple-string y))))
      ((or (eql x-ind x-final) (eql y-ind y-final))
         (and (eql x-ind end-x) (eql y-ind y-final)))
      (declare (fixnum x-ind y-ind x-final y-final))
      (unless (char= (schar x x-ind) (schar y y-ind)) (return nil))))


;;; Return a representation for tree (a complement of a verb). Only
;;; interested in head verb/noun/adj (rightmost daughter) or
;;; dominating prt/prep.

(defun complement-representation (tree vt)
   (cond
      ((and (cdr tree) (atom (cadr tree)))
         (if (or (category-match-p (caar tree) *prt/prep-cats*)
                (category-match-p (caar tree) *adj-cats*)
                (category-match-p (caar tree) *noun-cats*)
                (category-match-p (caar tree) *verb-cats*))
            (split-lexical-rep (cadr tree))
            nil))
      ((category-match-p (caar tree) *verb-cats*)
         (let ((wh (tree-node-category-value (caar tree) vt 'WH))
               (rep (or (some-representation (cdr tree) *verb-cats*) nil)))
            (if (eq wh '+)
               (list (list 'WH '+) rep) rep)))
      ((category-match-p (caar tree) *noun-cats*)
         (or (some-representation (cdr tree) *noun-cats*) nil))
      ((category-match-p (caar tree) *adj-cats*)
         (or (some-representation (cdr tree) *adj-cats*) nil))
      ((category-match-p (caar tree) *prt/prep-cats*)
         (or (some #'(lambda (d) (prt/prep-representation d vt))
                (cdr tree))
             nil))
      (t nil)))


(defun some-representation (nodes possible-cats)
   (some
      #'(lambda (n) (some-representation1 n possible-cats))
      nodes))

(defun some-representation1 (tree possible-cats)
   (cond
      ((not (category-match-p (caar tree) possible-cats))
         nil)
      ((and (cdr tree) (atom (cadr tree)))
         (split-lexical-rep (cadr tree)))
      ((search "cj_int" (cadar tree))
         (let ((r
                (mapcan
                   #'(lambda (d) (some-representation1 d possible-cats))
                   (cdr tree))))
            (or r nil)))
      (t
         (some-representation (cdr tree) possible-cats))))


(defun prt/prep-representation (tree vt)
   ;; all structure introduced by lexical Ps in tree
   (cond
      ((not (category-match-p (caar tree) *prt/prep-cats*))
         nil)
      ((and (cdr tree) (atom (cadr tree)))
         (split-lexical-rep (cadr tree)))
      ((cddr tree)
          (let ((first-d (car (last (cdr tree)))))
            (if
               (and (cdr first-d) (atom (cadr first-d))
                  (category-match-p (caar first-d) *prt/prep-cats*))
               ;; first daughter is a lexical P
               (list*
                  (prepositional-category-representation (caar first-d) vt)
                  (split-lexical-rep (cadr first-d)) ; head P
                  (reverse
                     (mapcar #'(lambda (d) (complement-representation d vt))
                        (butlast (cdr tree)))))
              (some
                 #'(lambda (n) (prt/prep-representation n vt))
                 (cdr tree)))))
      (t
        (some
           #'(lambda (n) (prt/prep-representation n vt))
           (cdr tree)))))


(defun prepositional-category-representation (cat vt)
   (let ((val (tree-node-category-value cat vt 'PSUBCAT)))
      (list 'PSUBCAT (or val '|?|))))


;;;; FAILED PARSES

(defun stats-from-edges (edges word tags)
   (let ((res nil))
      (dolist (edge edges (cons 0 res))
         (unless (g-chart-edge-needed edge)
            (let ((found (g-chart-edge-found edge)))
               (unless (and (cdr found) (atom (cadr found)))
                  (dolist
                     (rep
                        (inactive-edge-representation found
                           (g-chart-edge-nvt edge) word tags
                           edges (g-chart-edge-start edge)))
                     (pushnew (cons rep 1) res :test #'equal))))))))


(defun inactive-edge-representation (node vt word tags edges start)
   ;; might contain packed nodes, so always return a list of repns
   (let ((res nil))
      (dolist (node (maybe-packed-nodes node) res)
         (unless (and (cdr node) (atom (cadr node)))
            (dolist (new (tree-node-representations node vt (car node) word tags nil))
               (if (eq (car new) 'PASSIVE)
                  (push new res)
                  (push
                     (cons (inactive-edge-subject-representation edges start) new) 
                     res)))))))


(defun inactive-edge-subject-representation (edges end)
   ;; if inactive edge with given end vertex is an N* consitituent, then
   ;; collect N lexical entries at end of N* paths
   (let ((res nil))
      (dolist (edge edges (cons 0 res))
         (when
            (and (null (g-chart-edge-needed edge))
               (eql (g-chart-edge-end edge) end))
            (dolist (node (maybe-packed-nodes (g-chart-edge-found edge)))
               (cond
                  ((and (cdr node) (atom (cadr node)))
                     (when (category-match-p (caar node) *noun-cats*)
                        (pushnew (car (split-lexical-rep (cadr node))) res
                           :test #'equal)))
                  ((category-match-p (caar node) *noun-cats*)
                     (dolist (r (some-representation (cdr node) *noun-cats*))
                        (pushnew r res :test #'equal)))))))
      res))


;;; End of file


