;;; generates binary and ternary rules from a list of edges.


;;; test string:  
; The_AT research_NN1 scientists_NN2 :q say_VV0 that_CST to_TO claim_VV0 that_CST all_DB events_NN2 follow_VV0 these_DD2 patterns_NN2 is_VBZ a_AT1 philosophical_JJ position_NN1 and_CC not_XX a_AT1 scientific_JJ one_NN1

(defvar *generate-edges* t) ;;; true to generate edges

(defvar *rules* (make-hash-table :test #'equal)) ;;; holds rules induced for current parse.

(defvar *counter* 5) ; see invent-new-edges
(defvar *ternary* t) ;;; (yet another flag!) true to invent ternary rules.

(defvar *max-cont-edges* 200) ;;; invent this many contiguous edges max.

;;; these flags control usage of rule templates.

(defvar *b1* t)
(defvar *b2* t)
(defvar *b3* t)
(defvar *b4* t)
(defvar *b5* t)
(defvar *t1* t)
(defvar *t2* t)
(defvar *t3* t)
(defvar *t4* t)
(defvar *t5* t)

;;; definition of learnt rule.
(defstruct rule name mother body static-length 
  freq backbone daughters exception parse parse-number)

(defun get-contiguous-edges (all-edges)

;;; return list of edges that might be combined together
;;; to form a larger edge, but haven't been combined already.
;;; (ie all possible joins of broken substrings).

;;; NB: place upper limit on number of joins to conserve space.

  (let* ((edges (filter-active-edges all-edges))
	 (max (+ 1 (get-max-end edges))) (tot 0)
	 (buckets (make-array max))
	 (joins nil))

    (dolist (e edges)
	    (push e (aref buckets (g-chart-edge-end e))))

    (dolist (e edges (filter-existing-edges joins edges))
	    (let ((start (g-chart-edge-start e)))
	      (dolist (y (aref buckets start))
		      (setf tot (+ 1 tot))
		      (when (> tot *max-cont-edges*) 
			    (return-from 
			     get-contiguous-edges
			     (filter-existing-edges joins edges)))
		      
		      (push (list y e) joins)
		      (when *ternary*
			(dolist (z (aref buckets (g-chart-edge-start y)))
				(setf tot (+ 1 tot))
				(when (> tot *max-cont-edges*) 
					  (return-from 
					   get-contiguous-edges
					   (filter-existing-edges 
					    joins edges)))
				    (push (list z y e) joins))))))))

(defun get-max-end (edges)

;;; return maximum vertex end of edge in chart.

  (let ((max -1))
    (dolist (e edges max)
	    (when (> (g-chart-edge-end e) max)
		  (setf max (g-chart-edge-end e))))))
	    
(defun filter-active-edges (edges)

;;; remove all actives edges from edges

  (let ((res nil))
    (dolist (e edges res)
	    (when (null (g-chart-edge-needed e))
		  (push e res)))))


(defun filter-existing-edges (pairs edges)

;;; throw away all pairs whose substring might have been
;;; generated by  existing manually written rule(s).  Ignore
;;; text rules.

  (let ((res nil) (end (length *previous-sentence)))
    (dolist (p  pairs res)
	    (unless (pair-member p edges end)
		    (push p res)))))

(defun filter-existing-edges (pairs edges)
  pairs)

(defun pair-member (pair edges sent-end)
  (let ((start1 (g-chart-edge-start (car pair)))
	(end2 (g-chart-edge-end (car (last pair)))))

    (dolist (e edges nil)
	    (when (and (= start1 (g-chart-edge-start e))
		       (= end2 (g-chart-edge-end e))
		       (not (text-rule-p
			     (extract-name
			      (g-chart-edge-found e))))
		       (not (learnt-rule-p e)))
		  (return-from pair-member t)))))

(defun learnt-rule-p (edge)
;; true if edge originated from a learnt rule.

  (let* ((name (extract-name (g-chart-edge-found edge)))
	 (res (fetch-rule-entry name)))
    (rule-body res)))

(defvar *overgen-rules* 
'(
  "T/s_ex_s/+" "T/rmta_a1" 
  "T/lmta_prep" "Tacl/colon1" "T/txt-sc1/++-"
  "T/txt-sc1/+-+" "T/txt-sc1/-++"  "T/npq"
  "Tph/pp/-" "Taph/comma-b"
  "Tacl/brack/+"  "Tacl/dash+/-"
  "Tacl/dash-/+" "Tcl/interj_tph"  "T/rmta_s/--" 
  "T/lmta_np" "Tacl/comma+"  "T/txt-sc1/+++"
  "Tph/pp/+" "Tph/np/-" "Tacl/dash+/+" 
  "T/rmta_s/+-" "T/rmta_s/-+" "T/txt-cl1/-" 
  "T/txt-cl3" "Taph/colon" 
  "T/txt-sc2/--" 
  "T/rmta_n1" "T/txt-cl2" "Tph/np/+" "T/lmta_auxv"
  "T/rmta_s/++" "T/txt-cl1/+"
  "T/lmta_n" 
  "Tph/a2/-"
  "T/txt-sc2/+-"
  "T/txt-sc2/-+" 
  "T/s_qu_s/-"  "T/leta_cl" 
  "N1/n1_dir" 
  "Tacl/comma-e" 
  "T/lmta_comp" "Tph/a2/+" 
  "T/txt-sc2/++"
  "T/s_qu_s/+"
  "T/leta_s" 
  "Taph/comma+/-" 
  "Taph/brack" 
  "Taph/dash-"  
  "T/lmta_n1" 
  "T/npqcj" "Taph/comma+/+" "Tph/n1/-" 
  "Taph/dash+" "Tph/vp/-" "T/lmta_conj"
  "Tph/n1/+" 
  "T/txt-sc1/---" 
  "Tph/vp/+"
  "T/s_ex_s/-" "T/lmta_v" "T/Tph"
  "T/txt-sc1/+--" "T/txt-sc1/-+-" "T/txt-sc1/--+" 
  "T/lmta_adv" "Tacl/brack/-" "Tacl/dash-/-"))


(defun text-rule-p (name)
;;; true if rule is a text rule.
;;; assume text rules begin with "T".
  (or (equalp (aref name 0) #\T))) ;;; quick hack
      ;(member name *overgen-rules* :test #'string=)))

(defun print-edge (edge)
  (print (map-parser-cat-to-term-cat (g-chart-edge-res edge)))
  (format t "start ~A end ~A ~%" 
	  (g-chart-edge-start edge)
	  (g-chart-edge-end edge)))
	
(defun dump-edges (edges)
  (dolist (e edges)
	  (print-edge e)))
	        
(defun make-new-name (direction pair)
  (let* ((a (extract-name-map (g-chart-edge-found (car pair))))
	(b (extract-name-map (g-chart-edge-found (second pair))))
	(tern (= 3 (length pair)))
	(c (if tern (extract-name-map (g-chart-edge-found (third pair)))
	     nil)))
	
    (cond
     ((equal direction 'left)
      (if tern
	(format nil "~A:[~A-~A-~A]" c a b c)
	(format nil "~A:[~A-~A]" b a b )))	
     ((equal direction 'right)
      (if tern
	  (format nil "~A:[~A-~A-~A]" a a b c)
      (format nil "~A:[~A-~A]" a a b)))
     ((equal direction 'leftp)
      (if tern
	(format nil "~Ap:[~A-~A-~A]" c a b c)
	(format nil "~Ap:[~A-~A]" b a b )))	
     ((equal direction 'rightp)
      (if tern
	  (format nil "~Ap:[~A-~A-~A]" a a b c)
      (format nil "~Ap:[~A-~A]" a a b)))
     ((equal direction 'top)
      (if tern
	  (format nil "top:[~A-~A-~A]" a b c)
	(format nil "top:[~A-~A]" a b))))))

(defvar *name-table* nil)

(defun extract-name-map (tree)
;;; extract name from cat.  Maps cats to names.

  (let ((res (car (last (car tree)))))

    (cond
     ((stringp res) (map-cat-to-name (caar tree))); res)
     ((arrayp res) (map-cat-to-name (car 
				     (last (car tree)))))
                   ;(format nil "~A" (svref res 0)))
     ;;; think gaps changes ordering?
     (t (map-cat-to-name (caar tree)))))) 

(defun extract-name (tree)
;;; extract name from cat.  If word_tag, keep just cat index

  (let ((res (car (last (car tree)))))

    (cond
     ((stringp res)  res)
     ((arrayp res) (format nil "~A" (svref res 0)))
     (t (error "can't extract from ~A ~A" tree res)))))

(defun map-cat-to-name (cat)
;;; give a unique name to each distinct lhs in grammar
  (unless *name-table*
	  (setf *name-table* (make-hash-table :test #'equal)))

  (let ((res nil) (len (length cat)))
    ;;;normalise cat
    (dotimes (x len)
	     (let ((thing (svref cat x))) 
	       (if (and (not (numberp thing)) (g-varp thing))
		   (push 'x res)
		 (push thing res))))
    (let ((entry (gethash res *name-table*)))
      (if entry
	  entry
	(progn
	  (let ((name (format nil "~A" (gensym))))
	    (setf (gethash res *name-table*) name)
	    name))))))

(defun raise-bar-level (parser-cat vt)

;;; given a cat of form X, see if XP (ie bar level raised
;;; by one) exists in grammar.  Return it if true.

;;; don't bother with instantiated variables

  (let* ((cat-binding (car (convert-from-parser-format nil parser-cat t)))
	 (cat (category-binding-category cat-binding))
	 (bar nil)
	 (bar-fv nil))

    (dolist (f-v cat   (progn (when bar (setf (cdr bar-fv) bar)) nil))
	    (when (eq (car f-v) 'BAR)
		  (setf bar (cdr f-v))
		  (setf bar-fv f-v)
		  (cond
		   ((eq bar '|0|) (setf (cdr bar-fv) '|1|))
		   ((eq bar '|1|) (setf (cdr bar-fv) '|2|))
		   (t 
		    (return-from raise-bar-level 
				 nil)))  ;;; can't exceed max bar level. 
		  		  
		  (let ((parser-cat (car (convert-category-to-parser 
					  cat-binding vt nil))))
		    (setf (cdr bar-fv) bar)
		    (return-from raise-bar-level 
				 parser-cat))))))


(defun fake-inactive-edge1 (pair)

;;; given a pair (taken to be the rhs of some invented
;;; rule), invent an inactive edge to fool parser 
;;; into thinking the rule is actually in the grammar.

;;; it seems cats are reversed in an edge.

;;; given rhs AB, create rule B --> A(C)B

  (let ((l (length pair)))
    (if (and (not *b2*) (= 2 l))
	(return-from fake-inactive-edge1 nil))
    (if (and (not *t2*) (= 3 l))
	(return-from fake-inactive-edge1 nil)))
    
  (let* ((needed nil)
	 (mother nil)
	 (found nil)	 	 
	(end (g-chart-edge-end (car (last pair))))
	(start (g-chart-edge-start (car pair)))
	(vts (cons (g-chart-edge-vts (car (last pair)))
		   (g-chart-edge-vts (car pair))))
	(res (list (g-copy-category
		    (caar (g-chart-edge-found (car pair)))
		    (g-chart-edge-rvt (car pair)) nil nil nil)
		   (make-new-name  'right pair))))

    (dolist (c pair) (push (g-chart-edge-found c) found))
    (make-g-chart-edge needed found end vts res start)))


(defun fake-inactive-edge2 (pair)

;;; given rhs AB, create rule A --> AB(C)

  (let ((l (length pair)))
    (if (and (not *b1*) (= 2 l))
	(return-from fake-inactive-edge2 nil))
    (if (and (not *t1*) (= 3 l))
	(return-from fake-inactive-edge2 nil)))

  (let* ((needed nil)
	 (mother nil)
	 (found nil)
	 (end (g-chart-edge-end (car (last  pair))))
	 (start (g-chart-edge-start (car pair)))
	 (vts (cons (g-chart-edge-vts (car (last  pair)))
		    (g-chart-edge-vts (car pair))))
	 (res (list (g-copy-category 
		     (caar (g-chart-edge-found (car (last  pair))))
		     (g-chart-edge-rvt (car (last pair))) nil nil nil)
		    (make-new-name  'left pair))))

    (dolist (c pair) (push (g-chart-edge-found c) found))

    (make-g-chart-edge needed found end vts res start)))


(defun fake-inactive-edge3 (pair)

;;; given a pair (taken to be the rhs of some invented
;;; rule), invent an inactive edge to fool parser 
;;; into thinking the rule is actually in the grammar.

;;; it seems cats are reversed in an edge.

;;; given rhs AB, create rule XB --> A(C)B

  (let ((l (length pair)))
    (if (and (not *b5*) (= 2 l))
	(return-from fake-inactive-edge3 nil))
    (if (and (not *t5*) (= 3 l))
	(return-from fake-inactive-edge3 nil)))

  (let* ((needed nil)
	 (mother nil)
	 (found nil)	 	 
	(end (g-chart-edge-end (car (last pair))))
	(start (g-chart-edge-start (car pair)))
	(vts (cons (g-chart-edge-vts (car (last pair)))
		   (g-chart-edge-vts (car pair))))
	(xp (let ((xp2 (raise-bar-level (car (g-chart-edge-found 
					       (car pair)))
					vts)))
	      (if xp2 
		  xp2
		(return-from fake-inactive-edge3 nil))))
	(res (list (g-copy-category
		    xp
		    (g-chart-edge-rvt (car pair)) nil nil nil)
		   (make-new-name  'rightp pair))))

    (dolist (c pair) (push (g-chart-edge-found c) found))
    (make-g-chart-edge needed found end vts res start)))


(defun fake-inactive-edge4 (pair)

;;; given rhs AB, create rule XA --> AB(C)

    (let ((l (length pair)))
    (if (and (not *b4*) (= 2 l))
	(return-from fake-inactive-edge4 nil))
    (if (and (not *t4*) (= 3 l))
	(return-from fake-inactive-edge4 nil)))
    
  (let* ((needed nil)
	 (mother nil)
	 (found nil)
	 (end (g-chart-edge-end (car (last  pair))))
	 (start (g-chart-edge-start (car pair)))
	 (vts (cons (g-chart-edge-vts (car (last  pair)))
		    (g-chart-edge-vts (car pair))))
	 (xp (let ((xp2 (raise-bar-level (car (g-chart-edge-found 
						(car (last pair))))
					 vts)))
	       (if xp2 
		   xp2
		 (return-from fake-inactive-edge4 nil))))
	 (res (list (g-copy-category 
		     xp
		     (g-chart-edge-rvt (car (last pair))) nil nil nil)
		    (make-new-name  'leftp pair))))

    (dolist (c pair) (push (g-chart-edge-found c) found))

    (make-g-chart-edge needed found end vts res start)))

(defvar *top-lhs* nil)
(defvar *use-top-rules* t)

(defun fake-top-edge (pair)

;;; we have an edge that spans chart, so pretend it is 
;;; a sentence.
  
    (let ((l (length pair)))
      (if (and (not *b3*) (= 2 l))
	  (return-from fake-top-edge nil))
      (if (and (not *t3*) (= 3 l))
	  (return-from fake-top-edge nil)))
    
  (let* ((needed nil)
	 (mother nil)
	 (found nil)
	 (end (g-chart-edge-end (car (last pair))))
	 (start (g-chart-edge-start (car pair)))
	 (vts (cons (g-chart-edge-vts (car pair))
		   (g-chart-edge-vts (car (last pair)))))
	 (new (if *top-lhs* *top-lhs*
		(progn
		  (setf *top-lhs* 
			(index-bundle-for-parser 
			 (category-binding-category 
			  (caar (top-declaration-categories 
				 (normalise-top-definition (car *top)))))
			 nil nil))
		  *top-lhs*)))
	 (res (list new
		    (make-new-name  'top pair))))

    (dolist (c pair) (push (g-chart-edge-found c) found))
    (make-g-chart-edge needed found end vts res start)))

(defun spanning-edge (edges)
  (let ((end (length *previous-sentence)))
    (dolist (e edges nil)
	    (when (and (= 0 (g-chart-edge-start e))
		       (= end (g-chart-edge-end e)))
		  (return-from spanning-edge t)))))

(defun spanning-edges (edges) nil)


(defun map-actives-to-inactives (edges)

  (let ((res nil))
    (dolist (e edges res)
	    (let ((found (cons (g-chart-edge-res e)
			       (g-chart-edge-found e))))
				
	      (setf (g-chart-edge-res e) nil)
	      (setf (g-chart-edge-found e) found))
	    (push e res))))

(defun invent-new-edges (chart)

  (unless *generate-edges* (return-from invent-new-edges nil))

  (format t "Inventing edges ~%")

  (clear-rule-table *rules*)

  
;;; under acl, image grows very large and swapping occurs.  Try to
;;; get gabage collection to deal with this.

  
  ;(setf (sys:gsgc-switch :gc-old-before-expand) t)

  (let ((counter *counter*) (invented nil))
  (loop
    
   (let ((new-edges nil)
	 (pairs (get-contiguous-edges (car *chart-edges)))
	 (sent-length (length *previous-sentence)))

     (when (= 0 counter)
	   (return-from invent-new-edges t))

     ;(dump-pairs pairs)
     (setf counter (- counter 1))

     (dolist (p pairs)

	     (let ((new (fake-inactive-edge1 p)))
	       (when new
		     (push new new-edges)))

	     (let ((xp1 (fake-inactive-edge3 p)))
	       (when xp1 (push xp1 new-edges)))

	     ;;; if rule spans sentence, create rule top --> A B.
	     (when (and  *use-top-rules* *top
			 (= 0 (g-chart-edge-start (car p)))
			(=  sent-length (g-chart-edge-end (car (last p))))
			(top-category-indexes))
		   (let ((new (fake-top-edge p)))
		     (when new
			   (push new new-edges))))

	     ;;; don't create two rules if p of form A A
	     (unless (= (svref 
			 (caar 
			  (g-chart-edge-found (car p))) 0)
			(svref 
			 (caar 
			  (g-chart-edge-found (second p))) 0))

		     (let ((xp1 (fake-inactive-edge4 p)))
		       (when xp1 (push xp1 new-edges)))

		     (let ((new (fake-inactive-edge2 p)))
		       (when new
			     (push new new-edges)))))

               ;;; throw away edges already generated

     (let ((new-edges2 nil))
       (dolist (e new-edges)
	       (unless (edge-generated-already e invented)
		       (push e new-edges2)
		       (push (list (car (last (g-chart-edge-res e)))
				   (g-chart-edge-start e)
				   (g-chart-edge-end e)) invented)))
       (setf new-edges new-edges2))

     ;;; throw away edges that are not allowed (see filter.lisp)
     (setf new-edges (filter-block new-edges))

     (when *evaluate*
	   ;;;throw away any edges that haven't been
	   ;;;learnt already
	   (let ((temp nil))
	     (dolist (e new-edges) 				    	       
		     (when (gethash (car (last (g-chart-edge-res e)))
				    *rule-info*)
			     (push e temp)))
	     (setf new-edges temp)))

     (when (null new-edges) (return-from invent-new-edges nil))

     ;;; rank rules using prior and take first n
     (when t;(not *evaluate*)
	   (setf new-edges (n-best new-edges)))

     (setf new-edges (map-actives-to-inactives new-edges))          

     (dolist (e new-edges) 	     
	     (g-process-inactive e chart 
				 (= sent-length 
				    (g-chart-edge-end e))))))))

(defun edge-generated-already (edge old)
;;; true if we've invented edge already (ie same name + span)

  (let ((name (car (last (g-chart-edge-res edge))))
	(start (g-chart-edge-start edge))
	(end (g-chart-edge-end edge)))

    (dolist (o old nil)
	    (let ((old-name (car o))
		  (old-start (second o))
		  (old-end (third o)))
	      (when (and (string= old-name name)
			 (= old-start start) (= old-end end))
		    (return-from edge-generated-already t))))))


(defun add-rule-to-table (edge table)
  
;;; update rule table

  (let* ((name (cadr (g-chart-edge-res edge)))
	 (res (gethash name table)))

    (if res
	;(error "name already in table")
	(progn
	  ;(setf (rule-freq res) (+ 1 (rule-freq res)))
	  (return-from add-rule-to-table t))

      (let ((entry (make-rule :mother
			      (svref (car (g-chart-edge-res edge))
				     0)
			      :name name
			      :body (map-edge-to-rule edge)
			      :static-length (encode-learnt-rule edge)
			      ;:exception (exceptions edge)
			      :daughters (let ((temp nil))
					   (dolist (y (cdr (g-chart-edge-found 
						       edge)) temp)
						   (push nil temp)))
			      :freq 1)))
	(setf (gethash name table) entry)
	(let ((res2 (gethash name table)))
	  (if (null res2) (error "entry not stored?")))))))

(defun get-rule (name table)
  (gethash name table))

(defun clear-rule-table (table)
  (clrhash table))

	    
(defun keep-rules-used nil

;;; keep only those rules actually used in a parse.

  (let ((rules (extract-rules-from-trees *current-parse-trees)))
    (maphash #'(lambda (key val)
		 (when key
		       (unless (member key rules :test #'string=)
			       (setf (gethash key *rules*) nil))))
	     *rules*)))

(defun extract-rules-from-trees (trees)
  (let ((res nil))
    (dolist (tr trees res)
	    (setf res (append 
		       (collect-rules
			(get-rule-labelling-from-parse-tree
			 (cdr tr))) res)))))

(defun collect-rules (tree)
  (cond
   ((null tree) nil)
   ((listp (car tree)) 
    (append (collect-rules (car tree)) (collect-rules (cdr tree))))
   ((stringp (car tree)) (cons (car tree) (collect-rules (cdr tree))))
   (t (collect-rules (cdr tree)))))
    
(defun map-edge-to-rule (edge)
  (let* ((cats (g-chart-edge-found edge))
	 (mother (car (g-chart-edge-res edge)))
	 (body nil))    
    (dolist (c cats)
	    (push (caar c) body))
    (push mother body)))