;;; MDL-based learner for ANLT System.

;;; Assumes a grammar has been read in already.  Uses JAC's prob decision
;;; model.

;;; This file contains redefinitions of JAC routines.
;;;; g-parse, v-unpack and v-unpack-daughters

(defvar *total-parsed* 0)
(defvar *total-processed* 0)
(defvar *total-learnt-parses* 0)

(defvar *succ* nil) ;;; true if the current sentence can be generated
(defvar *parse-probs* nil) ;;; holds derivation probs of current sentence.
(defvar *trace* nil) ;;; true to record parse and sentence for learnt rule

(setf +n-best-retained+ 100)

(defvar *grammar-size* 0) ;;; number of rules in grammar + learnt rules

(defvar *max-sent-length* 30) ;;; don't parse sentences longer than this.

;;; for some reason this hasn't been picked-up.
(defun ncons (x) (cons x nil))

(defun g-parse (words &optional (top-indexes t))
  
  ;;; initialise various tables
  (unless *rule-denom* (create-rule-freq-table))

  
  (when (and (not *evaluate*)
	     (> (length words) *max-sent-length*))
    (return-from g-parse nil))

  (setf *succ* t)

  (let ((res 
	 (g-parse1 words top-indexes)))
    
    (setf *total-processed* (+ 1 *total-processed*))

    (when res
	  (unless *succ* (setf *total-learnt-parses* 
			       (1+ *total-learnt-parses*)))
	  (setf *total-parsed* (1+ *total-parsed*)))    

          
    (when (and (not *evaluate*) (not *succ*) *generate-edges* res)
	  (update-rule-freqs res))
    res))

;;; Parse a list of words. GDE passes optional variable top-indexes so parser
;;; won't unpack categories which cannot possibly be top. Argument passed as
;;; t if no top constraint is in force.
;;; Active edges indexed on vertex number and category index of next needed
;;; category, inactive on start vertex and category of constituent.

(defun g-parse1 (words top-indexes )
   (let*
      ((nvertices (1+ (length words)))
         (ncats (1+ *current-category-index))
         (chart
            (cons
               (make-array (list nvertices ncats) :initial-element nil)
               (and g-packing
                  (make-array (list nvertices ncats) :initial-element nil))))
         (word-no 0)
         (g-result nil))
      (setq *chart-edges nil)
      (loop
         (cond
            ((null words)
               (g-insert-traces word-no chart t)
               (return
		(let ((res2 
		       (if g-packing
			   (let
			       ((res
				 (mapcan
				  #'(lambda (p)
				      (when
                                       (or (not (listp top-indexes))
					   (member (g-category-index (caadr p))
						   top-indexes))
                                       (g-unpack (cdr p) (car p))))
				  g-result)))
			     (dolist (p res) (g-rempack (cdr p)))
			     res)
			 g-result)))
		  (if res2
		      res2
		    (progn
		      (setf *succ* nil)
		      (invent-new-edges chart)
		      (let ((res3 
			     (if g-packing
				 (let
				     ((res
				       (mapcan
					#'(lambda (p)
					    (when
					     (or (not (listp top-indexes))
						 (member 
						  (g-category-index (caadr p))
						  top-indexes))
					     (g-unpack (cdr p) (car p))))
					g-result)))
				   (dolist (p res) (g-rempack (cdr p)))
				   res)
			 g-result)))
			res3))))))
		    
            ((and (not *g-compute-all-edges)
                  (> word-no 0)
                  (dotimes (n ncats t)
			   (when (aref (car chart) word-no n) 
				 (return nil))))
	     (return nil)))
         (g-insert-traces word-no chart nil)
         (dolist (defn (g-defns (car words)))
            (let ((cat (caar defn)))
               (when (g-category-index cat)
                  (multiple-value-bind (renamed-cat re-entrant-p)
                     (g-copy-category cat nil nil nil nil)
                     (g-process-inactive
                        (make-g-chart-edge nil
                           (cons (cons renamed-cat (cdar defn)) (cdr defn))
                           (1+ word-no) (cons nil nil) re-entrant-p word-no)
                        chart (null (cdr words)))))))
         (setq word-no (1+ word-no))
         (setq words (cdr words)))))

(defun v-unpack (tree vt score)
   (g-perform-stack-check)
   (let
      ((tail (cdar tree)) (unpacked nil) (unpacked-scores nil) mother)
      (loop
         (when (or (atom tail) (atom (car tail)))
            (setq mother (cons (caar tree) tail)) (return))
         (let ((packed (cdar tail)))
            (multiple-value-bind (success new-rvt new-nvt)
               (g-unify
                  (caar tree) (caar (cddr packed)) vt (cadr packed))
               (declare (ignore new-rvt))
               (when success
                  (multiple-value-bind (unp unp-s)
                      (v-unpack (cddr packed) new-nvt score)
                     (dolist (u unp)
                        (push
                           ;; (car packed) is the nvt of packed-into node at
                           ;; the time of packing
                           (cons (nconc (ldiff vt (car packed)) (car u))
                              (cdr u))
                           unpacked))
                       (dolist (u unp-s) (push u unpacked-scores))))))
         (setq tail (cdr tail)))
      (cond
         ((and (cdr tree) (atom (cadr tree)))
            ;; a word node
            (values (cons (cons vt (cons mother (cdr tree))) unpacked)
               (cons score unpacked-scores)))
         (t
            (let*
                ((daughters-alts (list (list vt)))
                 (entry
                    (assoc (intern (cadr mother)) (cdr *production-probs*)
                       :test #'eq))
		 (mo-entry (let ((rule-ent 
				  (fetch-rule-entry (cadr mother))))
			     (if rule-ent
				 (rule-daughters rule-ent)
			       nil)))
                 (score
                    (if +level-one-probs+
                       (cons
                          (+ (car score) 
			     (parse-selection-prob (cadr entry)
						   (cadr mother)))	     
                          (1+ (cdr score)))
                       score))
                 (daughters-alts-scores (list score))
		 (mo-entries (reverse mo-entry))
                 (d-entries (reverse (cddr entry)))) ; !!! inefficient
               (dolist (d (cdr tree))
                  (multiple-value-setq (daughters-alts daughters-alts-scores)
                     (v-unpack-daughter
                        d daughters-alts daughters-alts-scores
                        (if d-entries (cdr (pop d-entries)) nil)
			(if mo-entries (pop mo-entries) nil))))
               (v-unpack-prune
                  (append unpacked
                     (if (cdr daughters-alts)
                        (mapcar
                           #'(lambda (alt)
                              (list* (car alt) mother (reverse (cdr alt))))
                           daughters-alts)
                        (list (cons vt tree))))
                  (append unpacked-scores daughters-alts-scores)))))))
                   
(defun v-unpack-daughter 
  (d daughters-alts daughters-alts-scores d-entries mo-entry)
   (let ((unpacked nil) (unpacked-scores nil))
      (dolist (alt daughters-alts)
         (multiple-value-bind (unp unp-s)
             (v-unpack d (car alt) (pop daughters-alts-scores))
            (dolist (up unp) 

               (let*
                  ((name ; nb packing may still be present until rempack
                     (cond
		      ((atom (cadr (cdr up)))
                        nil) ; a word
		      ((stringp (car (cdar (cdr up)))) ;;; deal with semantics
		       (car (cdar (cdr up))))
		      (t
		       ;;;  this isn't qutie right (as packing may results
		       ;;; in more than one daughter ...)
		       (dolist (x (cdar (cdr up)) nil)
			       (when (stringp x)
				     (return x))))))
		       ;(car (last (cdar (cdr up)))
                   (n
                      (if (and d-entries +level-two-probs+)
                         (daughter-selection-prob d-entries mo-entry name)
                         (log 1.0 10)))
                   (s (pop unp-s)))
                  (push (list* (car up) (cdr up) (cdr alt)) unpacked)
                  (push (if +level-two-probs+ (cons (+ n (car s)) (1+ (cdr s))) s)
                     unpacked-scores)))))
      (values unpacked unpacked-scores)))
         

;;; jac routine can't tokenise things like ,_, properly (when not expecting
;;; tagged input) so when file parsing, always expect input to be
;;; tokenised.  This will prob. need fixing properly later.

(defun parse-file1 (input-file verbose command-options)
   (let
      ((*suppress-dict-messages t))
      (with-open-stream
         (*standard-input*
            (open input-file :direction :input))
         (let* ((sent nil) (tail-sent nil))
            (loop
               (setf sent
                  (nconc sent
                     (mapcan
                        #'(lambda (word)
                             (unless (gde-comment-p word)
                                (ncons word)))
			;;;; change
                        (get-reply t))))
               (setf tail-sent (parse-file-get-tail sent))
               (cond
                  (tail-sent
                     (setf sent (ldiff sent tail-sent))
                     (setf *previous-sentence sent)
                     (parse-file-sentence sent verbose command-options)
                     (setf sent (cdr tail-sent))))
               (cond
                  ((eql
                      (peek-char nil *standard-input* nil
                         *eof-marker)
                      *eof-marker)
                     (return nil))))
            (if sent
               (parse-file-sentence
                  (ldiff sent (parse-file-get-tail sent))
                  verbose command-options))))))

(defun report nil
  (format 
   t 
   "n-best: ~A LM window: ~A rules : ~A Parsed ~A ~A Generated~%"
   +n-best-retained+   *parse-length*
   (let ((tot 0))
     (maphash #'(lambda (key val)
			 (when (and key val)
			       (setf tot (1+ tot))))
	      *rule-info*)
     tot)

   *total-parsed* (if (= 0 *total-processed*)
			 0
		    (* 100.0 (/ *total-parsed* *total-processed*))))  
  (let ((bin 0) (tern 0))
    (maphash #'(lambda (key val)
			 (when (and key val (rule-body val))
			       (if (= 4 (length (rule-body val)))
				   (setf tern (+ 1 tern))
				 (setf bin (+ 1 bin)))))
	      *rule-info*)
    (format t "~A binary rules ~A ternary rules ~%" bin tern))
	  
  (format t "Q = ~A~%" *q*)
  (format t "Max rhs cands (*max-cont-edges*) = ~A" *max-cont-edges*)
  (if *generate-edges*
      (format t "~%Using MDL learner~%")
          (format t "~%Not using MDL learner~%"))
  
  (if *use-drules* (format t "Using drules (~A present) ~%" 
			     (length *drules))
    (format t "Not using drules~%"))

  (if (and *headed*  *filter-block*)
      (format t "Blocking non NP right-headed  rules ~%"))
  (when *type-check* (format t "Type checking ~%"))
  (if *trace* (format t "Recording rule contexts ~%"))
  (if *evaluate* (format t "Evaluation mode ~%"))
  (when (or *headed* *type-check* *use-drules*)
	(format t "Thresholding rules with score > ~A ~%" 
		(/ *exception-threshold* 10)))
  (unless *parameter-prior* (format t "Not using parameter prior~%"))
  (unless *structure-prior* (format t "Not using structure prior~%"))
  (unless *likelihood* (format t "Not using likelihood~%"))
  (if (not *filter-ternary*)
      (format t "Not filtering ternary rules~%")
    (format t "Filtering ternary rules~%"))

  (when *b1* (format t "Using template b1 (A --> AB) ~%"))
  (when *b2* (format t "Using template b2 (B --> AB) ~%"))
  (when *b3* (format t "Using template b3 (S --> AB) ~%"))
  (when *b4* (format t "Using template b4 (AP --> AB) ~%")) 
  (when *b5* (format t "Using template b5 (BP --> AB) ~%"))
  (when *t1* (format t "Using template t1 (A --> ABC) ~%"))
  (when *t2* (format t "Using template t2 (C --> ABC) ~%"))
  (when *t3* (format t "Using template t3 (S --> ABC) ~%"))
  (when *t4* (format t "Using template t4 (AP --> ABC) ~%"))
  (when *t5* (format t "Using template t5 (CP --> ABC) ~%"))
  (format t "Maximum sentence length = ~A words" *max-sent-length*))

(defun learnt-model-size nil
 ;;; return length of learnt rules + length of learnt rule parameters

  (let ((s 0)
	(theta 0))

    (maphash #'(lambda (name rule)
		       (when (and name rule (rule-body rule))
			     (setf s (+ s (rule-static-length rule)))
			     (setf theta (+ theta (parameter-prior rule)))))
	     *rule-info*)
    (format t "Structure length: ~A Parameter length: ~A ~%"
	    s theta)))


(defun parse-top-loop nil
  (report)
   (top-loop #'gde-top-print
      #'parse-top-eval "Parse"
      (concat-string "GDE Parser ("
         (if *lr1-parse "LR1" "chart")
         " parser, top category " (if *top "" "not ") "defined)")))
	 

(defun learner-on nil (setf *generate-edges* t))
(defun learner-off nil (setf *generate-edges* nil))

(defun evaluate-on nil 
  (setf *evaluate* t) (setf +n-best-retained+ 1) 
  (setf *parse-length* 2))

(defun evaluate-off nil 
  (setf *evaluate* nil) (setf +n-best-retained+ 20)
  (setf *parse-length* 200))

(defun reset-counters nil 
  (setf *total-parsed* 0) (setf *total-processed* 0)
  (setf *total-learnt-parses* 0)))

(defun set-window (n)
  (cond
   ((null n) (gde-warn "null window length"))
   ((> 0 n) (gde-warn "Can't have a negative window length"))
   (t
    (setf *parse-length* n))))

(defun set-n-best (n)
  (cond
   ((null n) (gde-warn "null n-best"))
   ((> 0 n) (gde-warn "Can't have a negative n-best"))
   (t
    (setf +n-best-retained+ n))))

(defun g-unpack (tree vt)
   ;; redefinition
   (multiple-value-bind (vts-and-trees scores)
       (v-unpack tree vt (cons (log 1.0 10) 0))
      ;; rank them
      (let*
           ((sorted
              (sort
                 (mapcar #'(lambda (vt-t s) (cons vt-t (/ (car s) (cdr s))))
                    vts-and-trees scores)
                 #'> :key #'cdr))
            (pruned (ldiff sorted (nthcdr +n-best-retained+ sorted))))
         (print (mapcar #'cdr pruned))
	 ;;; keep derivation probs for later use
	 (setf *parse-probs* (mapcar #'cdr pruned))
         (mapcar #'car pruned))))

(defun parameter-prior-on nil (setf *parameter-prior* t))
(defun parameter-prior-off nil (setf *parameter-prior* nil))

(defun structure-prior-on nil (setf *structure-prior* t))
(defun structure-prior-off nil (setf *structure-prior* nil))

(defun likelihood-on nil (setf *likelihood* t))
(defun likelihood-off nil (setf *likelihood* nil))

(defun domain-on nil (setf *domain* t))
(defun domain-off nil (setf *domain* nil))









