;;; search for best model 

(defvar *parse-length* 200) ;;; n of above	
(defvar *parse-queue* nil) ;;; holds n previous parses.

(defvar *verbose* nil)

(defun add-to-bounded-lifo (a l n)

;;; add element a to queue l, of length n.
  
  (let ((res nil))
    (push a l)
    (dotimes (x (- n 1) res)
	     (push (car l) res)
	     (pop l))
    (reverse res)))

(defun update-rule-freqs (parses)

  (let ((models (calc-posterior parses))
	(best nil)
	(best-score -0)
	(index 0)
	(counter 0))
   
    (dolist (sc models)
	    (let* ((prior (model-prior sc)) (like (model-likelihood sc))
		   (sc1 (+ prior like)))
	      (when (or (null best) (> sc1 best-score))
		(setf index counter)
		(setf best-score sc1)
		(setf best sc))
	      (setf counter (+ 1 counter))))    

    (when parses
	  (let ((likelihood-rules (model-rules best)))
	    ;;; keep parse.
	    (setf *parse-queue*
		  (add-to-bounded-lifo (get-learnt-rules likelihood-rules)
				       *parse-queue* *parse-length*))
	    (dolist (r-d likelihood-rules) 
		    (let* ((r (car r-d))
			   (d (cadr r-d))
			   (entry (fetch-rule-entry r))
			   (entry2 (gethash r *rule-info*))
			   (dfreqs (rule-daughters entry)))
		      
		      (when (and (null entry2) entry)
			    (when *trace*
				  (setf (rule-parse entry)
					(get-rule-labelling-from-parse-tree
					 (cdr (nth index
					     parses))))
				  (setf (rule-parse-number entry)
					*total-processed*))
			    (setf *grammar-size* (1+ *grammar-size*)))
		      
		      (when (rule-body entry)
			    (setf (svref *rule-denom* (rule-mother entry))
				  (1+ (svref *rule-denom* 
					     (rule-mother entry))))	       
			    (if entry2
				(setf (rule-freq entry2) (+ 1 
							    (rule-freq 
							     entry2)))
			      (setf (gethash r *rule-info*) entry))
			    (dolist (dau d)
				    (setf (car dfreqs)
					  (add-daughter dau (car dfreqs)))
				    (pop dfreqs)))))

	    (format t "Rules: ~A " 
		    *grammar-size*)

	    (when *verbose*
	      (format *verbose* "~A ~A ~A ~%" *total-learnt-parses* 
		      ;;; prior like
		      (model-prior best) (model-likelihood best)))))))
	          		    		
(defun get-learnt-rules (likerules)
  (let ((res nil))
    (dolist (r likerules res)
	    (let ((rule (fetch-rule-entry (car r))))
	      (when (rule-body rule)
		    (push rule res))))))
  
(defun add-counts (likelihood-rules)

;;; updates all counts, inc daughter freqs

  (let ((learnt nil))
    (dolist (r-d likelihood-rules learnt)
	    (setf *freq-total* (+ 1 *freq-total*))
	    (let* ((r (car r-d))
		   (d (cadr r-d))
		   (entry2 (fetch-rule-entry r))
		   (dfreqs (rule-daughters entry2)))

	      (when (null entry2) (error "no rule ~A" r))

	      ;(when (rule-body entry2) (push entry2 learnt))
	      (setf (svref *rule-denom* (rule-mother entry2))
		    (1+ (svref *rule-denom* (rule-mother entry2))))    
	
	      (let ((temp nil))
		(dolist (dd dfreqs) 
			(when (null d) (return))
			(push (add-daughter (car d) dd) temp)
			(pop d))

		(when d
		;;;okay, kleene op has added extra stuff
		      (dolist (dd d) 
			      (push (add-daughter dd nil) temp)))
		(setf (rule-daughters entry2) (reverse temp)))
	      (setf (rule-freq entry2) (+ 1 (rule-freq entry2)))))))

(defun add-counts-lite (likelihood-rules)

;;; updates  counts of just learnt rules, inc daughter freqs

  (let ((learnt nil))
    (dolist (r-d likelihood-rules learnt)
	    (setf *freq-total* (+ 1 *freq-total*))
	    (let* ((r (car r-d))
		   (d (cadr r-d))
		   (entry2 (fetch-rule-entry r))
		   (dfreqs (rule-daughters entry2)))

	      (when (null entry2) (error "no rule ~A" r))

	      (when (rule-body entry2) 
		    (push entry2 learnt)
		    (setf (svref *rule-denom* (rule-mother entry2))
			  (1+ (svref *rule-denom* (rule-mother entry2))))    
		    
		    (let ((temp nil))
		      (dolist (dd dfreqs) 
			      (when (null d) (return))
			      (push (add-daughter (car d) dd) temp)
			      (pop d))

		      (when d
		;;;okay, kleene op has added extra stuff
			    (dolist (dd d) 
				    (push (add-daughter dd nil) temp)))
		      (setf (rule-daughters entry2) 
			    (reverse temp)))
		    (setf (rule-freq entry2) (+ 1 (rule-freq entry2))))))))

(defun minus-counts (likelihood-rules)

;;; reverse of add-counts
  (dolist (r-d likelihood-rules)
	  (setf *freq-total* (- *freq-total* 1))
	  (let* ((r (car r-d)) (d (cadr r-d))
		 (entry2 (fetch-rule-entry r)) 
		 (dfreqs (rule-daughters entry2)))

	    (dolist (dau d)
		    (setf (car dfreqs)
			  (remove-daughter dau (car dfreqs)))
		    (pop dfreqs))
		  
	    (setf (svref *rule-denom* (rule-mother entry2))
		  (- (svref *rule-denom* (rule-mother entry2)) 1))
	    (setf (rule-freq entry2) (-  (rule-freq entry2) 1)))))


(defun minus-counts-lite (likelihood-rules)

;;; reverse of add-counts-lits
  (dolist (r-d likelihood-rules)
	  (setf *freq-total* (- *freq-total* 1))
	  (let* ((r (car r-d)) (d (cadr r-d))
		 (entry2 (fetch-rule-entry r)) 
		 (dfreqs (rule-daughters entry2)))

	    (when (rule-body entry2)
		  (dolist (dau d)
			  (setf (car dfreqs)
				(remove-daughter dau (car dfreqs)))
			  (pop dfreqs))
		  
		  (setf (svref *rule-denom* (rule-mother entry2))
			(- (svref *rule-denom* (rule-mother entry2)) 1))
		  (setf (rule-freq entry2) (-  (rule-freq entry2) 1))))))
    

(defvar *parameter-prior* t)

(defun rule-prior (rules)
  (when (null rules) (return-from rule-prior 0.0))
  (unless *parameter-prior*  (return-from rule-prior 0.0))
  (let ((tot 0))
    (dolist (r rules (* -1.0 tot))
	    (setf tot (+ tot (parameter-prior r))))))


(defun parameter-prior (rule)
    ;;; encode freq, inverted.
     (encode-integer (- 500000 (rule-freq rule))))    		     

;;; these routines add derivation prob to rules (held logarithmically)

(defvar *psm-prior* t) ;;; true to use psm prior
(defun psm-prior-on nil (setf  *psm-prior* t))
(defun psm-prior-off nil (setf  *psm-prior* nil))

(defun keep-r-rule (rule prob table)
  (let ((in (gethash rule table)))
    (if in
	(push prob (gethash rule table))
      (setf (gethash rule table) (list prob)))))

(defun compact-r-table (table)
  (maphash #'(lambda (name probs)
	       (when (and name (rule-body (fetch-rule-entry name)))
		     (setf (gethash name table)
			   (* -1.0 (list-log-add probs)))))
	   table))

(defun list-log-add (probs)
  (let ((tot nil))
    (if (= 1 (length probs)) (return-from list-log-add (car probs)))
      (setf tot (car probs))
      (dolist (p (cdr probs) tot)
	      (setf tot (log (+ (expt 10 tot) (expt 10 p )) 10)))))
		     
(defun smooth-model nil

;;; good-turing smoothing of model.

;;; only smooths P(rule | lhs) currently

  (let ((buckets (make-array (length *rule-denom*) :initial-element nil)))
    (maphash #'(lambda (key val)
		 (when key
		       (if (aref buckets (rule-mother val))
			   (push (list key (rule-freq val))
				 (aref buckets (rule-mother val)))
			 (setf (aref buckets (rule-mother val))
			       (list (list key (rule-freq val)))))))
	     *rule-info*)

    (dotimes (x (length *rule-denom*))
	     (setf (aref *rule-denom* x) 0))

    (dotimes (x (length *rule-denom*))
	     (let ((pdf (aref buckets x)))
	       (when pdf
		     (let ((smoothed (smooth pdf)))
		       (dolist (sm smoothed)
			       (setf (aref *rule-denom* x)
				     (+ (aref *rule-denom* x) (cadr sm)))
			       (let ((entry (fetch-rule-entry (car sm))))
				 (setf (rule-freq entry) (cadr sm))))))))))

(defun add-daughter (d daughters)

;;; d is the name of some rule ; daughters is a list of
;;; the form (f-tot (d f) (d f) ...)
;;; where f-tot is total of all f, each d is some daughter, with freq f

(when (null d) (error  "d nil") (return-from add-daughter daughters))

  (if (null daughters)
      ;;; add one for unseen
      (list 2 (list (cons d 1)))
    (let ((ds (cadr daughters)))
      (setf (car daughters) (+ 1 (car daughters)))
      (dolist (pair ds)
	      (when (string= (car pair) d)
		    (setf (cdr pair) (+ 1 (cdr pair)))
		    (return-from add-daughter daughters)))
      (push (cons d 1) (cadr daughters))
      daughters)))

(defun remove-daughter (d daughters)
  (if (null daughters)
      (error "null daughters")
    (let ((ds (cadr daughters)))
      (setf (car daughters) (- (car daughters) 1))
      (dolist (pair ds)
	      (when (string= (car pair) d)
		    (setf (cdr pair) (- (cdr pair) 1))
		    (return-from remove-daughter daughters)))
      (error "daughter not found in list"))))

(defun prune-daughters (daughters)
  (let ((new nil))
    (dolist (d daughters (reverse new))
	    (let ((res nil))
	      (dolist (dd (cadr d))
		      (unless (= 0 (cdr dd))
			      (push dd res)))
	      (if res
		  (push (list (car d) res) new)
		(push nil new))))))

(defun daughter-likelihood (d freqs)

  (dolist (f (cadr freqs))
	  (when (and (string= (car f) d) (> (cdr f) 0))
		(return-from daughter-likelihood
		 (log (/ (cdr f) (car freqs)) 10))))
  ;;;unseen
  (if freqs
      (log (/ 1 (car freqs)) 10)
    (log 0.00001 10))) ;;; low score for completely novel rules

(defvar *likelihood* t)

(defun rule-likelihood (rules)
;;; take one (one added for unseen psm, not needed for likelihood)

  (when (not *likelihood*) (return-from rule-likelihood 0.0))

  (when (null rules) (return-from rule-likelihood 0.0))
  (let ((tot 0))
    (dolist (rule rules tot)
	    (setf tot (+ tot (log (/ (rule-freq rule)
				     (- (svref *rule-denom*
					    (rule-mother rule)) 1)) 10))))))






