;;; various routines to manipulate the data structures

(defun dump-prob-table (table n)

  (if (null *feats)
      (progn
	(cond
	 ((null *features)
	  (error "Need to read in a grammar first"))
	 (t
	  (setf *feats (remove 'H *features))))))
	 

  (format t "Cat ~A ~%" n)
  (let ((c-table (aref table n)))
    (dotimes (y (length c-table))
	     (format t "~A ~%" (nth y *feats))
	     (maphash #'(lambda (key val)
			  (when key
				(format t "~A ~A " key val)))
		      (aref c-table y))
	     (terpri))))

(defun dump-rule-freq-table (&optional (learnt nil))
  (maphash #'(lambda (k v)
	       (when (and k v)
		     (if (and learnt (rule-body v))
			 (progn
			   (format t "~A " k)
			   
			   (pprint-cat t
			    (map-parser-cat-to-term-cat
			     (list (first (rule-body v)))))
			   (format t " --> ")
			   (pprint-cat t
				  (map-parser-cat-to-term-cat
				   (list (second (rule-body v)))))
			   (pprint-cat t
			    (map-parser-cat-to-term-cat
			     (list (third (rule-body v)))))
			   (format t " ~A ~A ~%" 
				   (rule-static-length v)
				   (rule-freq v))))))
		     
	   
	   *rule-info*))

(defun fetch-rule-entry (name)
  (let ((res (gethash name *rule-info*)))
    (if res
       res
      (let ((res2 (gethash name *rules*)))
	res2))))
	  


(defun pprint-cat (fp fv)
  (format fp "[")
  (if (= 1 (length fv))
      (format fp "~A ~A" (caar fv) (cdar fv))
    (progn
      (dolist (f-v (butlast fv))
	      (format fp "~A ~A, " (car f-v) (cdr f-v)))
      (format fp "~A ~A" (caar (last fv)) (cdar (last fv)))))
  (format fp "]"))

(defun grammar-report nil
  (let ((tot 0)
       (probs nil)
       (len 0))
    
    (bmaphash #'(lambda (key val)
		 (when key
		       (setf len (1+ len))
		       (push (tree-likelihood key) probs)))
	     *rule-info*)

    (format t "Rules: ~A in Grammar ~%" len)))

			       		
(defun trace-growth (fp)
  (setf *verbose* (open fp :direction :output)))

(defun reset-tables nil

  ;;; file to record prior + like growth.
  (when *verbose* (close *verbose*))	

  (setf *parse-queue* nil)
  (dotimes (x *parse-length*)
	   (push nil *parse-queue*))
  
  (reset-counters)
  (dotimes (x (length *rule-denom*))
	   (setf (svref *rule-denom* x) 0))
  (setf *grammar-size* 0)
  (evaluate-off)
  (setf *table* nil)
  (setf *freq-total* 0)
  (setf *rule-denom* nil)
  (clrhash *rule-info*))

(defun map-parser-cat-to-cat (cat)

;;; need to ensure all feats are present.

    (if (null *feats)
      (progn
	(cond
	 ((null *features)		  
	  (error "Need to read in a grammar first"))
	 (t (setf *feats (remove 'H *features))))))

  (let ((f1 (svref *index-category-table (svref (car cat) 0)))
	(f-v nil)
	(f nil))

    (setf f f1)
    
    (dotimes (x (length f))
	     (push (cons (car f) (svref (car cat) (+ 1 x)))
		   f-v)
	     (pop f))

    (if (= (length *feats) (length f-v))
	f-v
      (dolist (f2 *feats f-v)
	      (if (not (member f2 f1))
		  (push (cons f2 '@x) f-v))))))

(defun map-parser-cat-to-term-cat (cat)

;;; version of above that doesn't include missing variables.

  (let ((f1 (svref *index-category-table (svref (car cat) 0)))
	(f-v nil)
	(f nil))

    (setf f f1)
    
    (dotimes (x (length f) f-v)
	     (push (cons (car f) (svref (car cat) (+ 1 x)))
		   f-v)
	     (pop f))))


(defun get-rules-given-name (name)

;;; return a list of expanded rules  given a rule name.

  (normalise-idrule-definition name))


(defun get-rules-given-name (name)

;;; return a list of expanded rules  given a rule name.

  (get name 'compiled-idrules))

(defun get-rules-given-name (name)

  (let ((compiled (compile-idrule name)))
    (mapcar #'(lambda (compiled-idrule)
		(convert-idrule-to-parser
		 compiled-idrule))
	    compiled)))
    
(defun normalise-variables (vals)

;;; map shared variables to same value ???

  (let ((res nil))
    (dolist (v vals res)
	    (cond
	     ((g-varp v) 
	      (push '\@y res))
	     (t
	      (push v res))))))

(defun normalise-variables (vals)

;;; map variables eg. @x @y to same val 

  (let ((res nil))
    (dolist (v vals res)
	    (if (variable-p v)
		(push '@x res)
	      (push v res)))))

	
(defun get-cats-given-rule (rule)

  (let ((daughters  nil) (mother 
			  (map-parser-cat-to-cat 
			   (list
			    (caar rule)))))
    (dolist (rhs (cdr rule))
	    (push (map-parser-cat-to-term-cat rhs) daughters))
    (setf daughters (reverse daughters))
    (push mother daughters)))

(defun get-value-given-feat-and-cat (f c)

;;; return the value of a feature in a given category

;;; if feature not present, assume it is optional 

  (let ((res nil))
    (dolist (pair c '\@)
	    (when (equal (car pair) f)
		  (return-from get-value-given-feat-and-cat 
			       (cdr pair))))))

(defun get-all-values-given-feat-and-cat-index  (f index rules)
  
  (let ((res nil))
    (dolist (r rules (normalise-variables res))
	    (let ((cat (nth index r)))
	      (when cat
		    (let ((val (get-value-given-feat-and-cat f cat)))
		      (push  val res)))))))

(defun variable-p (var)
  (or (eql var '\@)
      (g-varp var)))


(defun optional (var)
  (cond
   ((symbolp var) nil)
   ((g-optionalp var) t)))

(defun feature-to-index (f)
  (let ((x 0))
    (dolist (ff *feats x)
	    (if (equal f ff) (return-from feature-to-index x)
	      (setf x (+ 1 x))))))

(defun build-value-prob-table nil

;;; for each cat in a rule, for each feature in that cat,
;;; work out prob of feature taking any given value.
;;; Does not deal with re-entrant values yet.

  (let ((rules nil)
	(table nil)
	(longest 0))

    ;;;first get all rules in grammar

    (dolist (rule *id-rules)
	    (let ((raw-rules (get-rules-given-name rule)))
	      (dolist (r raw-rules)
		      (push (get-cats-given-rule r) rules))))

    ;;; find length of longest rule

    (dolist (r rules)
	    (when (> (length r) longest) (setf longest (length r))))

    (setf table (make-array longest))

    (dotimes (x longest table)
	     (let ((cat-table (make-array (length *feats))))
	       (dolist (f *feats)
		       (let ((values 
			      (get-all-values-given-feat-and-cat-index
			       f x rules))
			     (count 0)
			     (f-table (make-hash-table)))

			 (setf count (length values))

			 (dolist (v values)
				 (let ((res (gethash v f-table)))
				   (if res
				       (setf (gethash v f-table)
					     (+ 1 (gethash v f-table)))
				     (setf (gethash v f-table) 1))))
			 
			 (let ((res (gethash 'total f-table)))
			   (if res
			       (error "total used?")
			     (setf (gethash 'total f-table) count)))

			 (setf (aref  cat-table (feature-to-index f))
			       f-table)))
	       (setf (aref  table x) cat-table)))))
			       
			 

(defun get-val-prob-given-cat (f v cat table)
  (cond
   ((g-varp v)
    (get-val-prob-given-cat1 f '\@y cat table))
   (t
    (get-val-prob-given-cat1 f v cat table))))

(defun get-val-prob-given-cat (f v cat table)
  (if (variable-p v)
      (get-val-prob-given-cat1 f '@x cat table)
    (get-val-prob-given-cat1 f v cat table)))

    
(defun get-val-prob-given-cat1 (f v cat table)

  (when (null table)
	(setf *table* (build-value-prob-table))
	(setf table *table*))

  (let* ((cat-table (aref table cat))
	 (f-table (aref cat-table (feature-to-index f)))
	 (f-tot (gethash 'total f-table))
	 (v1 (gethash v f-table)))
    
    ;;; if v1 is null, feature doesn't have a given
    ;;; value in some cat eg. T + in cat 2 is not defined.
    ;;; given a small value instead.

    (if (null v1)
	(setf v1 1))

    (/ v1 (+ 1 f-tot))))
    
(defun remove-duplicate-edges (score-edge-pairs)

;;; an edge (rule) is a considered a duplicate if
;;; it spans the same substring as another edge plus
;;; it has the same name.

  (let ((res nil))
    (dolist (pair score-edge-pairs res)
	    (unless (dup-edge pair res)
		    (push pair res)))))

(defun dup-edge (pair pairs)

  (let ((start (g-chart-edge-start (cdr pair)))
	(end (g-chart-edge-end (cdr pair)))
	(name (car (last (g-chart-edge-res (cdr pair))))))

  (dolist (p pairs nil)
	  (let ((pp (cdr p)))
	    (when (and (string= name 
			      (car (last (g-chart-edge-res
					  pp))))
		       (= (g-chart-edge-start pp) start)
		     (= (g-chart-edge-end pp) end))
		(return-from dup-edge t))))))

(defun bits-to-logs (bits)

;;; map a description length to log10
  (log (expt 2 (* bits -1)) 10))


(defun rule-index-to-rule (index rules)

;;; map index to rule.  If start, return "start", if end, "end".

  (cond
   ((= 0 index) "start")
   ((= -1 index) "start")
   ;((= (length rules) index) "end")
   (t
    (nth (- index 1) rules))))

(defun save-model (fp)

;;; save bumpf to file

  (prune-ds)
  (with-open-file (out fp :direction :output)

		  
		  ;;; save cat names
		  (maphash #'(lambda (key val)
			       (when key
				 (format out "~S~%" (list key val))))
			   *name-table*)
		  (format out "end~%")

		  ;;; save details of how model learnt
		  (format out 
			  " ~A ~A ~A "
			  +n-best-retained+  *n-best*  *parse-length*)	     
		  (format out " ~A " *use-top-rules*)	       
		  (format out " ~A " *q*)	       
		  (format out " ~A " *parameter-prior*)	       
		  (format out " ~A " *structure-prior*)	       
		  (format out " ~A " *likelihood*)	       
		  (format out " ~A ~%" *use-drules*)	       
		  (format out " ~A ~A ~A ~A ~A ~A ~A ~A ~A ~A ~% " 
			  *t1* *t2* *t3* *t4* *t5* 
			  *b1* *b2* *b3* *b4* *b5*)
		  (format out " ~A " *counter*)	
		  (format out " ~A " *max-cont-edges*)	
		  (format out " ~A " *max-sent-length*)
		  ;;; dump rule table 
		  (maphash #'(lambda (key val)
			       (when key
				     (format out "~S~%" (cons key val))))
			   *rule-info*)
		  
		  ;;; save non-term info (for checking that still valid later)

		  (format out "~A ~%" (length *index-category-table))
		  (format out "~A ~%" (length *id-rules))		
       		  		  
		  ;;; now dump parameter info
		  (dotimes (x (length *rule-denom*))
			  (format out "~A ~%" (svref *rule-denom* x)))))
		  

(defun load-model (fp)

;;; reverse of above.  Assumes that manually
;;; written grammar is resident (same as when config saved).

  (reset-tables)   
  (prune-ds)

  (with-open-file (in fp :direction :input)
		  (if *name-table*
		      (clrhash *name-table*)
		    (setf *name-table* (make-hash-table :test #'equal)))	
		  (loop
		   (let ((cat-name (read in nil 'eof nil)))
		     (cond
		      ((equal cat-name 'end)
		       (return))
		      ((equal cat-name 'eos)
		       (error "eos found?"))
		      (t
		       (setf (gethash (car cat-name) *name-table*)
			     (second cat-name))))))
  
  
		  ;;; read details of how  model learnt
		  (setf  +n-best-retained+ (read in nil 'eof nil))
		  (setf  *n-best*  (read in nil 'eof nil))
		  (setf  *parse-length* (read in nil 'eof nil))
		  (setf  *use-top-rules* (read in nil 'eof nil))
		  (setf  *q* (read in nil 'eof nil))
		  (setf  *parameter-prior* (read in nil 'eof nil))
		  (setf  *structure-prior* (read in nil 'eof nil))
		  (setf  *likelihood* (read in nil 'eof nil))
		  (setf  *use-drules* (read in nil 'eof nil))
		  (setf  *t1* (read in nil 'eof nil))
		  (setf  *t2* (read in nil 'eof nil))
		  (setf  *t3* (read in nil 'eof nil))
		  (setf  *t4* (read in nil 'eof nil))
		  (setf  *t5* (read in nil 'eof nil))
		  
		  (setf  *b1* (read in nil 'eof nil))
		  (setf  *b2* (read in nil 'eof nil))
		  (setf  *b3* (read in nil 'eof nil))
		  (setf  *b4* (read in nil 'eof nil))
		  (setf  *b5* (read in nil 'eof nil))
		  
		  (setf  *counter* (read in nil 'eof nil))
		  (setf  *max-cont-edges* (read in nil 'eof nil))
		  (setf  *max-sent-length* (read in nil 'eof nil))
		  
		    
		  ;;; read rule table 
		  (format t "reading rule table ~%")
		  (loop
		   (let ((res (read in nil 'eof nil)))
		       (if (numberp res)
			   (progn
					;(setf cat-len res)
			     (return))
			 (setf (gethash (car res) *rule-info*)
			       (cdr res)))))
		  
		  (when (not (= (length *id-rules) 
				(read in nil 'eof nil)))
		    (error "grammar incompatibility"))
		  
		  
		  (format t "reading denom table ~%")
		    (setf *freq-total* 0)
		    
		    (let ((nums nil))
		      (loop
		       (let ((num (read in nil 'eof nil)))
			 (cond
			  ((equal 'eof num)
			   (setf *rule-denom* (make-array 
					       (length nums)))
			   (setf nums (reverse nums))
			   (dotimes (x (length nums))
				    (setf (svref *rule-denom* x)
					  (car nums))
				    (setf *freq-total* 
					  (+ *freq-total* (car nums)))
				    (pop nums))
			   (report)
			   (return-from load-model t))
			  (t (push num nums))))))))
(defun prune-ds nil

;;; remove junk from daughters

  (maphash #'(lambda (k v)
	       (when (and k v)
		     (if (= 0 (rule-freq v))
			 (remhash k *rule-info*)
			; (setf (gethash k *rule-info*) nil)
		       (setf (rule-daughters v) (prune-daughters
						 (rule-daughters v))))))
	   *rule-info*))

  
(defun save-learnt-rules (fp &optional (context nil))

;;; dump learnt rules to a file in a form gde can read in

;;; print freq info (as comment).

;;; freq info of learnt rules saved in rule fp-freqs

;;; context will also dump parse rule learnt in, along with sentence number.
;;; set *trace* before learning to record this info.

    (setf *print-pretty* nil)
    (let ((names nil))
      (with-open-file (out fp :direction :output)		     
		      (maphash #'(lambda (key val)			      
				   (when (and key val
					      (rule-body val))
					 (let ((name (if context
							 key
						       (gensym))))
					   (push (cons name (rule-freq val))
						 names)
					   (format out
						   "psrule ~A : " name)	       
					   (pprint-learnt-rule out val)
					   (when context
						 
						 (format out "~%~%~S~%~A~%"
						 (rule-parse val)
						 (+ 1 
						    (rule-parse-number val))))
					   (format out " . ~A ~A ~%~%~%" 
						   #\;
						   (rule-freq val)))))
				   *rule-info*))
      (with-open-file (out2 (format nil "~A-freqs" fp)
			   :direction :output)
		      (format out2 "~A" names))))


(defun pprint-learnt-rule (fp rule)
  (let ((mother (map-parser-cat-to-term-cat (list (car (rule-body rule))))))
    (pprint-cat fp (reverse mother))
    (format fp " --> ")
    (dolist (cat (cdr (rule-body rule)))
	    (pprint-cat fp (reverse 
			    (map-parser-cat-to-term-cat (list cat)))))))










