;;; code for mle/mle comparison.  Configure the learner to be liberal
;;; and don't do predicate invention ranking.

(load "parseval.lsp")
(load "eval-analysis.lisp")

(load "diag.lisp")

(setf *max-candidates* 20)

(setf *hard* nil)

(defvar *training-set* nil)


(defun mdl-test nil
  (run  t t))

(defun mle-test nil
  (run nil nil))

(defun train-and-test (train)
;;; train a model and evaluate it

  (setf *generate-edges* t)
  (setf *evaluate* nil)
  (setf *training-set* train)
  (parse-file1 train nil nil))

(defun run (struc param )

  (evaluate-off)
  (setf +n-best-retained+ 20) ;;; number of new rules to consider
  (setf *parse-length* 2000) ;;; size of window
  (filter-punct nil) ;;; allow [punct]
  (filter-zero t) ;;; disallow [bar 0]
  (set-filter-block-on) ;;; allow filtering of above
  (domain-off) ;;; no use of domain knowledge
  (filter-ternary nil) ;;; allow any tenrary rule

  (reset-tables)
	  
  (if struc
      (structure-prior-on)
    (structure-prior-off))
  
  (if param
      (parameter-prior-on)
    (parameter-prior-off))

  (report)
  (pre-train "../data/sustr.trees1")
  (train-and-test 
   (format nil "/usr/groups/corpora/bnc-tagged/unparsed")))
	   	  	  

(defun iter-test (n mle  block punct zero)
  ;;; iterate around training corpus.
  ;;; after each run, throw away rules seen infrequently.
  (setf *n-best* 1.0)
  (setf +n-best-retained+ 20)
  (setf *parse-length* 250)
  (setf *likelihood* t)

  (if block
      (set-filter-block-on)
    (set-filter-block-off))
  (filter-punct punct)
  (filter-zero zero)
  (set-block-on)
  (domain-on)
  (reset-tables)
  (pre-train "../data/sustr.trees1")
  (if mle
      (progn
	(structure-prior-off)
	(parameter-prior-off))
    (progn
      (structure-prior-on)
      (parameter-prior-on)))
    (report)
  (train-and-test 
   (format nil "../data/unparsed.bnc")))
	     

(defun prune-low-events (n)
  (maphash #'(lambda (name entry)
	       (when (and name entry (rule-body entry)
			  (> n (rule-freq entry)))
		     (setf (svref *rule-denom*
				  (rule-mother entry))
			   (- (svref *rule-denom*
				  (rule-mother entry))
			      (rule-freq entry)))
		     (when (>= 0 (svref *rule-denom* (rule-mother entry)))
			   (error "rule denom negative"))
		     (remhash name *rule-info*)))
	   *rule-info*))


(defun zipf nil
  (let ((freqs nil) (max 0))
    (maphash #'(lambda (key val)
		 (when key
		       (when (< max (rule-freq val))
			     (setf max (rule-freq val)))))
	     *rule-info*) 
	     (setf max (+ 1 max))
	     (setf freqs (make-array max))
    (dotimes (x max)
	     (setf (svref  freqs x) 0))
    (maphash #'(lambda (key val)
		 (when key
		       (setf (svref  freqs (rule-freq val))
			     (+ (svref  freqs (rule-freq val))
				1))))
	     *rule-info*)
    (with-open-file (out "temp" :direction :output)
    (dotimes (x 500)
	     (when (< 0 (svref freqs x))
	     (format out "~A ~A ~%" x  (svref  freqs x)))))))




(defun  parse-selection-prob (entry name) 1.0)
(defun daughter-selection-prob (d-entries mo-entry name) 1.0)
(defun single-rule-score (n) 1.0)
