;;; Parseval-style evaluation.
 ;;uses jac code for massaging trees 

(defvar *evaluate* nil) ;;; true to evaluate 
(defvar *bracketed-trees* nil)
(defvar *training-sentences* nil)
(defvar *smooth* nil) ;;; true to smooth

(load "analysis.lisp")

(if (probe-file "parseval.lsp")
    (load "parseval")
  (format t "Did not load parseval ~%"))

(defun evaluate (fp out-fp)

;;; read in training set fp, writing results to out; call parseval over
;;; out

  (let ((old-n-best-retained +n-best-retained+)
	(old-parse-length *parse-length*))

    (setf +n-best-retained+ 1) 
    (setf *parse-length* 2)
    (setf *evaluate* t)
    (setf *print-pretty* t)

    (read-in-training-set fp)
    
    (with-open-file (raw-out (format nil "~A-raw" out-fp)
			     :direction :output)
        (with-open-file (fail-out (format nil "~A-fail" out-fp)
			     :direction :output)
    (with-open-file 
     (sent-out 
      (format nil "~A-sentences" out-fp)
      :direction :output)
     (with-open-file 
      (test-out (format nil "~A-test" out-fp)
		:direction :output)
      (with-open-file 
     (bench-out 
      (format nil "~A-bench" out-fp)
      :direction :output)
     
     (format test-out "%LB ( ~%%RB ) ~%")
     (format bench-out "%LB ( ~%%RB ) ~%")
     
     (let ((parses nil) (counter 0))
       (dolist (p *training-sentences*)
	       (setf *previous-sentence p)
	       (setf counter (1+ counter))
	       (invoke-parser p nil)	       

	       (let ((result (get-words-from-tree
			      (get-bracketing-from-parse-tree
			       (cdar *current-parse-trees)))))
		 (when (and *current-parse-trees
			    (not (= (length p)
				    (length result))))
		       (pprint (get-rule-labelling-from-parse-tree
				(car *current-parse-trees)))
		       (pprint p) (terpri)(pprint result) 
		       (format t "error in parse ")
		       (setf *current-parse-trees nil)))
	       
	       (when (not *current-parse-trees)
		   (format fail-out "~A ~% ~A~%" counter p))
		   
	       (when *current-parse-trees
		     
		     (format test-out "~A ~S ~%" counter 
			     (tree-to-string-tree
			      (lr1-parse-analysis-rule-tree 
			       (cdar *current-parse-trees))))
		     (format raw-out "~A ~S ~%" counter 
			     
			     (get-rule-labelling-from-parse-tree
			      (cdar *current-parse-trees)))
		     (format bench-out "~A ~S ~%" counter
			     (tree-to-string-tree 
			      (car *bracketed-trees*)))
		     (format sent-out "~A ~%" counter)
		     
		     (dolist (word p)
			     (format sent-out "~S " 
				     (car 
				      (tree-to-string-tree
				       (list
					(car (phrasal-parse-structure-word 
					      (format nil "~A" word))))))))
		     
		     (format sent-out "~%~%"))	       
	       (pop *bracketed-trees*))))))
    
    (parseval (format nil "~A-sentences" out-fp)
	      (format nil "~A-bench" out-fp)
	      (format nil "~A-test" out-fp))
    
    (setf +n-best-retained+ old-n-best-retained)
    (setf *parse-length* old-parse-length)
    (setf *evaluate* nil)))))
  

(defun tree-to-string-tree (tree)

;;; idiotic parseval reads in sentences using normal lisp convention
;;; for case preserving tokens eg |fred|; for sentences, these are 
;;; read as fred, for trees, it treats | as a character. !!!!

;;; also, it can't deal with (_( or )_) etc

  (cond
   ((null tree) nil)
   ((atom tree) tree)
   ((or (and (stringp (car tree))
	     (char-member  '(#\] #\[ #\> #\< #\)
			     #\, #\$ #\?  #\( #\; #\&) (car tree)))
	(and (atom (car tree)) (char-member
				'(#\] #\[ #\> #\< #\)
			     #\, #\$ #\?  #\( #\; #\&) (format nil "~S"
							       (car tree)))))
    (cons "unk" (tree-to-string-tree (cdr tree))))
   ((listp (car tree))
    (cons (tree-to-string-tree (car tree)) 
		 (tree-to-string-tree (cdr tree))))
   ((numberp (car tree)) (cons "unk" (tree-to-string-tree (cdr tree))))
   ((member (car tree) '(  "-_-") ;;; some rules have underscores in them
	    :test #'string=)			       
    (cons "unk" (tree-to-string-tree (cdr tree))))
   ((stringp (car tree)) (cons
			  (car tree) (tree-to-string-tree (cdr tree))))
   (t
    (cons (format nil "~A" (car tree))
		 (tree-to-string-tree (cdr tree))))))
      
(defun char-member (char string)
  (dotimes (x (length string) nil)
	   (let ((temp (aref string x)))
	     (dolist (c char nil)
		     (when (equal c temp)
			   (return-from char-member t))))))

(defun read-in-training-set (fp)

  (setf *bracketed-trees* nil)
  (setf *training-sentences* nil)

  (with-open-file (in fp :direction :input)
		  (loop
		   (let ((sentence (read in nil 'eof nil))
			 (tree (read in nil 'eof nil)))
		     (cond
		      ((equal sentence 'eof)
		       (return-from read-in-training-set t))
		      (t

		       (push  sentence
			     *training-sentences*)
		       (push tree
			     *bracketed-trees*)))))))
(defun mean (l)
  (let ((sum nil))
    (dolist (ll l)
	    (setf sum (+ sum ll)))
    (/ sum (length l))))

(defun remove-nonterminals-from-tree (tree)
  (cond
   ((null tree) nil)
   ((atom tree) tree)
   ((listp (car tree))
    (let ((res
 (remove-nonterminals-from-tree (cdar tree))))
      (if res
	  (cons res (remove-nonterminals-from-tree (cdr tree)))
	(remove-nonterminals-from-tree (cdr tree)))))
   (t
    (cons (car tree) (remove-nonterminals-from-tree (cdr tree))))))

(defun lobotomise-grammar (n)

;;; randomly delete n id rules from grammar.

  (let ((rules nil))
    (dolist (x (make-random-set (length *id-rules) n))
	    (push (nth x  *id-rules) rules))
    (delete-idrule-list rules)))

(defun make-random-set (l n)

;;; make a list of random numbers s.t no repeats

  (let ((num nil))
    (dotimes (x n num)
	     (loop
	      (let ((res (random l)))
		(unless (member res num :test #'=)
			(push res num)
			(return)))))))

(defun get-words-from-tree (tree)
  (cond
   ((null tree) nil)
   ((listp (car tree))
    (append (get-words-from-tree (car tree))
	    (get-words-from-tree (cdr tree))))
   ((stringp (car tree)) (get-words-from-tree (cdr tree)))
   (t (cons (car tree) (get-words-from-tree (cdr tree))))))


