;;; semco.cl - Semantic cooccurrence analysis for parse selection
;;; Adaptation of PhD project for the New Alvey Natural Language Tools
;;; Copyright (C) Eirik Hektoen 1998


;;; NB Implementation dependencies
;;; ==============================
;;; The program uses an error handler for robust corpus parsing, but this is
;;; disabled for AKCL Lisp which does not implement conditions as defined in
;;; Steele's CLTL2.  Timeouts and GDE errors use catch/throw and are not
;;; affected.  (Search for `AKCL'.)
;;;
;;; The program uses Allegro Lisp-specific hooks to detects garbage collections
;;; in order to reset timeouts.  (Search for `ALLEGRO'.)


;;; NB Parser Environment Dependence
;;; ================================
;;; This program is intended to run as part of the GDE.  If required to run
;;; with another parsing system, adapt functions calling requires-the-gde.


;;; NB Includes GDE Patches
;;; =======================
;;; This file includes substitutes for the GDE functions g-insert-traces and
;;; g-unpack (for parser timeout), and g-defns (for tag-based word lookup).
;;; The original functions are renamed with the prefix "original-gde-".


;;; Corpus file format for training and testing
;;; ===========================================
;;; 1. Lines starting with # (comments) and blank lines are ignored.
;;; 2. Lines starting with [ or ] contain bracketting.  Each [ is optionally
;;;    followed by a category label.  Each line may contain multiple brackets.
;;; 3. Lines consisting entirely of a * represent empty syntactic gaps.
;;; 4. All other lines represent the original text, with one word/punctuation
;;;    symbol per line.  This is optionally followed by one or more tag-strings
;;;    of the following form (where [] indicates optional parts):
;;;
;;;      [lemma[+suffix]_]tag[:probability]
;;;
;;; 5. Spaces (incl. tabs) serve to delimit labels, words and tag-strings, but
;;;    are otherwise ignored.  There must be no space within a tag-string.
;;; 6. Any \ in the input indicates that the next character is part of an
;;;    alphanumeric element (label, word, lemma, suffix or tag).  This is
;;;    mainly required for # [ ] * \ and space, but works for any character.


;;; User accessible status variables
;;; ================================
;;; *semco-trees, *semco-forms, *semco-coocs and *semco-probs hold the parse
;;; trees, forms, cooccurrences and probabilities after calls to semco-parse or
;;; semco-probs.  The last three are lists of lists of forms etc. reflecting
;;; the fact that each parse may have multiple possible semantic forms.

;;; If *semco-sort is non-NIL, semco-parse and semco-probs sort the parses and
;;; the forms for each parse in decreasing order of probability, parses
;;; according to their most probable form.  If it is an integer n, the list is
;;; truncated after the n most probable parses.  This affects the GDE's list of
;;; parse trees in *current-parse-trees as well as the derived Semco variables.
;;; (The alternative forms of the parses included are not affected.)

;;; NB: *semco-display is automatically switched off for corpus processing.

(defvar *semco-trees nil)               ; current parse trees (Semco style)
(defvar *semco-forms nil)               ; current semantic forms
(defvar *semco-coocs nil)               ; current semantic cooccurrences
(defvar *semco-probs nil)               ; current parse/form probabilities

(defvar *semco-sort t)                  ; sort/cutoff GDE list of parses
(defvar *semco-display t)               ; show details of Semco analysis


;;; Customisation parameters
;;; ========================

(defvar *cooc-db-dir    "semco-db/")    ; cooccurrence data base directory
(defvar *cooc-db-files  100)            ; number of files in db
(defvar *cooc-db-freq   100)            ; number of sents per db update
(defvar *corpus-train   "semco-sample") ; training corpus (default)
(defvar *corpus-test    "semco-sample") ; testing corpus (default)
(defvar *results-file   "semco-test-results"); test results file (default)
(defvar *parser-timeout 120)            ; timeout/sec, or nil to disable
(defvar *max-looseness  0.3)            ; requirement for corpus/parse match
(defvar *max-exp-coocs  100000)         ; max expected number of coocs
(defvar *corpus-skip    '(|,| |.|))     ; list of words (symbols) to skip
(defvar *parser-gaps    '(e))           ; list of parser gap markers


;;; Preliminary global declarations
;;; ===============================

#- AKCL
(declaim
 (optimize (speed 3) (space 0) (safety 0) (debug 0))
 (special *cooc-db-hash *cooc-db-touch *cooc-db-conv-begun
          *semco-n-sentences *semco-corpus-stats *chart-edges
          *current-parse-trees)
 (inline requires-the-gde))

(defun requires-the-gde ()
  "Call initially in functions that depend on GDE variables or functions."
  (unless (find-symbol "*GDE-VERSION-NO")
    (error "This function requires the GDE")))


;;; Parse selection
;;; ===============
;;; These functions depend on a previously trained cooccurrence data base in
;;; the directory named in *cooc-db-dir.  If the default name in this variable
;;; is wrong, set it either directly or with cooc-db-init before first call.

(defun semco-parse (sentence)
  "Parse sentence with front-end parser and compute parse probabilities"
  (when (eq (semco-front-end-parser sentence) 'parse-ok)
    (semco-probs)))

(defun semco-probs ()
  "Compute the probability distribution of current parse trees and forms"
  ;; The result is stored in *semco-probs and returned.  It is a list of lists
  ;; of probabilities, reflecting the fact that the parser returns a list of
  ;; possible forms for each parse.  The probability normalisation is based on
  ;; the maximum prob for each parse and not adjusted after any cutoff.
  (cooc-db-init)
  (get-semco-coocs)
  (compute-parse-probs)
  (semco-sort)
  (when *semco-display (semco-show-coocs))
  *semco-probs)

(defun semco-sort ()
  "Sort and cut off the lists of parses, forms, coocs and probabilities."
  ;; NB: Modifies the GDE's *current-parse-trees as well as *semco-trees,
  ;; *semco-forms, *semco-coocs, *semco-probs.
  (requires-the-gde)
  (when *semco-sort
    (get-semco-trees)
    (get-semco-forms)
    (let ((q (mapcar #'list* *semco-probs *semco-forms *semco-trees
                     *semco-coocs *current-parse-trees)))
      (dolist (r q)
        (when (cdar r)                  ; sort possible forms
          (let ((s (mapcar #'cons (car r) (cadr r))))
            (setq s (sort s #'> :key #'car))
            (setf (car r) (mapcar #'car s) (cadr r) (mapcar #'cdr s)))))
      (setq q (sort q #'> :key #'caar))
      (when (integerp *semco-sort)
        (setf (cdr (nthcdr (1- *semco-sort) q)) nil))
      (setq *semco-probs (mapcar #'car q)
            *semco-forms (mapcar #'cadr q)
            *semco-trees (mapcar #'caddr q)
            *semco-coocs (mapcar #'cadddr q)
            *current-parse-trees (mapcar #'cddddr q))))
  *semco-probs)


;;; Training
;;; ========

(defun semco-train-corpus (&optional corpus db-dir)
  "Train or continue training Semco with the parse trees in the corpus"
  (format t "~2&Starting Semco training...~%")
  (cooc-db-init db-dir)
  (when corpus (setq *corpus-train corpus))
  (process-corpus *corpus-train #'semco-train-tree
                  #'check-cooc-db #'update-cooc-db)
  (format t "~2&Semco training complete.~%")
  (show-corpus-status))

(defun semco-train-tree (tree)
  "Increment Semco training data for one parse tree"
  ;; Extract and parse sentence, find parse matching tree, increment cooc db.
  ;; tree = a tagged parse tree as returned by read-corpus-tree
  (cooc-db-init)
  (when *cooc-db-conv-begun
    (error "Cannot continue training after parse probability computations"))
  (let ((correct-coocs (parse-and-match tree)))
    ;; correct-coocs = subset of *semco-coocs representing "correct" parses
    (let* ((m (length *semco-coocs)) (mc (length correct-coocs)))
      (when (and (< 0 mc m) (<= (/ (float (1- mc)) (1- m)) *max-looseness))
        (let ((temp nil))
          (dolist (parse *semco-coocs)  ; each parse is a list of forms
            (let ((wgt (/ 1.0 (length parse))))
              (dolist (form parse)      ; each form is a list of coocs
                (dolist (cooc form)
                  (let ((x (assoc cooc temp :test #'equal)))
                    (unless x (push (setq x (list* cooc 0.0 0.0)) temp))
                    (incf (cadr x) wgt)
                    (when (member parse correct-coocs)
                      (incf (cddr x) wgt)))))))
          (setq m (float m))
          (dolist (x temp)
            (let ((cooc (car x)) (n (cadr x)) (nc (cddr x)))
              (when (< 0.0 n m)
                (let ((a (- 1.0 (/ n (- m n))))
                      (pos (/ nc mc))
                      (neg (/ (- mc nc) mc)))
                  (let ((h (assoc a (gethash cooc *cooc-db-hash))))
                    (if h (progn (incf (cadr h) pos) (incf (cddr h) neg))
                      (push (list* a pos neg)
                            (gethash cooc *cooc-db-hash)))))
                (setf (svref *cooc-db-touch (cooc-db-index cooc)) t)))))))))


;;; Testing
;;; =======
;;; To allow flexible retrospective analysis, detailed test results are saved
;;; to file for individual sentences as well as for the corpus as a whole.
;;; The data in the test results file are (see function for precise format):
;;;  - corpus file position
;;;  - number of sentences (*semco-n-sentences)
;;;  - corpus sentence statistics (*semco-corpus-stats)
;;;  - reverse list of sentence profiles (*test-corpus-results):
;;;     - number of words
;;;     - number of parses
;;;     - number of `correct' parses (with identical max matching score)
;;;     - best probabilistic ranking of a `correct' parse
;;;     - worst such ranking

(defvar *test-corpus-results nil)

(defun semco-test-corpus (&optional corpus db-dir results-file)
  "Test Semco on the given corpus"
  (format t "~2&Starting Semco testing...~%")
  (cooc-db-init db-dir)
  (when corpus (setq *corpus-test corpus))
  (when results-file (setq *results-file results-file))
  (setq *test-corpus-results nil)
  (process-corpus *corpus-test #'semco-test-tree
                  #'check-test-results #'save-test-results)
  (format t "~2&Semco testing complete.~%")
  (show-corpus-status))

(defun semco-test-tree (tree)
  "Increment Semco testing data for one sentence"
  ;; Parse sentence, select most probable parse with Semco, also find the parse
  ;; best matching the tree, compare them and increment the test results.
  ;; tree = a tagged parse tree as returned by read-corpus-tree
  (cooc-db-init)
  (multiple-value-bind (correct-coocs n-words) (parse-and-match tree)
    ;; correct-coocs = subset of *semco-coocs representing "correct" parses
    ;; n-words = number of words in the sentence
    (let ((best-rank 0) (worst-rank 0))
      (when *semco-coocs
        (compute-parse-probs)
        (let ((probs (mapcar #'(lambda (x) (reduce #'max (cons 0 x)))
                             *semco-probs))
              (max-corr 0))
          (mapc #'(lambda (par prob)
                    (when (and (member par correct-coocs) (> prob max-corr))
                      (setq max-corr prob)))
                *semco-coocs probs)
          (setq best-rank 1)
          (dolist (prob probs)
            (when (>= prob max-corr)
              (incf worst-rank)
              (when (> prob max-corr) (incf best-rank))))))
      (push (list n-words (length *semco-coocs) (length correct-coocs)
                  best-rank worst-rank)
            *test-corpus-results)))
  (when *semco-display (semco-show-coocs))
  (car *test-corpus-results))

(defun save-test-results (file-pos)
  (when (probe-file *results-file)
    (when (probe-file "semco-results-tmp")
      (delete-file "semco-results-tmp"))
    (rename-file *results-file "semco-results-tmp"))
  (with-open-file (f *results-file :direction :output)
    (format f "~s~%~s~%~s~%~s~%" file-pos *semco-n-sentences
            *semco-corpus-stats *test-corpus-results))
  (when (probe-file "semco-results-tmp")
    (delete-file "semco-results-tmp")))

(defun check-test-results ()
  (when (and (not (probe-file *results-file))
             (probe-file "semco-results-tmp"))
    (rename-file "semco-results-tmp" *results-file))
  (with-open-file (f *results-file :if-does-not-exist nil)
    (when f
      (format t "Found ~a~%" *results-file)
      (let ((file-pos (read f)))
        (setq *semco-n-sentences (read f)
              *semco-corpus-stats (read f)
              *test-corpus-results (read f))
        file-pos))))


;;; Common corpus operations for training and testing
;;; =================================================

(defvar *semco-n-sentences 0)
(defvar *semco-corpus-stats nil)
(defvar *tagged-sentence nil)           ; ((word tag-string.. ).. )
(defvar *tagged-sentence-pos 0)

(defun process-corpus (corpus sentence-fn &optional check-fn update-fn)
  (format t "Corpus file: ~a~%" corpus)
  (setq *semco-n-sentences 0 *semco-corpus-stats nil)
  (setq *semco-display nil)
  (with-open-file (tr corpus)
    (let ((check-pos (when check-fn (funcall check-fn))))
      (when check-pos
        (format t "Resuming from checkpoint at file pos ~d.~%" check-pos)
        (show-corpus-status)
        (file-position tr check-pos)))
    (let ((upd *semco-n-sentences))
      (loop (handler-case
                (let ((tree (read-corpus-tree tr)))
                  (unless tree (return))
                  (incf *semco-n-sentences)
                  (funcall sentence-fn tree))
              (serious-condition (c)
                (setq *chart-edges nil) ; free memory
                (warn "Sentence ~d aborted by error:~%  ~a"
                      *semco-n-sentences c)))
            (when (zerop (mod *semco-n-sentences *cooc-db-freq))
              (funcall update-fn (file-position tr))
              (setf upd *semco-n-sentences)))
      (when (> *semco-n-sentences upd)
        (funcall update-fn (file-position tr))))))

(defun parse-and-match (tree)
  "Extract sentence from tree, parse and find best matches in the result"
  ;; tree = a tagged parse tree as returned by read-corpus-tree
  ;; Returns 2 values: the best parses as a subset of *semco-coocs (or NIL on
  ;; parse error), the number of words in the sentence.
  (format t "~<~%~15:;[~d~>" *semco-n-sentences)
  (setq *tagged-sentence (extract-tagged-sentence tree)
        *tagged-sentence-pos 0)
  (let* ((sentence (mapcar #'(lambda (x) (if (consp x) (car x) x))
                           *tagged-sentence))
         (status (semco-front-end-parser sentence))
         (correct-coocs nil))
    (if (not (eq status 'parse-ok)) (format t " ~(~a~)" status)
      (let ((correct-trees (find-most-similar-trees *semco-trees tree)))
        (get-semco-coocs)
        (setq correct-coocs
              (mapcan
               #'(lambda (x y) (when (member x correct-trees) (list y)))
               *semco-trees *semco-coocs))))
    (setq *chart-edges nil)             ; free memory
    (format t "] ")
    (values correct-coocs (length sentence))))

;;; ---------------------------------------------------------------------------
;;; This patched version of the GDE's function g-defns (called by the parser to
;;; look up word definitions) backs off first to the down-cased word and then
;;; to the associated tags when the normal word lookup fails.  It depends on
;;; *tagged-sentence as source for the tags and lemmas, and will skip the first
;;; *tagged-sentence-pos entries in this list (to distinguish multiple
;;; occurrences of the same word).  Process-corpus and parse-and-match set
;;; these variables before parsing a sentence.
;;; ---------------------------------------------------------------------------

(when (and (fboundp 'g-defns)
           (not (fboundp 'original-gde-g-defns)))
  (setf (symbol-function 'original-gde-g-defns) #'g-defns))

;;; MO 31 July 1998. 
;;; Doesn't deal properly with in-lexicon words
;;; that are upper-case (but lower case in lexicon)
;;; eg. I_PPIS1 v i_PPIS1.

(defun g-defns (word)                   ; patched version of GDE function
  (let ((tags nil))
    (or (when (get word 'word) (original-gde-g-defns word))
        (let ((p (position #\_ (string word))))
          (when p
            (let ((wd (intern (subseq (string word) 0 p)))
		  (lower-wd (intern (subseq (string (string-downcase word))
					    0 p)))
                  (tag (when p (intern (subseq (string word) p)))))
              (or (when (get wd 'word) (original-gde-g-defns wd))
		  (when (get lower-wd 'word) (original-gde-g-defns lower-wd))
                  (when (get tag 'word) (original-gde-g-defns tag))))))
		  
			
        (when (some #'upper-case-p (string word))
          (let* ((low (intern (string-downcase word))))
            (when (get low 'word) (original-gde-g-defns low))))
        (let ((pos (position
                    word *tagged-sentence
                    :test #'(lambda (w x) (and (consp x) (eql w (car x))))
                    :start *tagged-sentence-pos)))
          (when pos
            (setq tags (sort (mapcar #'parse-tag-string
                                     (cdr (nth pos *tagged-sentence)))
                             #'> :key #'cddr)
                  *tagged-sentence-pos (1+ pos))
            (dolist (x tags)
              (let ((lemma (or (car x) word))
                    (tag (intern (concatenate 'string "_" (cadr x)))))
                (when (get tag 'word)
                  (let ((def (copy-tree (original-gde-g-defns tag))))
                    (when def
                      (setq lemma (intern lemma))
                      ;; Change tag-based defs to refer to the original word
                      ;; form and to use the lemma for semantic predicate:
                      (dolist (d def)
                        (setf (second d) word ; visible word form
                              (third d) lemma)) ; semantic form
                      (return def))))))))
        (original-gde-g-defns word))))

(defun parse-tag-string (s)
  ;; Returns: (lemma tag . probability)
  ;; Tag string format: [lemma[+suffix]_]tag[:probability]
  (let ((a (position #\+ s :from-end t))
        (b (position #\_ s :from-end t))
        (c (position #\: s :from-end t)))
    (let* ((lemma (when (or a b) (subseq s 0 (or (and a b (min a b)) a b))))
    (tag (subseq s (if lemma (+ (or (and a b (max a b)) a b) 1) 0) c))
    (prob (or #- AKCL
              (ignore-errors
                  (let ((x (read-from-string (subseq s (+ c 1)))))
                    (when (numberp x) x)))
              #+ AKCL
              (let ((x (read-from-string (subseq s (+ c 1)))))
                (when (numberp x) x))
              0.0)))
      (list* lemma tag prob))))

(defun show-corpus-status ()
  (format t "Sents: ~d (~(~:{~a ~d~:^, ~}~))~@
             Coocs: ~d~2%"
          *semco-n-sentences *semco-corpus-stats
          (hash-table-count *cooc-db-hash)))


;;; Cooccurrence data base
;;; ======================
;;; The hash maps each cooc to an assoc list ((a pos . neg)... ) of pos and neg
;;; obs for each a = (- 1.0 (/ n (- m n))) where the cooc is in n of m parses.
;;; The hash is mapped to a number of files in the cooc db directory.

(defvar *cooc-db-hash)                  ; hash: cooc -> ((a pos . neg)... )
(defvar *cooc-db-touch)                 ; array: cooc db file -> changed/read
(defvar *cooc-db-pathname)
(defvar *cooc-db-conv-begun)            ; records start of conversion of hash

(defun cooc-db-init (&optional db-dir)
  "Initialise cooc db data structures and pathname"
  (unless (and (boundp '*cooc-db-hash)
               (or (null db-dir) (eql *cooc-db-dir db-dir)))
    (when db-dir (setq *cooc-db-dir db-dir))
    (setq
     *cooc-db-hash (make-hash-table :size *max-exp-coocs :test #'equal)
     *cooc-db-touch (make-array *cooc-db-files :initial-element nil)
     *cooc-db-pathname (merge-pathnames *cooc-db-dir)
     *cooc-db-conv-begun nil)
    ;; If dir is specified as a file, convert to sub-directory:
    (when (pathname-name *cooc-db-pathname)
      (let ((d (pathname-directory *cooc-db-pathname))
            (n (file-namestring *cooc-db-pathname)))
      (setq *cooc-db-pathname
            (make-pathname :host (pathname-host *cooc-db-pathname)
                           :device (pathname-device *cooc-db-pathname)
                           :directory (if (consp d) (append d (list n))
                                        (list :relative n))))))
    (format t "Cooc db dir: ~a~%" *cooc-db-pathname)))


;;; Cooccurrence data base file operations
;;; ======================================

(defun update-cooc-db (file-pos)
  "Update cooc db with files that have changed since last call (for training)"
  ;; Write data in temporary files, delete matching old files, and rename.
  ;; The corpus file pos is stored in "done".
  (format t "~2&Updating cooc db at file pos ~d...~%" file-pos)
  (maphash
   #'(lambda (c h)
       (let* ((i (cooc-db-index c)) (d (svref *cooc-db-touch i)))
       (cond ((eq d t) (setf (svref *cooc-db-touch i) (list (cons c h))))
             (d (push (cons c h) (svref *cooc-db-touch i))))))
   *cooc-db-hash)
  (let ((files nil))
    (dotimes (i *cooc-db-files)
      (let ((x (svref *cooc-db-touch i)))
      (when x
        (let ((file (cooc-db-pathname i t)))
          (with-open-file (f file :direction :output)
            (let ((*print-pretty* nil)) (format f "(~%~{~s~%~})~%" x)))
          (push file files)))))
    (let ((file (cooc-db-pathname "done" t)))
      (with-open-file (f file :direction :output)
      (format f "~s~%~s~%~s~%" file-pos *semco-n-sentences
              *semco-corpus-stats))
      (push file files))
    (dolist (file (nreverse files)) (rename-temp-cooc-db-file file)))
  (fill *cooc-db-touch nil)
  (format t "Sents: ~d (~(~:{~a ~d~:^, ~}~))~@
             Coocs: ~d~2%"
          *semco-n-sentences *semco-corpus-stats
          (hash-table-count *cooc-db-hash)))

(defun check-cooc-db ()
  "Check status of cooc db, fixing it after mid-update interruptions"
  ;; If "done.s-tmp" exists, finish renaming temp files, otherwise delete them.
  ;; When a checkpoint exists, load cooc db and return file pos.
  (let* ((dir (directory (cooc-db-pathname :wild t)))
         (done (find "done" dir :key #'pathname-name :test #'equal))
         (done-name (cooc-db-pathname "done")))
    (when dir
      (format t "~&[~:[Deleting~;Renaming~] temp files in cooc db...]~2%" done)
      (if (not done) (dolist (file dir) (delete-file file))
      (dolist (file (append (delete done dir) (list done)))
        (rename-temp-cooc-db-file file))))
    (with-open-file (f done-name :if-does-not-exist nil)
      (when f
        (format t "Found ~a~%" done-name)
        (let ((file-pos (the integer (read f))))
          (setq *semco-n-sentences (read f)
                *semco-corpus-stats (read f))
          (dotimes (i *cooc-db-files) (load-cooc-db-file i))
          (fill *cooc-db-touch nil)
          file-pos)))))

(defun load-cooc-db-file (i)
  "Load the cooc db file with index i into the hash"
  (with-open-file (f (cooc-db-pathname i) :if-does-not-exist nil)
    (dolist (x (when f (read f)))
      (setf (gethash (car x) *cooc-db-hash) (cdr x))))
  (setf (svref *cooc-db-touch i) t))

(defun cooc-db-index (cooc)
  "Determine the cooc db file index of a cooc"
  ; Uses a simple programmed hash fn for compatibility across Lisp versions!
  (let ((a (symbol-name (car cooc))) (b (symbol-name (cdr cooc)))
        (hash 0) (k 11))
    (dotimes (i (length a)) (incf hash (* (incf k 2) (char-code (char a i)))))
    (dotimes (i (length b)) (incf hash (* (incf k 2) (char-code (char b i)))))
    (mod hash *cooc-db-files)))

(defun cooc-db-pathname (index &optional tmp)
  "Determine pathname of indexed or named file in cooc db directory"
  (let ((name (if (integerp index) (format nil "~4,'0d" index) index))
        (type (if tmp "s-tmp" "s")))
    (merge-pathnames (make-pathname :name name :type type)
                     *cooc-db-pathname)))

(defun rename-temp-cooc-db-file (file)
  "Rename a temporary cooc db file to its permanent name"
  (let ((new (file-namestring (cooc-db-pathname (pathname-name file)))))
    (rename-file file new)))


;;; Corpus input
;;; ============
;;; Read-corpus-tree returns an S-structure with category labels represented as
;;; symbols, gaps by the symbol *, and words by (:word symbol tag-string.. ).
;;; Typical example (based on the style of the WSJ corpus):
;;;   ((S (NP (:word he "he_NP:1"))
;;;       (VP (:word left "leave_VVD:0.8" "left_JJ:0.2")))
;;;    (:word |.| "._.:1"))
;;; Trees and subtrees do not necessarily contain a category label.  The tag
;;; strings are copied from the corpus with escape resolution and may be more
;;; complex than this.  At EOF, the function returns nil.

(defun read-corpus-tree (f)
  "Read next parse tree in the open corpus file"
  (let ((q nil)) ; a stack of nested, incomplete, reverse lists of daughters
    (loop
     (let ((tok (read-corpus-token f)))
       (case tok
         ((nil) (when q (warn "EOF in mid-sentence")) (return nil))
         (#\[ (push nil q))
         (#\] (when q
                (setf (car q) (nreverse (car q)))
                (if (null (cdr q)) (return (car q))
                  (let ((x q)) (setf q (cdr q) (cdr x) (car q) (car q) x)))))
         (t (unless (and (listp tok) (member (cadr tok) *corpus-skip))
              (if q (push tok (car q))
                (warn "Top-level atomic token in corpus: ~s" tok)))))))))

(defun extract-tagged-sentence (tree)
  "Extract list of tagged words from parse tree"
  (cond ((atom tree) nil)
        ((eq (car tree) :word) (list (cdr tree)))
        (t (mapcan #'extract-tagged-sentence tree))))

(defvar *read-token-buf
  (make-array '(30) :element-type 'character :adjustable t :fill-pointer 0))

(defun read-corpus-token (f)
  ;; Returns: #\[, #\], *, label, (:word symbol tag-string.. ), nil
  (let (ch (comment nil) (newline nil))
    (loop (setq ch (read-char f nil nil))
          (cond ((eql ch #\#) (setq comment t))
                ((eql ch #\newline) (setq newline t comment nil))
                ((not (or comment (member ch '(#\space #\tab)))) (return))))
    (if (member ch '(#\[ #\] nil)) ch
      (let ((buf nil))
        (loop
         (setf (fill-pointer *read-token-buf) 0)
         (loop (when (eql ch #\\)
                 (setq ch (read-char f nil nil))
                 (unless ch (return)))
               (vector-push-extend ch *read-token-buf)
               (setq ch (read-char f nil nil))
               (when (member ch '(#\[ #\] #\* #\space #\tab #\newline nil))
                 (return)))
         (if buf (push (copy-seq *read-token-buf) buf)
           (push (intern *read-token-buf) buf))
         (loop (unless (member ch '(#\space #\tab)) (return))
               (setq ch (read-char f nil nil)))
         (when (or (not newline) (member ch '(#\newline nil))) (return)))
        (when ch (unread-char ch f))
        (cond ((null buf) (assert (null ch)) nil)
              ((or (not newline) (eq (car buf) '*)) (car buf))
              (t (cons :word (nreverse buf))))))))


;;; Front end parser interface
;;; ==========================
;;; Semco-front-end-parser tries to parse a sentence with the GDE parser and
;;; returns PARSE-OK if successful within *parser-timeout (if non-NIL) seconds.
;;; Otherwise it returns NO-PARSE, GDE-ERROR, TIMEOUT, OVERFLOW, or ERROR.

(defvar *semco-trees-base nil)          ; the underlying current parse trees

(defun check-semco-trees-base (&optional clear)
  (when clear (setq *current-parse-trees nil))
  (when (or clear (not (eq *semco-trees-base *current-parse-trees)))
    (setq *semco-trees-base *current-parse-trees
          *semco-trees nil *semco-forms nil *semco-coocs nil)))

#+ AKCL (warn "Robust parsing wrt overflow disabled for AKCL Lisp")

(defun semco-front-end-parser (sentence)
  "Parse sentence robustly with current GDE parser/grammar/lexicon"
  (requires-the-gde)
  (check-semco-trees-base t)
  (start-parser-timeout)
  (let ((status
         (block nil                     ; for local returns
           (catch 'timeout              ; thrown by check-parser-timeout
             (catch 'break              ; thrown by GDE errors
               #+ AKCL
               (invoke-parser sentence nil)
               #- AKCL
               (handler-case
                   (invoke-parser sentence nil)
                 (storage-condition ()
                   (setq *chart-edges nil) ; free memory
                   (warn "Parser memory overflow - calling gc...")
                   (handler-case (gc t)
                     (storage-condition ()
                       (warn "Another memory overflow during gc!"))
                     (serious-condition (c)
                       (warn "Another error during gc:~%  ~a" c)))
                   (return 'overflow))
                 (serious-condition ()
                   (setq *chart-edges nil)
                   (return 'error)))
               (unless *current-parse-trees (return 'no-parse))
               (get-semco-trees)
               (get-semco-forms)
               (return 'parse-ok))
             (return 'gde-error))
           (return 'timeout))))
    (stop-parser-timeout)
    (unless (eql status 'parse-ok) (check-semco-trees-base t))
    (let ((q (assoc status *semco-corpus-stats)))
      (if q (incf (cadr q))
        (push (list status 1) *semco-corpus-stats)))
    status))

(defun get-semco-trees ()
  "Extract parse trees from current parse, set *semco-trees and return."
  ;; Returns a list of trees: (tree.. )
  (requires-the-gde)
  (check-semco-trees-base)
  (or *semco-trees
      (let ((trees nil))
        (dolist (x *current-parse-trees)
          (check-parser-timeout)
          (push (get-rule-labelling-from-parse-tree (cdr x)) trees))
        (setq *semco-trees (nreverse trees)))))

(defun get-semco-forms ()
  "Extract semantic forms from current parse, set *semco-forms and return."
  ;; Returns ((form.. ).. ) to allow multiple possible forms per parse.
  (requires-the-gde)
  (check-semco-trees-base)
  (or *semco-forms
      (setq *semco-forms
            (mapcar #'(lambda (x)
                        (check-parser-timeout)
                        (simplify-lambda-formula
                         (extract-semantics-from-parse-tree (cdr x) (car x))))
                    *current-parse-trees))))

(defun get-semco-coocs ()
  "Compute distinguishing cooccurrence sets from current semantic forms."
  ;; Excludes cooccurrences present in all forms of all parses.
  ;; Returns ((cooc.. ).. ) to allow multiple possible forms per parse.
  (get-semco-forms)
  (unless *semco-coocs
    (setq *semco-coocs
          (mapcar #'(lambda (p) (mapcar #'logical-form-coocs p))
                  *semco-forms))
    (let ((in-all (copy-list (caar *semco-coocs))))
      (dolist (p *semco-coocs)
        (dolist (f p)
          (setq in-all (delete-if-not #'(lambda (c) (member c f :test #'equal))
                                      in-all))))
      (dolist (c in-all)
        (dolist (p *semco-coocs)
          (map-into p #'(lambda (f) (delete c f :test #'equal)) p)))))
  *semco-coocs)


;;; Parser timeout
;;; ==============

(defvar *parser-timeout-limit nil)      ; internal run time (or nil)

(defvar *parser-timeout-restart nil)    ; one per sentence allowed for GC

(defun start-parser-timeout (&optional restart)
  (unless (and *parser-timeout-restart restart)
    (setq *parser-timeout-restart restart
          *parser-timeout-limit (when *parser-timeout
                                  (+ (get-internal-run-time)
                                     (* *parser-timeout
                                        internal-time-units-per-second))))))

(declaim (inline check-parser-timeout))

(defun check-parser-timeout ()
  (when (and *parser-timeout-limit
             (> (get-internal-run-time) *parser-timeout-limit)) 
    (throw 'timeout nil)))

(defun check-parser-timeout () nil) ;;; doesn't seem to work ??

(defun stop-parser-timeout ()
  (setq *parser-timeout-limit nil))

;;; GDE patches for timeout:

(when (and (fboundp 'g-insert-traces)
           (not (fboundp 'original-gde-g-insert-traces)))
  (setf (symbol-function 'original-gde-g-insert-traces) #'g-insert-traces))

(when (and (fboundp 'g-unpack)
           (not (fboundp 'original-gde-g-unpack)))
  (setf (symbol-function 'original-gde-g-unpack) #'g-unpack))

(defun g-insert-traces (&rest args)
  "GDE core function patched for timeout"
  (check-parser-timeout)
  (apply #'original-gde-g-insert-traces args))

(defun g-unpack (&rest args)
  "GDE core function patched for timeout"
  (check-parser-timeout)
  (apply #'original-gde-g-unpack args))

;;; Garbage collection hook:

#+ ALLEGRO
(progn
  (setq excl:*gc-after-hook*
        #'(lambda (&rest x) (declare (ignore x)) (start-parser-timeout t)))
  (setf (sys:gsgc-switch :hook-after-gc) t))
#- ALLEGRO
(warn "Timeout reset on GC disabled for non-Allegro Lisp")
  

;;; Parse/corpus tree matching
;;; ==========================
;;; The default function for parse tree matching is based on maximising the
;;; value of simi = s1 + s2 - 3*s3 where
;;;
;;;   s1 = number of shared constituents
;;;   s2 = number of shared constituents with matching labels
;;;   s3 = number of crossing constituents (in corpus relative to parse)
;;;
;;; Constituents must include 2+ words.  Identical constituents count as one.
;;; Labels match when the max alphanumeric prefixes are equal ignoring case.

(defun find-most-similar-trees (trees match)
  "Find the parse tree or trees most similar to the corpus tree"
  (let ((match-cons (corpus-tree-constituents match)) (simi nil))
    (dolist (tree trees)
      (let ((tree-cons (parser-tree-constituents tree)))
        (push (similarity-measure tree-cons match-cons) simi)))
    (let ((max (reduce #'max simi)))
      (mapcan #'(lambda (x tr) (when (>= x max) (list tr)))
              (nreverse simi) trees))))

(defun similarity-measure (tree-cons match-cons)
  (let ((s1 0) (s2 0) (s3 0))
    (dolist (x match-cons (+ s1 s2 (* -3 s3)))
      (let ((b1 nil) (b2 nil) (b3 nil))
        (dolist (y tree-cons)
          (unless b2
            (when (and (= (car x) (car y)) (= (cadr x) (cadr y)))
              (unless b1 (incf s1) (setq b1 t))
              (when (member-if
                     #'(lambda (a)
                         (member-if
                          #'(lambda (b) (matching-labels a b))
                          (cddr y)))
                     (cddr x))
                (incf s2) (setq b2 t))))
          (unless b3
            (when (or (< (car x) (car y) (cadr x) (cadr y))
                      (> (car x) (car y) (cadr x) (cadr y)))
              (incf s3) (setq b3 t))))))))

(defun parser-tree-constituents (tree)
  (extract-constituents
   tree 0 #'(lambda (x) (and (atom x) (not (member x *parser-gaps))))))

(defun corpus-tree-constituents (tree)
  (extract-constituents
   tree 0 #'(lambda (x) (and (listp x) (eq (car x) :word)))))

(defun extract-constituents (tree start word-p)
  "Get all constituents of two or more words in tree"
  ;; Two values: ((v1 v2 label... )... ), end-position
  (cond
   ((funcall word-p tree) (values nil (1+ start)))
   ((atom tree) (values nil start))
   (t (let ((consts nil) (end start))
        (dolist (dau (if (atom (car tree)) (cdr tree) tree))
          (multiple-value-bind (x y) (extract-constituents dau end word-p)
            (setq consts (nconc consts x) end y)))
        (when (> (- end start) 1)
          (let ((q (find-if
                    #'(lambda (s)
                        (and (eql (car s) start) (eql (cadr s) end)))
                    consts)))
            (unless q (push (setq q (list start end)) consts))
            (when (and (atom (car tree)) (not (member (car tree) (cddr q))))
              (push (car tree) (cddr q)))))
        (values consts end)))))

(defun matching-labels (a b)
  "Do these labels match (for tree comparison purposes)?"
  ;; Called from above, a is from the corpus, b is from the parser.
  ;; Requires non-empty identical alphanumeric prefixes ignoring case.
  (let ((i (string-not-equal (string a) (string b))))
    (or (not i)
        (and (>= i (or (position-if-not #'alphanumericp (string a)) 0) 1)
             (>= i (or (position-if-not #'alphanumericp (string b)) 0) 1)))))


;;; Cooccurrence logic
;;; ==================
;;; A cooc is the coincidence of some value as the argument of two predicates.
;;; Quantifiers and logical operators are ignored except as far as they define
;;; variable scopes.  A cooc depends only on the presence of such coincidences
;;; in the logical form, and not on the logical status of the different parts
;;; of the expression.  Non-atomic predicate arguments are excluded from coocs
;;; but subject to a recursive call for non-first order robustness.

;;; A cooc is represented as a dotted pair of predarg symbols in case-sensitive
;;; alphabetical order.  A predarg symbol is formed by concatenating the
;;; predicate, a period (".") and the argument position (from 0).

(defun logical-form-coocs (lf)
  "Determine cooc set for predicate calculus logical form"
  (let ((preds (adjust-pred-list (logical-form-preds lf))) (res nil))
    (do ((p1 preds (cdr p1)))
        ((null p1))
      (do ((p2 (cdr p1) (cdr p2)))
          ((null p2))
        (do ((v1 (cdar p1) (cdr v1)) (n1 0 (1+ n1)))
            ((null v1))
          (when (atom (car v1))
            (do ((v2 (cdar p2) (cdr v2)) (n2 0 (1+ n2)))
                ((null v2))
              (if (and (eql (car v1) (car v2))
                       (not (and (eql (caar p1) (caar p2)) (eql n1 n2))))
                  (push (build-cooc (caar p1) n1 (caar p2) n2) res)))))))
    (delete-duplicates (nreverse res) :test #'equal)))

(defun adjust-pred-list (preds)
  "Optional predicate list adjustment - this default version does nothing!"
  preds)

(defun build-cooc (p1 n1 p2 n2)
  (let ((a (intern-predarg p1 n1))
        (b (intern-predarg p2 n2)))
    (if (string< a b) (cons a b) (cons b a))))

(defvar *intern-predarg-buf
  (make-array 32 :element-type 'character :adjustable t :fill-pointer 0))

(defun intern-predarg (p n)
  (let ((list (get p 'predarg-symbol-list)))
    (or (nth n list)
        (progn
          (setf (fill-pointer *intern-predarg-buf) 0)
          (with-output-to-string (s *intern-predarg-buf)
            (format s "~a.~d" p n))
          (let ((pa (intern *intern-predarg-buf)))
            (if list (let ((ext (- (1+ n) (length list))))
                       (when (plusp ext) (nconc list (make-list ext))))
              (setf (get p 'predarg-symbol-list)
                    (setq list (make-list (1+ n)))))
            (setf (nth n list) pa))))))

(defvar logical-form-vars nil)

(defun logical-form-preds (lf)
  "Compute list of predicates in pred calc logical form"
  (setq logical-form-vars nil)
  (delete-duplicates (logical-form-preds2 lf) :test #'equal))

(defun logical-form-preds2 (lf &optional exch)
  (when (consp lf)
    (let ((key (first lf)) (q nil))
      (when (atom key)
        (cond
         ((and (consp (cadr lf)) (atom (caadr lf)) (null (cdadr lf)))
                                        ; logical quantifier
          (let ((var (caadr lf)))
            (when (member var logical-form-vars)
              (let ((n (gentemp))) (push (cons var n) exch) (setq var n)))
            (push var logical-form-vars))
          (setq lf (cddr lf)))
         ((some #'atom (rest lf)) ; predicate, at least one atomic arg
          (pop lf)
          (setq q (mapcar #'(lambda (v) (or (cdr (assoc v exch)) v)) lf))
          (setq q (list (cons key q))))))
      (nconc q (mapcan #'(lambda (x) (logical-form-preds2 x exch)) lf)))))


;;; Parse probability distribution
;;; ==============================
;;; (Note: The unclustered Model 2 from the PhD project is assumed here.)

(defun compute-parse-probs ()
  "Compute the probability distribution of the current set of parses"
  ;; The result is stored in *semco-probs and returned.
  (get-semco-coocs)
  (let ((probs (mapcar                  ; initially a list of lists of 1.0
                #'(lambda (p)
                    (mapcar #'(lambda (f) (declare (ignore f)) 1.0) p))
                *semco-coocs))
        (coocs nil))
    (dolist (p *semco-coocs)
      (dolist (f p) (dolist (c f) (pushnew c coocs :test #'equal))))
    (setq coocs (nreverse coocs))
    (dolist (cooc coocs)
      (let* ((p (cooc-prob cooc)) (q (- 1.0 p)))
        (mapl #'(lambda (aa bb)
                  (mapl #'(lambda (a b)
                            (setf (car a)
                                  (* (car a)
                                     (if (member cooc (car b) :test #'equal)
                                         p q))))
                        (car aa) (car bb)))
              probs *semco-coocs)))
    (let ((sum (reduce #'+ (mapcar #'(lambda (aa) (reduce #'max (cons 0 aa)))
                                   probs))))
      (if (zerop sum) (warn "All parse probabilities are zero!")
        (mapl
         #'(lambda (aa)
             (mapl #'(lambda (a) (setf (car a) (/ (car a) sum))) (car aa)))
         probs)))
    (setq *semco-probs probs)))

(defun semco-show-coocs (&optional coocs)
  "Display a table of the distinguishing coocs in the parses"
  (get-semco-coocs)
  (unless coocs
    (dolist (p *semco-coocs)
      (dolist (f p) (dolist (c f) (pushnew c coocs :test #'equal))))
    (setq coocs (nreverse coocs)))
  (let ((w1 0) (w2 0) (m 0))
    (dolist (par *semco-coocs) (dolist (form par) (incf m)))
    (format t "~2&Parses: ~a, semantic forms: ~a, cooccurrences: ~a~2%"
            (length *semco-coocs) m (length coocs))
    (dolist (c coocs)
      (setq w1 (max w1 (length (string (car c))))
            w2 (max w2 (length (string (cdr c))))))
    (format t "Coocs~vt prob  parses (0-9)/forms (.)~%" (+ w1 w2 1))
    (let ((j 1) (k (- 79 w1 w2 8)))
      (loop
       (when (= j 1) (format t "~vt" (+ w1 w2 8)))
       (let ((p 0) (i 0))
         (dolist (par *semco-coocs)
           (incf p)
           (when par
             (when (<= j (incf i) k) (format t "~a" (mod p 10)))
             (dolist (form (cdr par))
               (when (<= j (incf i) k) (format t "."))))))
       (format t "~%")
       (dolist (c coocs)
         (when (= j 1)
           (format t "~va ~va ~5,3f " w1 (car c) w2 (cdr c) (cooc-prob c)))
         (let ((i 0))
           (dolist (par *semco-coocs)
             (dolist (form par)
               (when (<= j (incf i) k)
                 (format t "~:[-~;+~]" (member c form :test #'equal))))))
         (format t "~%"))
       (format t "~%")
       (when (>= k m) (return))
       (setq j (+ k 1) k (+ k 79))))))


;;; Cooccurrence probability estimation
;;; ===================================
;;; (Note: The unclustered Model 2 from the PhD project is assumed here.)

(defvar *unseen-cooc-prob nil)

(defun cooc-prob (cooc)
  "Determine the cooccurrence probability of this cooc"
  ;; The *cooc-db-hash is a two-stage cache for cooc scan data and probs.
  (setq *cooc-db-conv-begun t)          ; no more training possible!
  (let ((x (gethash cooc *cooc-db-hash)))
    (cond
     ((numberp x) x)
     ((consp x) (setf (gethash cooc *cooc-db-hash) (compute-cooc-prob x)))
     ((null x)
      (let ((i (cooc-db-index cooc)))
        (if (svref *cooc-db-touch i)
            (setf (gethash cooc *cooc-db-hash)
                  (or *unseen-cooc-prob
                      (setq *unseen-cooc-prob (compute-cooc-prob nil))))
          (progn (load-cooc-db-file i)
                 (setf (svref *cooc-db-touch i) t)
                 (cooc-prob cooc))))))))

;;; ---------------------------------------------------------------------------
;;; Mathematically, a cooccurrence probability estimate p~ is defined by
;;;
;;;   ln(p~ / (1-p~)) = [int f(p) ln(p / (1-p)) dp] / [int f(p) dp],
;;;
;;; where f(p) is defined in terms of the cooc scan data and both integrals are
;;; from 0 to 1.  To avoid infinite logarithms at p=0 and p=1, however, the rhs
;;; nominator is computed with the mathematically equivalent formula
;;;
;;;   int [(f(p) - f(0)) ln(p) - (f(p) - f(1)) ln(1-p)] dp + f(1) - f(0).
;;;
;;; The numeric integration uses an adaptive sampling method which concentrates
;;; the computation near the peak of f(p).
;;;
;;; See sections 4.3.3 and 5.4.2 in the PhD thesis [Hektoen 1997].
;;; ---------------------------------------------------------------------------

(defun compute-cooc-prob (scan)
  "Compute the cooc prob estimate corresponding to the given scan data"
  (let* ((nom 0.0) (den 0.0) (sam (compute-cooc-prob-samples scan))
         (f0 (cadar sam)) (f1 (cadar (last sam))))
    (let ((a nil) (ga nil) (b nil) (gb nil))
      (dolist (q sam)
        (let* ((fb (cadr q)) (trap (cddr q)))
          (shiftf a b (car q))
          (shiftf ga gb (- (* (- fb f0) (log b))
                           (* (- fb f1) (log (- 1.0 b)))))
          (incf den trap)
          (when a (incf nom (* (- b a) (+ ga gb) 0.5))))))
    (/ 1.0 (+ 1.0 (/ 1.0 (exp (/ (+ nom (- f1 f0)) den)))))))

(defparameter prob-m2-const -0.36d0)    ; const a in P(p) = 1/(1-ap)^2
(defparameter prob-m2-no-samples 30)    ; number of samples
(defparameter prob-m2-base              ; base for f(p) super exponent
    (expt 10.0d0
          (floor (log (* 0.4d0 (sqrt most-positive-double-float))
                      10.0d0))))

;;; Compute the required number of samples of f(p) from 0.0 to 1.0 such that
;;; the corresponding trapeziums are approx equal, and scaled so max is 1.0.
;;; Returns ((p f(p) . trap)... ), max unscaled f(p) and its super exponent.
;;; Uses double float internally, but returns single float (except max f(p)).
;;; Last trap in list is always 0.0.  Assumes dividing any number by
;;; prob-m2-base twice gives zero!

(defun compute-cooc-prob-samples (scan)
  (let ((sam nil) (emin nil))
    (dotimes (i prob-m2-no-samples)
      (let ((q sam) (p 0.0d0) a b fa fb)
        (cond ((cdr q)
               (mapl #'(lambda (x) (when (> (cddar x) (cddar q)) (setq q x)))
                     (cdr sam))
               (setq a (caar q) b (caadr q) fa (cadar q) fb (cadadr q))
               (setq p (if (= fa fb) (/ (+ a b) 2)
                         (+ a (* (- (sqrt (/ (+ (* fa fa) (* fb fb)) 2)) fa)
                                 (/ (- b a) (- fb fa)))))))
              (q (setq p 1.0d0 a (caar q) fa (cadar q))))
        (multiple-value-bind (fp e) (compute-cooc-prob-f p scan)
          (cond ((= fp 0.0d0))
                ((null emin) (setq emin e))
                ((> e emin)
                 (setq fp (if (> e (1+ emin)) 0.0d0 (/ fp prob-m2-base))))
                ((< e emin)
                 (if (< e (1- emin))
                     (dolist (q sam) (setf (cadr q) 0.0d0 (cddr q) 0.0d0))
                   (dolist (q sam)
                     (setf (cadr q) (/ (cadr q) prob-m2-base)
                           (cddr q) (/ (cddr q) prob-m2-base))))
                 (setq emin e)))
          (let ((s (list* p fp 0.0d0)))
            (if q (push s (cdr q)) (push s sam)))
          (when (cddr q) (setf (cddadr q) (* (- b p) (+ fp fb) 0.5d0)))
          (when q (setf (cddar q) (* (- p a) (+ fa fp) 0.5d0))))))
    (let ((m (cadar sam)))
      (dolist (q (cdr sam)) (when (> (cadr q) m) (setq m (cadr q))))
      (dolist (q sam)
        (setf (car q) (single-float (car q))
              (cadr q) (single-float (/ (cadr q) m))
              (cddr q) (single-float (/ (cddr q) m))))
      (values sam m emin))))

;;; Compute continuous probability distribution f(p) = Prod(fi(p)) * P(p).
;;; Returns two values, f and e, representing (/ f (expt prob-m2-base e)),
;;; where e is the smallest value such that a nonzero f >= 1.  Assumes no
;;; single iteration divides f by more than prob-m2-base.

(defun compute-cooc-prob-f (p scan)
  (let ((f (expt (- 1.0d0 (* prob-m2-const p)) -2)) (e 0))
    (when (< f 1.0d0) (setq f (* f prob-m2-base)) (incf e))
    (cond
     ((= p 0.0d0)
      (when (some #'(lambda (x) (/= (cadr x) 0.0d0)) scan) (setq f 0.0d0 e 0)))
     ((= p 1.0d0)
      (when (some #'(lambda (x) (/= (cddr x) 0.0d0)) scan) (setq f 0.0d0 e 0)))
     (t
      (dolist (ax scan)
        (let ((a (car ax)) (pos (cadr ax)) (neg (cddr ax)))
          (unless (= pos 0.0)
            (setq f (* f (expt (/ (* (- 1.0d0 a) p) (- 1.0d0 (* a p))) pos))))
          (unless (= neg 0.0)
            (setq f (* f (expt (/ (- 1.0d0 p) (- 1.0d0 (* a p))) neg)))))
        (when (< f 1.0d0) (setq f (* f prob-m2-base)) (incf e)))))
    (values f e)))











