;;
;;      Title : mafuncs
;;
;;      Function : This holds some of the basic user accessible functions
;;                 for the morphological analyser and dictionary
;;
;;      Author :  Alan W Black   28th Oct 1985
;;                Dept of A.I.   University of Edinburgh
;;     
;;      Copyright Graeme Ritchie, Alan Black,
;;                Steve Pulman and Graham Russell  1987
;;
;;         ---------------------------------------------
;;         |    Not to be used for military purposes   |
;;         ---------------------------------------------
;;
;;      Description :
;;          This file MUST be loaded within code that is to use the analyser
;;          and dictionary functions.
;;
;;      Parameters :
;;      Returns :
;;      Side Effect :
;;
;;      External references :
;;
;;      System functions used :
;;        Name     type      Args #  type                 Comment
;;
;;      Restrictions :
;;
;;

(declare
   (special
      D-FEASIBLES              ;; feasible pairs in the automata
      D-AUTOMATA               ;; the spelling rule automata
      D-ALIASES                ;; feature aliases
      D-VARIABLES              ;; feature variables and value variables
      D-FEATURES               ;; features
      D-FSD                    ;; feature specification defaults
      D-WHEAD                  ;; head features
      D-WDAUGHTER              ;; daughter features
      D-MORPHOLOGYONLY         ;; morphological features that can be removed
      D-SURFACEALPHABET        ;; surface alphabet
      D-LEXICALALPHABET        ;; lexical alphabet
      D-GRAMMAR                ;; expnaded grammar rules
      D-UNARYRULES             ;; grammar rules with only one daughter
      D-USERGRAM               ;; grammar rules as user specified
      D-BASICVARIABLES         ;; variable names
      D-DISTINGUISHEDCATEGORY  ;; top category of grammar
      D-LCATEGORIES            ;; lexical category definitions
      D-INITCONFIG             ;; initial configuration of spelling rules
      D-em                     ;; end marker lexicon
      D-LEXICON                ;; morpheme dictionary tree
      D-SIMPLELEXICON          ;; new morpheme dictionary tree
      D-LOOKUPFORMAT           ;; format of returned word form
      D-TRANSITIONSLIST        ;; automata transitions list
      D-FINALSLIST
      D-TRACEFLAG
      D-TRACEFLAGSWITCH
      D-NONINFLECTFLAG
      D-NONINFLECTFLAGSWITCH
      D-INCOREFLAG
      D-INCOREFLAGSWITCH
      D-LOADEDPARTS
      D-LEXICON-STATE          ;; add to file, or just tree of lexicon
      D-ENTRYFILEID
      D-AUTOSYMBOLS
      D-NULLPOSSIBLES
      D-CATEGORYDEFINITIONS
      D-STRINGSON
   )
   (localf
      D-PrintList
      D-InitGlobals                   
      D-MorphoSplit                   
      D-IntegrateCategoryDefinitions
   )
)

(include "macros")
(include "keywords")
(include "subrout")
(include "catrouts")

;;; ***

(declare (special D-SEMANTICS))


(defun D-LoadAll (sp gr di)
;;
;; Loads in all three sections of the analyser, the spelling rules
;; word grammar and dictionary 
;;
;; WARNING  NO CONSISTENCY CHECK IS DONE YET
;;
   (prog ()
      (setq D-CATEGORYDEFINITIONS nil)  ;; so you can reload everything
      (D-LoadSpRules sp)       ;; load spelling automata
      (D-LoadWordGrammar gr)
      (D-LoadLexicon di)
   )
)

(defun D-LoadSpRules (name)
;;
;;  Loads in the spelling rule automata and other junk in the
;;  .sp.ma file
;;
   (let ( (fileid (infile (concat name (DK-SP-MA)))) version )
      (D-MarkUnLoad 'sp)
      (setq D-TRANSITIONSLIST nil)  ;; this is to save time in gc
      (setq version (read fileid))   ;; find version of spelling rules
      (cond
	 ((equal version (DK-VERSION-STAMP))
	    (setq D-LEXICALALPHABET (read fileid))
	    (setq D-SURFACEALPHABET (read fileid))
	    (setq D-TRANSITIONSLIST (read fileid))
	    (setq D-UNUSUALFEASIBLES (read fileid))
	    (setq D-SURFTOLEXSETS (read fileid))
	    (setq D-NULLPOSSIBLES (read fileid))
	    (setq D-AUTOSYMBOLS (read fileid))
	    (setq D-TRANSITIONSLIST    ;; declare and call translist
	       (eval D-TRANSITIONSLIST))
	    (setq D-INITCONFIG nil)    ;; Initial spelling rule configuration
	    (D-MarkLoaded (list 'sp name))
	    (close fileid))
         (t       ;; file is not compiled by the right version of code
            (close fileid)
	    (error "File was made by different version of system")))
   )
)

(defun D-LoadWordGrammar (name)
;;
;;  Loads in the word grammar and feature declarations
;;
   (let ((fileid (infile (concat name (DK-GR-MA)))))
      (D-MarkUnLoad 'gr)
      (cond
	 ((equal (read fileid) (DK-VERSION-STAMP))
	    (setq D-GRAMMAR (read fileid))
	    (setq D-ALIASES (read fileid))
	    (setq D-FEATURES (read fileid))
	    (setq D-BASICVARIABLES (read fileid))
	    (setq D-CATVALFEAT (read fileid))
	    (setq D-WHEAD (read fileid))
	    (setq D-WDAUGHTER (read fileid))
	    (setq D-FSD (read fileid))
	    (setq D-VARIABLES D-BASICVARIABLES)
	    (setq D-DISTINGUISHEDCATEGORY (read fileid))
	    (setq D-MORPHOLOGYONLY (read fileid))
	    (setq D-LCATEGORIES (read fileid))
	    (setq D-DTREE (read fileid))
	    (setq D-CATEGORYDEFINITIONS 
	       (D-IntegrateCategoryDefinitions (read fileid)))
	    (setq D-CANMAKE (read fileid))
	    (setq D-DISTINGUISHEDCATEGORY 
	       (D-SetUpDistCat D-DISTINGUISHEDCATEGORY))
	    (setq D-GRAMMAR (D-ProcessGRules D-GRAMMAR))
	    (setq D-UNARYRULES
	       (mapcan 
		  #'(lambda (rule)
		     (cond
			((eq (length rule) 4)  ;; a unary rule
			   (ncons rule))
			(t nil)))
                  D-GRAMMAR))
;;; ***
            (setq D-SEMANTICS
               (mapcar
                  #'(lambda (rule-sem)
                       (cons (car rule-sem)
                          (sublis
                             '((\1 . 1) (\2 . 2) (\3 . 3) (\4 . 4)
                               (\5 . 5) (\6 . 6) (\7 . 7))
                             (cdr rule-sem))))
                  (read fileid)))
            (D-MarkLoaded (list 'gr name))
            (close fileid))
         (t       ;; file is not compiled by the right version of code
            (close fileid)   ;; tidy up so system can continue
	    (error "File was made by different version of system")))
   )
)

(defun D-LoadLexicon (name)
;;
;;  load in the lexicon tree and open the port to the entries 
;;  file.  This removes all other previous lexicons loaded
;;
   (D-MarkUnLoad 'di)
   (mapcar           ;; close entryfile id of lexicons
      #'(lambda (lexicon)
	 (close (car lexicon)))
      D-LEXICON)
   (D-ReadInLexiconFile (concat name (DK-LE-MA)))
   (setq D-LEXICON (ncons D-SIMPLELEXICON))
   (rplaca (car D-LEXICON) (infile (concat name (DK-EN-MA))))
   (D-MarkLoaded (list 'di name))
)

(defun D-ReadInLexiconFile (filename)
;;
;;   reads in the s-expressions in a lexicon file.  this is so the 
;;   variable names are always internal to the system and these funny
;;   systems that have upper case only will now have less excuse for being
;;   confused
;;
   (let  ((fileid (infile filename)))
      (cond
	 ((equal (read fileid) (DK-VERSION-STAMP))
	    (setq D-SIMPLELEXICON (read fileid))
	    (setq D-FEATURES (read fileid))
	    (setq D-CATVALFEAT (read fileid))
	    (setq D-CATEGORYDEFINITIONS
	       (D-IntegrateCategoryDefinitions (read fileid)))
	    (close fileid))
         (t       ;; file is not compiled by the right version of code
            (close fileid)   ;; tidy up so system can continue
	    (error "File was made by different version of system")))
   )
)

(defun D-AddLexicon (name)
;;
;;  adds the given lexicon to the currently loaded ones
;;  if this fails the lexicon is marked unloaded - to be safe
;;
   (cond
      ((not (assq 'di D-LOADEDPARTS))
	 (error "Cannot add lexicon when none loaded")
      )
      (t
	 (let ((currentlexes (cdr (assq 'di D-LOADEDPARTS))))
	    (D-MarkUnLoad 'di)
            (D-ReadInLexiconFile (concat name (DK-LE-MA)))
	    (cond
	       ((memq name currentlexes) ;; is this to be re-loaded
		  (let ( (newthings
		     (D-RemoveLexicon name currentlexes D-LEXICON)) )
                    (princ "Replaced lexicon: ") (princ name) (terpri)
                    (setq D-LEXICON (cadr newthings))
		    (setq currentlexes (car newthings))))
                  (t
                    (princ "Added lexicon: ") (princ name) (terpri)))
            (setq D-LEXICON (cons D-SIMPLELEXICON D-LEXICON))
            (rplaca (car D-LEXICON) (infile (concat name (DK-EN-MA))))
            (D-MarkLoaded
	       (cons
		  'di
		  (cons name currentlexes)))
	 )
      )
   )
)

(defun D-RemoveLexicon (name names lexicons)
;;
;;  Returns a pair of new names and new lexicons with the lexicon (name)
;;  removed.
;;
   (cond
      ((or (null names) (null lexicons))
	 (princ "Lexicons loaded and names of loaded lexicons inconsistant")
	 (error "try clearing system and reloading"))
      ((eq name (car names))
	 (close (caar lexicons))    ;; close the port to the entries file
	 (list
	    (cdr names) (cdr lexicons)))
      (t 
	 (D-RemoveLexicon name (cdr names) (cdr lexicons)))
   )
)

(defun D-PrintLoadedSections ()
;;
;;   This prints out to the terminal the names of the currently
;;   loaded sections (grammar spelling rules, and lexicons)
;;
   (let ((lex (assq 'di D-LOADEDPARTS))
	 (sprule (assq 'sp D-LOADEDPARTS))
	 (gram (assq 'gr D-LOADEDPARTS)))
      (princ "Currently loaded sections are:") (terpri)
      (cond
	 ((null sprule) (princ "   No Spelling Rules loaded") (terpri))
	 (t   (princ "   Spelling Rules: ") (princ (cadr sprule)) (terpri)))
      (cond
	 ((null gram) (princ "   No Word Grammar loaded") (terpri))
	 (t   (princ "   Word Grammar: ") (princ (cadr gram)) (terpri)))
      (cond
	 ((null lex) (princ "   No Lexicons loaded") (terpri))
	 (t   (princ "   Lexicons: ") (D-PrintList (cdr lex)) (terpri)))
   )
)

(defun D-PrintList (list)
;;
;;   pritns out each item in the list with an intermediate space
;;
   (cond
      ((null list) nil)
      (t
	 (princ (car list)) (princ " ")
	 (D-PrintList (cdr list))
      )
   )
)



(defun D-Initialise ()
;;
;;   This function must be called before any use can be made
;;   of the dictionary and analyser system
;;
   (D-InitGlobals)       ;; initialise the global variables
   't
)

(defun D-InitGlobals ()
;;
;;  This initialises the global variables so that no previous 
;;  junk can appear in the analyser
;;
   (D-if (boundp 'D-LEXICON) then
      (mapcar           ;; close entryfile id of lexicons
         #'(lambda (lexicon)
	    (errset (close (car lexicon))))
         D-LEXICON))
   (setq D-FEASIBLES nil)         ;; feasible pairs in the automata
   (setq D-AUTOMATA nil)          ;; the spelling rule automata
   (setq D-ALIASES nil)           ;; feature aliases
   (setq D-VARIABLES nil)         ;; feature variables and value variables
   (setq D-BASICVARIABLES nil)
   (setq D-FEATURES nil)          ;; features
   (setq D-FSD nil)               ;; feature specification defaults
   (setq D-CATVALFEAT nil)        ;; category valued features
   (setq D-WHEAD nil)             ;; head features
   (setq D-WDAUGHTER nil)         ;; daughter features
   (setq D-MORPHOLOGYONLY nil)    ;; local morphology features
   (setq D-LCATEGORIES nil)       ;; lexical category definitions
   (setq D-SURFACEALPHABET nil)   ;; surface alphabet
   (setq D-LEXICALALPHABET nil)   ;; lexical alphabet
   (setq D-TRANSITIONSLIST nil)   ;; compiled rules
   (setq D-UNUSUALFEASIBLES nil)  ;; unusual feasible pairs in spelling rules
   (setq D-SURFTOLEXSETS nil)     ;; which surface correspond to what lexs.
   (setq D-AUTOSYMBOLS nil)       ;; symbols in the transducers
   (setq D-GRAMMAR nil)           ;; expnaded grammar rules
   (setq D-USERGRAM nil)          ;; grammar rules as user specified
   (setq D-em nil)                ;; end marker lexicon
   (setq D-LEXICON nil)           ;; morpheme dictionary tree
   (setq D-SIMPLELEXICON nil) 
   (setq D-DISTINGUISHEDCATEGORY nil)
   (setq D-LOOKUPFORMAT 'D-CATEGORYFORM) ;; format of returned words
   (setq D-TRACEFLAGSWITCH (DK-OFF))        ;; initially no tracing
   (setq D-LEXICON-STATE 'D-ADD)  ;; lexicon in addition mode
   (setq D-LOADEDPARTS nil)       ;; for checking what is loaded
   (setq D-NONINFLECTS nil)       ;; non inflectable categories
   (setq D-VARIABLENUMBERS 0)
   (setq D-NONINFLECTFLAG (DK-OFF))
   (setq D-INCOREFLAG (DK-OFF))
   (setq D-NONINFLECTFLAGSWITCH (DK-OFF))
   (setq D-INCOREFLAGSWITCH (DK-OFF))
   (setq D-DTREE nil)            ;; discrimination list for bau
   (setq D-NULLPOSSIBLES nil)
   (setq D-CATEGORYDEFINITIONS nil)
   (setq D-CANMAKE nil)          ;; can make tree for term unification
   (setq D-STRINGSON nil)  ;; if set to t strings are kept to as strings
			   ;; here they are changed into symbols in readatom
)

(defun D-VersionHeading ()
;;
;;  prints out the current version and heading
;;
   (princ "Morphological Analyser  - ")
   (princ (DK-VERSION-STAMP))
   (terpri)
)

(defun D-ChangeLookUpFormat (format)
;;
;;  This function sets the global variable that determines the format
;;  out the output returned from D-LookUp
;;  This function is externally callable
;;
   (cond
      ((and (atom format)
            (memq format '(D-CATEGORYFORM 
			   D-WORDSTRUCTURE
			   D-STRINGSEGMENTCAT
			   D-STRINGSEGMENTWS)))
	 (setq D-LOOKUPFORMAT format)
      )
      (t
	 (error "invalid lookup format")
      )
   )
)

(defun D-LookUp (word)
;;
;;   Basic look up routine for analyser and dicitonary
;;   This routine is externably callable
;;   returned format is dependant on global D-LOOKUPFORMAT
;;
   (cond
      ((not (equal (length D-LOADEDPARTS) 3))
	 (error "Lexicon, grammar, or spelling rules not loaded")
      )
      ((eq D-LOOKUPFORMAT 'D-CATEGORYFORM)
	 (D-GetCategoryForm word);; syntactic category only
      )
      ((eq D-LOOKUPFORMAT 'D-WORDSTRUCTURE)
	 (D-GetWordStructure word)  ;; full word structure format
      )
      ((eq D-LOOKUPFORMAT 'D-STRINGSEGMENTCAT)
	 (D-StringSegmentCat word)
      )
      ((eq D-LOOKUPFORMAT 'D-STRINGSEGMENTWS)
	 (D-StringSegmentWStructure word)
      )
      ((eq D-LOOKUPFORMAT 'D-DEBUG)
	 (D-AnalyseWordWithDebug word)
      )
      (t
	 (error (concat "Unknown look up format " D-LOOKUPFORMAT))
      )
   )
)

(defun D-FastLookUp (word)
;;
;;   This is a faster lookup of dictionary.  This is done be before
;;   releasing the full analysis program on the given word a direct
;;   morpheme lookup is done (which is much faster) only if that fails
;;   does a full analysis take place.
;;
;;   This has a problem as if a word is a morpheme but can also be
;;   analysed into morphological constiuents, the second lookup will not
;;   occur.  There will be be corrections to this along the lines of
;;   allowing a specification at compile time with a grammar/lexicon
;;   to tag words that are both morphemes and have separate analyses
;;   
;;   This lookup does not work for the STRINGSEGMENT lookup formats
;;   as these are intended for analyses of strings.
;;
   (cond
      ((not (equal (length D-LOADEDPARTS) 3))
	 (error "Lexicon, grammar, or spelling rules not loaded")
      )
      ((eq D-LOOKUPFORMAT 'D-CATEGORYFORM)
	 (let ( (morphemes (D-Morpheme word)) )
	    (cond
	       (morphemes
		  (mapcar
		     #'(lambda (morphentry)
			(D-Syntax-Field morphentry))
                     morphemes)
               )
	       (t    ;; not a direct morpheme
	          (D-GetCategoryForm word);; syntactic category only
               )
            )
         )
      )
      ((eq D-LOOKUPFORMAT 'D-WORDSTRUCTURE)
	 (let ( (morphemes (D-Morpheme word)) )
	    (cond
	       (morphemes
		  (mapcar
		     #'(lambda (morphentry)
;;;  *** JAC 14/1/88
                        (D-ApplyLCategoryTree
                           (cons (caddr morphentry)
                              (list 'ENTRY morphentry))))


;;; ***			(list 'ENTRY morphentry))
                     morphemes)
               )
	       (t    ;; not a direct morpheme
	          (D-GetWordStructure word)  ;; full word structure format
               )
            )
         )
      )
      ((eq D-LOOKUPFORMAT 'D-STRINGSEGMENTCAT)
	 nil  ;; this is an error situation  
      )
      ((eq D-LOOKUPFORMAT 'D-STRINGSEGMENTWS)
	 nil  ;; this is an error situation  
      )
      (t
	 (error (concat "Unknown look up format " D-LOOKUPFORMAT))
      )
   )
)

(defun D-Morpheme (morpheme)
;;
;;  Looks a morpheme directly up in the lexicon.  This function
;;  is externally callable
;;
   (cond
      ((assq 'di D-LOADEDPARTS)
         (D-LookUpDict          ;; look up the dictionary direct
            morpheme
         )
      )
      (t
	 (error "No lexicon loaded")
      )
   )
)

(defun D-MorphemeConcat (morphs flags)
;;
;;  concatenates the given lexical forms to give surface forms with 
;;  respect to the currently loaded spelling rules.
;;  flags allow different forms of processing to be done on the 
;;  resulting surface forms  - NONE IMPLEMENTED YET
;;
   (cond
      ((assq 'sp D-LOADEDPARTS)
	 (cond
	    ((memq (DK-NONULLS-FLAG) flags)
	       (mapcar
		  #'(lambda (surf)
		     (implode (remove (DK-NULL) (aexplodec surf))))
                  (D-MorphConcat morphs))
	    )
	    ((not (null flags))
	       (error "Unknown option for morpheme concatenation"))
	    (t    
	       (D-MorphConcat morphs))
         )
      )
      (t
	 (error "No spelling rules loaded"))
   )
)

(defun D-Segment (word)
;;
;;   This segments a word morphographemically, ie it does the
;;   segementation irrespective of the word grammar rules.
;;   This function is externally callable.
;;
   (cond
      ((and (assq 'sp D-LOADEDPARTS)
	    (assq 'di D-LOADEDPARTS))
         (D-MorphoSplit (D-Tokenize word))
      )
      (t
	 (error "Spelling Rules or Lexicon not loaded")
      )
   )
)

(defun D-MorphoSplit (word)
;;
;;  Takes a word and segments it into individual morphemes
;;  returns a list of all possible segmentations
;;
   (cond
      ((eq word 'END) '(nil))
      (t
         (mapcan
            #'(lambda (firstmorph)
	        (mapcan
	           #'(lambda (remains)
		       (mapcar
		          #'(lambda (entry)
			     (cons 
				(list
				   (D-CitationForm entry)
				   (D-PhonologicalForm entry)
				   (D-MakePCategory (D-Syntax-Field entry))
				   (D-Semantic-Field entry)
				   (D-User-Field entry))
				remains))
		          (car firstmorph))
                     )
                   (D-MorphoSplit (cadr firstmorph))))
            (D-Recog word)
         )
      )
   )
)

(defun D-AddToRest (remains newthings)
;;
)

(defun D-IntegrateCategoryDefinitions (catdefs)
;;
;;   Adds the given category definitions to those already
;;   loaded if they are inconsistent an error is signalled.
;;   
   (cond
      ((null catdefs)
	 D-CATEGORYDEFINITIONS)
      ((null (assq (caar catdefs) D-CATEGORYDEFINITIONS))
	 (cons           ;; definition not already there
	    (car catdefs)
	    (D-IntegrateCategoryDefinitions (cdr catdefs)))
      )
      ((equal (car catdefs) (assq (caar catdefs) D-CATEGORYDEFINITIONS))
	 (D-IntegrateCategoryDefinitions (cdr catdefs)))
      (t     ;; the category is defined but with different values
	 (error "Incompatible category definitions")
      )
   )
)

