/* Statistical labeller - phrase common functions.

   21-05-93	Created from other source files

   Copyright (C) David Elworthy 1995

   Principal external functions:
	add_tag_to_list, free_tags_list
	tag_phrase,
	build_subsumed_node
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "common.h"
#include "list.h"
#include "map.h"
#include "trans.h"
#include "diction.h"
#include "label.h"
#include "stack.h"
#include "phrase.h"

/*============================================================================
  Low level functions
*/

/*---------------------------------------------------------------------------
    add_tag_to_list

    Add a Tag to a sorted Tags list whose head is at 'head', returning the new
    head'. Duplicate tags are reported.
----------------------------------------------------------------------------*/

static int tag_cmp(Key *c1, Key *c2)
{
    Tag *s1 = (Tag *)c1;
    Tag *s2 = (Tag *)c2;
    return (*s1 - *s2);
}

Tags add_tag_to_list(Tags head, Tag tag)
{
    Tags t;
    BOOL match;

    /* Add to list, checking for duplicates */
    t = list_search_and_add((void **)&head, (Key *)&tag, tag_cmp,
				sizeof(TagsSt), "Tags list", &match);

    if (match)
	fprintf(stderr, "Duplicate tag ignored '%s'", unmap_tag(tag));
    else
	t->tag = tag;

    return head;
}

/*---------------------------------------------------------------------------
    free_tags_list

    Free a list of tags
----------------------------------------------------------------------------*/

void free_tags_list(Tags tags)
{
    Tags next;

    for ( ; tags != NULL ; tags = next)
    {
	next = tags->next;
	free(tags);
    }
}

/*---------------------------------------------------------------------------
    match_tags_list

    Takes a Tags list and a list of scored hypotheses, and builds an unscored
    hypotheses list of those that match. If 'lex_only' is set, limits the
    match to lexical hypotheses.
----------------------------------------------------------------------------*/

Link match_tags_list(Tags tags, Link hyps, BOOL lex_only)
{
    Link head = NULL, *new_at = &head;
    Tags t;
    BOOL single_hyps = (hyps->next == NULL);

    /* Test each tag in item against each hypothesis */
    for (t = tags ; t != NULL ; t = t->next)
    {
	Tag tag;
	Link h;

	tag = t->tag;
	for (h = hyps ; h != NULL ; h = h->next)
	{
	    Hyp base_hyp = h->u.shyp->hyp;

	    if (base_hyp->tag == tag &&
		(!lex_only || base_hyp->type == LexHyp))
	    {
		Link l = create_link(NULL, FALSE, base_hyp, NULL, new_at);
		new_at = &(l->next);

		/* Special case to make a fast get out if we had a single
		    entry in hyps and we just matched it */
		if (single_hyps) return head;
		else break;
	    }
	}
    }

    return head;
}

/*============================================================================
  I/O and debugging functions
*/

void dump_phrase_hyp(FILE *out, Hyp hyp)
{
    Lexeme start = hyp->lex_start;
    Lexeme end   = hyp->lex_end;

    fprintf(out, "(%d,%d) [%s...%s]_%s:%g", start->id, end->id,
		LexemeText(start), LexemeText(end),
		unmap_tag(hyp->tag), hyp->score);
}

/*============================================================================
  Tagging functions
*/

/*---------------------------------------------------------------------------
    build_subsumed_node

    Construct a subsumed node in a phrase. A scored hypothesis, copied from
    hyp, is created. The start and end lists of the node are created with just
    this shyp in them. The start and end parents of the shyp are set to the
    node just created. The lex field of the node is NULL.
----------------------------------------------------------------------------*/

Node build_subsumed_node(Node succ, Hyp hyp)
{
    SHyp shyp;

    /* Create and link the node */
    Node node = create_node(NULL, NULL);

    node->succ = succ;
    if (succ) succ->pred = node;

    /* Make hypothesis list with a single entry */
    shyp = create_shyp(hyp, node, node);
    node->start = create_link(NULL, TRUE, NULL, shyp, NULL);
    node->end   = create_link(NULL, TRUE, NULL, shyp, NULL);

    return node;
}

/*-----------------------------------------------------------------------------
    make_node

    Make a node with a given successor and hypothesis list. 'start' and 'end'
    give the start and end nodes for the hypothesis. The 'lex' field of the
    node is set to NULL. Does not link the new node in.
-----------------------------------------------------------------------------*/

Node make_node(Node succ, Link hyps, Node start, Node end)
{
    /* Create the node */
    Node new = create_node(NULL, NULL);

    /* Set its successor */
    new->succ = succ;

    /* Create scored hypothesis lists */
    new->start = copy_hyps(hyps, new, new);
/*    new->start = copy_hyps(hyps, start, end);*/
    new->end   = copy_links(new->start);

    return new;
}

/*---------------------------------------------------------------------------
    tag_phrase

    Call the tagger on a phrase, specified by a phrasal hypothesis. Will not
    recurse, so all subsumed phrases must have been tagged. Puts the
    probability onto the hypothesis as its score.
    If anchor-bracket is set, an anchor is inserted at each end; otherwise a
    phrase internal tag is inserted. Either way, an additional, temporary node
    is added in.
----------------------------------------------------------------------------*/

void tag_phrase(Hyp h, Trans *trans, Trans *new)
{
    Score prob;

    Node start, end;
    Node s = h->p.phrase.start;
    Node e = h->p.phrase.end;
    Tag  t = (Option(anchor_bracket)) ? anchor_tag : to_inphrase_tag(h->tag);
    Hyp  h_start, h_end;

    /* Modify the phrase by adding extra hypotheses at the ends of it */
    h_start = create_lex_hyp(t, 1.0, NULL, NULL);
    h_end   = create_lex_hyp(t, 1.0, NULL, NULL);

    /* Make extra nodes and link them in */
    start = build_subsumed_node(NULL, h_start);
    end   = build_subsumed_node(NULL, h_end);
    start->pred = s->pred;
    start->succ = s;
    s->pred     = start;
    end->succ   = e->succ;
    end->pred   = e;
    e->succ     = end;

    /* Call the tagger on the subsumed phrase */
    prob = tag(start, end, trans, new);

    /* Get rid of the two extra nodes and hypotheses */
    s->pred = start->pred;
    e->succ = end->succ;
    free_nodes(start, start);
    free_nodes(end, end);
    free(h_start);
    free(h_end);

    /* Modify hypothesis score by the score of the tagged sequence */
    h->score *= prob;
}
