/* Statistical labeller: information about dictionary and transitions

   26-02-93	Created

   Copyright (C) David Elworthy 1995

   Usage: dtinfo root [mapping]

   Reads a dictionary and transitions matrix and reports information about
   distribution of tags and transitions to/from.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "common.h"
#include "map.h"
#include "diction.h"
#include "trans.h"

/* Transitions lower than this value will be ignored. */
#define MinTrans (1e-300)

/*----------------------------------------------------------------------------
    analyse_dict
-----------------------------------------------------------------------------*/

static void analyse_dict(Dict *d)
{
    int *counts;
    int i;
    DictWord word;
    int total_tags = 0;

    /* Create an array for counts */
    Allocate(counts, sizeof(int) * (tags_max + 1), "dict counts array");
    for (i = 0 ; i <= tags_max ; i++) counts[i] = 0;

    /* Scan the dict array */
    for (i = 0, word = d[i].d ; i < d->size ; i++, word++)
    {
	if (word->text)
	{
	    counts[word->ntag] += 1;
	    total_tags += word->ntag;
	}
    }

    /* Print the results */
    printf("N(tags)\tN(words)\n");
    for (i = 0 ; i <= tags_max ; i++)
    {
	if (counts[i] != 0)
	{
	    printf("%d\t%d\n", i, counts[i]);
	}
    }
    printf("\nAverage tags per word %g\n\n", ((double)total_tags) / d->size);

    free(counts);
}

/*----------------------------------------------------------------------------
    analyse_tran
-----------------------------------------------------------------------------*/

static void analyse_tran(Trans *t)
{
    int *from, *to;
    int i, j;
    Score *trans = t->trans;
    int   size   = t->size;
    int total_from = 0, total_to = 0;

    Allocate(from, sizeof(int) * (tags_all + 1), "from counts array");
    Allocate(to,   sizeof(int) * (tags_all + 1), "to counts array");
    for (i = 0 ; i <= tags_all ; i++) from[i] = 0;
    for (i = 0 ; i <= tags_all ; i++) to[i] = 0;

    /* Accumulate the totals */
    for (i = 0 ; i < tags_all ; i++)
    {
	int from_i = 0, to_i = 0;

	for (j = 0 ; j < tags_all ; j++)
	    if (Trans(i,j,size) >= MinTrans)
		from_i += 1;

	for (j = 0 ; j < tags_all ; j++)
	    if (Trans(j,i,size) >= MinTrans)
		to_i += 1;

	from[from_i] += 1;
	to[to_i]     += 1;
    }

    /* Write the results */
    printf("From:\nN(tr)\tN(tags)\n");
    for (i = 0 ; i <= tags_all ; i++)
	if (from[i] != 0)
	{
	    printf("%d\t%d\n", i, from[i]);
	    total_from += 1;
	}
    printf("\nAverage %g\n", ((double)total_from) / tags_all);

    printf("\nTo:\nN(tr)\tN(tags)\n");
    for (i = 0 ; i <= tags_all ; i++)
	if (to[i] != 0)
	{
	    printf("%d\t%d\n", i, to[i]);
	    total_to += 1;
	}
    printf("\nAverage %g\n", ((double)total_to) / tags_all);

    free(to);
    free(from);
}

/*----------------------------------------------------------------------------
    main
-----------------------------------------------------------------------------*/

int main(int argc, char *argv[])
{
    char  mapname[MAXFN], dictname[MAXFN], tranname[MAXFN];
    Trans t;
    Dict  d;

    InitDict(d)
    InitTrans(t)
    InitOptions;

    if (argc < 2)
	error_exit("Usage: dtinfo root [map]\n");

    /* Get main file names */
    make_names(argv[1], dictname, tranname, MAXFN);

    /* Get mappings */
    strcpy(mapname, (argc < 3) ? "tags.map" : argv[2]);
    read_mapping(mapname);

    /* Read dictionary */
    read_named_dict(dictname, &d, -1);

    /* Read transitions */
    read_named_trans(tranname, &t);

    /* Analyse them */
    analyse_dict(&d);
    analyse_tran(&t);

    return 0;
}
