/******************************************************************************/
/*                                                                            */
/*  PACKAGE:  BIbliometric-based Retrieval of Documents                       */
/*                                                                            */
/*  MODULE: BIRDCode.cpp                                                      */
/*                                                                            */
/*  PURPOSE: Defines methods of classes which are related to execution of     */
/*           BIRD code.                                                       */
/*                                                                            */
/*  WRITTEN BY:  David Caron   25/04/96                                       */
/*                                                                            */
/*  DEPENDENT UPON: BIRDCode.h                                                */
/*                                                                            */
/*  NEEDED BY: Makefile                                                       */
/*                                                                            */
/******************************************************************************/
#include "BIRDCode.h"
#include "TCP.h"
#include <math.h>
#include <sys/ddi.h>

a_Variable_with_underscores;

GetCandidateSet *BIRD_GetCandidateSet;
Compare *BIRD_Compare;
DocLists TheDocLists;
VectLists TheVectLists;


//================================(Document)=================================
int Document::compareTo(Document *aDocument)
// This method returns 0 if the identifer of the instance is equal to the
// identifier of aDocument, < 0 if the identifier is less than that of
// aDocument, and > 0 if the identifier is greater than that of aDocument.
// Note that since this is an operator, this method can only be called directly,
// not through a pointer to an instance.
{
//	if( aDocument==NULL ) return(FALSE);
	return( strcmp(identifier, aDocument->getIdentifier()) );
} 


Document *Document::deepCopy(Document *aDocument)
{
	identifier = aDocument->getIdentifier();
	position = aDocument->getPosition();
	references = aDocument->ref();
	parents = aDocument->getParents();
	children = aDocument->getChildren();
	return(this);
}


//===============================(BinaryTree)================================
long BinarySearchTree::add(char *anIdentifier, DocList *aDocList) 
// This method attempts to add "anIdentifier" to the BinaryTree. 
// Since only the indexes of the identifiers are stored in the binary tree 
// of instances, "aDocList" is needed to access the identifiers behind the 
// document indexes. 
// If "aURL" matches the contents of one of the document indexes in the tree,
// then that index is returned. (Remeber that document indexes start at 0.)
// otherwise -1 is returned and the identifier hopefully will never be added to 
// the tree. 
{ 
   int compare = strcmp(anIdentifier, (*aDocList)[docIndex]->getIdentifier() );
   if( compare == 0 ) return(docIndex); 
   if( compare < 0 )
   { 
	if( right == NULL)
	{
		right= new BinarySearchTree( aDocList->getLength() );
		return(-1);
	} else return( right->add(anIdentifier, aDocList) );
   } else if( left == NULL)
	  {
		left= new BinarySearchTree( aDocList->getLength() );
		return(-1);
	} else return( left->add(anIdentifier, aDocList) );
}


/*long BinarySearchTree::findIndexOf(char *anIdentifier, DocList *aDocList)
// This method is almost the same as add above except that only the index of 
// anIdentifier is returned. No new leaf node is created if anIdentifier is 
// unique. 
{
   int compare = strcmp(anIdentifier, (*aDocList)[docIndex]->getIdentifier() );
   if( compare == 0 ) return(docIndex); 
   if( compare < 0 )
   { 
	if( right == NULL) return(-1);
	else return( right->findIndexOf(anIdentifier, aDocList) );
   } else if( left == NULL) return(-1);
	  else return( left->findIndexOf(anIdentifier, aDocList) );
}	
*/ 

Vector *BinarySearchTree::listIndexesInOrder()
// This method returns a vector of indexes for the docList associated with this
// binary search tree.  The indexes will be in a certain order such that the 
// documents contained at these indexes in the docList will be sorted by 
// identifiers. 
//
//WARNING! WHEN WRITING RECURSIVE C++ METHODS, DO NOT CALL "new" WITHIN THE 
//	   CURRENT FUNCTION AFTER IT HAS ALREADY CALLED ITSELF!  "new" SHOULD
//	   BE AT THE BEGINNING OF THE RECURSIVE METHOD! 

{
	Vector *aVector;
	Vector *retVector = new Vector();
	if( right != NULL ) 
	{
		aVector = right->listIndexesInOrder();
		retVector->add(aVector);
		delete aVector;
	} 
	retVector->add(docIndex);
	if( left != NULL )
	{
		aVector=left->listIndexesInOrder();
		retVector->add(aVector);
		delete aVector;
	}
	return(retVector);
}


void BinarySearchTree::print(ostream& os)
{
	if( right != NULL ) right->print(os);
	os << docIndex << endl;
	if( left != NULL ) left->print(os);
}	


//===================================(Stem)==================================
// concatenates the Stem by one directory by chopping off everything after the
// last '/' in the instance variable identifier.  The new end of string is 
// determined by the index of the returned variable aLength.
// If the only thing left is "http", then zero is returned instead.
long Stem::truncate() 
{
	char temp[1024];
	strcpy(temp,identifier);
	temp[length] = '\0'; 
	long aLength = strlen(temp)-strlen( strrchr(temp, (int)'/'));
	char *root = identifier + aLength -2; 
	if( !strncmp(root,":/",2) && (length < 8) ) return(0);
	return(aLength); 
}

//================================(DocList)==================================
DocList *Document::getCiting(mode aMode)
// This method finds the parents of the instance and returns them in a DocList
// The parents are only stored in the instance variable "parents" if selected
// by the BIRD code.
{	
	char temp[256];
	fstream hotList;
	if( parents == NULL )
	{
		// call lycos search engine from here;
		if( !getDocumentsCiting(hotList, identifier) ) 
		{ 
#ifdef DEBUG
		      cout << "CITING: " << identifier << " (primitive error)" 
			   << endl;
#endif
		      return(new DocList()); 
		} 
//		strcat(strcpy(temp,"getLinks.pl -gc "), identifier);
#ifdef DEBUG
		cout << "CITING: " << identifier;
#endif
		DocList *aDocList = (new DocList())->ReadURLList(hotList);
//		DocList *aDocList = (new DocList())->ReadURLList(temp);
		if( aMode == STORE ) parents = aDocList;
		return( aDocList ); 
	} else return( parents );
}


DocList *Document::getCitedBy(mode aMode)
// This method finds the children of the instance and returns them in a DocList
// The children are only stored in the instance variable "children" if selected
// by the BIRD code.
{
	char temp[256];
	fstream hotList;
	if( children == NULL )
	{
		// call get URL
		if( !getDocumentsCitedBy(hotList, identifier) ) 
		{
#ifdef DEBUG
		     cout << "CITEDBY: " << identifier << " (primitive error)"
			  << endl;
#endif
		     return(new DocList());
		} 
//		strcat(strcpy(temp,"getLinks.pl -dpcn1000 "), identifier );
#ifdef DEBUG
		cout << "CITEDBY: " << identifier;
#endif
		DocList *aDocList = (new DocList())->ReadURLList(hotList);
//		DocList *aDocList = (new DocList())->ReadURLList(temp);
		if( aMode == STORE) children = aDocList;
		return( aDocList );
	} else return( children );
}


void Document::print(ostream& os)
// Default print method for class Document.
{
	os << "Doc(" << identifier << ")";
}


//================================(DocList)==================================
// This method returns the index of aDocument in the instance. If aDocument
// does not exist then -1 is returned. 
long DocList::findIndexOf(Document *aDocument)
{
    int compare;
    if( sorted ) 
    {
	long beginning = 0;
	long end = length - 1;
	long oldMiddle, middle = 0;
	do {
		oldMiddle = middle;
		middle = (end-beginning) / 2 + beginning;
		compare = data[middle]->compareTo(aDocument);
		if( compare == 0 ) return(middle);
		if( compare < 0 ) beginning = middle; 
		else end = middle;
	} while( oldMiddle != middle );
	if( !data[end]->compareTo(aDocument) ) return(end);  //missed above
    }
    else for(long index=0; index < length; index++)
		if( data[index] != NULL )
		    if( !data[index]->compareTo(aDocument) ) return(index);
    return(-1);
}


DocList *DocList::ReadURLList(fstream& hotList)
// This method executes the system command contain in sysCall and parses the 
// stdout into Documents which are added to this instance. 
// Because of the bottleneck delay induced by the system call, the current 
// system time is streamed to this program's stdout. 
//Only unique URL's are added to the DocList. 
{
	char buf[257], url[256], temp[10];
	long rank, position;
	BinarySearchTree aTree(0);

	if( !hotList ) { return(this); }
	if( hotList.getline(buf,256) )
	{	//First hotlink MUST be unique, naturally! 
		sscanf(buf,"%s%i",&url, &position);
		add( new Document( strdup(url) , position) );
	}
	while( hotList.getline(buf,256) )
	{
		sscanf(buf,"%s%i",&url, &position);
		if( aTree.add(url, this) < 0 )
			add(new Document(strdup(url), position));
	}
	hotList.close();
#ifdef DEBUG
	cout << "  (" << length << ")" << endl;
	cout << "hotlinks parsed: ";
	printTime(cout);
#endif
	return(this);
}

DocList *DocList::ReadURLList(char *sysCall)
// This method executes the system command contain in sysCall and parses the 
// stdout into Documents which are added to this instance. 
// Because of the bottleneck delay induced by the system call, the current 
// system time is streamed to this program's stdout. 
//Only unique URL's are added to the DocList. 
{
	char buf[257], url[256], temp[10];
	long rank, position;
	FILE *perlstdout;
	perlstdout = popen(sysCall,"r");
	BinarySearchTree aTree(0);
	
	if( fgets(buf,256, perlstdout) )
	{	//First hotlink MUST be unique, naturally! 
		sscanf(buf,"%s%i",&url, &position);
		add( new Document( strdup(url) , position) );
	}
	while( fgets(buf,256, perlstdout) )
	{
		sscanf(buf,"%s%i",&url, &position);
		if( aTree.add(url, this) < 0 )
			add(new Document(strdup(url), position));
	}
	pclose(perlstdout);
#ifdef DEBUG
	cout << "  (" << length << ")" << endl;
	cout << "hotlinks parsed: ";
	printTime(cout);
#endif
	return(this);
}
 

void DocList::stem(long levels)
// This method is used to stem a DocList by directory.
// Initially a stem is created for each document and stored in "stemList". These
// stems have not been truncated. They are only used to get the main loop 
// described below, started. All future truncated stems are real. 
// All the stems are stored in the temporary variable "stemList". An instance
// variable called "superStem" is then created whose index represents the 
// associated stems in "stemList". The following procedure is repeated "levels" 
// number of times where "levels" is an instance variable set through the BIRD
// code: 
//	For each stem in "StemList" the following is performed:
//	1. A truncated stem is produced. 
//		( In otherwords the superDirectory of the stem is found.) 
//	2. If the truncated stem contains only a root then the procedure 
//		continues with the next stem.
// 	3. The truncated stem is compared to all other stems in stemList.
//	3. If a match is found then the index of the matched stem is added to 
//	   the vector at the index of the truncated stem in "superStem". 
//
//	The contents of "stemList" and "superStem" are always one larger than 
//	the actual index because the index 0 represents no index or empty.   
{
	long index;
	Stem *aStem;
	if( superStem != NULL ) return;
	List<Stem> *stemList = new List<Stem>();
	for(long biblioIndex=0; biblioIndex < length; biblioIndex++)
		stemList->add(new Stem(data[biblioIndex]->getIdentifier()) );

	superStem = new Vector();
	long stemIndex=0;
	for(int height=0; height < levels; height++)
	{
	   long stems = stemList->getLength();
	   for(; stemIndex < stems; stemIndex++)
	   {	
		Stem aStem = Stem((*stemList)[stemIndex]->getIdentifier(),
					(*stemList)[stemIndex]->truncate());	
		if( aStem.getLength() == 0 ) continue;
		index = length;   // previous entries are docs not stems
		while( (index < stemList->getLength() ) &&
			!( aStem == (*stemList)[index]) ) index++;
		if( index >= stems ) stemList->add(new Stem(&aStem)); 
 		*(*superStem)[stemIndex] = index+1;
	   }
	}

#ifdef DEBUG 
//*****************For Debugging purposes only
	for(index=0; index < length; index++)
	{
	    cout << data[index]->getIdentifier() << endl;
	    long nextIndex = index +1;
	    if( (nextIndex = *(*superStem)[ nextIndex -1] ) > 0 )
	    {
	    	cout << "\t" << *( (*stemList)[ nextIndex -1 ]);
		while( (nextIndex = *(*superStem)[nextIndex -1 ]) > 0 )
			cout << "\t" << *( (*stemList)[ nextIndex -1 ]);
	    }
	}
#endif
	stemList->initialize();
	delete stemList;
}


DocList *DocList::deepCopy(DocList *aList)
// Deep copies the contents of "aList" to that of the instance.
// NOTE: THIS HAS BEEN WRITTEN AS A PRIMITIVE TO INCREASE SPEED.  
{
	if( (aList->getLength()+length) >= size ) 
		growBy( aList->getLength()+length -size+1 );
	for(long index=0; index < aList->getLength(); index++)
		data[length++] = new Document()->deepCopy( (*aList)[index] ) ;
	sorted=aList->sortedById();
	return(this);
}


void DocList::print(ostream& os)
// Default print method for class Document.
{
	for(long index=0; index < length-1; index++)
		if( data[index] != NULL ) os << *data[index] << endl;
	if( length!=0 ) os << *data[index]; 	
}


//================================(DocLists)=================================
long DocLists::findIndexOf(DocList *aDocList)
// This method returns the index of aDocList in the instance. If aDocList does
// not exist in the instance, then -1 is returned.
{
	for(long index=0; index < length; index++)
		if( data[index] == aDocList ) return(index);
	return(-1);
}


DocList *DocLists::operator[](long index)
// This method returns the DocList at index. 
// if index is out of bounds, then the List is expanded and a new, empty
// DocList is returned. 
// NULL is never returned.
{
	if( index >= length ) 
	{
		if(index >=size) growBy(index-size+1);
		length=index+1;
		data[index] = new DocList();
	} else if( data[index] == NULL ) data[index] = new DocList();
	return( data[index] );
}

 
void DocLists::print(ostream& os)
// Default print method for class Document.
{
	for(long index=0; index < length; index++)
	{
		os << "Document List [" << index << "]:(" << endl;
		os << *data[index] << ")" << endl << endl;
	} 	
}


//================================(Vector)==================================
long *Vector::operator[](long index)
// This method returns a pointer to the long value at index. 
// if index is out of bounds, then the List is expanded;
// if data at index contains NULL, then a pointer  to a new long value is
// returned instead. The new long value is initialized to zero.
// NULL is never returned.
{
	if( index >= length ) 
	{
		if(index >=size) growBy(index-size+1);
		data[index] = new long;
		*data[index] = 0;
		length=index+1;
	} else  if( data[index] == NULL )
		{ 
			data[index] = new long;
			*data[index] = 0;
		}
	return( data[index] );
}


Vector *Vector::deepCopy(Vector *aList)
{
	if( (aList->getLength()+length) >= size ) 
		growBy( aList->getLength()+length -size+1 );
	for(long index=0; index < aList->getLength(); index++)
	{
		data[length] = new long;
		*data[length++] = *(*aList)[index];
	}
	return(this);	
}
	

void Vector::print(ostream& os)
// Default print method for class Document.
{
	os << "Vect(";
	for(long index=0; index < length-1; index++)
		if( data[index] == NULL ) os << "0,";
		else os << *data[index] << ",";
	if( length!=0 ) os << *data[index];
	os << ")";
}


//================================(VectList)==================================
Vector *VectList::operator[](long index)
// This method returns the Vector at index. 
// if index is out of bounds, then the List is expanded and a new, empty Vector
// is returned.to a new long value is returned.
// NULL is never returned.
{
	if( index >= length )
	{
		if(index >=size) growBy(index-size+1);
		data[index] = new Vector();
		length=index+1;
	} else if( data[index] == NULL ) data[index] = new Vector();
	return( data[index] );
}


VectList *VectList::deepCopy(VectList *aList)
// Deep copies the contents of "aList" to that of the instance.
// NOTE: THIS HAS BEEN WRITTEN AS A PRIMITIVE TO INCREASE SPEED.  
{
	if( (aList->getLength()+length) >= size ) 
		growBy( aList->getLength()+length -size+1 );
	for(long index=0; index < aList->getLength(); index++)
		data[length++] = new Vector()->deepCopy( (*aList)[index] ) ;
	return(this);
}


void VectList::print(ostream& os)
// Default print method for class Document.
// Note that it is possible for the instance list to contain a NULL pointer if
// the correponding Document in the DocList was actually a duplicate.  
{
	for(long index=0; index < length-1; index++)
		if( data[index] == NULL ) os << "NULL" << endl;
	else os << *data[index] << endl;
	if( length!=0 ) os << *data[index]; 	
}


//================================(VectLists)=================================
long VectLists::findIndexOf(VectList *aVectList)
// This method returns the index of aVEctList in the instance. If aVectList does
// not exist in the instance, then -1 is returned.
{
	for(long index=0; index < length; index++)
		if( data[index] == aVectList ) return(index);
	return(-1);
}

VectList *VectLists::operator[](long index)
// This method returns the VectList at index. 
// if index is out of bounds, then the List is expanded and a new, empty
// VectList is returned.to a new long value is returned.
// NULL is never returned.
{
	if( index >= length )
	{
		if(index >=size) growBy(index-size+1);
		data[index] = new VectList();
		length=index+1;
	}
	return( data[index] );
}


void VectLists::print(ostream& os)
// Default print method for class Document.
{
	for(long index=0; index < length; index++)
	{
		os << "Vector List [" << index << "]:(" << endl;
		os << *data[index] << ")" << endl;
	} 
}


//================================(ExecList)==================================
void ExecList::execute()
// This method simply calls the execute() method on each executable class in
// the instance list.
{
	for(long index=0; index < length; index++)
		data[index]->execute();
}	


//==============================(GetCandidateSet)=============================
DocList *GetCandidateSet::execute(DocList *targetSet)
// This method executes the executable BIRD subclasses which find the
// candidateSet of the passed targetSet. The subclasses pass data between each
// other via the instance variables of this class. 
// Initially the instance variable "targetList" is emptied (if it contains a
// targetSet from a previous run) and the new targetSet is loaded. 
// The resulting candidate set is deep copied to a new piece of memory and then
// returned to the caller. 
// This method is called directly by the main function. 
{
	targetList->initialize();
	targetList->add(targetSet);
	retList->initialize();
	BIRDCode->execute();
	return( new DocList()->deepCopy(retList) );
}


void GetCandidateSet::print(ostream& os)
// Default print method for class Document.
{
	os << "implementationOf getCandidateSet for DocList[";
	os << TheDocLists.findIndexOf(targetList) - DOCINDEX_OFFSET;
	os << "]" << endl;
	os << *BIRDCode;
	os << "return DocList[";
	os << TheDocLists.findIndexOf(retList) - DOCINDEX_OFFSET;
	os << "]" << endl;
}


//==================================(Compare)=================================
VectList *Compare::execute(DocList *selectSet, DocList *alternateSet)
// This method executes the executable BIRD subclasses which vectorize and 
// generate a correlation matrix between the candidateSet and targetSet.
// The candidateSet and targetSet are directly accessed by the subclasses via
// the global variable "TheDocLists" which contain all the DocList's used in 
// the BIRD code.  The final vector list is stored in the instance variable
// "retList" . An deep copy is made of "retList" and the copy returned to the
// caller. 
// This method is called directly by the main function. 
{

	selectList->initialize();
	selectList->add(selectSet);
	selectList->sortedById(selectSet->sortedById());
	alternateList->initialize();
	alternateList->sortedById(alternateSet->sortedById());
	alternateList->add(alternateSet);
	retList->initialize();
	BIRDCode->execute();
	return( new VectList()->deepCopy(retList) );
}


void Compare::print(ostream& os)
// Default print method for class Document.
{
	os << "implementationOf compare DocList[";
	os << TheDocLists.findIndexOf(selectList) - DOCINDEX_OFFSET;
	os << "] and DocList[";
	os << TheDocLists.findIndexOf(alternateList) - DOCINDEX_OFFSET;
	os << "]" << endl;
	os << *BIRDCode;
	os << "return ";
	printList(os,  TheVectLists.findIndexOf(retList) );
	os << endl;
}


//==============================(GetSamples)==================================
void GetSamples::execute()
// This method randomly selects documents from the targetList and adds them to
// "retList".  The number of documents selected is determined by the instance
// variable "numOfDocuments".
{
	retList->initialize();
	long length = targetList->getLength();
	for(long index=0; index < numOfDocuments; index++)
		retList->add( (*targetList)[rand() % length] );
}


void GetSamples::print(ostream& os)
// Default print method for class Document.
{
	os << "DocList[";
	os << TheDocLists.findIndexOf(retList) - DOCINDEX_OFFSET;
	os << "] = "; 
	os << "get " << numOfDocuments <<" samples of DocList[";
	os << TheDocLists.findIndexOf(targetList) - DOCINDEX_OFFSET;
	os << "]" << endl;
}


//================================(FindDoc)===================================
// For each document in a sourceList, one of the following is performed:
//  If the instance variable "citeType" == CITING
//	then the parents are found and all duplicates removed. The unique 
//		documents are then added to "retList".
//	else the children are found and all duplicates removed. The unique
//		documents are then added to "retList".
// Any documents in "RetList" which have fewer references that the instance 
// variable "numOfReferences" are removed.  
// NOTE: A "removed" documents means the pointer to it is changed to NULL.
//       The document itself is ONLY deleted if the instance variable
// 	 "modeType" == FIND, not STORE.
void FindDoc::execute()
{
	retList->initialize();
	searchTree = NULL;
	unSortedDocList = NULL;
	long index;
	Document *currentDoc, *testDocument;
	DocLists *biblioLists = new DocLists();
	if( citeType == CITING )
	   for(index=0; index < sourceList->getLength(); index++)
		removeDupAndAdd( (*sourceList)[index]->getCiting(modeType) );
	else // (citeType == CITEDBY )
	   for(index=0; index < sourceList->getLength(); index++)
		removeDupAndAdd( (*sourceList)[index]->getCitedBy(modeType) );

//remove documents with insufficient references
//	long number=0;
//	DocList *sortedDocList = new DocList();
	Vector *orderedIndexes = searchTree->listIndexesInOrder();
	for(index=0; index < orderedIndexes->getLength(); index++)
	{
		long docIndex = *(*orderedIndexes)[index];
		if( (*unSortedDocList)[docIndex]->ref() < numOfReferences )
		{
		    if( modeType == FIND ) delete (*unSortedDocList)[docIndex];
		} 
		else retList->add((*unSortedDocList)[docIndex]);
	}
	retList->sortedById(TRUE);
	delete unSortedDocList;
	orderedIndexes->initialize(); //Also frees indexes in searchTree.
	delete orderedIndexes;
	delete searchTree;
#ifdef DEBUG
	cout << "Number of returned documents accepted: " 
	     <<  retList->getLength() << endl;
#endif
}


void FindDoc::removeDupAndAdd(DocList *aDocList)
// This method accepts a unique aDocList (all the elements are unique), and
// adds them to a searchTree. Any document which cannot be added to the tree
// (defined by a non zero return value representing the index of the duplicate
//  document + 1) are simply not added to retList.
// Instead, the instance variable "references" in the original document is 
// incremented to keep track of how many duplicate documents were found.
// If searchTree.add returns 0 then the document is assumed to be unique and
// is added to retList. 

// If aDocList is stored ( modeType ==STORE) as a parent or child then a copy is
// made before any duplicate deletions are made as so not to corrupt aDocList.
{
	long duplicateIndex, index, start;
	Document *aDocument;
	if( aDocList->getLength() == 0 ) 
	{
#ifdef DEBUG
		cout << endl;
#endif 
		return;
	}
#ifdef DEBUG
	cout << "Removing duplicates... ";
#endif 
	if( searchTree == NULL )
	{
	    searchTree = new BinarySearchTree(0);
	    unSortedDocList = new DocList( (*aDocList)[0] );
	    start=1;
	} else start=0;

	for(index=1; index < aDocList->getLength(); index++)
	{
	    aDocument = (*aDocList)[index];
	    if( (duplicateIndex=searchTree
		     ->add(aDocument->getIdentifier(), unSortedDocList) ) < 0 )
		unSortedDocList->add(aDocument);
	    else (*unSortedDocList)[duplicateIndex]->incRef();
	}

#ifdef DEBUG
	printTime(cout);
	cout << endl;
#endif 
}
	
void FindDoc::print(ostream& os)
// Default print method for class Document.
{
	os << "DocList[" << TheDocLists.findIndexOf(retList) - DOCINDEX_OFFSET;
	os << "] = find documents ";
	switch(citeType)
	{
		case CITING: os << "citing "; break;
		case CITEDBY: os << "citedBy "; break;
	}
	os << "atLeast " << numOfReferences << " in DocList[";
	os << TheDocLists.findIndexOf(sourceList) - DOCINDEX_OFFSET;
	os << "] with pathLimit of ";
	os << pathLimit;
}

//===============================(Vectorize)==================================
// Initially this method stems the biblioDocList if the instance variable 
// "stemType" == DIRECTORY which is selected in the BIRD code.
// For each document in the biblioDocList, the following is performed:
// 	If the instance variable "biblioType" == METRIC 
//		then the children are mapped onto the selectDocList. 
// 		else the parents are mapped onto the selectDocList.    
void Vectorize::execute()
{
	long biblioIndex, docIndex;
	Document *aDocument;
	retList->initialize();
	long length=biblioDocList->getLength();

	if( stemType == DIRECTORY ) biblioDocList->stem(levels);

#ifdef DEBUG
    	cout << "Vectorizing...";
#endif
	if( biblioType == METRIC )
	   	for(biblioIndex=0; biblioIndex < length; biblioIndex++)
	   	{
			aDocument= (*biblioDocList)[biblioIndex];
			mapUsing(biblioIndex,  aDocument->getCitedBy(STORE));
	   	}
	else  // biblioType == GRAPHIC 
		for(biblioIndex=0; biblioIndex < length; biblioIndex++)
	    	{
			aDocument= (*biblioDocList)[biblioIndex];
			mapUsing(biblioIndex,  aDocument->getCiting(STORE));
	   	}
#ifdef DEBUG
  	cout << "Done(";
  	printTime(cout);
  	cout << ")" << endl;
#endif
}

		
void Vectorize::mapUsing(long biblioIndex, DocList *aDocList)
// Given a biblio document referenced by "biblioIndex" and "aDocList", 
// For each document in "aDocList", the following is performed:
// 1. The index of that document is found in the selectDocList.
// 2. If a valid index was returned then it is used as the i,th column in the
//    "retList" matrix, and "biblioIndex" is used as the j-th row.
// 3. The value stored at this location in "RetList" depends on the value of
//    the instance variable "vectType":
//    If INDICATOR then 1 is stored.  
//    If RANK then the index of the current document is stored.
//    If POSITION then the position of the current document is stored.  
// Note that the instance variable "stemType" == DIRECTORY, a function called 
// stemOf is used to retreive the stem index of the current biblio document. 
// This will result in the "retList" matrix storing stem indexes instead of 
// document biblioIndexes. 
// The intance variable of "biblioDocList used to store these stems also 
// contains a group of dummy stems at the beginning which must not be included 
// in retList. There is one dummy stem for each document in "biblioDocList"
// Therefore to ensure that these dummy index are not added to "retList", the
// indexes returned by stemOf are reduced by y the number of 
// documents in biblioIndex.  

// Futhermore, each stem can point to one or more super stems such that 
// documents within one stem citing or citedBy other documents will cause 
// documents within the superstem to also cite or be citedBy other documents.
// This is implemented below within the code boxed in by ****
{
	long docIndex, value, stemIndex, aLong;
	long length = biblioDocList->getLength();
	for(long rank=0; rank < aDocList->getLength(); rank++)
	{		    
	    if( (docIndex=selectDocList->findIndexOf( (*aDocList)[rank] )) != -1)
	    {
		switch(vectType) 
		{
			case INDICATOR: value = 1; break;
			case RANK: value = rank; break;
			case POSITION: value = (*aDocList)[rank]->getPosition();
		}
		*(*(*retList)[docIndex])[biblioIndex] += value;
	    	if( (stemType == DIRECTORY) && (levels > 0) )
		{
		    long nextIndex = biblioIndex;
		    for(int height=0; height < levels; height++)
		   	if((nextIndex = biblioDocList->stemOf(nextIndex) ) > 0 )
			    *(*(*retList)[docIndex])[nextIndex-1]+=value;
		}	
//		else *(*(*retList)[docIndex])[biblioIndex] += value;
	    }	 
	}
}
	

void Vectorize::print(ostream& os)
// Default print method for class Document.
{
	os << "VectList[" << TheVectLists.findIndexOf(retList);
	os << "] = vectorize citations ";
	if( biblioType == METRIC ) os << "from DocList["; 
			      else os << "to DocList[";
	os << TheDocLists.findIndexOf(biblioDocList) -DOCINDEX_OFFSET;
	if( biblioType == METRIC ) os << "] to DocList[";
			      else os << "] from DocList[";
	os << TheDocLists.findIndexOf(selectDocList) - DOCINDEX_OFFSET;
	os << "] by ";
	switch(vectType)
	{
		case INDICATOR: os << "indicator "; break;
		case RANK: os << "rank "; break;
		case POSITION: os << "position "; break;
	}
	if( stemType == DIRECTORY )
	{
		os << "steming by directory";
		if(levels > 0) os << " for " << levels;
		if(levels > 1) os << " levels"; else os << " level";
	}
}


//===============================(CenerateCor)================================
void GenerateCor::execute()
// For each vector in "CandidList" the following is performed:
//   For each vector in "targetList" the following is performed:
//	store the correlation of both Vectors in "retList" at column 
//	"candidIndex" and row "targIndex"
{
    Vector *candidVect, *targVect;
    retList->initialize();
#ifdef DEBUG
    cout << "Generating Correlation...";
#endif
    for(long candIndex=0; candIndex < candidList->getLength(); candIndex++)
    {
	for(long targIndex=0; targIndex < targetList->getLength(); targIndex++)
	{
	   candidVect =(*candidList)[candIndex]; 
	   targVect =(*targetList)[targIndex];
	   *(*(*retList)[candIndex])[targIndex] =
	   					cosAngle(candidVect, targVect);
	}
    }
#ifdef DEBUG
    cout << "Done(";
    printTime(cout);
    cout << ")" << endl;
#endif
}


long GenerateCor::cosAngle(Vector *candidVect, Vector *targetVect)
//WARNING! VECTORS MAY HAVE TRAILING ZEROS NOT STORED! 
//THE EXTRA ZEROS MAY BE IGNORED FOR A DOT PRODUCT HOWEVER.
//Assumes that candidVect and targetVect have the same length.
//Uses formula: Cos angle = u.v /( |u||v| )
//		where u.v = u1*v1 + u2*v2 + u3*v3 + ...
//		 and  |u| = sqrt(u1*u1 + u2*u2 + u3*u3 + ... )
//		 and  |v| = sqrt(v1*v1 + v2*v2 + v3*v3 + ... )
{
    long dotProd =0;
    long candidSum =0;
    long targetSum =0;
    long candidValue, targetValue;
    double temp, mag;
    for(long index=0; index < candidVect->getLength(); index++)
    {
	if( (*candidVect)[index] == NULL ) candidValue = 0;
	else candidValue = *(*candidVect)[index];
	if( (*targetVect)[index] == NULL ) targetValue = 0;
	else targetValue = *(*targetVect)[index];
	dotProd += candidValue*targetValue;
	candidSum += candidValue*candidValue;
	targetSum += targetValue*targetValue;
    }
    if( (mag = sqrt((double)candidSum)*sqrt((double)targetSum) ) != 0)
    	 temp = (double)dotProd / mag;
    else temp = 0;
    return( (long) (temp*1000) );
}		


void GenerateCor::print(ostream& os)
// Default print method for class Document.
{
	printList(os, TheVectLists.findIndexOf(retList) );
	os << " = generate correlation matrix of ";
	printList(os, TheVectLists.findIndexOf(candidList) );
	os << " with ";
	printList(os, TheVectLists.findIndexOf(targetList) );
}


//===============================(Collapse)================================
void Collapse::execute()
// Each vector in the instance variable "collapsable", is reduced to one
// value according to the value of the instance variable "aFunc".
{
  long vectIndex, rowIndex, index, value, *ptr;
  Vector *aVector;
#ifdef DEBUG
  cout << "Collapsing..." << endl;
#endif
  retList->initialize();
  if( aDir == ROWS )
  {
    switch(aFunc)
    {
	case AVERAGE: 
	   for(vectIndex=0; vectIndex < collapsable->getLength(); vectIndex++)
	   {
		aVector = (*collapsable)[vectIndex];
	   	value=0;  
		for(index=0; index < aVector->getLength(); index++)
			if( (ptr = (*aVector)[index]) != NULL) value += *ptr;
	   	retList->add( new Vector( value / (index+1)) );
	   }
	   break;
	
	case MAXIMUM: 
	   for(vectIndex=0; vectIndex < collapsable->getLength(); vectIndex++)
	   {
		aVector = (*collapsable)[vectIndex];
	   	value=0; 
		for(index=0; index < aVector->getLength(); index++)
			if( (ptr = (*aVector)[index]) != NULL) 
				value = max(value, *ptr);
	   	retList->add( new Vector(value) );
	   }
	   break;

	case MINIMUM: 
	   for(vectIndex=0; vectIndex < collapsable->getLength(); vectIndex++)
	   {
		aVector = (*collapsable)[vectIndex];
	   	value= 2E+9; 
		for(index=0; index < aVector->getLength(); index++)
			if( (ptr = (*aVector)[index]) != NULL)
				value = min(value, *ptr );
	   	retList->add( new Vector(value) );
	   }
	   break;
    };
  }
  else  // IT IS ASSUMED THAT ALL VECTORS HAVE THE SAME NUMBER OF ROWS!!!
  {
    retList->add( new Vector() );
    switch(aFunc) 
    {
	case AVERAGE:
	   for(rowIndex=0; rowIndex <(*collapsable)[0]->getLength(); rowIndex++)
 	   {
	   	value=0;  
		for(index=0; index < collapsable->getLength(); index++)
			if( (ptr = (*(*collapsable)[index])[rowIndex]) != NULL) 
				value += *ptr;
	   	*(*(*retList)[0])[rowIndex] = value / (index+1);
	   }
	   break;
	
	case MAXIMUM: 
	   for(rowIndex=0; rowIndex <(*collapsable)[0]->getLength(); rowIndex++)
	   {
	   	value=0; 
		for(index=0; index < collapsable->getLength(); index++)
			if( (ptr = (*(*collapsable)[index])[rowIndex]) != NULL) 
				value = max(value, *ptr);
	   	*(*(*retList)[0])[rowIndex] = value ;
	   }
	   break;

	case MINIMUM: 
	   for(rowIndex=0; rowIndex <(*collapsable)[0]->getLength(); rowIndex++)
	   {
	   	value= 2E+9; 
		for(index=0; index < collapsable->getLength(); index++)
			if( (ptr = (*(*collapsable)[index])[rowIndex]) != NULL) 
				value = min(value, *ptr );
	   	*(*(*retList)[0])[rowIndex] = value ;
	   }
	   break;
    }
  }
#ifdef DEBUG
  cout << "Done(";
  printTime(cout);
  cout << ")" << endl;
#endif
}


void Collapse::print(ostream& os)
// Default print method for class Document.
{
	printList(os, TheVectLists.findIndexOf(retList) );
	os << " = collapse ";
	printList(os, TheVectLists.findIndexOf(collapsable) );
	os << " by ";
	if ( aDir == ROWS ) os << " rows "; else os << " columns ";
	os << " using ";
	switch(aFunc)
	{
		case AVERAGE: os << "average "; break;
		case MINIMUM: os << "minimum "; break;
		case MAXIMUM: os << "maximum "; break;
	}
}

	
//===============================(Copy)===================================
void Copy::execute()
// This method performs a deepcopy of the instance variable "source". 
// "source" and "retList" are of type VectList.
{
	retList->initialize();
	for(long index=0; index< source->getLength(); index++)
		retList->add( new Vector( *(*(*source)[index])[0] ) );
}

void Copy::print(ostream& os)
// Default print method for class Document.
{
	printList(os, TheVectLists.findIndexOf(retList) );
	printList(os, TheVectLists.findIndexOf(source) );
}
