
/* c166 lex.c */

/* cbgc 
 *
 * ANSI -Wall version.
 *
 * (C) 1990-95 Tenison Technology
 * DJ Greaves
 * Tenison Technology
 * 10 Tenison Road
 * Cambridge CB1 2DW
 */

#include <stdio.h>
#include <malloc.h>
#include "ccchdr.h"
#include <ctype.h>



void mks(char a, char *s);
uchar nextsymb();
uchar nextsymb1();
uchar nextsymb2();
uchar l_readnumber();
uchar l_readalpha();
CONS *srcptr = NULL;

extern char *buffer1;
char * scanpoi;
uchar symb, unreadsymb, lastsymb;
char *lastwordnode, *unwordnode;
int linenumber;                      /* Current source line */





#define maxline 500
char inbuffer[maxline];              /* current input line */
char tagbuffer[maxline];             /* tokens being processed */
char * poi;                          /* pointer to next char in inbuffer */


char lex_sescapes()
{ char i = *(poi++);
  if (i != '\\') return i;
  switch( *(poi++) )
   { default: i = *poi; return i & 0x1F;
     case '\\': return '\\';
     case 'n': return '\n';
     case 't': return 9;
     case 'r': return 13;
     case '"': return '"';
     case '\'': return '\'';
     case 'f': return 12;
     case 'a': return 7;
     case '0': return 0;
   }
}

/* Lexical symbol table */
typedef struct lexitem 
{ char * textual; char atrib; struct lexitem * nlitem; 
} LEXITEM;


#define lextabmask  127
struct lexitem *lexstab[lextabmask+1];
struct lexitem *stnode;
char * wordnode;

int lookup(char *s)
{ struct lexitem * p;
  int h = 0;
  char *ss = s;
  while (*ss) { h = h + h + *(ss++); }
  h = h & lextabmask;
  p = lexstab[h];
  while(p)
   { if (strcmp(p->textual, s)==0)
     { stnode = p;
       return 1;
     }
    p = p->nlitem;
  }
  p = NEWZ(struct lexitem);
  p->atrib = s_newid;
  p->textual = (char *) malloc(strlen(s)+1);
  strcpy(p->textual, s);
  p->nlitem = lexstab[h];
  lexstab[h] = p;
  stnode = p;
  return 0; /* return 0 for a new id */
}

void filereset()
{
  scanpoi = buffer1;
  linenumber = 1;
}

struct lexdecode
{ 
  int s; char *string;
} lexdecodes[] = 

{
 { s_newid,	"s_newid" },		 
 { s_auto,	"s_auto" },		 
 { s_break,	"s_break" },		 
 { s_case,	"s_case" },		 
 { s_char,	"s_char" },		 
 { s_const,	"s_const" },		 
 { s_continue,	"s_continue" },		 
 { s_double,	"s_double" },		 
 { s_do,	"s_do" },		 
 { s_leaf,	"s_leaf" },		 
 { s_default,	"s_default" },		 
 { s_else,	"s_else" },		 
 { s_extern,	"s_extern" },		 
 { s_for,	"s_for" },		 
 { s_int,	"s_int" },		 
 { s_goto,	"s_goto" },		 
 { s_if,	"s_if" },		 
 { s_long,	"s_long" },		 
 { s_register,	"s_register" },		 
 { s_return,	"s_return" },		 
 { s_short,	"s_short" },		 
 { s_sizeof,	"s_sizeof" },		 
 { s_static,	"s_static" },		 
 { s_struct,	"s_struct" },		 
 { s_switch,	"s_switch" },		 
 { s_typedef,	"s_typedef" },		 
 { s_void,	"s_void" },		 
 { s_while,	"s_while" },		 
 { s_union,	"s_union" },		 
 { s_volatile,	"s_volatile" },		 
 { s_unsigned,	"s_unsigned" },		 
 { s_signed,	"s_signed" },		 

 { s_commentstart,	"s_commentstart" },		 
 { s_eof,	"s_eof" },		 
 { s_commentend,	"s_commentend" },		 
 { s_number,	"s_number" },		 
 { s_lsect,	"s_lsect" },		 
 { s_rsect,	"s_rsect" },		 
 { s_lparen,	"s_lparen" },		 
 { s_rparen,	"s_rparen" },		 
 { s_lsquare,	"s_lsquare" },		 
 { s_rsquare,	"s_rsquare" },		 
 { s_dot,	"s_dot" },		 
 { s_comma,	"s_comma" },		 
 { s_semicolon,	"s_semicolon" },		 
 { s_percent,	"s_percent" },		 
 { s_colon,	"s_colon" },		 
 { s_query,	"s_query" },		 

 { s_plus,	"s_plus" },		 
 { s_plusplus,	"s_plusplus" },		 
 { s_pluseq,	"s_pluseq" },		 
 { s_minus,	"s_minus" },		 
 { s_minusminus,	"s_minusminus" },		 
 { s_minuseq,	"s_minuseq" },		 
 { s_ampersand,	"s_ampersand" },		 
 { s_logand,	"s_logand" },		 
 { s_andeq,	"s_andeq" },		 
 { s_bitor,	"s_bitor" },		 
 { s_logor,	"s_logor" },		 
 { s_oreq,	"s_oreq" },		 
 { s_equals,	"s_equals" },		 
 { s_eqeq,	"s_eqeq" },		 
 { s_pling,	"s_pling" },		 
 { s_plingeq,	"s_plingeq" },		 
 { s_greater,	"s_greater" },		 
 { s_greatereq,	"s_greatereq" },		 
 { s_rshift,	"s_rshift" },		 
 { s_lesser,	"s_lesser" },		 
 { s_lessereq,	"s_lessereq" },		 
 { s_lshift,	"s_lshift" },		 
 { s_arrow,	"s_arrow" },		 
 { s_slash,	"s_slash" },		 
 { s_slasheq,	"s_slasheq" },		 
 { s_star,	"s_star" },		 
 { s_stareq,	"s_stareq" },		 
 { s_string,	"s_string" },		 
 { s_eor,	"s_eor" },		 
 { s_coloneq,	"s_coloneq" },		 
 { s_define,	"s_define" },		 
 { s_macro,	"s_macro" },	 	 
 { s_tilda,	"s_tilda" },	          
 { s_lshifteq,	"s_lshifteq" },	       
 { s_rshifteq,	"s_rshifteq" },	       
 { s_bit,	"s_bit" },
 { s_enum, "s_enum"},
 { -1, "" }
};



char *lextok(int i)
{
  int j = 0;
  if (i == s_newid)
    {
      return wordnode;
    }

  while (lexdecodes[j].s >= 0)
    {
      if (lexdecodes[j].s == i) return lexdecodes[j].string;
      j += 1;
    }
  printf("bad %i\n", i);
  return "bad";
}
void lexinit(char * s)
{ 
  int h;


  for (h=0; h<=lextabmask; h++) lexstab[h] = NULL; /* clear hash table */

  filereset();
  mks(s_auto, "auto");
  mks(s_break, "break");
  mks(s_case, "case");
  mks(s_char, "char");
  mks(s_const, "const");
  mks(s_continue, "continue");
  mks(s_double, "double");
  mks(s_do, "do");
  mks(s_enum, "enum");
  mks(s_leaf, "leaf");
  mks(s_default, "default");
  mks(s_else, "else");
  mks(s_extern, "extern");
  mks(s_for, "for");
  mks(s_int, "int");
  mks(s_goto, "goto");
  mks(s_if, "if");
  mks(s_long, "long");
  mks(s_register, "register");
  mks(s_return, "return");
  mks(s_short, "short");
  mks(s_sizeof, "sizeof");
  mks(s_static, "static");
  mks(s_struct, "struct");
  mks(s_switch, "switch");
  mks(s_typedef, "typedef");
  mks(s_union, "union");
  mks(s_unsigned, "unsigned");
  mks(s_signed, "signed");
  mks(s_void, "void");
  mks(s_volatile, "volatile");
  mks(s_while, "while");
  mks(s_define, "#define");
  mks(s_abs, "abs");
  poi = inbuffer;
  *poi = 0;
  symb = s_semicolon; /* harmless start value */
  unreadsymb = 0;
}

/* install a token in the lexical symbol table */
void mks(char a, char *s)
{ 
  lookup(s);
  stnode->atrib = a;
}

uchar nextsymb()
{
  if (unreadsymb)
   { 
     symb = unreadsymb;
     wordnode = unwordnode;
     unreadsymb = 0;
   }
  else
   {
     lastsymb = symb;
     lastwordnode = wordnode;
     symb = nextsymb1();
   }
  if (0)
    {
      if (symb == s_newid) printf("symb %s\n",  wordnode);
      else printf("symb %s\n", lextok(symb));
    }
  return symb;
}

uchar nextsymb1()
{
  symb = nextsymb2();
  while (symb == s_commentstart)
  {
    while (symb != s_commentend)
    { symb = nextsymb2();
      if (symb==s_eof) return symb;
    }
    nextsymb1();
  }
  if (0) assembly("; symb %s \n",   lextok(symb)); 
  return symb;
}

void unnextsymb()
{
  unwordnode = wordnode;
  unreadsymb = symb;
  symb = lastsymb;
  wordnode = lastwordnode;
}

/*
 * print line of error
 */
void errorline()
{
  char * k = inbuffer;
  while(*k)
  {
    if (k==poi) printf("<-<<");
    putchar(*(k++));
  }
  putchar('\n');
}

uchar clinein()
{
  CONS **m = &srcptr;
  char *d = inbuffer;
  if (*scanpoi == (char) 0) return s_eof;
  while (*scanpoi)
  {
    char c = *scanpoi++;
    if (c == '\n') break;
    *d++ = c;
  }
  *d = 0;
  
  while (*m) m = &((*m)->cdr);/* chain along and put text on end */
  *m = cons(strdup(inbuffer), NULL);
  return 0;
}


uchar nextsymb2()
{ 
  char c;
  while (1)
  {
    while (*poi == 0)
    {
      if (clinein() == s_eof) return s_eof;
      poi = inbuffer;
      linenumber ++;
    }
  c = *poi;
  if (isdigit(c)) return l_readnumber();
  if ((isupper(c)) || (islower(c)) || (c == '_'))
                  return l_readalpha();
  if (c == '#' && poi == inbuffer)
  {
    return l_readalpha();
  }
  poi++;
  switch(c)
   { default:                        /* ignore all other src chars */
       continue;

     case '~':
       return s_tilda;

     case '^':
       switch(*poi)
         { 
	 default: return s_eor;
	 case '=': poi++; return s_eoreq;
         }

     
   case '{':
     return s_lsect;
   case '}':
       return s_rsect;
     case '(':
       return s_lparen;
     case ')':
       return s_rparen;
     case '[':
       return s_lsquare;
     case ']':
       return s_rsquare;
     case '.':
       switch(*poi)
         { default: return s_dot;
           case '.': poi++; poi++; return s_3dots;
         }

     case ',':
       return s_comma;
     case ';':
       return s_semicolon;
     case '%':
       return s_percent;
     case ':':
       switch(*poi)
         { default: return s_colon;
           case '=': poi++; return s_coloneq;
         }
     case '?':
       return s_query;

     case '+':
       switch(*poi)
         { default: return s_plus;
           case '+': poi++; return s_plusplus;
           case '=': poi++; return s_pluseq;
         }
     case '-':
       switch(*poi)
         { default: return s_minus;
           case '-': poi++; return s_minusminus;
           case '>': poi++; return s_arrow;
           case '=': poi++; return s_minuseq;
         }
     case '&':
       switch(*poi)
         { default: return s_ampersand;
           case '&': poi++; return s_logand;
           case '=': poi++; return s_andeq;
         }
     case '|':
       switch(*poi)
         { default: return s_bitor;
           case '|': poi++; return s_logor;
           case '=': poi++; return s_oreq;
         }
     case '=':
       switch(*poi)
         { default: return s_equals;
           case '=': poi++; return s_eqeq;
         }
     case '!':
       switch(*poi)
         { default: return s_pling;
           case '=': poi++; return s_plingeq;
         }
     case '>':
       switch(*poi)
         { default: return s_greater;
           case '=': poi++; return s_greatereq;
           case '>': poi++; 
             if (*poi == '=') { poi++; return s_rshifteq; }
             return s_rshift;
         }
     case '<':
       switch(*poi)
         { default: return s_lesser;
           case '=': poi++; return s_lessereq;
           case '<': poi++;
             if (*poi == '=') { poi++; return s_lshifteq; }
             return s_lshift;
         }
     case '/':
       switch(*poi)
         { default: return s_slash;
           case '=': poi++; return s_slasheq;
           case '/': *poi = 0; continue; /* ignore to end of line */
           case '*': poi++; return s_commentstart;
         }
     case '*':
       switch(*poi)
         { default: return s_star;
           case '=': poi++; return s_stareq;
           case '/': poi++; return s_commentend;
         }

     case '\'':
         { int cdc = lex_sescapes() & 0xFF;
           wordnode = (char *) cdc;
           if (*(poi++) != '\'') error("mismatched quote");
           return s_number;
         }

     case '"':                       /* Adjacent strings are catted in eval */
     { char *o = tagbuffer;
       int limit = maxline;
       while (*poi != '"')
       {
         char cc = lex_sescapes();
         if (limit-- <= 0) error("string too long");
         else *(o++) = cc;
       }
       poi++;
       *(o++) = (char) 0;
     }
     wordnode = (char *) malloc(strlen(tagbuffer)+1);
     strcpy(wordnode, tagbuffer);
     return s_string;
   }
  }
}


uchar l_readnumber()
{ int base = 10;
  char c;
  int r = 0;
  if (*poi == '0')
   { poi++;
     if (*poi == 'x')
      { base = 16;
        poi ++;
      }
     else base = 8;
   }
  c = *(poi++);
  while (1)
   { if ( (c>='0') && (c <= '9'))
      { r = r*base + c - '0'; }
     else if (( c >= 'a' ) && ( c <= 'f') )
      { r = r*base + c - 'a' + 10; }
     else if (( c >= 'A' ) && ( c <= 'F') )
      { r = r*base + c - 'A' + 10; }
     else break;
     c = *(poi++);
   }
  c = toupper(c);

  if (c == 'L' || c =='U')       /* Check for constant suffix of U or L */
  {
    /* nothing */
  }
  else poi--;
  wordnode = (char *) r;
  return s_number;
}

uchar l_readalpha()
{ char *o = tagbuffer;
  char c = *(poi++);
  while (islower(c) || c == '#' || c == '_' || isupper(c) || isdigit(c))
  { *(o++) = c;
    c = *(poi++);
  }
  *o = (char) 0;
  poi--;
  lookup(tagbuffer);
  wordnode = stnode->textual;
  return stnode->atrib;
}



/* end of lex.c */
