#include <stdio.h>
#include <string.h>
#include "kernel.h"
#include "pcre.h"
#include "iconv.h"

char patt[80];
pcre *regexp;
int opts, matchopts;

#define OPT_pattfile 1
#define OPT_time 2
#define OPT_utf 4
#define OPT_capture 8
#define OPT_global 16

#define Timer_Start 0x490C0
#define Timer_Stop 0x490C1
#define OS_Module 0x1E

/* Convert UTF-8 string to default encoding */
void copy_utf(char *out, char *in) {
  iconv_t charset;
  char *s_in, *s_out;
  unsigned int isize, osize;
  int ucp;

  charset=iconv_open("ISO-8859-1", "UTF-8");
  s_in=in; s_out=out;
  isize=strlen(in); osize=80;
  while(iconv(charset, &s_in, &isize, &s_out, &osize)==-1) {
    ucp=*s_in++;
    if(ucp<0x80) {
      break;
    } else if(ucp<0xE0) {
      ucp=((ucp&0x1F)<<6)+((*s_in++)&0x3F);
    } else {
      ucp=((ucp&0xF)<<12)+(((*s_in++)&0x3F)<<6);
      ucp=ucp+((*s_in++)&0x3F);
    }
    osize-=sprintf(s_out, "<U+%04X>", ucp);
    s_out+=8;
  }
  *s_out='\0';
  iconv_close(charset);
}

int printcaptures(int *outvec, int veclen, char *subj) {
  int i;
  char ssub[80], utfsub[80];

  pcre_copy_substring(subj, outvec, veclen, 0, ssub, 80);
  if(!(opts & OPT_utf)) {
    printf("Matched %s at byte %d\n", ssub, outvec[0]);
  } else {
    copy_utf(utfsub, ssub);
    printf("Matched %s at byte %d\n", utfsub, outvec[0]);
  }
  if(veclen==1 || !(opts & OPT_capture))
    return(1);
  
/* Otherwise print captured substrings as well */
  printf("%d captured strings: ", veclen-1);
  for(i=1; i<veclen; i++) {
    pcre_copy_substring(subj, outvec, veclen, i, ssub, 80);
    printf("(%s) ", ssub);
  }
  putchar('\n');
  return(veclen);
}

/* Read a line of subject string from the input */
int readsubj(char *strptr, FILE *fp) {
  int len;

  if(!fgets(strptr, 80, fp))
    return(0);
  len=strlen(strptr)-2;
  while(strptr[len] == '/') {
    strptr+=len;
    *strptr='\n';
    strptr++;
    fgets(strptr, 80, fp);
    len=strlen(strptr)-2;
  }
  return(len);
}

int matchfile(FILE *fp) {
  int rc, subjlen, startpos;
  int lineno, veclen;
  int *pcreout;
  char *subj;
  _kernel_swi_regs r;

  lineno=1;
/* find number of captures and allocate memory */
  rc=pcre_fullinfo(regexp, NULL, PCRE_INFO_CAPTURECOUNT, &veclen);
  veclen=(veclen+1)*3;
  pcreout=pcre_malloc(veclen*sizeof(int));
  subj=pcre_malloc(256);
  while(readsubj(subj, fp)) {
      subjlen=strlen(subj);
      if(opts & OPT_time)
        _kernel_swi(Timer_Start, &r, &r);
      rc=pcre_exec(regexp, NULL, subj, subjlen, 0, 0, pcreout, veclen);
      if(opts & OPT_time) {
        _kernel_swi(Timer_Stop, &r, &r);
        printf("pcre_exec completed in %d.%06d sec.", r.r[0], r.r[1]);
      }
      printf("Line %d: ", lineno);
      if(rc==-1)
        printf("No match!\n");
      else if(rc<0)
        printf("Error code %d while matching string\n", rc);
      else {
        printcaptures(pcreout, rc, subj); 
      }
      lineno++;
      if(!(opts & OPT_global))
        continue;

/* otherwise look for all matches*/
      while(rc>0 && pcreout[0]<subjlen) {
        startpos=pcreout[1];
        if(pcreout[0]==pcreout[1])
          /* skip one character if we found an empty string */
          startpos++;
        if(opts & OPT_utf) {
          /* advance by an entire UTF-8 chararcter */
          while((subj[startpos] & 0xC0) == 0x80) startpos++;
        }
        rc=pcre_exec(regexp, NULL, subj, subjlen, startpos, 0, pcreout, veclen);
        if(rc > 0)
          printcaptures(pcreout, rc, subj);
      }
  }
  pcre_free(pcreout);
  pcre_free(subj);
  return(0);
}

int checkmods(void) {
  _kernel_oserror *errorp;
  _kernel_swi_regs r;
  char modname[40];
  int result;

  result=0;
  if(opts & OPT_time) {
    r.r[0]=18;
    strcpy(modname, "TimerMod");
    r.r[1]=(int) modname;
    errorp=_kernel_swi(OS_Module, &r, &r);
    if(errorp != NULL) {
      printf("TimerMod is needed to time matching\n");
      opts ^= OPT_time;
      result += 1;
    }
  }
  if(opts & OPT_utf) {
    r.r[0]=18;
    strcpy(modname, "Iconv");
    r.r[1]=(int) modname;
    errorp=_kernel_swi(OS_Module, &r, &r);
    if(errorp != NULL) {
      printf("Iconv is needed to use UTF-8\n");
      opts ^= OPT_utf;
      result += 2;
    }
  }
  return(result);
}

int setuppatt(FILE *fp) {
  const char *errstr;
  int errpos;
  iconv_t charset;
  char *pattsrc, *utfpatt, *newpatt;
  unsigned int pattsize, utfsize;

  if(patt[0]=='\0') {
    fgets(patt, 80, fp);
    patt[strlen(patt)-1]='\0';
    printf("Using pattern: %s\n", patt);
  }
  if(patt[0]=='\0') {
    printf("No pattern string!\n");
    return(-1);
  }
  if(opts & OPT_utf) {
    charset=iconv_open("UTF-8", "ISO-8859-1");
    pattsrc=patt;
    utfpatt=malloc(80);
    newpatt=utfpatt;
    pattsize=strlen(patt); utfsize=80;
    iconv(charset, &pattsrc, &pattsize, &utfpatt, &utfsize);
    *utfpatt='\0';
    strncpy(patt, newpatt, 80-utfsize+1);
    iconv_close(charset);
  }
  regexp=pcre_compile(patt, matchopts, &errstr, &errpos, NULL);
  if(!regexp) {
    printf("Error compiling pattern: %s at byte %d\n", errstr, errpos);
    return(-1);
  }
  return(0);
}

int main(int argc, char *argv[]) {
  int i;
  char *fname;
  FILE *fp;

  opts=0;
  matchopts=0;
  fname=NULL;
  strcpy(patt, "");
/* check there's at least one parameter */
  if(argc<2) {
    printf("Usage: *regex [options] <filename>\n");
    exit(0);
  }
  for(i=1; i<argc; i++) {
    if(*argv[i] != '-') {
      fname=argv[i];
    } else {
      switch(argv[i][1]) {
      case 'p': strcpy(patt, argv[++i]);
        break;
      case 'i': printf("Enter pattern: ");
                gets(patt);
                break;
      case 't': opts |= OPT_time;
        break;
      case 'u': opts |= OPT_utf;
        matchopts |= PCRE_UTF8;
        break;
      case 's': matchopts |= PCRE_CASELESS;
        break;
      case 'c': opts |= OPT_capture;
        break;
      case 'g': opts |= OPT_global;
        break;
      default: printf("Unknown option -%c\n", argv[i][1]);
      }
    }
  }
/* check filename and try to open it */
  if(fname==NULL) {
    printf("No filename specified\n");
    exit(0);
  }
  fp=fopen(fname, "r");
  if(fp==NULL) {
    printf("Unable to open file %s\n", fname);
    exit(0);
  }
  if((opts & OPT_time) || (opts & OPT_utf))
    checkmods();
  if(!setuppatt(fp)) {
    matchfile(fp);
    pcre_free(regexp);
  }
  fclose(fp);  
  exit(0);
}
