/*  -*- Mode: C;  -*- */

/******************************************************************************
*                                                                             *
*   Copyright 2005 University of Cambridge Computer Laboratory.               *
*                                                                             *
*   This file is part of Nprobe.                                              *
*                                                                             *
*   Nprobe is free software; you can redistribute it and/or modify            *
*   it under the terms of the GNU General Public License as published by      *
*   the Free Software Foundation; either version 2 of the License, or         *
*   (at your option) any later version.                                       *
*                                                                             *
*   Nprobe is distributed in the hope that it will be useful,                 *
*   but WITHOUT ANY WARRANTY; without even the implied warranty of            *
*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             *
*   GNU General Public License for more details.                              *
*                                                                             *
*   You should have received a copy of the GNU General Public License         *
*   along with Nprobe; if not, write to the Free Software                     *
*   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *
*                                                                             *
******************************************************************************/

#include <stdio.h>
#include <setjmp.h>
#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#include <assert.h>
#include <sys/param.h>
#include <string.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/socket.h>
#ifdef __alpha__
#include <sys/mbuf.h>
#endif
#include <net/route.h>
#include <net/if.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#undef __STDC__
#include <netinet/ip.h>

#ifdef __alpha__
#include <netinet/ip_var.h>
#endif
#include <netinet/tcp.h>
#include <netinet/if_ether.h>
#include <limits.h>
#include <linux/limits.h>

#include <sys/stat.h>
#include <sys/fcntl.h>

#include <errno.h>

#include "list.h"
#include "pkt.h"
#include "interface.h"
#include "flows.h"
#include "service.h"
#include "http.h"
#include "tcp.h"
#include "udp.h"
#include "udp_ns.h"
#include "seq.h"
#include "fingerprint.h"
#include "pool.h"

#include "if_nprobe.h"
#include "output.h"
#include "interesting.h"
#include "content_t.h"

#ifdef PRINT_OUT
#include "print_util.h"
#endif

static jmp_buf stop_parse;


simple_parse_template_t stemps[] = 
{
  {"href", LR_LINK}, /* ANCHOR 'A ' */
  {"href", LR_LINK}, /* 'AREA' ? inline or link ?*/
  {"href", LR_REL_BASE},		/* BASE */
  {"background", LR_INLINE},	/* BODY */
  {"action", LR_LINK}, /* FORM */
  {"usemap", LR_INLINE},	/* MAP */
  {"src", LR_INLINE}		/* SCRIPT */
};

multi_parse_template_t dtemps[] = 
{
  /* APPLET */
  {
    {
      {"codebase", LR_REL_SCRIPT_BASE},
      {"code", LR_INLINE},
      {"object", LR_INLINE},
      {"archive", LR_SCRIPT_ARCHIVE},
      {"", DUMMY_VALUE}
    }
  }, 
  /* FRAME/IFRAME */
  {
    {
      {"src", LR_INLINE},
      {"longdesc", LR_INLINE},
      {"", DUMMY_VALUE}
    }
  },
  /* IMG */
  {
    {
      {"src", LR_INLINE},
      {"usemap", LR_INLINE},
      {"longdesc", LR_INLINE},
      {"", DUMMY_VALUE}
    }
  },
  /* INPUT */
  {
    {
      {"src", LR_INLINE},
      {"usemap", LR_INLINE},
      {"", DUMMY_VALUE}
    }
  },
  /* OBJECT*/
  {
    {
      {"codebase", LR_REL_SCRIPT_BASE},
      {"data", LR_INLINE},
      {"classid", LR_INLINE},
      {"archive", LR_SCRIPT_ARCHIVE},
      {"usemap", LR_INLINE},
      {"", DUMMY_VALUE}
    }
  }
};

inline void 
chain_new_links_buf(struct links_chars *lc, jmp_buf bail)
{
  links_buf_t *current;
  links_buf_t *next;

  current = lc->current;
  current->nchars = lc->nchars;

 
  if (++lc->nbufs > LINKS_MAX_BUFS)
    {
      /* max exceded flags exhausted */
      --lc->nbufs;
      longjmp(bail, HTML_LINKS_BUFS_EXHUSTED);
    }

  lc->totchars += current->nchars;
  next = get_links_buffer();
  current->next = next;
  lc->nchars = 0U;
  lc->buf = next->buf;
  lc->current = next;

  assert(next->next == NULL);
}
  
inline void 
chain_first_links_buf(struct links_chars *lc)
{
  links_buf_t *bp = get_links_buffer();
  lc->buf = bp->buf;
  lc->chain = bp;
  lc->current = bp;
  lc->nbufs = 1;
  lc->nchars = 0U;
  lc->totchars = 0U;

  assert(bp->next == NULL);
}

inline int 
set_large_pbuf(struct tcp_conn *tconnp)
{
  SET_OB_PARSE_STATE(P_LARGE_BUF);
  OB_TAGBUF = get_tag_buf();
  OB_TAGBUF_SZ = LARGE_TAGBUF_SZ;
  memcpy(OB_TAGBUF, OB_TAGBUF_BUF, OB_TAGBUF_INDX);

  return 0;
}

/*
 * some HTML doesn't end the HREF URL with a " 
 */
inline char *
find_url_end(char *s, int len)
{
  char c;
  char *endp = s+len;
  while (s < endp)
    {
      c = *s;
      if (c == '"' || c == '\'' || c == '>' || c == '?' || c == ';' || c == ' ' || c == '#')
	return s;
      s++;
    }

  return endp;
}

/*
 * some HTML doesn't end the HREF URL with a " 
 */
inline char *
find_linkref_end(char *s, int len)
{
  char c;
  char *endp = s+len;
  while (s < endp)
    {
      c = *s;
      if (c == '"' || c == '>' || c == ' ')
	return s;
      s++;
    }

  return NULL;
}

/*
 * find end of url in style defn
 */
inline char *
find_style_url_end(char *s, char *endp)
{
  char c;

  while (s < endp)
    {
      c = *s;
      if (c == ')' || c == '"' || c == '\'' || c == '>' || c == ';' 
	  || c == ' ')
	return s;
      s++;
    }

  return endp;
}


struct buf 
{
  int len;
  char *buf;
};

#define START buf.buf
#define LEN buf.len

#if 0
#define PULL(buf, adj) \
MACRO_BEGIN   \
assert(adj <= buf.len);  \
(buf).len-=(adj);                  \
(buf).buf+=(adj); \
MACRO_END
#endif

inline int 
clear_chars(char *start, char *end, char *chars)
{
  char *cp = start;
  int i;
  int l = strlen(chars);
  int len = end - start;

  for (i = 0; i < l && len > 0; i++)
    { 
      if (*cp == chars[i])
	{
	  cp++;
	  len--;
	  i = -1;
	} 
    }
  return cp-start;
}

inline char * 
clear_chars_rev(char *start, char *end, char *chars)
{
  char *cp = end;
  int i;
  int l = strlen(chars);

  for (i = 0; i < l && cp > start; i++)
    { 
      if (*cp == chars[i])
	{
	  cp--;
	  i = -1;
	} 
    }
  return cp;
}

inline int 
clear_space(char *start, char *end)
{
  char *cp = start;

  while (cp < end)
    {
      if (*cp != ' ')
	break;
      else
	cp++;
    }
      
  return cp-start;
}

#ifdef DUMP_DEBUG
int 
print_stderr(char *cp, char *end, int offset, char *label)
{
  cp = cp + offset;
  fprintf(stderr, "\n%s:-", label);
  while (cp < end)
    {
      if (isprint(*cp))
	fprintf(stderr, "%c", *cp);
      else
	fprintf(stderr, "[%d]", *cp);
      cp++;
    }
  fprintf(stderr, "\n");

  return 0;
}

int 
print(char *cp, char *end, int offset, char *label)
{
  cp = cp + offset;
  printf("\n%s:-", label);
  while (cp < end)
    {
      if (isprint(*cp))
	printf("%c", *cp);
      else
	printf("[%d]", *cp);
      cp++;
    }
  printf("\n");

  return 0;
}

#endif /* DUMP_DEBUG */

/* 
 * Record end of base scope for relative URL resolution 
 */
inline void
write_base_scope_end(struct tcp_conn *tconnp, unsigned char type)
{
  struct links_chars *chars = &OB_LINKS_CHARS;
  assert(chars->current->next == NULL);

  *chars->buf++ = type;
  chars->nchars++;
}
  

/*
 * Write timestamp into links buffer 
 */
inline void 
write_tstamp(struct links_chars *chars, int offset_us)
{
  *chars->buf++ = LR_TS;
  *((int *)(chars->buf)) = offset_us; /* time stamp */
  chars->buf += sizeof(int); 
  chars->nchars += (1+sizeof(int));
}

inline void
write_url(struct links_chars *chars, char *start, int len, jmp_buf bail)
{
  int cl;

  if (len > MAX_URL_LEN)
    longjmp(bail, HTML_ERR_RUNAWAY_URL);
  
  while (len)
    {
      cl = MAX(0, MIN(len, LINKS_BUFLEN - chars->nchars));
      memcpy(chars->buf, start, cl);
      len -= cl;
      chars->nchars += cl;
      if (len)
	{
	  start += cl;
	  chain_new_links_buf(chars, bail);
	}
      else
	{
	  chars->buf += cl;
	  *(chars->buf++) = '\0';
	  chars->nchars += 1;
	}
    }

  return;
}

  

/*
 * Copy found URL into links buffer - return ptr to end if something copied
 * - see also get_location
 */
char * 
get_url(struct tcp_conn *tconnp, char *start, char *end, unsigned char url_type)
{
  int copylen;
  struct links_chars *chars = &OB_LINKS_CHARS;

  if (chars->chain == NULL)
    chain_first_links_buf(chars);

  assert(chars->current->next == NULL);

  //start += clear_chars(start, end, "= \"\'");
  start += clear_chars(start, end, "= \'");
  if (*start == '"')
    {
      start++;
      if (*start == '"')
	{
	  /* empty url */
	  start = end = find_url_end(start, end-start);
	  return NULL;
	}
    }
	
      
  end = find_url_end(start, end-start);
  if ((copylen = end-start) <= 0)
    return NULL;

  if (OB_PARSE_STATE & P_NEED_LINK_TIMESTAMP)
    {
      OB_PARSE_STATE &=  ~P_NEED_LINK_TIMESTAMP;
      write_tstamp(chars, (int)(TCP_SSOLID_TM - HTTP_FIRSTREP_SEEN));
    }
  
 
  *(chars->buf++) = url_type; 
   chars->nchars += 1; 

  write_url(chars, start, copylen, stop_parse);
  
  return end;
}

/*
 * Copy found URL list into links buffer 
 */
int 
get_url_list(struct tcp_conn *tconnp, char *start, char *end, unsigned char url_type)
{
  char *cp = start;
  
  //print(start, end, -10, "XXXXX ARCHIVE LIST XXXXX");

  /* swallow first '=' */
  cp += clear_chars(start, end, "=");
#if 0
  while (*cp++ != '=')
    if (cp >= end)
      return 1;
  cp++;
#endif
  
  /* find next attribute if any */
  cp = seqchr(cp, '=', end-cp);
  if (cp)
    {
      /* back off to start of attribute name */
      while (*cp-- != ' ')
	if (cp <= start)
	  break;
      end = cp;
    }
  
  cp = start;
  while (cp < end)
    {
      cp = get_url(tconnp, cp+1, end, url_type);
      if (cp == NULL)
	break;
    }
  
  return 0;
}

/*
 * Parse tag with simple template
 */
int 
parse_simple(tcp_conn_t *tconnp, simple_parse_template_t *temp, 
	     char *start, char *end)
{
  char *ap;
  int len;
  int matchlen;

  
  while (1)
    {
      len = end-start;
      if (!(ap = ci_seqchr(start, *(temp->url_attr), len)))
	return 0;
      matchlen = ci_seqstrncmp(ap, temp->url_attr, end-ap);
      if(matchlen > 0)
	 {
	   get_url(tconnp, ap+matchlen, end, temp->record_type);
	   break;
	 }
      else if (matchlen < 0)
	{
	  return 0;
	}
      else
	{
	  start = ap+1;
	  continue;
	}
    }
  
  return 0;
}

/*
 * Parse tag with double template
 */
int 
parse_multi(tcp_conn_t *tconnp, multi_parse_template_t *temp, 
	     char *begin, char *end)
{
  int i;
  int in_base_scope = 0;
  char *start;
  char *match;
  char *ap;
  int len;
  int matchlen;

  //print(begin, end, -10, "tag");

  for (i = 0;; i++)
    {
      match = temp->temps[i].url_attr;
      if (!strlen(match))
	break;
      start = begin;
      
      while (1)
	{
	  len = end-start;
	  if (!(ap = ci_seqchr(start, *match, len)))
	    break;
	  matchlen = ci_seqstrncmp(ap, match, end-ap);
	  if(matchlen > 0)
	    {
	      unsigned char rtype = temp->temps[i].record_type;
	      if (!(rtype & (LR_REL_BASE | LR_ARCHIVE)))
		{
		  get_url(tconnp, ap+matchlen, end, rtype);
		}
	      else if (rtype & LR_REL_BASE)
		{
		  if (get_url(tconnp, ap+matchlen, end, rtype) != NULL)
		    in_base_scope++;
		}
	      else if (rtype & LR_ARCHIVE)
		{
		  get_url_list(tconnp, ap+matchlen, end, rtype);
		}
	      break;
	    }
	  else if (matchlen < 0)
	    {
	      break;
	    }
	  else
	    {
	      start = ap+1;
	      continue;
	    }
	}
    }
  
  if (in_base_scope)
    /* always scripted base here */
    write_base_scope_end(tconnp, LR_END_SCRIPT_BASE);

  return 0;
}

/*
 * Parse LINK tag - interpret according to rel/
 */
int 
parse_LINK(tcp_conn_t *tconnp, char *begin, char *end)
{
  int want_it = 1;
  int matchlen;
  char *start = begin;
  char *ap;
  int len;
  simple_parse_template_t temps[] = 
    {
      {"href", LR_INLINE},
      {"href", LR_LINK}
    };
  
  //print(start, end, -10, "tag");
      
  /* looking for "rel=stylesheet" */
  while (1)
    {
      len = end-start;
      if (!(ap = ci_seqchr(start, 'r', len)))
	break;
      matchlen = ci_seqstrncmp(ap, "rel", end-ap);
      if(matchlen > 0)
	{
	  assert (matchlen == 3);
	  ap += matchlen;
	  /* found rel attribute */
	  ap += clear_chars(start, end, "= \"");
	  matchlen = ci_seqstrncmp(ap, "stylesheet", end-ap);
	  if(matchlen > 0)
	    /* stylesheet */
	    want_it = 0;
	  break;
	}
      else if (matchlen < 0)
	{
	  break;
	}
      else
	{
	  start = ap+1;
	  continue;
	}
    }

  parse_simple(tconnp, &temps[want_it], begin, end);
  
  return 0;
}

inline void 
save_refresh(struct tcp_conn *tconnp, char *start, char *end, long per)
{
  int copylen = end-start;
  struct links_chars *chars = &OB_LINKS_CHARS;
  unsigned char l_type;
  

  if (chars->chain == NULL)
    chain_first_links_buf(chars);

  if (OB_PARSE_STATE & P_NEED_LINK_TIMESTAMP)
    {
      OB_PARSE_STATE &=  ~P_NEED_LINK_TIMESTAMP;
      write_tstamp(chars, (int)(TCP_SSOLID_TM - HTTP_FIRSTREP_SEEN));
    }

  if (copylen)
    {
      l_type = LR_REFRESH_URL >> 8; 
      copylen += 1;
    }
  else
    {
      l_type = LR_REFRESH_SELF >> 8;
    }
  
 
  *(chars->buf++) = LR_HIGH;
  *(chars->buf++) = l_type;
  *((long *)(chars->buf)) = per;
  chars->buf += sizeof(long);
  chars->nchars += (sizeof(long) + 2);
  if (copylen)
    write_url(chars, start, copylen, stop_parse);

  return;
}

  
  

/*
 * Parse an HTML META TAG
 * XXX TODO - generalise for all relevant META tags
 */

int parse_meta(tcp_conn_t *tconnp, char *start, char *end)
{
  char *cp = start;
  int matchlen;
  long per;

  cp += clear_space(start, end);
  if ((matchlen = ci_seqstrncmp(cp, "http-equiv", end-cp)) > 0)
    {
      cp += matchlen;
      cp += clear_chars(cp, end, " =\"");
      if ((matchlen = ci_seqstrncmp(cp, "refresh", end-cp)) > 0)
	{
	  cp += matchlen;
	  cp += clear_chars(cp, end, " =\"");
	  if ((matchlen = ci_seqstrncmp(cp, "content", end-cp)) > 0)
	    {
	      cp += matchlen;
	      cp += clear_chars(cp, end, " =\"");
	      /* find refresh period */
	      per = strtol(cp, &cp, 0);
	      if (errno == ERANGE)
		longjmp(stop_parse, HTML_ERR_PARSE);
	      cp += clear_chars(cp, end, "; ");
	      /* dispose of any trailing stuff */
	      end = clear_chars_rev(cp, end, ">\" /");
	      if ((matchlen = ci_seqstrncmp(cp, "url", end-cp)) > 0)
		{
		  cp += matchlen;
		  cp += clear_chars(cp, end, " =");
		}
	      save_refresh(tconnp, cp, end, per);
	    }
	  
	}
    }
  
  return 0;
}

/*
 * Parse any style element looking for declared URLs
 */
int parse_style(tcp_conn_t *tconnp, char *start, char *end)
{
  char *cp = start;
  char *tmpp;
  int len = end - start;
  
  
  while (cp < end)
    {
      if ((cp = seqstr_l(cp, "url", end - cp)) == NULL)
	return 0;

      cp += clear_chars(cp, end, " (\"");
      tmpp = find_style_url_end(cp, end);
      if ((cp = get_url(tconnp, cp, tmpp, LR_INLINE)) == NULL)
	return 0;
    }
  
  return 0;
}

      


/*
 * Parse an element opening tag - the whole tag is present 
 */
int parse_tag(tcp_conn_t *tconnp, char *start, char *end)
{
  char *cp = start;
  int len = end - start;
  int matchlen;
#if 0
  char *tc = start;
  printf("Tag:- ");
  while (tc <= end)
    if (isprint(*tc))
      putchar(*tc++);
    else
      printf("[%d]", (int)*tc++);

  putchar('\n');
#endif
  //return 0;
      /* is it something we want? */
  switch (*cp)
    {
    case 'a':
    case 'A':
      if ((matchlen = ci_seqstrncmp(cp, "a ", len)) > 0)
	parse_simple(tconnp, &stemps[0], cp+matchlen, end);
      else if ((matchlen = ci_seqstrncmp(cp, "area ", len)) > 0)
	parse_simple(tconnp, &stemps[1], cp+matchlen, end);
      else if ((matchlen = ci_seqstrncmp(cp, "applet ", len)) > 0)
	parse_multi(tconnp, &dtemps[0], cp+matchlen, end);
      break;
    case 'b':
    case 'B':
      if ((matchlen = ci_seqstrncmp(cp, "base ", len)) > 0)
	parse_simple(tconnp, &stemps[2], cp+matchlen, end);
      else if ((matchlen = ci_seqstrncmp(cp, "body ", len)) > 0)
	parse_simple(tconnp, &stemps[3], cp+matchlen, end);
      break;
    case 'f':
    case 'F':
      if ((matchlen = ci_seqstrncmp(cp, "frame ", len)) > 0)
	parse_multi(tconnp, &dtemps[1], cp+matchlen, end);
      else if((matchlen = ci_seqstrncmp(cp, "form ", len)) > 0)
	parse_simple(tconnp, &stemps[4], cp+matchlen, end);
      break;
    case 'i':
    case 'I':
      if ((matchlen = ci_seqstrncmp(cp, "img ", len)) > 0)
	parse_multi(tconnp, &dtemps[2], cp+matchlen, end);
      else if ((matchlen = ci_seqstrncmp(cp, "iframe ", len)) > 0)
	parse_multi(tconnp, &dtemps[1], cp+matchlen, end);
      else if ((matchlen = ci_seqstrncmp(cp, "input ", len)) > 0)
	parse_multi(tconnp, &dtemps[3], cp+matchlen, end);
      break;

    case 'l':
    case'L':
      if ((matchlen = ci_seqstrncmp(cp, "link ", len)) > 0)
	/* special case - no template */
	parse_LINK(tconnp, cp+matchlen, end);
      break;

    case 'm':
    case 'M':
      if ((matchlen = ci_seqstrncmp(cp, "map ", len)) > 0)
	parse_simple(tconnp, &stemps[5], cp+matchlen, end);
      else if ((matchlen = ci_seqstrncmp(cp, "meta ", len)) > 0)
	parse_meta(tconnp, cp+matchlen, end);
      break;
    case 'o':
    case 'O':
      if ((matchlen = ci_seqstrncmp(cp, "object ", len)) > 0)
	parse_multi(tconnp, &dtemps[4], cp+matchlen, end);
      break;

    case 's':
    case 'S':
      if ((matchlen = ci_seqstrncmp(cp, "script ", len)) > 0)
	parse_simple(tconnp, &stemps[6], cp+matchlen, end);
#if 0
      else if ((matchlen = ci_seqstrncmp(cp, "style ", len)) > 0)
	parse_style(tconnp, cp+matchlen, end);
#endif
      break;

    default:
      return 0;
      break;
    }

  return 0;
}


int
parse_rep_body(prec_t *pp, struct tcp_conn *tconnp, int len, short code)
{
  
  char *cp;
  int jump;
  /* 
   * for keeping track within this parse 
   * - START and LEN refer to this structure 
   */
  struct buf buf;

  if (HTTP_REP_TRANS_STATUS & TRANS_LINKS_CHARS_EX)
    return 1;

  /*
   * This is where we return if the links buffer is full or on error
   */

  jump = setjmp(stop_parse);
  if (jump == HTML_LINKS_BUFS_EXHUSTED)
    {
      /* buff full */
      HTTP_REP_TRANS_STATUS |= TRANS_LINKS_CHARS_EX;
      goto out;
    }
  else if (jump != 0)
    {
      /* error */
      goto error;
    }
  
  buf.buf = pp->buf;
  buf.len = len;

  
  /* New packet - need timestamp */
  OB_PARSE_STATE |=  P_NEED_LINK_TIMESTAMP;


  cp = START;
  len = LEN;
  
  switch (OB_WHERE_STATE)
    {
    case P_FOUND_TAG: goto p_found_tag; break;
    case P_FINDING_TAG_END: goto p_finding_tag_end; break;
    case P_FOLLOW_ERROR: goto out; break;
    case P_NOT_STARTED:
    default:
      break;
    }
  
  /* seqchr() won't run us past the end */
  while ((cp = seqchr(START, '<', LEN)) != NULL)
    {
      char *tag_start, *tag_end;
      OB_TAGBUF_INDX = 0;
      CLEAR_OB_PARSE_STATE(P_TAG_SAVED);
      if (OB_PARSE_STATE & P_LARGE_BUF)
	{
	  CLEAR_OB_PARSE_STATE(P_LARGE_BUF);
	  recycle_tag_buf(OB_TAGBUF);
	  OB_TAGBUF = OB_TAGBUF_BUF; 
	  OB_TAGBUF_SZ = TAGBUF_SZ;
	}

#if 0

	  SET_OB_PARSE_STATE(P_LARGE_BUF);
	  OB_TAGBUF = get_tag_buf();
       OB_TAGBUF_SZ = LARGE_TAGBUF_SZ;
#endif
      
      /* Jump over '<' */
      PULL(buf, (cp+1) - START);
      if (!LEN)
	{
	  SET_OB_WHERE_STATE(P_FOUND_TAG);
	  goto out;
	}
    p_found_tag:
      JUMP_SPACE(buf, P_FOUND_TAG);
      cp = START;
      len = LEN;
      if (*cp == '/'  /* closing tag - don't want it */ 
	  || *cp == '!')/* TODO - script? */
	{
	  SET_OB_WHERE_STATE(P_NOT_STARTED);
	  continue;
	}
      
    p_finding_tag_end:
      SET_OB_WHERE_STATE(P_FINDING_TAG_END);
      /* Find end of start tag */
      while (1)
	{
	  if (*cp == '<')
	    {
	      break;
	    }
	  else if (*cp == '>')
	    {
	      cp++;
	      break;
	    }
	  if (--len == 0)
	    {
	      /* fallen off pkt end */
	      //printf("\n***- BREAK -***\n");
	      //SET_OB_WHERE_STATE(P_FOUND_TAG);
	      SET_OB_PARSE_STATE(P_TAG_SAVED);
	      len = cp-START;
	      if (len > (OB_TAGBUF_SZ - OB_TAGBUF_INDX))
		{
		  if (OB_PARSE_STATE & P_LARGE_BUF)
		    {
		      fprintf(stderr, "copy ");
		      jump = HTML_TAG_BUF_EXHUSTED;
		      goto error;
		    }
		  else
		    {
		      set_large_pbuf(tconnp);
		    }
		}
	      memcpy(&OB_TAGBUF[OB_TAGBUF_INDX], START, len);
	      OB_TAGBUF_INDX += len;    
	      goto out;
	    }
	  cp++;
	}
      
      /* 
       * Have now found end of tag with whole tag either in 
       * the pkt or the tag buffer
       */
      if (OB_PARSE_STATE & P_TAG_SAVED)
	{
	  /* accumulate balance */
	  len = cp-START;
	  if (len > (OB_TAGBUF_SZ - OB_TAGBUF_INDX))
	    {
	      if (OB_PARSE_STATE & P_LARGE_BUF)
		{
		  fprintf(stderr, "final copy ");
		  jump = HTML_TAG_BUF_EXHUSTED;
		  goto error;
		}
	      else
		{
		  set_large_pbuf(tconnp);
		}
	    }
	  memcpy(&OB_TAGBUF[OB_TAGBUF_INDX], START, len);
	  tag_start = OB_TAGBUF;
	  tag_end = tag_start + OB_TAGBUF_INDX + len -1;
	}
      else 
	{
	  tag_start = START;
	  tag_end = cp-1;
	}
   
      parse_tag(tconnp, tag_start, tag_end);   
      SET_OB_WHERE_STATE(P_NOT_STARTED);
      
      if (HTTP_REP_TRANS_STATUS & TRANS_LINKS_CHARS_EX)
	return 0;
      
      PULL(buf, cp - START);
      
      
      
    } /* end while */
  
  
 out:
  
  return 0;
  
 error:

#ifdef  DUMP_DEBUG
  fprintf(stderr, "parse Error\n");
  fprintf(stderr, "Buffer:\n");
  for (cp = OB_TAGBUF, len = OB_TAGBUF_INDX; len > 0; cp++, len--)
    {
      if (isprint(*cp))
	fprintf(stderr, "%c", *cp);
      else
	fprintf(stderr, "[%d]", *cp);
    }
  printf("Following packet:\n");
  for (cp = buf.buf, len = buf.len; len > 0; cp++, len--)
    {
      if (isprint(*cp))
	fprintf(stderr, "%c", *cp);
      else
	fprintf(stderr, "[%d]", *cp);
    }
  putchar ('\n');

#endif /* DUMP_DEBUG */

  SET_OB_WHERE_STATE(P_FOLLOW_ERROR);
  SET_OB_WHERE_STATE(P_NOT_STARTED);
  CLEAR_OB_PARSE_STATE(P_TAG_SAVED);
  OB_TAGBUF_INDX = 0;
  
  return jump;
}


/*
 * end parse_object.c 
 */


