
/*  -*- Mode: C;  -*- */

/******************************************************************************
*                                                                             *
*   Copyright 2005 University of Cambridge Computer Laboratory.               *
*                                                                             *
*   This file is part of Nprobe.                                              *
*                                                                             *
*   Nprobe is free software; you can redistribute it and/or modify            *
*   it under the terms of the GNU General Public License as published by      *
*   the Free Software Foundation; either version 2 of the License, or         *
*   (at your option) any later version.                                       *
*                                                                             *
*   Nprobe is distributed in the hope that it will be useful,                 *
*   but WITHOUT ANY WARRANTY; without even the implied warranty of            *
*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             *
*   GNU General Public License for more details.                              *
*                                                                             *
*   You should have received a copy of the GNU General Public License         *
*   along with Nprobe; if not, write to the Free Software                     *
*   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *
*                                                                             *
******************************************************************************/


#ifndef _PARSE_OBJECT_H__
#define _PARSE_OBJECT_H__

#include <setjmp.h>

/*****************************************************************************/

/*
 * Enable building of chain of char buffers to hold  references 
 * - additional buffers appended as necessary
 * - dumped as continuous set of records - type indicated by initial byte
 * - records may be either:
 *   - NULL terminated character sequences (ie strings) representing URLs
 *     which may be normal URLs or base URLs for relative resolution
 *   - end of relative scope markers
 *   - or four bytes representing a 32-bit signed integer time stamp giving 
 *     offset from first response packet arrival time in us. This timestamp 
 *     applies to all following until another time stamp is encountered. 
 */

#define LINKS_BUFSZ (128*16)
/*
 * Don't want to have to check for room in buffer for every write of known-size
 * records - so check variable length records (ie urls) to always leave room 
 * for the max known size record.
 * 
 * -12 allows for two record terminators plus (optional) time stamp and 
 * preamble byte, and possibly refresh preamble
 */
#define LINKS_BUFLEN  (LINKS_BUFSZ - 12)

#define LINKS_MAX_BUFS 32

/*
 * This is an arbitrary limit, but designed to catch obvious runaway urls
 * resulting from malformed HTML or parse failures
 */
#define MAX_URL_LEN 4096

struct links_buf
{
  struct links_buf *next;	/* chained in use or in pool */
  unsigned short nchars;		/* use - including delimiters */
  char buf[LINKS_BUFSZ];
};

typedef struct links_buf links_buf_t; 

struct links_chars 
{
  struct links_buf *chain;	/* first buffer in chain */
  struct links_buf *current;	/* current buffer */
  char *buf;			/* where currently writing */
  unsigned char nbufs;
  unsigned short nchars;		/* usage of current buffer */
  unsigned int totchars;		/* total o/a buffers */
};

/*****************************************************************************/

/* 
 * Defs for link record_types 
 * - construct bitwise to allow varying discriminations 
 */

#define DUMMY_VALUE 0x0
#define LR_TS 0x0
/* included by scripts, or archive (script) objects - default HTML included */
#define LR_SCRIPTED 0x1
#define LR_ARCHIVE 0x2
/* in-line (fetched as part of load), link (fetched later), or unknown */
#define LR_INLINE 0x4
#define LR_LINK 0x8
#define LR_UNKNOWN 0x10
/* base-url for relative resolution (may be nested) */
#define LR_REL_BASE 0x20
#define LR_END_BASE 0x40	/* end of scope */
/* link is a redirection */
#define LR_REDIRECT 0x80

#define LR_SCRIPT_ARCHIVE (LR_SCRIPTED | LR_ARCHIVE | LR_INLINE)
#define LR_SCRIPTED_LINK (LR_SCRIPTED | LR_LINK)
#define LR_SCRIPTED_INLINE (LR_SCRIPTED | LR_INLINE)
#define LR_SCRIPTED_UNKNOWN (LR_SCRIPTED | LR_UNKNOWN)
#define LR_REL_SCRIPT_BASE (LR_SCRIPTED | LR_REL_BASE)
#define LR_END_SCRIPT_BASE (LR_SCRIPTED | LR_END_BASE)
#define LR_REDIRECT_INLINE (LR_REDIRECT | LR_INLINE)
#define LR_REDIRECT_LINK (LR_REDIRECT | LR_LINK)

/* Allow for extension link types */
#define LR_HIGH 0xff

#define LR_REFRESH_SELF 0x100
#define LR_REFRESH_URL 0x200
#define  LR_REFRESH (LR_REFRESH_SELF | LR_REFRESH_URL)

#define LR_HAS_URL (LR_INLINE | LR_LINK | LR_UNKNOWN | LR_REL_BASE \
                    | LR_REFRESH_URL)


#ifndef SWIG

struct tcp_conn;		/* forward */

/* 
 * A template controllong parse of specific element tags where only one 
 * attribute may give URL reference 
 */

struct simple_parse_template 
{
  char *url_attr;
  unsigned char record_type;			/* inline/link/scripted/ etc*/
};
typedef struct simple_parse_template simple_parse_template_t;

/* 
 * A template controllong parse of specific element tags where two 
 * attributes may give URL references 
 */

struct multi_parse_template 
{
  simple_parse_template_t temps[10];
};
typedef struct multi_parse_template multi_parse_template_t;

/* State allows HTML body searches to span packets */

#define TAGBUF_SZ 512
#define LARGE_TAGBUF_SZ 5096

typedef union tag_buf
{
  union tag_buf *q;
  char buf[LARGE_TAGBUF_SZ];
} tag_buf_t;

typedef struct ob_parse_state 
{
  unsigned int parse_state;
  unsigned int where_state;
  char tagbuf_buf[TAGBUF_SZ];	/* normal buffer for tag acquisition */
  char *tagbuf;			/* a larger one if required */
  int tagbuf_sz;
  int tagbuf_indx;
} ob_parse_state_t;

/* defs for where state */
#define P_NOT_STARTED 0
#define P_FOUND_TAG 1
#define P_FINDING_TAG_END 2
#define P_FOLLOW_ERROR 0x3

/* defs for parse state */
#define P_TAG_SAVED 0x1
#define P_LARGE_BUF 0x2

#define P_NEED_LINK_TIMESTAMP 0x80000000

#define OB_TAGBUF tconnp->su.http.reptrans->inner.ob_p_state.tagbuf
#define OB_TAGBUF_BUF tconnp->su.http.reptrans->inner.ob_p_state.tagbuf_buf
#define OB_TAGBUF_INDX tconnp->su.http.reptrans->inner.ob_p_state.tagbuf_indx
#define OB_TAGBUF_SZ tconnp->su.http.reptrans->inner.ob_p_state.tagbuf_sz
#define OB_PARSE_STATE tconnp->su.http.reptrans->inner.ob_p_state.parse_state
#define OB_WHERE_STATE tconnp->su.http.reptrans->inner.ob_p_state.where_state
#define OB_ATTR_NEEDED tconnp->su.http.reptrans->inner.ob_p_state.needed
#define OB_PARSE_STATE_MATCH_INDX tconnp->su.http.reptrans->inner.ob_p_state.match_indx
#define OB_PARSE_STATE_TMPP tconnp->su.http.reptrans->inner.ob_p_state.tmpp
#define OB_PTEMP_SLOT tconnp->su.http.reptrans->inner.ob_p_state.temp_slot
  


/* Apart from top nibble (time stamp state)  states are exclusive */
#define SET_OB_PARSE_STATE(state) \
MACRO_BEGIN                   \
  OB_PARSE_STATE |= (state);  \
MACRO_END

#define CLEAR_OB_PARSE_STATE(state) \
MACRO_BEGIN                   \
  OB_PARSE_STATE &= ~(state);  \
MACRO_END

#define SET_OB_WHERE_STATE(state) \
MACRO_BEGIN                   \
OB_WHERE_STATE = (state); \
MACRO_END

#if 0
#define PULL(buf, adj) \
MACRO_BEGIN   \
assert(adj <= buf.len);  \
(buf).len-=(adj);                  \
(buf).buf+=(adj); \
MACRO_END
#endif

#define PULL(buf, adj) \
MACRO_BEGIN   \
if (adj > (buf).len)  \
     goto error;   \
(buf).len-=(adj);                  \
(buf).buf+=(adj); \
MACRO_END


#define JUMP_SPACE(buf, where)              \
MACRO_BEGIN  \
  while (*(buf).buf == ' ')  \
   PULL(buf, 1);              \
   if ((buf).len == 0)     \
     {     \
       SET_OB_WHERE_STATE(where); \
       goto out;       \
     }      \
 MACRO_END

#if 0
#define JUMP_SPACE(buf, where)              \
MACRO_BEGIN  \
  while (*(buf).buf == ' ')  \
   PULL(buf, 1);              \
   if (LEN == 0)     \
     {     \
       SET_OB_WHERE_STATE(where); \
       goto out;       \
     }      \
 MACRO_END
#endif

#if 0
#define JUMP_SPACE(buf, where)              \
MACRO_BEGIN  \
while (*(buf).buf == ' ')  \
  if (LEN == 0)     \
    {     \
      SET_OB_WHERE_STATE(where); \
      goto out;       \
    }      \
  PULL(buf, 1);              \
MACRO_END
#endif


/*****************************************************************************/

struct tcp_conn; /* forward */


/*
 * Parse_object.c 
 */

inline void chain_new_links_buf(struct links_chars *lc, jmp_buf bail);  
inline void chain_first_links_buf(struct links_chars *lc);
inline int set_large_pbuf(struct tcp_conn *tconnp);
inline char *find_url_end(char *s, int len);
inline char *find_linkref_end(char *s, int len);
inline int clear_chars(char *start, char *end, char *chars);
inline void write_base_scope_end(struct tcp_conn *tconnp, unsigned char type);
inline void write_tstamp(struct links_chars *chars, int offset_us);
inline void write_url(struct links_chars *chars, char *start, int len, jmp_buf bail);
char * get_url(struct tcp_conn *tconnp, char *start, char *end, unsigned char url_type);
int parse_rep_body(prec_t *pp, struct tcp_conn *tconnp, int len, short code);
int parse_simple(struct tcp_conn *tconnp, simple_parse_template_t *temp, 
		 char *start, char *end);
int parse_multi(struct tcp_conn *tconnp, multi_parse_template_t *temp, 
		 char *start, char *end);

#endif /* ifndef SWIG */
#endif /* _PARSE_OBJECT_H__ */


/*
 * end parse_object.h 
 */
