/*
 *  byterun/hash256.c
 *
 *  Hashing of Caml values to 256-bit digests.
 *
 *  (c) Copyright 2005, 2006, Mark R. Shinwell.
 * 
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice,
 *  this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *  notice, this list of conditions and the following disclaimer in the
 *  documentation and/or other materials provided with the distribution.
 *  3. The names of the authors may not be used to endorse or promote products
 *  derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
 *  NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 *  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "alloc.h"
#include "fail.h"
#include "intext.h"
#include "memory.h"
#include "mlvalues.h"
#include "sha2.h"

#include <assert.h>
#include <setjmp.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

extern char* strnstr_freebsd(const char*, const char*, size_t);

int hash_count = 0;

static void dump_rec(value v)
{
    CAMLparam1(v);

    if (Is_block(v)) {
        printf("%p=", (void*) v);

        int x;

        switch (Tag_val(v)) {
            case 0: printf("tuple=["); break;
            case Closure_tag: printf("closure=["); break;
            case Double_array_tag: printf("<double array>"); break;
            case Abstract_tag: printf("<abstract>"); break;
            case Custom_tag: printf("<custom>"); break;
            case String_tag: printf("<string>"); break;
            default: printf("tag %d=[", (int) Tag_val(v)); break;
        }

        fflush(stdout);

        if (Tag_val(v) < No_scan_tag) {
    
        for (x = (Tag_val(v) == Closure_tag ? 1 : 0);
             x < Wosize_val(v);
             x++) {
            dump_rec(Field(v, x));
            if (x < Wosize_val(v) - 1) {
                printf(", ");
            }
        }
        printf("]");

        }
    }
    else {
        if (!Is_in_heap(v)) printf("<out of heap>");
        
        if (!Is_block(v)) {
            if (v == 0) {
                printf("<null>");
            }
            else {
                printf("%d",Int_val(v));
                /*printf("<unboxed>");*/
            }
        }
        else {
            printf("<unstructured>");
        }
    }

    fflush(stdout);

    CAMLreturn0;
}

CAMLprim value dump(value v)
{
    CAMLparam1(v);

    dump_rec(v);

    printf("\n");
    fflush(stdout);

    CAMLreturn(Val_unit);
}

/* Hash a Caml string to a SHA-256 digest. */
static value hash256_raw(value v_marshalled)
{
    CAMLparam1(v_marshalled);
    CAMLlocal2(hash, input);
    unsigned char* hash_ptr;
    int input_size;
    int len;

    assert(Is_block(v_marshalled));
    assert(Tag_val(v_marshalled) == String_tag);

    /* the length of "v_marshalled" must be at least
       SHA256_BLOCK_SIZE, otherwise a memcpy in sha256_update()
       (sha2.c) can attempt to access memory beyond the string. */
    len = caml_string_length(v_marshalled);
    if (len > SHA256_BLOCK_SIZE) {
        input = v_marshalled;
        input_size = len;
    }
    else {
        unsigned char* input_ptr;

        input = caml_alloc_string(SHA256_BLOCK_SIZE);
        input_ptr = (unsigned char*) String_val(input);

        memcpy(input_ptr, String_val(v_marshalled), len);
        bzero(&input_ptr[len], SHA256_BLOCK_SIZE - len);
        input_size = SHA256_BLOCK_SIZE;
    }

    /* allocate space for the SHA-256 hash */
    hash = caml_alloc_string(SHA256_DIGEST_SIZE);
    hash_ptr = (unsigned char*) String_val(hash);

    /* compute the hash */
    sha256((unsigned char*) String_val(input), input_size, hash_ptr);

    hash_count++;

    CAMLreturn(hash);
}

/* Hash a Caml value to a SHA-256 digest. */
CAMLprim value hash256(value v)
{
    CAMLparam1(v);
    CAMLlocal3(flags, flags2, v_marshalled);

    /* marshal v to a byte-string */
    flags = caml_alloc_tuple(2);
    flags2 = caml_alloc_tuple(2);

#if 0
    /* doesn't work -- cyclic structures in stdlib/hashtbl.ml */

    Store_field(flags, 0, Val_int(1) /* Marshal.No_sharing */);
    Store_field(flags, 1, flags2);
    Store_field(flags2, 0, Val_int(1) /* Marshal.Closures */);
    Store_field(flags2, 1, Val_int(0));
#endif

    Store_field(flags, 0, Val_int(1) /* Marshal.Closures */);
    Store_field(flags, 1, Val_int(0));
    v_marshalled = caml_output_value_to_string(v, flags);

    /* do the hashing */
    CAMLreturn(hash256_raw(v_marshalled));
}

/* Make a type representation that corresponds to an unbound type
   variable.  These are "infectious", and will propagate via hash256_checked()
   so that any hash into which they are combined (via HashPackage) is
   flagged as another null hash with the flag set.  In this way we can
   detect attempts to marshal values whose type involves uninstantiated
   type variables.
*/

/* This isn't used any more -- now we check for __DUMMY__ entries in
   runtime type representation blocks. */
CAMLprim value make_unbound_tv_hash(value unit)
{
    CAMLparam1(unit);
    CAMLreturn(caml_alloc_string(1));
}

/* Flattening and hashing of runtime type representation blocks to strings,
   raising an exception if the type being represented contains any
   (uninstantiated) type variables. */
static char* flatten_buffer;
static int flatten_size;
static int flatten_used;
static jmp_buf env;

static void flatten_typerep_block_rec(value v)
{
    int i;

    for (i = 0; i < Wosize_val(v); i++) {
        value field = Field(v, i);
        if (!Is_block(field)){
            printf("failure on field %d\n", i);
            dump(v);
            assert(0);
        }
        assert(Is_block(field));
        assert(Tag_val(field) == String_tag || Tag_val(field) == 0);

        if (Tag_val(field) == 0) {
            /* indirection to another runtime typerep block... */
            flatten_typerep_block_rec(field);
        }
        else {
            /* constant string... */
            char* str;
            char* dest;
            int length;

            str = String_val(field);

            /* check for free type variable flag */
            if (strnstr_freebsd(str, "__DUMMY__", 9)) {
                /* throw "exception" to instantly unwind stack back
                   to flatten_typerep_block().  Yum yum. */
                longjmp(env, 1);
            }

            length = caml_string_length(field);
            if (flatten_size - flatten_used < length) {
                /* increase size of buffer */
                flatten_size *= 2;
                flatten_size += length;
                flatten_buffer = (char*) realloc(flatten_buffer, flatten_size);
            }

            /* determine where to copy this string */
            dest = &flatten_buffer[flatten_used];
            
            /* copy the string */
            strncpy(dest, str, length);
            flatten_used += length;
        }
    }
}

CAMLprim value flatten_typerep_block(value v)
{
    CAMLparam1(v);
    CAMLlocal1(ret);

    /* Do everything on the C heap, then copy to a Caml string. */
    flatten_size = Wosize_val(v) * 500;
    flatten_used = 0;
    flatten_buffer = (char*) malloc(flatten_size);

    if (setjmp(env) == 0) {
        flatten_typerep_block_rec(v);
    }
    else {
        /* "Exception" was thrown from within flatten_typerep_block_rec(). */
        free(flatten_buffer);
        caml_invalid_argument("Dynamic type contains free type variables");
    }

    ret = caml_alloc_string(flatten_used);
    strncpy(String_val(ret), flatten_buffer, flatten_used);
    free(flatten_buffer);

    CAMLreturn(hash256_raw(ret));
}

CAMLprim value hash256_checked(value v)
{
    /* DEPRECATED  --  use hash256 instead */
    return hash256(v);
}

