/*----------------------------------------------------------------------------

                                brutessl 1.03

                        Copyright (C) 1995 Andrew Roos
                             All Rights Reserved

This program is licensed for academic and educational use only. It may not be
used  for  any commercial purpose. You may modify the program if you wish to,
provided that the original copyright  notice  and  license  restrictions  are
retained,  and  that  you include a notice stating that you have modified the
program and giving details of the changes that you have made. This program is
distributed  without  any warranty including, but not limited to, the implied 
warranty of merchantability or fitness for a particular purpose.  

synopsis:  A program for brute-force searching SSL data which has been 
           encrypted using 40-bit (export) RC4 keys.

usage:	   brutessl [-q] -t [hh:[mm]]
		   brutessl [-q] <file> <checksum> <start segment> <no of segments>
		   brutessl [-q] -r <start segment> [<no of segments>]

Andrew Roos <andrewr@vironix.co.za>
----------------------------------------------------------------------------*/

#include "brutessl.h"
#if	defined(__DOS) || defined(__DOS__)
#include <memory.h>
#endif

/* MD5 functions */
#define function_F(x,y,z)   (((x)&(y))|(~(x)&(z)))
#define function_G(x,y,z)   (((x)&(z))|((y)&~(z)))
#define function_H(x,y,z)   ((x)^(y)^(z))
#define function_I(x,y,z)   ((y)^((x)|~(z)))

/*-------------------------------- Constants -------------------------------*/

static const rc4_byte rc4_initial_state[256] = {
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
};

/* key search */
static void md5_precomp(const word *input, word *precomp);
static void md5_digest(const word* input, const word *precomp, word *digest);
static int  rc4_check_key(const byte *key, const byte *stream);
static void reverse_byte_order(word *w, int n);

int search_18_bits(ssl_block *ssl, const byte *stream)
/* Search an 18-bit keyspace. ssl defines the keyspace to search, stream */
/* generator output (at 16 bytes offset) we're looking for. Returns 1 if */
/* the key is found, else 0.                                             */
{
	word *md5_input, i, precomp[4];
	union {
		byte rc4_key[16];
		word digest[4];
	} md5_output;
	
	/* initialize MD5 */
	md5_input = (word*)ssl;
	if (big_endian) reverse_byte_order(md5_input,13);
	md5_precomp(md5_input,precomp);

	for (i=0;i<262144;i++)
	{
		md5_digest(md5_input,precomp,md5_output.digest);
		if (rc4_check_key(md5_output.rc4_key,stream))
		{
			if (big_endian) reverse_byte_order(md5_input,13);
			return 1;
		}
		md5_input[3] += 0x100;
	}

	/* return ssl to its byte-oriented format */
	if (big_endian) reverse_byte_order(md5_input,13);
	return 0;
}

void md5_precomp(const word *input, word *precomp)
/* Precompute first three-and-a-half MD5 calculations. We can do this since */
/* the first three words in the input block do not change within a segment. */
/* Every little bit helps!                                                  */
{
	word word_A = 0x67452301;
	word word_B = 0xefcdab89;
	word word_C = 0x98badcfe;
	word word_D = 0x10325476;
	DECLARE_ROTATE_VARS

	word_A = word_B + rotate_left(word_A + function_F(word_B,word_C,word_D) + input[0] + 0xd76aa478,7);
	word_D = word_A + rotate_left(word_D + function_F(word_A,word_B,word_C) + input[1] + 0xe8c7b756,12);
	word_C = word_D + rotate_left(word_C + function_F(word_D,word_A,word_B) + input[2] + 0x242070db,17);
    word_B += function_F(word_C,word_D,word_A) + 0xc1bdceee;

	precomp[0] = word_A;
	precomp[1] = word_B;
	precomp[2] = word_C;
	precomp[3] = word_D;
}

void md5_digest(const word *input, const word *precomp, word *digest)
/* This is an implementation of the RSA, Inc. MD5 message digest algorithm. */
/* Compute the MD5 digest for a block. This function assumes that we've     */
/* already precomputed the first few calculations, that the thriteenth and  */
/* last words of the input block are zero, and that the fourteenth word is  */
/* 0x188. Returns computed digest in digest. According to profiler output,  */
/* md5_digest accounts for 20% of execution time.                           */
{
    word word_A = precomp[0];
    word word_B = precomp[1];
    word word_C = precomp[2];
    word word_D = precomp[3];
    DECLARE_ROTATE_VARS

    word_B = word_C + rotate_left(word_B + input[3],22);
    word_A = word_B + rotate_left(word_A + function_F(word_B,word_C,word_D) + input[4] + 0xf57c0faf,7);
    word_D = word_A + rotate_left(word_D + function_F(word_A,word_B,word_C) + input[5] + 0x4787c62a,12);
    word_C = word_D + rotate_left(word_C + function_F(word_D,word_A,word_B) + input[6] + 0xa8304613,17);
    word_B = word_C + rotate_left(word_B + function_F(word_C,word_D,word_A) + input[7] + 0xfd469501,22);
    word_A = word_B + rotate_left(word_A + function_F(word_B,word_C,word_D) + input[8] + 0x698098d8,7);
    word_D = word_A + rotate_left(word_D + function_F(word_A,word_B,word_C) + input[9] + 0x8b44f7af,12);
    word_C = word_D + rotate_left(word_C + function_F(word_D,word_A,word_B) + input[10] + 0xffff5bb1,17);
    word_B = word_C + rotate_left(word_B + function_F(word_C,word_D,word_A) + input[11] + 0x895cd7be,22);
    word_A = word_B + rotate_left(word_A + function_F(word_B,word_C,word_D) + input[12] + 0x6b901122,7);
    word_D = word_A + rotate_left(word_D + function_F(word_A,word_B,word_C) + 0xfd987193,12);
    word_C = word_D + rotate_left(word_C + function_F(word_D,word_A,word_B) + 0xa6794516,17);
    word_B = word_C + rotate_left(word_B + function_F(word_C,word_D,word_A) + 0x49b40821,22);

    word_A = word_B + rotate_left(word_A + function_G(word_B,word_C,word_D) + input[1] + 0xf61e2562,5);
    word_D = word_A + rotate_left(word_D + function_G(word_A,word_B,word_C) + input[6] + 0xc040b340,9);
    word_C = word_D + rotate_left(word_C + function_G(word_D,word_A,word_B) + input[11] + 0x265e5a51,14);
    word_B = word_C + rotate_left(word_B + function_G(word_C,word_D,word_A) + input[0] + 0xe9b6c7aa,20);
    word_A = word_B + rotate_left(word_A + function_G(word_B,word_C,word_D) + input[5] + 0xd62f105d,5);
    word_D = word_A + rotate_left(word_D + function_G(word_A,word_B,word_C) + input[10] + 0x02441453,9);
    word_C = word_D + rotate_left(word_C + function_G(word_D,word_A,word_B) + 0xd8a1e681,14);
    word_B = word_C + rotate_left(word_B + function_G(word_C,word_D,word_A) + input[4] + 0xe7d3fbc8,20);
    word_A = word_B + rotate_left(word_A + function_G(word_B,word_C,word_D) + input[9] + 0x21e1cde6,5);
    word_D = word_A + rotate_left(word_D + function_G(word_A,word_B,word_C) + 0xc337095e,9);
    word_C = word_D + rotate_left(word_C + function_G(word_D,word_A,word_B) + input[3] + 0xf4d50d87,14);
    word_B = word_C + rotate_left(word_B + function_G(word_C,word_D,word_A) + input[8] + 0x455a14ed,20);
    word_A = word_B + rotate_left(word_A + function_G(word_B,word_C,word_D) + 0xa9e3e905,5);
    word_D = word_A + rotate_left(word_D + function_G(word_A,word_B,word_C) + input[2] + 0xfcefa3f8,9);
    word_C = word_D + rotate_left(word_C + function_G(word_D,word_A,word_B) + input[7] + 0x676f02d9,14);
    word_B = word_C + rotate_left(word_B + function_G(word_C,word_D,word_A) + input[12] + 0x8d2a4c8a,20);

    word_A = word_B + rotate_left(word_A + function_H(word_B,word_C,word_D) + input[5] + 0xfffa3942,4);
    word_D = word_A + rotate_left(word_D + function_H(word_A,word_B,word_C) + input[8] + 0x8771f681,11);
    word_C = word_D + rotate_left(word_C + function_H(word_D,word_A,word_B) + input[11] + 0x6d9d6122,16);
    word_B = word_C + rotate_left(word_B + function_H(word_C,word_D,word_A) + 0xfde53994,23);
    word_A = word_B + rotate_left(word_A + function_H(word_B,word_C,word_D) + input[1] + 0xa4beea44,4);
    word_D = word_A + rotate_left(word_D + function_H(word_A,word_B,word_C) + input[4] + 0x4bdecfa9,11);
    word_C = word_D + rotate_left(word_C + function_H(word_D,word_A,word_B) + input[7] + 0xf6bb4b60,16);
    word_B = word_C + rotate_left(word_B + function_H(word_C,word_D,word_A) + input[10] + 0xbebfbc70,23);
    word_A = word_B + rotate_left(word_A + function_H(word_B,word_C,word_D) + 0x289b7ec6,4);
    word_D = word_A + rotate_left(word_D + function_H(word_A,word_B,word_C) + input[0] + 0xeaa127fa,11);
    word_C = word_D + rotate_left(word_C + function_H(word_D,word_A,word_B) + input[3] + 0xd4ef3085,16);
    word_B = word_C + rotate_left(word_B + function_H(word_C,word_D,word_A) + input[6] + 0x04881d05,23);
    word_A = word_B + rotate_left(word_A + function_H(word_B,word_C,word_D) + input[9] + 0xd9d4d039,4);
    word_D = word_A + rotate_left(word_D + function_H(word_A,word_B,word_C) + input[12] + 0xe6db99e5,11);
    word_C = word_D + rotate_left(word_C + function_H(word_D,word_A,word_B) + 0x1fa27cf8,16);
    word_B = word_C + rotate_left(word_B + function_H(word_C,word_D,word_A) + input[2] + 0xc4ac5665,23);

    word_A = word_B + rotate_left(word_A + function_I(word_B,word_C,word_D) + input[0] + 0xf4292244,6);
    word_D = word_A + rotate_left(word_D + function_I(word_A,word_B,word_C) + input[7] + 0x432aff97,10);
    word_C = word_D + rotate_left(word_C + function_I(word_D,word_A,word_B) + 0xab94252f,15);
    word_B = word_C + rotate_left(word_B + function_I(word_C,word_D,word_A) + input[5] + 0xfc93a039,21);
    word_A = word_B + rotate_left(word_A + function_I(word_B,word_C,word_D) + input[12] + 0x655b59c3,6);
    word_D = word_A + rotate_left(word_D + function_I(word_A,word_B,word_C) + input[3] + 0x8f0ccc92,10);
    word_C = word_D + rotate_left(word_C + function_I(word_D,word_A,word_B) + input[10] + 0xffeff47d,15);
    word_B = word_C + rotate_left(word_B + function_I(word_C,word_D,word_A) + input[1] + 0x85845dd1,21);
    word_A = word_B + rotate_left(word_A + function_I(word_B,word_C,word_D) + input[8] + 0x6fa87e4f,6);
    word_D = word_A + rotate_left(word_D + function_I(word_A,word_B,word_C) + 0xfe2ce6e0,10);
    word_C = word_D + rotate_left(word_C + function_I(word_D,word_A,word_B) + input[6] + 0xa3014314,15);
    word_B = word_C + rotate_left(word_B + function_I(word_C,word_D,word_A) + 0x4e0811a1,21);
    word_A = word_B + rotate_left(word_A + function_I(word_B,word_C,word_D) + input[4] + 0xf7537e82,6);
    word_D = word_A + rotate_left(word_D + function_I(word_A,word_B,word_C) + input[11] + 0xbd3af235,10);
    word_C = word_D + rotate_left(word_C + function_I(word_D,word_A,word_B) + input[2] + 0x2ad7d2bb,15);
    word_B = word_C + rotate_left(word_B + function_I(word_C,word_D,word_A) + input[9] + 0xeb86d391,21);

    digest[0] = word_A + 0x67452301;
    digest[1] = word_B + 0xefcdab89;
    digest[2] = word_C + 0x98badcfe;
    digest[3] = word_D + 0x10325476;
}

#define	int	99
#if !(defined(__alpha) || defined(__alpha__)) || !(defined(__OSF) || defined(__osf__)) || (RC4_BYTE != int)
#undef	int
int using_std_rc4_check_key = 1;
int rc4_check_key(const byte *key, const byte *stream)
/* Check to see whether the given key generates the required stream at an     */
/* offset of skip bytes. If this is a big-endian machine, the key is in a     */
/* strange order. Return 1 if this is the correct key, 0 otherwise. According */
/* to profiler, rc4_check_key() accounts for 70% of execution time, so we do  */
/* everything possible to speed it up...                                      */
{
	static rc4_byte state[256];		/* declared static for performance only */
	rc4_byte *state_p = state;
	unsigned tmp, index=0, i;
	unsigned k0,k1,k2,k3,k4,k5,k6,k7,k8,k9,k10,k11,k12,k13,k14,k15;

	/* In order to avoid having to call reverse_byte_order in the inner  */
	/* loop on big endian platforms, the RC4 key comes in the native     */
	/* word order, just as it was returned by md5_digest(). The idea of  */
	/* caching the key in local variables comes from Adam's original     */
	/* bruter4.c.                                                        */

	if (big_endian)
	{
		k0=key[3];   k1=key[2];   k2=key[1];   k3=key[0];
		k4=key[7];   k5=key[6];   k6=key[5];   k7=key[4];
		k8=key[11];  k9=key[10];  k10=key[9];  k11=key[8];
		k12=key[15]; k13=key[14]; k14=key[13]; k15=key[12];
	} else {
		k0=key[0];   k1=key[1];   k2=key[2];   k3=key[3];
		k4=key[4];   k5=key[5];   k6=key[6];   k7=key[7];
		k8=key[8];   k9=key[9];   k10=key[10]; k11=key[11];
		k12=key[12]; k13=key[13]; k14=key[14]; k15=key[15];
	}

	/* set up the RC4 state table - much faster than a loop */
	memcpy(state,rc4_initial_state,sizeof rc4_initial_state);

	/* prepare the key */
	for (i=0;i<16;i++)
	{
		tmp = *state_p;
		index = (index + k0 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k1 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k2 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k3 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k4 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k5 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k6 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k7 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k8 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k9 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k10 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k11 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k12 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k13 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k14 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;

		tmp = *state_p;
		index = (index + k15 + tmp) & 0xFF;
		*state_p++ = state[index];
		state[index] = (rc4_byte)tmp;
	}

	state_p = state;
	index = 0;

	for (i=0;i<4;i++)
	{
	
	tmp = *++state_p;
	index = (index + tmp) & 0xFF;
	*state_p = state[index];
	state[index] = (rc4_byte)tmp;

	tmp = *++state_p;
	index = (index + tmp) & 0xFF;
	*state_p = state[index];
	state[index] = (rc4_byte)tmp;

	tmp = *++state_p;
	index = (index + tmp) & 0xFF;
	*state_p = state[index];
	state[index] = (rc4_byte)tmp;

	tmp = *++state_p;
	index = (index + tmp) & 0xFF;
	*state_p = state[index];
	state[index] = (rc4_byte)tmp;
	}

	/* Generate the first output byte */
	tmp = *++state_p;
	index = (index + tmp) & 0xFF;
	*state_p = state[index];
	state[index] = (rc4_byte)tmp;

	/* Test first output byte */
	if (state[(tmp + *state_p)&0xFF] != *stream) return 0;

	/* We only get here 1/256 of the time, so it's not worth unrolling */
	for (i=0; i<7; i++)
	{
		tmp = *++state_p;
		index = (index + tmp) & 0xFF;
		*state_p = state[index];
		state[index] = (rc4_byte)tmp;
		if (state[(tmp + *state_p)&0xFF] != *++stream) return 0;
	}

	/* got it! */
	return 1;
}
#else /* now code for the 64bit little endian alpha */
#undef	int

int using_alpha_OSF1_rc4_check_key = 1;
int rc4_check_key(const byte *key, const byte *stream)
/* Check to see whether the given key generates the required stream at an     */
/* offset of skip bytes. If this is a big-endian machine, the key is in a     */
/* strange order. Return 1 if this is the correct key, 0 otherwise. According */
/* to profiler, rc4_check_key() accounts for 70% of execution time, so we do  */
/* everything possible to speed it up...                                      */
{
	static rc4_byte state[256];   /* declared static for performance only */
	rc4_byte *state_p = state;
	unsigned tmp, index=0, i;

	unsigned long k0, k1;
	
	k0 = ((unsigned long *)key)[0];
	k1 = ((unsigned long *)key)[1];

	/* just writing is of course faster than read and write */
	{
	    unsigned long *ptr;
	    unsigned long val = 0x0000000100000000UL;
	    
	    for(ptr = (unsigned long *) state;
	        ptr < (unsigned long *) &state[256];
		ptr += 8)
	    {
#define BAZ(x) ptr[(x)] = val; val += 0x0000000200000002UL;
		BAZ(0); BAZ(1); BAZ(2); BAZ(3); BAZ(4); BAZ(5); BAZ(6); BAZ(7);
	    }
	}

	/* Teach it to use extbl */
#define FOO(x) \
	tmp = state_p[(x)]; \
    index = (index + ((k0 >> (x * 8)) & 0xff) + tmp) & 0xFF; \
    state_p[(x)] = state[index]; \
    state[index] = (rc4_byte)tmp;
    
#define BAR(x) \
	tmp = state_p[(x)+8]; \
    index = (index + ((k1 >> (x * 8)) & 0xff) + tmp) & 0xFF; \
    state_p[(x)+8] = state[index]; \
    state[index] = (rc4_byte)tmp;
    
	/* prepare the key */
	for (i=0;i<16;i++)
	{
	    	FOO(0); FOO(1); FOO(2); FOO(3); FOO(4); FOO(5); FOO(6); FOO(7);
	    	BAR(0); BAR(1); BAR(2); BAR(3); BAR(4); BAR(5); BAR(6); BAR(7);
		state_p += 16;
	}

	state_p = state;
	index = 0;

#define LEM(x) \
	tmp = state_p[x+1]; \
	index = (index + tmp) & 0xFF; \
	state_p[x+1] = state[index]; \
	state[index] = (rc4_byte)tmp;

	LEM(0);  LEM(1);  LEM(2);  LEM(3);  LEM(4);  LEM(5);  LEM(6);  LEM(7);
	LEM(8);  LEM(9);  LEM(10); LEM(11); LEM(12); LEM(13); LEM(14); LEM(15);

	/* Generate the first output byte */
	LEM(16);

	state_p += 17;

	/* Test first output byte */
	if (state[(tmp + *state_p)&0xFF] != *stream) return 0;

	/* We only get here 1/256 of the time, so it's not worth unrolling */
	for (i=0; i<7; i++)
	{
		tmp = *++state_p;
		index = (index + tmp) & 0xFF;
		*state_p = state[index];
		state[index] = (rc4_byte)tmp;
		if (state[(tmp + *state_p)&0xFF] != *++stream) return 0;
	}

	/* got it! */
	return 1;
}
#endif /* alpha */

void reverse_byte_order(word *data, int nwords)
/* Reverse the byte ordering in each word */
{
    word tmp;

    while (nwords--)
	{
		tmp = *data;
		*data++ = (tmp>>24 | (tmp>>8)&0xFF00U | (tmp<<8)&0xFF0000U | tmp<<24);
	}
}
