
; ********************************************************************
; 8051 implementation of the Twofish cryptosystem
;
; Author : Francois Koeune (C) UCL Crypto Group 1998
; Version 1.3 : since 1.2, many exhaustive code replacements, to avoid the 
;	(quite inefficient) routines ADD32 and DOUB32, for example
; ********************************************************************

$MOD51
	Mo	DATA	20H	; Mo is stored in Mo..Mo+8
	Me	DATA	Mo+8	; Me    	  Me..Me+8
	S	DATA	Me+8	; S		  S..S+8
	Ekey	DATA	S+8	; place to store temporary expanded keys
	Ekey2	DATA	EKey+4
	G0	DATA	EKey2+4	; 4+4 bytes used internally by Encrypt
	G1	DATA	G0+4
	TEMP	DATA	G1+4	; 4 bytes used internally by Expkey (TEMP..TEMP+3) and by Encrypt

	INTERN	DATA	TEMP+11	; growing down :
				; 8 bytes used internally by h_fun (MDS)

	SAMPLE	IDATA	128	; 8 bytes used only for tests
	SAMPKEY	IDATA	0D0H

        JMP     TEST

; ********************************************************************
; RS function
; Apply the transformation RS to the 4 bytes (p0) starting from @R0 and
;	the 4 bytes (p1) starting from @R1
; Result starting from @R1 (!!!)
; A, B, R0 (incremented by 4), R2-R4 and the zones @R0 and @R1 are destroyed 
; NOTE: this code is derived from a C implementation due to Brian Gladman
; ********************************************************************

RS:	MOV	R2,#4		; !! for i=0 to 4
RS_LOO:	MOV	A,@R1		; !!	t1 = p1>>24
	MOV	R3,A
	INC	R1
	INC	R1
	INC	R1		; !!	p1 = (p1 <<8) | (p0>>24) ; we don't update p0, but only use parts we need instead
	MOV	A,@R0		; 
	INC	R0
	XCH	A,@R1
	DEC	R1
	XCH	A,@R1
	DEC	R1
	XCH	A,@R1
	DEC	R1
	MOV	@R1,A
	MOV	A,R3		; !!	u=(t<<1);
	CLR	C
	RLC	A
	JNC	RS_SKP		; !!	if (t & 0x80) u^=G_MOD
	XRL	A,#4DH
RS_SKP:	MOV	R4,A
	INC	R1		; !!	p1^=u<<16
	XRL	A,@R1
	MOV	@R1,A
	INC	R1
	INC	R1
	MOV	A,R3		; !!	p1^=t
	XRL	A,@R1
	MOV	@R1,A
	MOV	A,R3		; !!	u^=(t>>1)
	CLR	C
	RRC	A
	XRL	A,R4
	JNC	RS_SK2		; !!	if (t & 0x01) u^=0xA6
	XRL	A,#0A6H
RS_SK2:	MOV	R4,A
	DEC	R1
	XRL	A,@R1		; !!	p1^=u<<8
	MOV	@R1,A
	DEC	R1
	DEC	R1
	MOV	A,@R1		; !!	p1^=u<<24
	XRL	A,R4
	MOV	@R1,A
	DJNZ	R2,RS_LOO

	MOV	R2,#4		; !! for i=0 to 4
RS_LO2:	MOV	A,@R1		; !!	t1 = p1>>24
	MOV	R3,A
	INC	R1
	INC	R1
	INC	R1		; !!	p1 = (p1 <<8) | 0
	CLR	A
	XCH	A,@R1
	DEC	R1
	XCH	A,@R1
	DEC	R1
	XCH	A,@R1
	DEC	R1
	MOV	@R1,A
	MOV	A,R3		; !!	u=(t<<1);
	CLR	C
	RLC	A
	JNC	RS_SK3		; !!	if (t & 0x80) u^=G_MOD
	XRL	A,#4DH
RS_SK3:	MOV	R4,A
	INC	R1		; !!	p1^=u<<16
	XRL	A,@R1
	MOV	@R1,A
	INC	R1
	INC	R1
	MOV	A,R3		; !!	p1^=t
	XRL	A,@R1
	MOV	@R1,A
	MOV	A,R3		; !!	u^=(t>>1)
	CLR	C
	RRC	A
	XRL	A,R4
	JNC	RS_SK4		; !!	if (t & 0x01) u^=0xA6
	XRL	A,#0A6H
RS_SK4:	MOV	R4,A
	DEC	R1
	XRL	A,@R1		; !!	p1^=u<<8
	MOV	@R1,A
	DEC	R1
	DEC	R1
	MOV	A,@R1		; !!	p1^=u<<24
	XRL	A,R4
	MOV	@R1,A
	DJNZ	R2,RS_LO2
	RET

; ********************************************************************
; FFM_** macros
; Macros allowing quick multiplication by '5B' and 'EF' in GF(2^8)
; Multiply byte by '5B' or 'EF'
; Result in dest
; A is destroyed
; IMPORTANT : it is up to the user to load the base adress (TAB_5B or TAB_EF)
;		in DPTR before call !!!
; NOTE: this code is derived from a C implementation due to Brian Gladman
; ********************************************************************

TAB_5B: 
	DB 000h, 05Bh, 0B6h, 0EDh, 005h, 05Eh, 0B3h, 0E8h
	DB 00Ah, 051h, 0BCh, 0E7h, 00Fh, 054h, 0B9h, 0E2h
	DB 014h, 04Fh, 0A2h, 0F9h, 011h, 04Ah, 0A7h, 0FCh
	DB 01Eh, 045h, 0A8h, 0F3h, 01Bh, 040h, 0ADh, 0F6h
	DB 028h, 073h, 09Eh, 0C5h, 02Dh, 076h, 09Bh, 0C0h
	DB 022h, 079h, 094h, 0CFh, 027h, 07Ch, 091h, 0CAh
	DB 03Ch, 067h, 08Ah, 0D1h, 039h, 062h, 08Fh, 0D4h
	DB 036h, 06Dh, 080h, 0DBh, 033h, 068h, 085h, 0DEh
	DB 050h, 00Bh, 0E6h, 0BDh, 055h, 00Eh, 0E3h, 0B8h
	DB 05Ah, 001h, 0ECh, 0B7h, 05Fh, 004h, 0E9h, 0B2h
	DB 044h, 01Fh, 0F2h, 0A9h, 041h, 01Ah, 0F7h, 0ACh
	DB 04Eh, 015h, 0F8h, 0A3h, 04Bh, 010h, 0FDh, 0A6h
	DB 078h, 023h, 0CEh, 095h, 07Dh, 026h, 0CBh, 090h
	DB 072h, 029h, 0C4h, 09Fh, 077h, 02Ch, 0C1h, 09Ah
	DB 06Ch, 037h, 0DAh, 081h, 069h, 032h, 0DFh, 084h
	DB 066h, 03Dh, 0D0h, 08Bh, 063h, 038h, 0D5h, 08Eh
	DB 0A0h, 0FBh, 016h, 04Dh, 0A5h, 0FEh, 013h, 048h
	DB 0AAh, 0F1h, 01Ch, 047h, 0AFh, 0F4h, 019h, 042h
	DB 0B4h, 0EFh, 002h, 059h, 0B1h, 0EAh, 007h, 05Ch
	DB 0BEh, 0E5h, 008h, 053h, 0BBh, 0E0h, 00Dh, 056h
	DB 088h, 0D3h, 03Eh, 065h, 08Dh, 0D6h, 03Bh, 060h
	DB 082h, 0D9h, 034h, 06Fh, 087h, 0DCh, 031h, 06Ah
	DB 09Ch, 0C7h, 02Ah, 071h, 099h, 0C2h, 02Fh, 074h
	DB 096h, 0CDh, 020h, 07Bh, 093h, 0C8h, 025h, 07Eh
	DB 0F0h, 0ABh, 046h, 01Dh, 0F5h, 0AEh, 043h, 018h
	DB 0FAh, 0A1h, 04Ch, 017h, 0FFh, 0A4h, 049h, 012h
	DB 0E4h, 0BFh, 052h, 009h, 0E1h, 0BAh, 057h, 00Ch
	DB 0EEh, 0B5h, 058h, 003h, 0EBh, 0B0h, 05Dh, 006h
	DB 0D8h, 083h, 06Eh, 035h, 0DDh, 086h, 06Bh, 030h
	DB 0D2h, 089h, 064h, 03Fh, 0D7h, 08Ch, 061h, 03Ah
	DB 0CCh, 097h, 07Ah, 021h, 0C9h, 092h, 07Fh, 024h
	DB 0C6h, 09Dh, 070h, 02Bh, 0C3h, 098h, 075h, 02Eh

TAB_EF:
	DB 000h, 0EFh, 0B7h, 058h, 007h, 0E8h, 0B0h, 05Fh
	DB 00Eh, 0E1h, 0B9h, 056h, 009h, 0E6h, 0BEh, 051h
	DB 01Ch, 0F3h, 0ABh, 044h, 01Bh, 0F4h, 0ACh, 043h
	DB 012h, 0FDh, 0A5h, 04Ah, 015h, 0FAh, 0A2h, 04Dh
	DB 038h, 0D7h, 08Fh, 060h, 03Fh, 0D0h, 088h, 067h
	DB 036h, 0D9h, 081h, 06Eh, 031h, 0DEh, 086h, 069h
	DB 024h, 0CBh, 093h, 07Ch, 023h, 0CCh, 094h, 07Bh
	DB 02Ah, 0C5h, 09Dh, 072h, 02Dh, 0C2h, 09Ah, 075h
	DB 070h, 09Fh, 0C7h, 028h, 077h, 098h, 0C0h, 02Fh
	DB 07Eh, 091h, 0C9h, 026h, 079h, 096h, 0CEh, 021h
	DB 06Ch, 083h, 0DBh, 034h, 06Bh, 084h, 0DCh, 033h
	DB 062h, 08Dh, 0D5h, 03Ah, 065h, 08Ah, 0D2h, 03Dh
	DB 048h, 0A7h, 0FFh, 010h, 04Fh, 0A0h, 0F8h, 017h
	DB 046h, 0A9h, 0F1h, 01Eh, 041h, 0AEh, 0F6h, 019h
	DB 054h, 0BBh, 0E3h, 00Ch, 053h, 0BCh, 0E4h, 00Bh
	DB 05Ah, 0B5h, 0EDh, 002h, 05Dh, 0B2h, 0EAh, 005h
	DB 0E0h, 00Fh, 057h, 0B8h, 0E7h, 008h, 050h, 0BFh
	DB 0EEh, 001h, 059h, 0B6h, 0E9h, 006h, 05Eh, 0B1h
	DB 0FCh, 013h, 04Bh, 0A4h, 0FBh, 014h, 04Ch, 0A3h
	DB 0F2h, 01Dh, 045h, 0AAh, 0F5h, 01Ah, 042h, 0ADh
	DB 0D8h, 037h, 06Fh, 080h, 0DFh, 030h, 068h, 087h
	DB 0D6h, 039h, 061h, 08Eh, 0D1h, 03Eh, 066h, 089h
	DB 0C4h, 02Bh, 073h, 09Ch, 0C3h, 02Ch, 074h, 09Bh
	DB 0CAh, 025h, 07Dh, 092h, 0CDh, 022h, 07Ah, 095h
	DB 090h, 07Fh, 027h, 0C8h, 097h, 078h, 020h, 0CFh
	DB 09Eh, 071h, 029h, 0C6h, 099h, 076h, 02Eh, 0C1h
	DB 08Ch, 063h, 03Bh, 0D4h, 08Bh, 064h, 03Ch, 0D3h
	DB 082h, 06Dh, 035h, 0DAh, 085h, 06Ah, 032h, 0DDh
	DB 0A8h, 047h, 01Fh, 0F0h, 0AFh, 040h, 018h, 0F7h
	DB 0A6h, 049h, 011h, 0FEh, 0A1h, 04Eh, 016h, 0F9h
	DB 0B4h, 05Bh, 003h, 0ECh, 0B3h, 05Ch, 004h, 0EBh
	DB 0BAh, 055h, 00Dh, 0E2h, 0BDh, 052h, 00Ah, 0E5h

FFM_5B	MACRO	byte,dest
	MOV	A,byte
	MOVC	A,@A+DPTR
	MOV	dest,A
ENDM

FFM_EF	MACRO	byte,dest
	MOV	A,byte
	MOVC	A,@A+DPTR
	MOV	dest,A
ENDM

; ********************************************************************
; H_FUN function
; Apply the transformation H to the 4 bytes (X) starting from @R0 and
;	the 8 bytes (L) starting from @R1
; Result starting from @R0
; A, B, DPTR, R1 (incremented by 4) and the zone pointed to by R0 (result there) 
;	are destroyed 
; uses the 8 bytes INTERN..INTERN-7
; IMPORTANT NOTE: compared to the official description, the order of the
;		  two 32-bit words L0,L1 is reversed
; ********************************************************************

Q0:	DB	0A9H,67H,0B3H,0E8H,04H,0FDH,0A3H,76H,9AH,92H,80H,78H,0E4H,0DDH,0D1H,38H
	DB	0DH,0C6H,35H,98H,18H,0F7H,0ECH,6CH,43H,75H,37H,26H,0FAH,13H,94H,48H,0F2H
	DB	0D0H,8BH,30H,84H,54H,0DFH,23H,19H,5BH,3DH,59H,0F3H,0AEH,0A2H,82H,63H,01H
	DB	83H,2EH,0D9H,51H,9BH,7CH,0A6H,0EBH,0A5H,0BEH,16H,0CH,0E3H,61H,0C0H,8CH,3AH
	DB	0F5H,73H,2CH,25H,0BH,0BBH,4EH,89H,6BH,53H,6AH,0B4H,0F1H,0E1H,0E6H,0BDH,45H
	DB	0E2H,0F4H,0B6H,66H,0CCH,95H,03H,56H,0D4H,1CH,1EH,0D7H,0FBH,0C3H,8EH,0B5H
	DB	0E9H,0CFH,0BFH,0BAH,0EAH,77H,39H,0AFH,33H,0C9H,62H,71H,81H,79H,09H,0ADH,24H
	DB	0CDH,0F9H,0D8H,0E5H,0C5H,0B9H,4DH,44H,08H,86H,0E7H,0A1H,1DH,0AAH,0EDH,06H
	DB	70H,0B2H,0D2H,41H,7BH,0A0H,11H,31H,0C2H,27H,90H,20H,0F6H,60H,0FFH,96H,5CH
	DB	0B1H,0ABH,9EH,9CH,52H,1BH,5FH,93H,0AH,0EFH,91H,85H,49H,0EEH,2DH,4FH,8FH,3BH
	DB	47H,87H,6DH,46H,0D6H,3EH,69H,64H,2AH,0CEH,0CBH,2FH,0FCH,97H,05H,7AH,0ACH
	DB	7FH,0D5H,1AH,4BH,0EH,0A7H,5AH,28H,14H,3FH,29H,88H,3CH,4CH,02H,0B8H,0DAH
	DB	0B0H,17H,55H,1FH,8AH,7DH,57H,0C7H,8DH,74H,0B7H,0C4H,9FH,72H,7EH,15H,22H,12H
	DB	58H,07H,99H,34H,6EH,50H,0DEH,68H,65H,0BCH,0DBH,0F8H,0C8H,0A8H,2BH,40H,0DCH
	DB	0FEH,32H,0A4H,0CAH,10H,21H,0F0H,0D3H,5DH,0FH,00H,6FH,9DH,36H,42H,4AH,5EH,0C1H,0E0H

Q1:	DB	75H,0F3H,0C6H,0F4H,0DBH,7BH,0FBH,0C8H,4AH,0D3H,0E6H,6BH,45H,7DH,0E8H,4BH
	DB	0D6H,32H,0D8H,0FDH,37H,71H,0F1H,0E1H,30H,0FH,0F8H,1BH,87H,0FAH,06H,3FH,5EH
	DB	0BAH,0AEH,5BH,8AH,00H,0BCH,9DH,6DH,0C1H,0B1H,0EH,80H,5DH,0D2H,0D5H,0A0H,84H
	DB	07H,14H,0B5H,90H,2CH,0A3H,0B2H,73H,4CH,54H,92H,74H,36H,51H,38H,0B0H,0BDH,5AH
	DB	0FCH,60H,62H,96H,6CH,42H,0F7H,10H,7CH,28H,27H,8CH,13H,95H,9CH,0C7H,24H,46H
	DB	3BH,70H,0CAH,0E3H,85H,0CBH,11H,0D0H,93H,0B8H,0A6H,83H,20H,0FFH,9FH,77H,0C3H
	DB	0CCH,03H,6FH,08H,0BFH,40H,0E7H,2BH,0E2H,79H,0CH,0AAH,82H,41H,3AH,0EAH,0B9H
	DB	0E4H,9AH,0A4H,97H,7EH,0DAH,7AH,17H,66H,94H,0A1H,1DH,3DH,0F0H,0DEH,0B3H,0BH
	DB	72H,0A7H,1CH,0EFH,0D1H,53H,3EH,8FH,33H,26H,5FH,0ECH,76H,2AH,49H,81H,88H,0EEH
	DB	21H,0C4H,1AH,0EBH,0D9H,0C5H,39H,99H,0CDH,0ADH,31H,8BH,01H,18H,23H,0DDH,1FH
	DB	4EH,2DH,0F9H,48H,4FH,0F2H,65H,8EH,78H,5CH,58H,19H,8DH,0E5H,98H,57H,67H,7FH
	DB	05H,64H,0AFH,63H,0B6H,0FEH,0F5H,0B7H,3CH,0A5H,0CEH,0E9H,68H,44H,0E0H,4DH,43H
	DB	69H,29H,2EH,0ACH,15H,59H,0A8H,0AH,9EH,6EH,47H,0DFH,34H,35H,6AH,0CFH,0DCH,22H
	DB	0C9H,0C0H,9BH,89H,0D4H,0EDH,0ABH,12H,0A2H,0DH,52H,0BBH,02H,2FH,0A9H,0D7H,61H
	DB	1EH,0B4H,50H,04H,0F6H,0C2H,16H,25H,86H,56H,55H,09H,0BEH,91H

H_FUN:	MOV	DPTR,#Q1	; ---> note to understand comments: here we number big-endian ...
	MOV	A,@R0
	MOVC	A,@A+DPTR	; layer 1: Q1(0)
	XRL	A,@R1		; layer 1: XOR L0_0 (and not L1_0 : cf. above note)
	MOVC	A,@A+DPTR	; layer 2: Q1(0)
	INC	R1
	INC	R1
	INC	R1
	INC	R1
	XRL	A,@R1		; layer 2: XOR L1_0
	MOV	DPTR,#Q0
	MOVC	A,@A+DPTR	; layer 3: Q0(0)
	MOV	@R0,A
	INC	R0
				; second byte
	MOV	A,@R0
	MOVC	A,@A+DPTR	; layer 1: Q0
	DEC	R1
	DEC	R1
	DEC	R1
	XRL	A,@R1		; layer 1: XOR
	MOV	DPTR,#Q1
	MOVC	A,@A+DPTR	; layer 2:Q1
	INC	R1
	INC	R1
	INC	R1
	INC	R1
	XRL	A,@R1		; layer 2: XOR
	MOVC	A,@A+DPTR	; layer 3: Q1
	MOV	@R0,A
	INC	R0
				; third byte
	MOV	A,@R0
	MOVC	A,@A+DPTR	; layer 1: Q1
	DEC	R1
	DEC	R1
	DEC	R1
	XRL	A,@R1		; layer 1: XOR
	MOV	DPTR,#Q0
	MOVC	A,@A+DPTR	; layer 2:Q0
	INC	R1
	INC	R1
	INC	R1
	INC	R1
	XRL	A,@R1		; layer 2: XOR
	MOVC	A,@A+DPTR	; layer 3: Q0
	MOV	@R0,A
	INC	R0
				; fourth byte
	MOV	A,@R0
	MOVC	A,@A+DPTR	; layer 1: Q0
	DEC	R1
	DEC	R1
	DEC	R1
	XRL	A,@R1		; layer 1: XOR
	MOVC	A,@A+DPTR	; layer 2:Q0
	INC	R1
	INC	R1
	INC	R1
	INC	R1
	XRL	A,@R1		; layer 2: XOR
	MOV	DPTR,#Q1
	MOVC	A,@A+DPTR	; layer 3: Q1
	MOV	@R0,A

				; ---> ... but here we number little-endian (as in C code)
				; #INTERN-i corresponds to m5b_bi, #INTERN-4-i corresponds to mef_bi
	MOV	DPTR,#TAB_5B
FFM_5B	@R0,%INTERN		;	  m5b_b0
	DEC	R0
FFM_5B	@R0,%(INTERN-1)		;	  m5b_b1
	DEC	R0
FFM_5B	@R0,%(INTERN-2)		;	  m5b_b2
	DEC	R0
FFM_5B	@R0,%(INTERN-3)		; compute m5b_b3

	MOV	DPTR,#TAB_EF
FFM_EF	@R0,%(INTERN-7)		;	  mef_b3
	INC	R0
FFM_EF	@R0,%(INTERN-6)		;	  mef_b2
	INC	R0
FFM_EF	@R0,%(INTERN-5)		;	  mef_b1
	INC	R0
FFM_EF	@R0,%(INTERN-4)		;	  mef_b0

	MOV	A,@R0		; compute b0
	XRL	A,INTERN-5	;	    ^mef_b1
	XRL	A,INTERN-2	;		   ^m5b_b2
	XRL	A,INTERN-3	;			  ^m5b_b3
	MOV	@R0,A
	DEC	R0
	MOV	A,@R0		; compute b1
	XRL	A,INTERN-4	;	    ^mef_b0
	XRL	A,INTERN-6	;		   ^mef_b2
	XRL	A,INTERN-3	;			  ^m5b_b3
	MOV	B,A
	DEC	R0
	MOV	A,@R0		; compute b2
	XRL	A,INTERN-4	;	    ^mef_b0
	XRL	A,INTERN-1	;		   ^m5b_b1
	XRL	A,INTERN-7	;			  ^mef_b3
	MOV	@R0,A
	DEC	R0
	MOV	A,@R0		; compute b3
	XRL	A,INTERN	;	    ^m5b_b0
	XRL	A,INTERN-5	;		   ^mef_b1
	XRL	A,INTERN-6	;			  ^mef_b2
	MOV	@R0,B		; put b1 at its place
	INC	R0
	INC	R0
	MOV	@R0,A		; put b3 at its place
	DEC	R0
	DEC	R0
        RET

; ********************************************************************
; Startkeysched function
; Derives the three double-words (64 bits) Mo, Me and S from the key starting at @R1
;	and initializes Rho (in R4)
; Result stored in memory zones Mo, Me, S
; A, B, R0-R4 and the zone @R1 (i.e, the key) are destroyed 
; uses the memory zones Mo,Me,S
; IMPORTANT NOTE: compared to the official description, the order of the
;		  word pairs (e.g. Mo[0] and Mo[1]) is reversed, in order to speed up H_FUN
; ********************************************************************

Startkeysched:
	MOV	Me+4,@R1
	INC	R1
	MOV	Me+5,@R1
	INC	R1
	MOV	Me+6,@R1
	INC	R1
	MOV	Me+7,@R1
	INC	R1
	MOV	Mo+4,@R1
	INC	R1
	MOV	Mo+5,@R1
	INC	R1
	MOV	Mo+6,@R1
	INC	R1
	MOV	Mo+7,@R1
	INC	R1
	MOV	Me,@R1
	INC	R1
	MOV	Me+1,@R1
	INC	R1
	MOV	Me+2,@R1
	INC	R1
	MOV	Me+3,@R1
	INC	R1
	MOV	Mo,@R1
	INC	R1
	MOV	Mo+1,@R1
	INC	R1
	MOV	Mo+2,@R1
	INC	R1
	MOV	Mo+3,@R1
	CLR	C
	MOV	A,R1
	SUBB	A,#3
	MOV	R1,A
	SUBB	A,#4
	MOV	R0,A
	CALL	RS
	MOV	S+4,@R1
	INC	R1
	MOV	S+5,@R1
	INC	R1
	MOV	S+6,@R1
	INC	R1
	MOV	S+7,@R1
	CLR	C
	MOV	A,R1
	SUBB	A,#11
	MOV	R1,A
	SUBB	A,#4
	MOV	R0,A
	CALL	RS
	MOV	S,@R1
	INC	R1
	MOV	S+1,@R1
	INC	R1
	MOV	S+2,@R1
	INC	R1
	MOV	S+3,@R1
	CLR	A
	MOV	R4,A
	RET

; ********************************************************************
; Expkey function
; Computes the next (starting at 0, incrementing at each call) part of the subkey, on the
; 	basis on Mo, Me and S
; Result stored in memory zone  Ekey (8 bytes)
; A, B, DPTR, R0-R1 and the zone @R1 (i.e, the key) are destroyed 
; uses the memory zones Mo,Me,S, Ekey, TEMP, INTERN..INTERN-7
; ********************************************************************

Expkey:	MOV	A,R4		; transfer 2*i*Rho and update for next call
	MOV	EKey,A
	MOV	EKey+1,A
	MOV	EKey+2,A
	MOV	EKey+3,A
	INC	R4
	MOV	R0,#EKey
	MOV	R1,#Me
	CALL	H_FUN		; A=h_fun(2*i*0x01010101,Me)
	MOV	A,R4		; transfer (2*i+1)*Rho and update for next call
	MOV	TEMP,A
	MOV	TEMP+1,A
	MOV	TEMP+2,A
	MOV	TEMP+3,A
	INC	R4
	MOV	R0,#TEMP
	MOV	R1,#Mo
	CALL	H_FUN		; B=h_fun((2*i+1)*0x01010101,Mo)
	MOV	EKey2,EKey	; copy A to EKey2 (already in EKey)
	MOV	EKey2+1,EKey+1
	MOV	EKey2+2,EKey+2
	MOV	EKey2+3,EKey+3
	MOV	A,TEMP		; B=(B<<8)|(B>>24) not executed : we will traverse the word in a different order
	ADD	A,EKey+3	; A+B
	MOV	EKey+3,A
	MOV	A,TEMP+3
	ADDC	A,EKey+2
	MOV	EKey+2,A
	MOV	A,TEMP+2
	ADDC	A,EKey+1
	MOV	EKey+1,A
	MOV	A,TEMP+1
	ADDC	A,EKey
	MOV	EKey,A
	CLR	C		; B=2*B

	MOV	A,TEMP
	RLC	A
	MOV	B.7,C
	ADD	A,EKey2+3
	MOV	EKey2+3,A
	MOV	B.6,C
	MOV	C,B.7

	MOV	A,TEMP+3
	RLC	A
	MOV	B.7,C
	MOV	C,B.6
	ADDC	A,EKey2+2
	MOV	EKey2+2,A
	MOV	B.6,C
	MOV	C,B.7

	MOV	A,TEMP+2
	RLC	A
	MOV	B.7,C
	MOV	C,B.6
	ADDC	A,EKey2+1
	MOV	EKey2+1,A
	MOV	B.6,C
	MOV	C,B.7

	MOV	A,TEMP+1
	RLC	A
	MOV	C,B.6
	ADDC	A,EKey2
	MOV	EKey2,A

	MOV	A,EKey2+3	; <<< 9
	RLC	A
	MOV	B,A		; report a ajouter plus tard
	MOV	A,EKey2+2
	RLC	A
	XCH	A,EKey2+1
	RLC	A
	XCH	A,EKey2
	RLC	A
	MOV	EKey2+3,A
	MOV	B.0,C
	MOV	EKey2+2,B
	RET

; ********************************************************************
; XOR8
; Compute the logical XOR of the R2 bytes starting at @R0 and the ones starting at @R1
; Result starting at @R0
; A, R0, R1,R2 are destroyed (in fact, R0 and R1 are simply incremented by R2)
; ********************************************************************
XOR8:	
X_LOOP:	MOV	A,@R0
	XRL	A,@R1
	MOV	@R0,A
	INC	R0
	INC	R1
	DJNZ	R2,X_LOOP	; endfor
	RET

; ********************************************************************
; RAMCOPY function
; Copy R2 bytes from @R0 to @R1 in RAM
; A is destroyed, R0, R1 are incremented by R2
; ********************************************************************

RAMCOPY:
R_LOO:
	MOV	A,@R0
	INC	R0
	MOV	@R1,A
	INC	R1
	DJNZ	R2,R_LOO
	RET

; ********************************************************************
; Encrypt function
; Encrypt the block starting at @R0 using the 128-bit key starting at @R1
; Result starting at @R0
; ???? (sth else?) A, B, DPTR, R0-R4,R7 and the zone @R1 (i.e, the key) are destroyed 
; uses the memory zones Mo,Me,S, Ekey,TEMP, INTERN..INTERN-7
; ********************************************************************

Encrypt:
	; initial phase
	; =============
	MOV	A,R0		; save R0
	MOV	R5,A
	CALL	Startkeysched	; from now on, we can forget R1
	CALL	Expkey
	MOV	A,R5
	MOV	R0,A
	MOV	R1,#EKey
	MOV	R2,#8
	CALL	XOR8
	MOV	A,R0
	MOV	R6,A
	CALL	Expkey
	MOV	A,R6
	MOV	R0,A
	MOV	R1,#EKey
	MOV	R2,#8
	CALL	XOR8

	MOV	R4,#8

	MOV	A,R5
	MOV	R0,A
	MOV	R6,#16		; for i=1 to 16
En_loo:	MOV	R1,#G0
	MOV	R2,#4
	CALL	RAMCOPY		;	copy M0, M1 to buffers (and rotate M1)
	MOV	A,@R0
	MOV	G1+3,A
	INC	R0
	MOV	A,@R0
	MOV	G1,A
	INC	R0
	MOV	A,@R0
	MOV	G1+1,A
	INC	R0
	MOV	A,@R0
	MOV	G1+2,A
	MOV	R1,#S
	MOV	R0,#G0
	CALL	H_FUN		;	h_fun(M0,S)
	MOV	R0,#G1
	MOV	R1,#S
	CALL	H_FUN		;	h_fun(M1,S)
	CALL	Expkey
	MOV	A,G0+3		;	F0=g0+g1
	ADD	A,G1+3
	MOV	TEMP+3,A
	MOV	A,G0+2
	ADDC	A,G1+2
	MOV	TEMP+2,A
	MOV	A,G0+1
	ADDC	A,G1+1
	MOV	TEMP+1,A
	MOV	A,G0
	ADDC	A,G1
	MOV	TEMP,A
				;	F0+=EKey[0]
	MOV	A,TEMP+3
	ADD	A,EKey+3
	MOV	TEMP+3,A
	MOV	A,TEMP+2
	ADDC	A,EKey+2
	MOV	TEMP+2,A
	MOV	A,TEMP+1
	ADDC	A,EKey+1
	MOV	TEMP+1,A
	MOV	A,TEMP
	ADDC	A,EKey
	MOV	TEMP,A
				;	F1=g0+EKey[1]
	MOV	A,G0+3
	ADD	A,EKey2+3
	MOV	G0+3,A
	MOV	A,G0+2
	ADDC	A,EKey2+2
	MOV	G0+2,A
	MOV	A,G0+1
	ADDC	A,EKey2+1
	MOV	G0+1,A
	MOV	A,G0
	ADDC	A,EKey2
	MOV	G0,A
				;	2*g1
	MOV	A,G1+3
	CLR	C
	RLC	A
	MOV	G1+3,A
	MOV	A,G1+2
	RLC	A
	MOV	G1+2,A
	MOV	A,G1+1
	RLC	A
	MOV	G1+1,A
	MOV	A,G1
	RLC	A
	MOV	G1,A
				;	F1+=2*g1
	MOV	A,G0+3
	ADD	A,G1+3
	MOV	G0+3,A
	MOV	A,G0+2
	ADDC	A,G1+2
	MOV	G0+2,A
	MOV	A,G0+1
	ADDC	A,G1+1
	MOV	G0+1,A
	MOV	A,G0
	ADDC	A,G1
	MOV	G0,A

	MOV	A,R5		;	M0=M2^F0
	ADD	A,#8
	MOV	R1,A
	MOV	A,TEMP
	XRL	A,@R1
	MOV	TEMP,A
	INC	R1
	MOV	A,TEMP+1
	XRL	A,@R1
	MOV	TEMP+1,A
	INC	R1
	MOV	A,TEMP+2
	XRL	A,@R1
	MOV	TEMP+2,A
	INC	R1
	MOV	A,TEMP+3
	XRL	A,@R1
	MOV	TEMP+3,A

	MOV	A,R5		;	put former M0 in M2
	MOV	R0,A
	ADD	A,#8
	MOV	R1,A
	MOV	R2,#4
	CALL	RAMCOPY	

	MOV	A,R5
	MOV	R0,A
	INC	R0
	MOV	A,TEMP		
	RRC	A
	MOV	B,A
	MOV	A,TEMP+1
	RRC	A
	MOV	@R0,A
	INC	R0
	MOV	A,TEMP+2
	RRC	A
	MOV	@R0,A
	INC	R0
	MOV	A,TEMP+3
	RRC	A
	MOV	@R0,A
	MOV	B.7,C
	MOV	A,R5
	MOV	R0,A
	MOV	@R0,B

	MOV	A,R5
	ADD	A,#15
	MOV	R0,A
	MOV	A,@R0
	RLC	A
	MOV	B,A
	DEC	R0
	MOV	A,@R0
	RLC	A
	MOV	TEMP+2,A
	DEC	R0
	MOV	A,@R0
	RLC	A
	MOV	TEMP+1,A
	DEC	R0
	MOV	A,@R0
	RLC	A
	MOV	TEMP,A
	MOV	B.0,C
	MOV	TEMP+3,B

	MOV	A,R5
	ADD	A,#4
	MOV	R0,A
	ADD	A,#8
	MOV	R1,A
	MOV	R2,#4
	CALL	RAMCOPY		;	put former M1 in M3

	DEC	R0		;	M1'=M3^F1
	MOV	A,TEMP+3
	XRL	A,G0+3
	MOV	@R0,A
	DEC	R0
	MOV	A,TEMP+2
	XRL	A,G0+2
	MOV	@R0,A
	DEC	R0
	MOV	A,TEMP+1
	XRL	A,G0+1
	MOV	@R0,A
	DEC	R0
	MOV	A,TEMP
	XRL	A,G0
	MOV	@R0,A
	MOV	A,R5
	MOV	R0,A
	DEC	R6
	MOV	A,R6
	JZ	En_finish
	JMP	En_loo		; end for

En_finish:
	MOV	R1,#G0		; undo last swap (there are faster ways, but is it so usefull ?)
	MOV	R2,#8
	CALL	RAMCOPY
	MOV	A,R5
	MOV	R1,A
	ADD	A,#8
	MOV	R2,#8
	CALL	RAMCOPY
	MOV	R0,#G0
	MOV	R2,#8
	CALL	RAMCOPY
	MOV	R4,#4

	CALL	Expkey
	MOV	A,R5
	MOV	R0,A
	MOV	R1,#EKey
	MOV	R2,#8
	CALL	XOR8
	MOV	A,R0
	MOV	R6,A
	CALL	Expkey
	MOV	A,R6
	MOV	R0,A
	MOV	R1,#EKey
	MOV	R2,#8
	CALL	XOR8
	MOV	A,R5
	MOV	R0,A

	RET

;$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$


JUNK:	MOV	R0,#SAMPLE
	MOV	@R0,#0A1H
	INC	R0
	MOV	@R0,#09BH
	INC	R0
	MOV	@R0,#073H
	INC	R0
	MOV	@R0,#08EH
	INC	R0
	MOV	A,R0
	MOV	R1,A
	MOV	@R0,#019H
	INC	R0
	MOV	@R0,#054H
	INC	R0
	MOV	@R0,#09FH
	INC	R0
	MOV	@R0,#078H
	INC	R0
	MOV	@R0,#001H
	INC	R0
	MOV	@R0,#023H
	INC	R0
	MOV	@R0,#045H
	INC	R0
	MOV	@R0,#067H
	INC	R0
	MOV	@R0,#0ABH
	INC	R0
	MOV	@R0,#0CDH
	INC	R0
	MOV	@R0,#0EFH
	INC	R0
	MOV	@R0,#01H
	MOV	R1,#SAMPLE
	CALL	Startkeysched
	CALL	Expkey


TEST:	MOV	R0,#SAMPLE

	MOV	@R0,#16H
	INC	R0
	MOV	@R0,#0DBH
	INC	R0
	MOV	@R0,#091H
	INC	R0
	MOV	@R0,#0D4H
	INC	R0
	MOV	@R0,#9EH
	INC	R0
	MOV	@R0,#0C3H
	INC	R0
	MOV	@R0,#0B1H
	INC	R0
	MOV	@R0,#0E7H
	INC	R0
	MOV	@R0,#6BH
	INC	R0
	MOV	@R0,#08H
	INC	R0
	MOV	@R0,#0CBH
	INC	R0
	MOV	@R0,#086H
	INC	R0
	MOV	@R0,#19H
	INC	R0
	MOV	@R0,#54H
	INC	R0
	MOV	@R0,#09FH
	INC	R0
	MOV	@R0,#078H
	INC	R0

	MOV	R0,#SAMPKEY
	MOV	@R0,#5CH
	INC	R0
	MOV	@R0,#9FH
	INC	R0
	MOV	@R0,#058H
	INC	R0
	MOV	@R0,#09FH
	INC	R0
	MOV	@R0,#32H
	INC	R0
	MOV	@R0,#2CH
	INC	R0
	MOV	@R0,#012H
	INC	R0
	MOV	@R0,#0F6H
	INC	R0
	MOV	@R0,#2FH
	INC	R0
	MOV	@R0,#0ECH
	INC	R0
	MOV	@R0,#0BFH
	INC	R0
	MOV	@R0,#0B6H
	INC	R0
	MOV	@R0,#5AH
	INC	R0
	MOV	@R0,#0C3H
	INC	R0
	MOV	@R0,#0E8H
	INC	R0
	MOV	@R0,#02AH

	MOV	R0,#SAMPLE
	MOV	R1,#SAMPKEY
	CALL ENCRYPT
	END