/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		IP/TCP/UDP checksumming routines
 *
 * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 *		Tom May, <ftom@netcom.com>
 *              Pentium Pro/II routines:
 *              Alexander Kjeldaas <astor@guardian.no>
 *              Finn Arne Gangstad <finnag@guardian.no>
 *		Lots of code moved from tcp.c and ip.c; see those files
 *		for more names.
 *
 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
 *			     handling.
 *		Andi Kleen,  add zeroing on error
 *                   converted to pure assembler
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 */

				
/*
 * computes a partial checksum, e.g. for TCP/UDP fragments
 */

/*	
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
 */
		
.text
.align 4
.globl csum_partial								
csum_partial:
	pushl %esi
	pushl %ebx
	movl 20(%esp),%eax	# Function arg: unsigned int sum
	movl 16(%esp),%ecx	# Function arg: int len
	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf

	testl $2, %esi         
	jnz 30f                 
10:
	movl %ecx, %edx
	movl %ecx, %ebx
	andl $0x7c, %ebx
	shrl $7, %ecx
	addl %ebx,%esi
	shrl $2, %ebx  
	negl %ebx
	lea 45f(%ebx,%ebx,2), %ebx
	testl %esi, %esi
	jmp *%ebx

	# Handle 2-byte-aligned regions
20:	addw (%esi), %ax
	lea 2(%esi), %esi
	adcl $0, %eax
	jmp 10b

30:	subl $2, %ecx          
	ja 20b                 
	je 32f
	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
	addl %ebx, %eax
	adcl $0, %eax
	jmp 80f
32:
	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
	adcl $0, %eax
	jmp 80f

40: 
	addl -128(%esi), %eax
        .irp p,124,120,116,112,108,104,100, 96, 92, 88, 84, 80, 76, 72, 68,  \
                64, 60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12,  8,  4
        adcl -\p(%esi),%eax
        .endr
45:
	lea 128(%esi), %esi
	adcl $0, %eax
	dec %ecx
	jge 40b
	movl %edx, %ecx
50:	andl $3, %ecx
	jz 80f

	# Handle the last 1-3 bytes without jumping
	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
	movl $0xffffff,%ebx	# by the shll and shrl instructions
	shll $3,%ecx
	shrl %cl,%ebx
	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
	addl %ebx,%eax
	adcl $0,%eax
80: 
	popl %ebx
	popl %esi
	ret
				

/*
unsigned int csum_partial_copy(const char *src, char *dst, int len, int sum)
 */ 

/*
 * Copy from ds while checksumming, otherwise like csum_partial
 */

/* XXX KAF : we don't need to worry about protection faults, so removed */ 
#define SRC(y...) y;
#define DST(y...) y;

.align 4
.globl csum_partial_copy
				
/* Version for PentiumII/PPro */

#define ROUND1(x) \
	SRC(movl x(%esi), %ebx	)	;	\
	addl %ebx, %eax			;	\
	DST(movl %ebx, x(%edi)	)	; 

#define ROUND(x) \
	SRC(movl x(%esi), %ebx	)	;	\
	adcl %ebx, %eax			;	\
	DST(movl %ebx, x(%edi)	)	;

#define ARGBASE 12
		
csum_partial_copy:
	pushl %ebx
	pushl %edi
	pushl %esi
	movl ARGBASE+4(%esp),%esi	#src
	movl ARGBASE+8(%esp),%edi	#dst	
	movl ARGBASE+12(%esp),%ecx	#len
	movl ARGBASE+16(%esp),%eax	#sum
	movl %ecx, %edx  
	movl %ecx, %ebx  
	shrl $6, %ecx     
	andl $0x3c, %ebx  
	negl %ebx
	subl %ebx, %esi  
	subl %ebx, %edi  
	lea 3f(%ebx,%ebx), %ebx
	testl %esi, %esi 
	jmp *%ebx         
1:	addl $64,%esi
	addl $64,%edi 
	ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)	
	ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)	
	ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)	
	ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)	
3:	adcl $0,%eax
	dec %ecx
	jge 1b
4:      andl $3, %edx
	jz 7f
	cmpl $2, %edx
	jb 5f
SRC(	movw (%esi), %dx         )
	leal 2(%esi), %esi
DST(	movw %dx, (%edi)         )
	leal 2(%edi), %edi
	je 6f
	shll $16,%edx
5:
SRC(	movb (%esi), %dl         )
DST(	movb %dl, (%edi)         )
6:	addl %edx, %eax
	adcl $0, %eax
7:
	popl %esi
	popl %edi
	popl %ebx
	ret
				
#undef ROUND
#undef ROUND1		
