/* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * IP/TCP/UDP checksumming routines * * Authors: Jorge Cwik, * Arnt Gulbrandsen, * Tom May, * Pentium Pro/II routines: * Alexander Kjeldaas * Finn Arne Gangstad * Lots of code moved from tcp.c and ip.c; see those files * for more names. * * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception * handling. * Andi Kleen, add zeroing on error * converted to pure assembler * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ /* * computes a partial checksum, e.g. for TCP/UDP fragments */ /* unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) */ .text .align 4 .globl _csum_partial #ifndef CONFIG_X86_USE_PPRO_CHECKSUM /* * Experiments with Ethernet and SLIP connections show that buff * is aligned on either a 2-byte or 4-byte boundary. We get at * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. * Fortunately, it is easy to convert 2-byte alignment to 4-byte * alignment for the unrolled loop. */ _csum_partial: pushl %esi pushl %ebx movl 20(%esp),%eax # Function arg: unsigned int sum movl 16(%esp),%ecx # Function arg: int len movl 12(%esp),%esi # Function arg: unsigned char *buff testl $3, %esi # Check alignment. jz 2f # Jump if alignment is ok. testl $1, %esi # Check alignment. jz 10f # Jump if alignment is boundary of 2bytes. // buf is odd dec %ecx jl 8f movzbl (%esi), %ebx adcl %ebx, %eax roll $8, %eax inc %esi testl $2, %esi jz 2f 10: subl $2, %ecx # Alignment uses up two bytes. jae 1f # Jump if we had at least two bytes. addl $2, %ecx # ecx was < 2. Deal with it. jmp 4f 1: movw (%esi), %bx addl $2, %esi addw %bx, %ax adcl $0, %eax 2: movl %ecx, %edx shrl $5, %ecx jz 2f testl %esi, %esi 1: movl (%esi), %ebx adcl %ebx, %eax movl 4(%esi), %ebx adcl %ebx, %eax movl 8(%esi), %ebx adcl %ebx, %eax movl 12(%esi), %ebx adcl %ebx, %eax movl 16(%esi), %ebx adcl %ebx, %eax movl 20(%esi), %ebx adcl %ebx, %eax movl 24(%esi), %ebx adcl %ebx, %eax movl 28(%esi), %ebx adcl %ebx, %eax lea 32(%esi), %esi dec %ecx jne 1b adcl $0, %eax 2: movl %edx, %ecx andl $0x1c, %edx je 4f shrl $2, %edx # This clears CF 3: adcl (%esi), %eax lea 4(%esi), %esi dec %edx jne 3b adcl $0, %eax 4: andl $3, %ecx jz 7f cmpl $2, %ecx jb 5f movw (%esi),%cx leal 2(%esi),%esi je 6f shll $16,%ecx 5: movb (%esi),%cl 6: addl %ecx,%eax adcl $0, %eax 7: testl $1, 12(%esp) jz 8f roll $8, %eax 8: popl %ebx popl %esi ret #else /* Version for PentiumII/PPro */ csum_partial: pushl %esi pushl %ebx movl 20(%esp),%eax # Function arg: unsigned int sum movl 16(%esp),%ecx # Function arg: int len movl 12(%esp),%esi # Function arg: const unsigned char *buf testl $3, %esi jnz 25f 10: movl %ecx, %edx movl %ecx, %ebx andl $0x7c, %ebx shrl $7, %ecx addl %ebx,%esi shrl $2, %ebx negl %ebx lea 45f(%ebx,%ebx,2), %ebx testl %esi, %esi jmp *%ebx # Handle 2-byte-aligned regions 20: addw (%esi), %ax lea 2(%esi), %esi adcl $0, %eax jmp 10b 25: testl $1, %esi jz 30f # buf is odd dec %ecx jl 90f movzbl (%esi), %ebx addl %ebx, %eax adcl $0, %eax roll $8, %eax inc %esi testl $2, %esi jz 10b 30: subl $2, %ecx ja 20b je 32f addl $2, %ecx jz 80f movzbl (%esi),%ebx # csumming 1 byte, 2-aligned addl %ebx, %eax adcl $0, %eax jmp 80f 32: addw (%esi), %ax # csumming 2 bytes, 2-aligned adcl $0, %eax jmp 80f 40: addl -128(%esi), %eax adcl -124(%esi), %eax adcl -120(%esi), %eax adcl -116(%esi), %eax adcl -112(%esi), %eax adcl -108(%esi), %eax adcl -104(%esi), %eax adcl -100(%esi), %eax adcl -96(%esi), %eax adcl -92(%esi), %eax adcl -88(%esi), %eax adcl -84(%esi), %eax adcl -80(%esi), %eax adcl -76(%esi), %eax adcl -72(%esi), %eax adcl -68(%esi), %eax adcl -64(%esi), %eax adcl -60(%esi), %eax adcl -56(%esi), %eax adcl -52(%esi), %eax adcl -48(%esi), %eax adcl -44(%esi), %eax adcl -40(%esi), %eax adcl -36(%esi), %eax adcl -32(%esi), %eax adcl -28(%esi), %eax adcl -24(%esi), %eax adcl -20(%esi), %eax adcl -16(%esi), %eax adcl -12(%esi), %eax adcl -8(%esi), %eax adcl -4(%esi), %eax 45: lea 128(%esi), %esi adcl $0, %eax dec %ecx jge 40b movl %edx, %ecx 50: andl $3, %ecx jz 80f # Handle the last 1-3 bytes without jumping notl %ecx # 1->2, 2->1, 3->0, higher bits are masked movl $0xffffff,%ebx # by the shll and shrl instructions shll $3,%ecx shrl %cl,%ebx andl -128(%esi),%ebx # esi is 4-aligned so should be ok addl %ebx,%eax adcl $0,%eax 80: testl $1, 12(%esp) jz 90f roll $8, %eax 90: popl %ebx popl %esi ret #endif