inline -> __inline
[reactos.git] / reactos / drivers / net / tcpip / include / tcpcore.h
1 /*
2 * COPYRIGHT: See COPYING in the top level directory
3 * PROJECT: ReactOS TCP/IP protocol driver
4 * FILE: include/tcpcore.h
5 * PURPOSE: Transmission Control Protocol definitions
6 * REVISIONS:
7 * CSH 01/01-2003 Ported from linux kernel 2.4.20
8 */
9
10 /*
11 * INET An implementation of the TCP/IP protocol suite for the LINUX
12 * operating system. INET is implemented using the BSD Socket
13 * interface as the means of communication with the user level.
14 *
15 * Definitions for the TCP module.
16 *
17 * Version: @(#)tcp.h 1.0.5 05/23/93
18 *
19 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
20 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27 #ifndef __TCPCORE_H
28 #define __TCPCORE_H
29
30 #include "tcpdef.h"
31
32
33 struct socket;
34
35
36
37 #if 1 /* skbuff */
38
39 #define HAVE_ALLOC_SKB /* For the drivers to know */
40 #define HAVE_ALIGNABLE_SKB /* Ditto 8) */
41 #define SLAB_SKB /* Slabified skbuffs */
42
43 #define CHECKSUM_NONE 0
44 #define CHECKSUM_HW 1
45 #define CHECKSUM_UNNECESSARY 2
46
47 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1))
48 #define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1))
49 #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X),0))
50 #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0,2))
51
52 /* A. Checksumming of received packets by device.
53 *
54 * NONE: device failed to checksum this packet.
55 * skb->csum is undefined.
56 *
57 * UNNECESSARY: device parsed packet and wouldbe verified checksum.
58 * skb->csum is undefined.
59 * It is bad option, but, unfortunately, many of vendors do this.
60 * Apparently with secret goal to sell you new device, when you
61 * will add new protocol to your host. F.e. IPv6. 8)
62 *
63 * HW: the most generic way. Device supplied checksum of _all_
64 * the packet as seen by netif_rx in skb->csum.
65 * NOTE: Even if device supports only some protocols, but
66 * is able to produce some skb->csum, it MUST use HW,
67 * not UNNECESSARY.
68 *
69 * B. Checksumming on output.
70 *
71 * NONE: skb is checksummed by protocol or csum is not required.
72 *
73 * HW: device is required to csum packet as seen by hard_start_xmit
74 * from skb->h.raw to the end and to record the checksum
75 * at skb->h.raw+skb->csum.
76 *
77 * Device must show its capabilities in dev->features, set
78 * at device setup time.
79 * NETIF_F_HW_CSUM - it is clever device, it is able to checksum
80 * everything.
81 * NETIF_F_NO_CSUM - loopback or reliable single hop media.
82 * NETIF_F_IP_CSUM - device is dumb. It is able to csum only
83 * TCP/UDP over IPv4. Sigh. Vendors like this
84 * way by an unknown reason. Though, see comment above
85 * about CHECKSUM_UNNECESSARY. 8)
86 *
87 * Any questions? No questions, good. --ANK
88 */
89
90 #ifdef __i386__
91 #define NET_CALLER(arg) (*(((void**)&arg)-1))
92 #else
93 #define NET_CALLER(arg) __builtin_return_address(0)
94 #endif
95
96 #ifdef CONFIG_NETFILTER
97 struct nf_conntrack {
98 atomic_t use;
99 void (*destroy)(struct nf_conntrack *);
100 };
101
102 struct nf_ct_info {
103 struct nf_conntrack *master;
104 };
105 #endif
106
107 struct sk_buff_head {
108 /* These two members must be first. */
109 struct sk_buff * next;
110 struct sk_buff * prev;
111
112 __u32 qlen;
113 spinlock_t lock;
114 };
115
116 struct sk_buff;
117
118 #define MAX_SKB_FRAGS 6
119
120 typedef struct skb_frag_struct skb_frag_t;
121
122 struct skb_frag_struct
123 {
124 struct page *page;
125 __u16 page_offset;
126 __u16 size;
127 };
128
129 /* This data is invariant across clones and lives at
130 * the end of the header data, ie. at skb->end.
131 */
132 struct skb_shared_info {
133 atomic_t dataref;
134 unsigned int nr_frags;
135 struct sk_buff *frag_list;
136 skb_frag_t frags[MAX_SKB_FRAGS];
137 };
138
139 struct sk_buff {
140 /* These two members must be first. */
141 struct sk_buff * next; /* Next buffer in list */
142 struct sk_buff * prev; /* Previous buffer in list */
143
144 struct sk_buff_head * list; /* List we are on */
145 struct sock *sk; /* Socket we are owned by */
146 struct timeval stamp; /* Time we arrived */
147 struct net_device *dev; /* Device we arrived on/are leaving by */
148
149 /* Transport layer header */
150 union
151 {
152 struct tcphdr *th;
153 struct udphdr *uh;
154 struct icmphdr *icmph;
155 struct igmphdr *igmph;
156 struct iphdr *ipiph;
157 struct spxhdr *spxh;
158 unsigned char *raw;
159 } h;
160
161 /* Network layer header */
162 union
163 {
164 struct iphdr *iph;
165 struct ipv6hdr *ipv6h;
166 struct arphdr *arph;
167 struct ipxhdr *ipxh;
168 unsigned char *raw;
169 } nh;
170
171 /* Link layer header */
172 union
173 {
174 struct ethhdr *ethernet;
175 unsigned char *raw;
176 } mac;
177
178 struct dst_entry *dst;
179
180 /*
181 * This is the control buffer. It is free to use for every
182 * layer. Please put your private variables there. If you
183 * want to keep them across layers you have to do a skb_clone()
184 * first. This is owned by whoever has the skb queued ATM.
185 */
186 char cb[48];
187
188 unsigned int len; /* Length of actual data */
189 unsigned int data_len;
190 unsigned int csum; /* Checksum */
191 unsigned char __unused, /* Dead field, may be reused */
192 cloned, /* head may be cloned (check refcnt to be sure). */
193 pkt_type, /* Packet class */
194 ip_summed; /* Driver fed us an IP checksum */
195 __u32 priority; /* Packet queueing priority */
196 atomic_t users; /* User count - see datagram.c,tcp.c */
197 unsigned short protocol; /* Packet protocol from driver. */
198 unsigned short security; /* Security level of packet */
199 unsigned int truesize; /* Buffer size */
200
201 unsigned char *head; /* Head of buffer */
202 unsigned char *data; /* Data head pointer */
203 unsigned char *tail; /* Tail pointer */
204 unsigned char *end; /* End pointer */
205
206 void (*destructor)(struct sk_buff *); /* Destruct function */
207 #ifdef CONFIG_NETFILTER
208 /* Can be used for communication between hooks. */
209 unsigned long nfmark;
210 /* Cache info */
211 __u32 nfcache;
212 /* Associated connection, if any */
213 struct nf_ct_info *nfct;
214 #ifdef CONFIG_NETFILTER_DEBUG
215 unsigned int nf_debug;
216 #endif
217 #endif /*CONFIG_NETFILTER*/
218
219 #if defined(CONFIG_HIPPI)
220 union{
221 __u32 ifield;
222 } private;
223 #endif
224
225 #ifdef CONFIG_NET_SCHED
226 __u32 tc_index; /* traffic control index */
227 #endif
228 };
229
230 #define SK_WMEM_MAX 65535
231 #define SK_RMEM_MAX 65535
232
233 #if 1
234 //#ifdef __KERNEL__
235 /*
236 * Handling routines are only of interest to the kernel
237 */
238
239 extern void __kfree_skb(struct sk_buff *skb);
240 extern struct sk_buff * alloc_skb(unsigned int size, int priority);
241 extern void kfree_skbmem(struct sk_buff *skb);
242 extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority);
243 extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority);
244 extern struct sk_buff * pskb_copy(struct sk_buff *skb, int gfp_mask);
245 extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask);
246 extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom);
247 extern struct sk_buff * skb_copy_expand(const struct sk_buff *skb,
248 int newheadroom,
249 int newtailroom,
250 int priority);
251 #define dev_kfree_skb(a) kfree_skb(a)
252 extern void skb_over_panic(struct sk_buff *skb, int len, void *here);
253 extern void skb_under_panic(struct sk_buff *skb, int len, void *here);
254
255 /* Internal */
256 #define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end))
257
258 /**
259 * skb_queue_empty - check if a queue is empty
260 * @list: queue head
261 *
262 * Returns true if the queue is empty, false otherwise.
263 */
264
265 static __inline int skb_queue_empty(struct sk_buff_head *list)
266 {
267 return (list->next == (struct sk_buff *) list);
268 }
269
270 /**
271 * skb_get - reference buffer
272 * @skb: buffer to reference
273 *
274 * Makes another reference to a socket buffer and returns a pointer
275 * to the buffer.
276 */
277
278 static __inline struct sk_buff *skb_get(struct sk_buff *skb)
279 {
280 atomic_inc(&skb->users);
281 return skb;
282 }
283
284 /*
285 * If users==1, we are the only owner and are can avoid redundant
286 * atomic change.
287 */
288
289 /**
290 * kfree_skb - free an sk_buff
291 * @skb: buffer to free
292 *
293 * Drop a reference to the buffer and free it if the usage count has
294 * hit zero.
295 */
296
297 static __inline void kfree_skb(struct sk_buff *skb)
298 {
299 if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
300 __kfree_skb(skb);
301 }
302
303 /* Use this if you didn't touch the skb state [for fast switching] */
304 static __inline void kfree_skb_fast(struct sk_buff *skb)
305 {
306 if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
307 kfree_skbmem(skb);
308 }
309
310 /**
311 * skb_cloned - is the buffer a clone
312 * @skb: buffer to check
313 *
314 * Returns true if the buffer was generated with skb_clone() and is
315 * one of multiple shared copies of the buffer. Cloned buffers are
316 * shared data so must not be written to under normal circumstances.
317 */
318
319 static __inline int skb_cloned(struct sk_buff *skb)
320 {
321 return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
322 }
323
324 /**
325 * skb_shared - is the buffer shared
326 * @skb: buffer to check
327 *
328 * Returns true if more than one person has a reference to this
329 * buffer.
330 */
331
332 static __inline int skb_shared(struct sk_buff *skb)
333 {
334 return (atomic_read(&skb->users) != 1);
335 }
336
337 /**
338 * skb_share_check - check if buffer is shared and if so clone it
339 * @skb: buffer to check
340 * @pri: priority for memory allocation
341 *
342 * If the buffer is shared the buffer is cloned and the old copy
343 * drops a reference. A new clone with a single reference is returned.
344 * If the buffer is not shared the original buffer is returned. When
345 * being called from interrupt status or with spinlocks held pri must
346 * be GFP_ATOMIC.
347 *
348 * NULL is returned on a memory allocation failure.
349 */
350
351 static __inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
352 {
353 if (skb_shared(skb)) {
354 struct sk_buff *nskb;
355 nskb = skb_clone(skb, pri);
356 kfree_skb(skb);
357 return nskb;
358 }
359 return skb;
360 }
361
362
363 /*
364 * Copy shared buffers into a new sk_buff. We effectively do COW on
365 * packets to handle cases where we have a local reader and forward
366 * and a couple of other messy ones. The normal one is tcpdumping
367 * a packet thats being forwarded.
368 */
369
370 /**
371 * skb_unshare - make a copy of a shared buffer
372 * @skb: buffer to check
373 * @pri: priority for memory allocation
374 *
375 * If the socket buffer is a clone then this function creates a new
376 * copy of the data, drops a reference count on the old copy and returns
377 * the new copy with the reference count at 1. If the buffer is not a clone
378 * the original buffer is returned. When called with a spinlock held or
379 * from interrupt state @pri must be %GFP_ATOMIC
380 *
381 * %NULL is returned on a memory allocation failure.
382 */
383
384 static __inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
385 {
386 struct sk_buff *nskb;
387 if(!skb_cloned(skb))
388 return skb;
389 nskb=skb_copy(skb, pri);
390 kfree_skb(skb); /* Free our shared copy */
391 return nskb;
392 }
393
394 /**
395 * skb_peek
396 * @list_: list to peek at
397 *
398 * Peek an &sk_buff. Unlike most other operations you _MUST_
399 * be careful with this one. A peek leaves the buffer on the
400 * list and someone else may run off with it. You must hold
401 * the appropriate locks or have a private queue to do this.
402 *
403 * Returns %NULL for an empty list or a pointer to the head element.
404 * The reference count is not incremented and the reference is therefore
405 * volatile. Use with caution.
406 */
407
408 static __inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
409 {
410 struct sk_buff *list = ((struct sk_buff *)list_)->next;
411 if (list == (struct sk_buff *)list_)
412 list = NULL;
413 return list;
414 }
415
416 /**
417 * skb_peek_tail
418 * @list_: list to peek at
419 *
420 * Peek an &sk_buff. Unlike most other operations you _MUST_
421 * be careful with this one. A peek leaves the buffer on the
422 * list and someone else may run off with it. You must hold
423 * the appropriate locks or have a private queue to do this.
424 *
425 * Returns %NULL for an empty list or a pointer to the tail element.
426 * The reference count is not incremented and the reference is therefore
427 * volatile. Use with caution.
428 */
429
430 static __inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
431 {
432 struct sk_buff *list = ((struct sk_buff *)list_)->prev;
433 if (list == (struct sk_buff *)list_)
434 list = NULL;
435 return list;
436 }
437
438 /**
439 * skb_queue_len - get queue length
440 * @list_: list to measure
441 *
442 * Return the length of an &sk_buff queue.
443 */
444
445 static __inline __u32 skb_queue_len(struct sk_buff_head *list_)
446 {
447 return(list_->qlen);
448 }
449
450 static __inline void skb_queue_head_init(struct sk_buff_head *list)
451 {
452 spin_lock_init(&list->lock);
453 list->prev = (struct sk_buff *)list;
454 list->next = (struct sk_buff *)list;
455 list->qlen = 0;
456 }
457
458 /*
459 * Insert an sk_buff at the start of a list.
460 *
461 * The "__skb_xxxx()" functions are the non-atomic ones that
462 * can only be called with interrupts disabled.
463 */
464
465 /**
466 * __skb_queue_head - queue a buffer at the list head
467 * @list: list to use
468 * @newsk: buffer to queue
469 *
470 * Queue a buffer at the start of a list. This function takes no locks
471 * and you must therefore hold required locks before calling it.
472 *
473 * A buffer cannot be placed on two lists at the same time.
474 */
475
476 static __inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
477 {
478 struct sk_buff *prev, *next;
479
480 newsk->list = list;
481 list->qlen++;
482 prev = (struct sk_buff *)list;
483 next = prev->next;
484 newsk->next = next;
485 newsk->prev = prev;
486 next->prev = newsk;
487 prev->next = newsk;
488 }
489
490
491 /**
492 * skb_queue_head - queue a buffer at the list head
493 * @list: list to use
494 * @newsk: buffer to queue
495 *
496 * Queue a buffer at the start of the list. This function takes the
497 * list lock and can be used safely with other locking &sk_buff functions
498 * safely.
499 *
500 * A buffer cannot be placed on two lists at the same time.
501 */
502
503 static __inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
504 {
505 unsigned long flags;
506
507 spin_lock_irqsave(&list->lock, flags);
508 __skb_queue_head(list, newsk);
509 spin_unlock_irqrestore(&list->lock, flags);
510 }
511
512 /**
513 * __skb_queue_tail - queue a buffer at the list tail
514 * @list: list to use
515 * @newsk: buffer to queue
516 *
517 * Queue a buffer at the end of a list. This function takes no locks
518 * and you must therefore hold required locks before calling it.
519 *
520 * A buffer cannot be placed on two lists at the same time.
521 */
522
523
524 static __inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
525 {
526 struct sk_buff *prev, *next;
527
528 newsk->list = list;
529 list->qlen++;
530 next = (struct sk_buff *)list;
531 prev = next->prev;
532 newsk->next = next;
533 newsk->prev = prev;
534 next->prev = newsk;
535 prev->next = newsk;
536 }
537
538 /**
539 * skb_queue_tail - queue a buffer at the list tail
540 * @list: list to use
541 * @newsk: buffer to queue
542 *
543 * Queue a buffer at the tail of the list. This function takes the
544 * list lock and can be used safely with other locking &sk_buff functions
545 * safely.
546 *
547 * A buffer cannot be placed on two lists at the same time.
548 */
549
550 static __inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
551 {
552 unsigned long flags;
553
554 spin_lock_irqsave(&list->lock, flags);
555 __skb_queue_tail(list, newsk);
556 spin_unlock_irqrestore(&list->lock, flags);
557 }
558
559 /**
560 * __skb_dequeue - remove from the head of the queue
561 * @list: list to dequeue from
562 *
563 * Remove the head of the list. This function does not take any locks
564 * so must be used with appropriate locks held only. The head item is
565 * returned or %NULL if the list is empty.
566 */
567
568 static __inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
569 {
570 struct sk_buff *next, *prev, *result;
571
572 prev = (struct sk_buff *) list;
573 next = prev->next;
574 result = NULL;
575 if (next != prev) {
576 result = next;
577 next = next->next;
578 list->qlen--;
579 next->prev = prev;
580 prev->next = next;
581 result->next = NULL;
582 result->prev = NULL;
583 result->list = NULL;
584 }
585 return result;
586 }
587
588 /**
589 * skb_dequeue - remove from the head of the queue
590 * @list: list to dequeue from
591 *
592 * Remove the head of the list. The list lock is taken so the function
593 * may be used safely with other locking list functions. The head item is
594 * returned or %NULL if the list is empty.
595 */
596
597 static __inline struct sk_buff *skb_dequeue(struct sk_buff_head *list)
598 {
599 unsigned long flags;
600 struct sk_buff *result;
601
602 spin_lock_irqsave(&list->lock, flags);
603 result = __skb_dequeue(list);
604 spin_unlock_irqrestore(&list->lock, flags);
605 return result;
606 }
607
608 /*
609 * Insert a packet on a list.
610 */
611
612 static __inline void __skb_insert(struct sk_buff *newsk,
613 struct sk_buff * prev, struct sk_buff *next,
614 struct sk_buff_head * list)
615 {
616 newsk->next = next;
617 newsk->prev = prev;
618 next->prev = newsk;
619 prev->next = newsk;
620 newsk->list = list;
621 list->qlen++;
622 }
623
624 /**
625 * skb_insert - insert a buffer
626 * @old: buffer to insert before
627 * @newsk: buffer to insert
628 *
629 * Place a packet before a given packet in a list. The list locks are taken
630 * and this function is atomic with respect to other list locked calls
631 * A buffer cannot be placed on two lists at the same time.
632 */
633
634 static __inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
635 {
636 unsigned long flags;
637
638 spin_lock_irqsave(&old->list->lock, flags);
639 __skb_insert(newsk, old->prev, old, old->list);
640 spin_unlock_irqrestore(&old->list->lock, flags);
641 }
642
643 /*
644 * Place a packet after a given packet in a list.
645 */
646
647 static __inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
648 {
649 __skb_insert(newsk, old, old->next, old->list);
650 }
651
652 /**
653 * skb_append - append a buffer
654 * @old: buffer to insert after
655 * @newsk: buffer to insert
656 *
657 * Place a packet after a given packet in a list. The list locks are taken
658 * and this function is atomic with respect to other list locked calls.
659 * A buffer cannot be placed on two lists at the same time.
660 */
661
662
663 static __inline void skb_append(struct sk_buff *old, struct sk_buff *newsk)
664 {
665 unsigned long flags;
666
667 spin_lock_irqsave(&old->list->lock, flags);
668 __skb_append(old, newsk);
669 spin_unlock_irqrestore(&old->list->lock, flags);
670 }
671
672 /*
673 * remove sk_buff from list. _Must_ be called atomically, and with
674 * the list known..
675 */
676
677 static __inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
678 {
679 struct sk_buff * next, * prev;
680
681 list->qlen--;
682 next = skb->next;
683 prev = skb->prev;
684 skb->next = NULL;
685 skb->prev = NULL;
686 skb->list = NULL;
687 next->prev = prev;
688 prev->next = next;
689 }
690
691 /**
692 * skb_unlink - remove a buffer from a list
693 * @skb: buffer to remove
694 *
695 * Place a packet after a given packet in a list. The list locks are taken
696 * and this function is atomic with respect to other list locked calls
697 *
698 * Works even without knowing the list it is sitting on, which can be
699 * handy at times. It also means that THE LIST MUST EXIST when you
700 * unlink. Thus a list must have its contents unlinked before it is
701 * destroyed.
702 */
703
704 static __inline void skb_unlink(struct sk_buff *skb)
705 {
706 struct sk_buff_head *list = skb->list;
707
708 if(list) {
709 unsigned long flags;
710
711 spin_lock_irqsave(&list->lock, flags);
712 if(skb->list == list)
713 __skb_unlink(skb, skb->list);
714 spin_unlock_irqrestore(&list->lock, flags);
715 }
716 }
717
718 /* XXX: more streamlined implementation */
719
720 /**
721 * __skb_dequeue_tail - remove from the tail of the queue
722 * @list: list to dequeue from
723 *
724 * Remove the tail of the list. This function does not take any locks
725 * so must be used with appropriate locks held only. The tail item is
726 * returned or %NULL if the list is empty.
727 */
728
729 static __inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
730 {
731 struct sk_buff *skb = skb_peek_tail(list);
732 if (skb)
733 __skb_unlink(skb, list);
734 return skb;
735 }
736
737 /**
738 * skb_dequeue - remove from the head of the queue
739 * @list: list to dequeue from
740 *
741 * Remove the head of the list. The list lock is taken so the function
742 * may be used safely with other locking list functions. The tail item is
743 * returned or %NULL if the list is empty.
744 */
745
746 static __inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
747 {
748 unsigned long flags;
749 struct sk_buff *result;
750
751 spin_lock_irqsave(&list->lock, flags);
752 result = __skb_dequeue_tail(list);
753 spin_unlock_irqrestore(&list->lock, flags);
754 return result;
755 }
756
757 static __inline int skb_is_nonlinear(const struct sk_buff *skb)
758 {
759 return skb->data_len;
760 }
761
762 static __inline int skb_headlen(const struct sk_buff *skb)
763 {
764 return skb->len - skb->data_len;
765 }
766
767 #define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) out_of_line_bug(); } while (0)
768 #define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) out_of_line_bug(); } while (0)
769 #define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) out_of_line_bug(); } while (0)
770
771 /*
772 * Add data to an sk_buff
773 */
774
775 static __inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
776 {
777 unsigned char *tmp=skb->tail;
778 SKB_LINEAR_ASSERT(skb);
779 skb->tail+=len;
780 skb->len+=len;
781 return tmp;
782 }
783
784 /**
785 * skb_put - add data to a buffer
786 * @skb: buffer to use
787 * @len: amount of data to add
788 *
789 * This function extends the used data area of the buffer. If this would
790 * exceed the total buffer size the kernel will panic. A pointer to the
791 * first byte of the extra data is returned.
792 */
793
794 static __inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
795 {
796 #if 0
797 unsigned char *tmp=skb->tail;
798 SKB_LINEAR_ASSERT(skb);
799 skb->tail+=len;
800 skb->len+=len;
801 if(skb->tail>skb->end) {
802 skb_over_panic(skb, len, current_text_addr());
803 }
804 return tmp;
805 #else
806 return NULL;
807 #endif
808 }
809
810 static __inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
811 {
812 skb->data-=len;
813 skb->len+=len;
814 return skb->data;
815 }
816
817 /**
818 * skb_push - add data to the start of a buffer
819 * @skb: buffer to use
820 * @len: amount of data to add
821 *
822 * This function extends the used data area of the buffer at the buffer
823 * start. If this would exceed the total buffer headroom the kernel will
824 * panic. A pointer to the first byte of the extra data is returned.
825 */
826
827 static __inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
828 {
829 #if 0
830 skb->data-=len;
831 skb->len+=len;
832 if(skb->data<skb->head) {
833 skb_under_panic(skb, len, current_text_addr());
834 }
835 return skb->data;
836 #else
837 return NULL;
838 #endif
839 }
840
841 static __inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
842 {
843 skb->len-=len;
844 if (skb->len < skb->data_len)
845 out_of_line_bug();
846 return skb->data+=len;
847 }
848
849 /**
850 * skb_pull - remove data from the start of a buffer
851 * @skb: buffer to use
852 * @len: amount of data to remove
853 *
854 * This function removes data from the start of a buffer, returning
855 * the memory to the headroom. A pointer to the next data in the buffer
856 * is returned. Once the data has been pulled future pushes will overwrite
857 * the old data.
858 */
859
860 static __inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len)
861 {
862 if (len > skb->len)
863 return NULL;
864 return __skb_pull(skb,len);
865 }
866
867 extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta);
868
869 static __inline char *__pskb_pull(struct sk_buff *skb, unsigned int len)
870 {
871 if (len > skb_headlen(skb) &&
872 __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL)
873 return NULL;
874 skb->len -= len;
875 return skb->data += len;
876 }
877
878 static __inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len)
879 {
880 if (len > skb->len)
881 return NULL;
882 return __pskb_pull(skb,len);
883 }
884
885 static __inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
886 {
887 if (len <= skb_headlen(skb))
888 return 1;
889 if (len > skb->len)
890 return 0;
891 return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL);
892 }
893
894 /**
895 * skb_headroom - bytes at buffer head
896 * @skb: buffer to check
897 *
898 * Return the number of bytes of free space at the head of an &sk_buff.
899 */
900
901 static __inline int skb_headroom(const struct sk_buff *skb)
902 {
903 return skb->data-skb->head;
904 }
905
906 /**
907 * skb_tailroom - bytes at buffer end
908 * @skb: buffer to check
909 *
910 * Return the number of bytes of free space at the tail of an sk_buff
911 */
912
913 static __inline int skb_tailroom(const struct sk_buff *skb)
914 {
915 return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail;
916 }
917
918 /**
919 * skb_reserve - adjust headroom
920 * @skb: buffer to alter
921 * @len: bytes to move
922 *
923 * Increase the headroom of an empty &sk_buff by reducing the tail
924 * room. This is only allowed for an empty buffer.
925 */
926
927 static __inline void skb_reserve(struct sk_buff *skb, unsigned int len)
928 {
929 skb->data+=len;
930 skb->tail+=len;
931 }
932
933 extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
934
935 static __inline void __skb_trim(struct sk_buff *skb, unsigned int len)
936 {
937 if (!skb->data_len) {
938 skb->len = len;
939 skb->tail = skb->data+len;
940 } else {
941 ___pskb_trim(skb, len, 0);
942 }
943 }
944
945 /**
946 * skb_trim - remove end from a buffer
947 * @skb: buffer to alter
948 * @len: new length
949 *
950 * Cut the length of a buffer down by removing data from the tail. If
951 * the buffer is already under the length specified it is not modified.
952 */
953
954 static __inline void skb_trim(struct sk_buff *skb, unsigned int len)
955 {
956 if (skb->len > len) {
957 __skb_trim(skb, len);
958 }
959 }
960
961
962 static __inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
963 {
964 if (!skb->data_len) {
965 skb->len = len;
966 skb->tail = skb->data+len;
967 return 0;
968 } else {
969 return ___pskb_trim(skb, len, 1);
970 }
971 }
972
973 static __inline int pskb_trim(struct sk_buff *skb, unsigned int len)
974 {
975 if (len < skb->len)
976 return __pskb_trim(skb, len);
977 return 0;
978 }
979
980 /**
981 * skb_orphan - orphan a buffer
982 * @skb: buffer to orphan
983 *
984 * If a buffer currently has an owner then we call the owner's
985 * destructor function and make the @skb unowned. The buffer continues
986 * to exist but is no longer charged to its former owner.
987 */
988
989
990 static __inline void skb_orphan(struct sk_buff *skb)
991 {
992 if (skb->destructor)
993 skb->destructor(skb);
994 skb->destructor = NULL;
995 skb->sk = NULL;
996 }
997
998 /**
999 * skb_purge - empty a list
1000 * @list: list to empty
1001 *
1002 * Delete all buffers on an &sk_buff list. Each buffer is removed from
1003 * the list and one reference dropped. This function takes the list
1004 * lock and is atomic with respect to other list locking functions.
1005 */
1006
1007
1008 static __inline void skb_queue_purge(struct sk_buff_head *list)
1009 {
1010 struct sk_buff *skb;
1011 while ((skb=skb_dequeue(list))!=NULL)
1012 kfree_skb(skb);
1013 }
1014
1015 /**
1016 * __skb_purge - empty a list
1017 * @list: list to empty
1018 *
1019 * Delete all buffers on an &sk_buff list. Each buffer is removed from
1020 * the list and one reference dropped. This function does not take the
1021 * list lock and the caller must hold the relevant locks to use it.
1022 */
1023
1024
1025 static __inline void __skb_queue_purge(struct sk_buff_head *list)
1026 {
1027 struct sk_buff *skb;
1028 while ((skb=__skb_dequeue(list))!=NULL)
1029 kfree_skb(skb);
1030 }
1031
1032 /**
1033 * __dev_alloc_skb - allocate an skbuff for sending
1034 * @length: length to allocate
1035 * @gfp_mask: get_free_pages mask, passed to alloc_skb
1036 *
1037 * Allocate a new &sk_buff and assign it a usage count of one. The
1038 * buffer has unspecified headroom built in. Users should allocate
1039 * the headroom they think they need without accounting for the
1040 * built in space. The built in space is used for optimisations.
1041 *
1042 * %NULL is returned in there is no free memory.
1043 */
1044
1045 static __inline struct sk_buff *__dev_alloc_skb(unsigned int length,
1046 int gfp_mask)
1047 {
1048 struct sk_buff *skb;
1049
1050 skb = alloc_skb(length+16, gfp_mask);
1051 if (skb)
1052 skb_reserve(skb,16);
1053 return skb;
1054 }
1055
1056 /**
1057 * dev_alloc_skb - allocate an skbuff for sending
1058 * @length: length to allocate
1059 *
1060 * Allocate a new &sk_buff and assign it a usage count of one. The
1061 * buffer has unspecified headroom built in. Users should allocate
1062 * the headroom they think they need without accounting for the
1063 * built in space. The built in space is used for optimisations.
1064 *
1065 * %NULL is returned in there is no free memory. Although this function
1066 * allocates memory it can be called from an interrupt.
1067 */
1068
1069 static __inline struct sk_buff *dev_alloc_skb(unsigned int length)
1070 {
1071 #if 0
1072 return __dev_alloc_skb(length, GFP_ATOMIC);
1073 #else
1074 return NULL;
1075 #endif
1076 }
1077
1078 /**
1079 * skb_cow - copy header of skb when it is required
1080 * @skb: buffer to cow
1081 * @headroom: needed headroom
1082 *
1083 * If the skb passed lacks sufficient headroom or its data part
1084 * is shared, data is reallocated. If reallocation fails, an error
1085 * is returned and original skb is not changed.
1086 *
1087 * The result is skb with writable area skb->head...skb->tail
1088 * and at least @headroom of space at head.
1089 */
1090
1091 static __inline int
1092 skb_cow(struct sk_buff *skb, unsigned int headroom)
1093 {
1094 #if 0
1095 int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
1096
1097 if (delta < 0)
1098 delta = 0;
1099
1100 if (delta || skb_cloned(skb))
1101 return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC);
1102 return 0;
1103 #else
1104 return 0;
1105 #endif
1106 }
1107
1108 /**
1109 * skb_linearize - convert paged skb to linear one
1110 * @skb: buffer to linarize
1111 * @gfp: allocation mode
1112 *
1113 * If there is no free memory -ENOMEM is returned, otherwise zero
1114 * is returned and the old skb data released. */
1115 int skb_linearize(struct sk_buff *skb, int gfp);
1116
1117 static __inline void *kmap_skb_frag(const skb_frag_t *frag)
1118 {
1119 #if 0
1120 #ifdef CONFIG_HIGHMEM
1121 if (in_irq())
1122 out_of_line_bug();
1123
1124 local_bh_disable();
1125 #endif
1126 return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
1127 #else
1128 return NULL;
1129 #endif
1130 }
1131
1132 static __inline void kunmap_skb_frag(void *vaddr)
1133 {
1134 #if 0
1135 kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
1136 #ifdef CONFIG_HIGHMEM
1137 local_bh_enable();
1138 #endif
1139 #endif
1140 }
1141
1142 #define skb_queue_walk(queue, skb) \
1143 for (skb = (queue)->next; \
1144 (skb != (struct sk_buff *)(queue)); \
1145 skb=skb->next)
1146
1147
1148 extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err);
1149 extern unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait);
1150 extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size);
1151 extern int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size);
1152 extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump);
1153 extern int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov);
1154 extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb);
1155
1156 extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum);
1157 extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
1158 extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum);
1159 extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
1160
1161 extern void skb_init(void);
1162 extern void skb_add_mtu(int mtu);
1163
1164 #ifdef CONFIG_NETFILTER
1165 static __inline void
1166 nf_conntrack_put(struct nf_ct_info *nfct)
1167 {
1168 if (nfct && atomic_dec_and_test(&nfct->master->use))
1169 nfct->master->destroy(nfct->master);
1170 }
1171 static __inline void
1172 nf_conntrack_get(struct nf_ct_info *nfct)
1173 {
1174 if (nfct)
1175 atomic_inc(&nfct->master->use);
1176 }
1177 #endif
1178
1179
1180 #endif /* skbuff */
1181
1182
1183
1184
1185
1186 struct sock;
1187
1188 typedef struct sockaddr
1189 {
1190 int x;
1191 } _sockaddr;
1192
1193
1194 struct msghdr {
1195 void * msg_name; /* Socket name */
1196 int msg_namelen; /* Length of name */
1197 struct iovec * msg_iov; /* Data blocks */
1198 __kernel_size_t msg_iovlen; /* Number of blocks */
1199 void * msg_control; /* Per protocol magic (eg BSD file descriptor passing) */
1200 __kernel_size_t msg_controllen; /* Length of cmsg list */
1201 unsigned msg_flags;
1202 };
1203
1204
1205 /* IP protocol blocks we attach to sockets.
1206 * socket layer -> transport layer interface
1207 * transport -> network interface is defined by struct inet_proto
1208 */
1209 struct proto {
1210 void (*close)(struct sock *sk,
1211 long timeout);
1212 int (*connect)(struct sock *sk,
1213 struct sockaddr *uaddr,
1214 int addr_len);
1215 int (*disconnect)(struct sock *sk, int flags);
1216
1217 struct sock * (*accept) (struct sock *sk, int flags, int *err);
1218
1219 int (*ioctl)(struct sock *sk, int cmd,
1220 unsigned long arg);
1221 int (*init)(struct sock *sk);
1222 int (*destroy)(struct sock *sk);
1223 void (*shutdown)(struct sock *sk, int how);
1224 int (*setsockopt)(struct sock *sk, int level,
1225 int optname, char *optval, int optlen);
1226 int (*getsockopt)(struct sock *sk, int level,
1227 int optname, char *optval,
1228 int *option);
1229 int (*sendmsg)(struct sock *sk, struct msghdr *msg,
1230 int len);
1231 int (*recvmsg)(struct sock *sk, struct msghdr *msg,
1232 int len, int noblock, int flags,
1233 int *addr_len);
1234 int (*bind)(struct sock *sk,
1235 struct sockaddr *uaddr, int addr_len);
1236
1237 int (*backlog_rcv) (struct sock *sk,
1238 struct sk_buff *skb);
1239
1240 /* Keeping track of sk's, looking them up, and port selection methods. */
1241 void (*hash)(struct sock *sk);
1242 void (*unhash)(struct sock *sk);
1243 int (*get_port)(struct sock *sk, unsigned short snum);
1244
1245 char name[32];
1246
1247 struct {
1248 int inuse;
1249 } stats[32];
1250 // u8 __pad[SMP_CACHE_BYTES - sizeof(int)];
1251 // } stats[NR_CPUS];
1252 };
1253
1254
1255
1256
1257
1258
1259
1260 /* This defines a selective acknowledgement block. */
1261 struct tcp_sack_block {
1262 __u32 start_seq;
1263 __u32 end_seq;
1264 };
1265
1266
1267 struct tcp_opt {
1268 int tcp_header_len; /* Bytes of tcp header to send */
1269
1270 /*
1271 * Header prediction flags
1272 * 0x5?10 << 16 + snd_wnd in net byte order
1273 */
1274 __u32 pred_flags;
1275
1276 /*
1277 * RFC793 variables by their proper names. This means you can
1278 * read the code and the spec side by side (and laugh ...)
1279 * See RFC793 and RFC1122. The RFC writes these in capitals.
1280 */
1281 __u32 rcv_nxt; /* What we want to receive next */
1282 __u32 snd_nxt; /* Next sequence we send */
1283
1284 __u32 snd_una; /* First byte we want an ack for */
1285 __u32 snd_sml; /* Last byte of the most recently transmitted small packet */
1286 __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
1287 __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */
1288
1289 /* Delayed ACK control data */
1290 struct {
1291 __u8 pending; /* ACK is pending */
1292 __u8 quick; /* Scheduled number of quick acks */
1293 __u8 pingpong; /* The session is interactive */
1294 __u8 blocked; /* Delayed ACK was blocked by socket lock*/
1295 __u32 ato; /* Predicted tick of soft clock */
1296 unsigned long timeout; /* Currently scheduled timeout */
1297 __u32 lrcvtime; /* timestamp of last received data packet*/
1298 __u16 last_seg_size; /* Size of last incoming segment */
1299 __u16 rcv_mss; /* MSS used for delayed ACK decisions */
1300 } ack;
1301
1302 /* Data for direct copy to user */
1303 struct {
1304 //struct sk_buff_head prequeue;
1305 struct task_struct *task;
1306 struct iovec *iov;
1307 int memory;
1308 int len;
1309 } ucopy;
1310
1311 __u32 snd_wl1; /* Sequence for window update */
1312 __u32 snd_wnd; /* The window we expect to receive */
1313 __u32 max_window; /* Maximal window ever seen from peer */
1314 __u32 pmtu_cookie; /* Last pmtu seen by socket */
1315 __u16 mss_cache; /* Cached effective mss, not including SACKS */
1316 __u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
1317 __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
1318 __u8 ca_state; /* State of fast-retransmit machine */
1319 __u8 retransmits; /* Number of unrecovered RTO timeouts. */
1320
1321 __u8 reordering; /* Packet reordering metric. */
1322 __u8 queue_shrunk; /* Write queue has been shrunk recently.*/
1323 __u8 defer_accept; /* User waits for some data after accept() */
1324
1325 /* RTT measurement */
1326 __u8 backoff; /* backoff */
1327 __u32 srtt; /* smothed round trip time << 3 */
1328 __u32 mdev; /* medium deviation */
1329 __u32 mdev_max; /* maximal mdev for the last rtt period */
1330 __u32 rttvar; /* smoothed mdev_max */
1331 __u32 rtt_seq; /* sequence number to update rttvar */
1332 __u32 rto; /* retransmit timeout */
1333
1334 __u32 packets_out; /* Packets which are "in flight" */
1335 __u32 left_out; /* Packets which leaved network */
1336 __u32 retrans_out; /* Retransmitted packets out */
1337
1338
1339 /*
1340 * Slow start and congestion control (see also Nagle, and Karn & Partridge)
1341 */
1342 __u32 snd_ssthresh; /* Slow start size threshold */
1343 __u32 snd_cwnd; /* Sending congestion window */
1344 __u16 snd_cwnd_cnt; /* Linear increase counter */
1345 __u16 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
1346 __u32 snd_cwnd_used;
1347 __u32 snd_cwnd_stamp;
1348
1349 /* Two commonly used timers in both sender and receiver paths. */
1350 unsigned long timeout;
1351 struct timer_list retransmit_timer; /* Resend (no ack) */
1352 struct timer_list delack_timer; /* Ack delay */
1353
1354 struct sk_buff_head out_of_order_queue; /* Out of order segments go here */
1355
1356 struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */
1357 struct sk_buff *send_head; /* Front of stuff to transmit */
1358 struct page *sndmsg_page; /* Cached page for sendmsg */
1359 u32 sndmsg_off; /* Cached offset for sendmsg */
1360
1361 __u32 rcv_wnd; /* Current receiver window */
1362 __u32 rcv_wup; /* rcv_nxt on last window update sent */
1363 __u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
1364 __u32 pushed_seq; /* Last pushed seq, required to talk to windows */
1365 __u32 copied_seq; /* Head of yet unread data */
1366 /*
1367 * Options received (usually on last packet, some only on SYN packets).
1368 */
1369 char tstamp_ok, /* TIMESTAMP seen on SYN packet */
1370 wscale_ok, /* Wscale seen on SYN packet */
1371 sack_ok; /* SACK seen on SYN packet */
1372 char saw_tstamp; /* Saw TIMESTAMP on last packet */
1373 __u8 snd_wscale; /* Window scaling received from sender */
1374 __u8 rcv_wscale; /* Window scaling to send to receiver */
1375 __u8 nonagle; /* Disable Nagle algorithm? */
1376 __u8 keepalive_probes; /* num of allowed keep alive probes */
1377
1378 /* PAWS/RTTM data */
1379 __u32 rcv_tsval; /* Time stamp value */
1380 __u32 rcv_tsecr; /* Time stamp echo reply */
1381 __u32 ts_recent; /* Time stamp to echo next */
1382 long ts_recent_stamp;/* Time we stored ts_recent (for aging) */
1383
1384 /* SACKs data */
1385 __u16 user_mss; /* mss requested by user in ioctl */
1386 __u8 dsack; /* D-SACK is scheduled */
1387 __u8 eff_sacks; /* Size of SACK array to send with next packet */
1388 struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
1389 struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
1390
1391 __u32 window_clamp; /* Maximal window to advertise */
1392 __u32 rcv_ssthresh; /* Current window clamp */
1393 __u8 probes_out; /* unanswered 0 window probes */
1394 __u8 num_sacks; /* Number of SACK blocks */
1395 __u16 advmss; /* Advertised MSS */
1396
1397 __u8 syn_retries; /* num of allowed syn retries */
1398 __u8 ecn_flags; /* ECN status bits. */
1399 __u16 prior_ssthresh; /* ssthresh saved at recovery start */
1400 __u32 lost_out; /* Lost packets */
1401 __u32 sacked_out; /* SACK'd packets */
1402 __u32 fackets_out; /* FACK'd packets */
1403 __u32 high_seq; /* snd_nxt at onset of congestion */
1404
1405 __u32 retrans_stamp; /* Timestamp of the last retransmit,
1406 * also used in SYN-SENT to remember stamp of
1407 * the first SYN. */
1408 __u32 undo_marker; /* tracking retrans started here. */
1409 int undo_retrans; /* number of undoable retransmissions. */
1410 __u32 urg_seq; /* Seq of received urgent pointer */
1411 __u16 urg_data; /* Saved octet of OOB data and control flags */
1412 __u8 pending; /* Scheduled timer event */
1413 __u8 urg_mode; /* In urgent mode */
1414 __u32 snd_up; /* Urgent pointer */
1415
1416 /* The syn_wait_lock is necessary only to avoid tcp_get_info having
1417 * to grab the main lock sock while browsing the listening hash
1418 * (otherwise it's deadlock prone).
1419 * This lock is acquired in read mode only from tcp_get_info() and
1420 * it's acquired in write mode _only_ from code that is actively
1421 * changing the syn_wait_queue. All readers that are holding
1422 * the master sock lock don't need to grab this lock in read mode
1423 * too as the syn_wait_queue writes are always protected from
1424 * the main sock lock.
1425 */
1426 rwlock_t syn_wait_lock;
1427 struct tcp_listen_opt *listen_opt;
1428
1429 /* FIFO of established children */
1430 struct open_request *accept_queue;
1431 struct open_request *accept_queue_tail;
1432
1433 int write_pending; /* A write to socket waits to start. */
1434
1435 unsigned int keepalive_time; /* time before keep alive takes place */
1436 unsigned int keepalive_intvl; /* time interval between keep alive probes */
1437 int linger2;
1438
1439 unsigned long last_synq_overflow;
1440 };
1441
1442
1443
1444
1445 /* This is the per-socket lock. The spinlock provides a synchronization
1446 * between user contexts and software interrupt processing, whereas the
1447 * mini-semaphore synchronizes multiple users amongst themselves.
1448 */
1449 typedef struct {
1450 spinlock_t slock;
1451 unsigned int users;
1452 wait_queue_head_t wq;
1453 } socket_lock_t;
1454
1455 struct sock {
1456 /* Socket demultiplex comparisons on incoming packets. */
1457 __u32 daddr; /* Foreign IPv4 addr */
1458 __u32 rcv_saddr; /* Bound local IPv4 addr */
1459 __u16 dport; /* Destination port */
1460 unsigned short num; /* Local port */
1461 int bound_dev_if; /* Bound device index if != 0 */
1462
1463 /* Main hash linkage for various protocol lookup tables. */
1464 struct sock *next;
1465 struct sock **pprev;
1466 struct sock *bind_next;
1467 struct sock **bind_pprev;
1468
1469 volatile unsigned char state, /* Connection state */
1470 zapped; /* In ax25 & ipx means not linked */
1471 __u16 sport; /* Source port */
1472
1473 unsigned short family; /* Address family */
1474 unsigned char reuse; /* SO_REUSEADDR setting */
1475 unsigned char shutdown;
1476 atomic_t refcnt; /* Reference count */
1477
1478 socket_lock_t lock; /* Synchronizer... */
1479 int rcvbuf; /* Size of receive buffer in bytes */
1480
1481 wait_queue_head_t *sleep; /* Sock wait queue */
1482 struct dst_entry *dst_cache; /* Destination cache */
1483 rwlock_t dst_lock;
1484 atomic_t rmem_alloc; /* Receive queue bytes committed */
1485 struct sk_buff_head receive_queue; /* Incoming packets */
1486 atomic_t wmem_alloc; /* Transmit queue bytes committed */
1487 struct sk_buff_head write_queue; /* Packet sending queue */
1488 atomic_t omem_alloc; /* "o" is "option" or "other" */
1489 int wmem_queued; /* Persistent queue size */
1490 int forward_alloc; /* Space allocated forward. */
1491 __u32 saddr; /* Sending source */
1492 unsigned int allocation; /* Allocation mode */
1493 int sndbuf; /* Size of send buffer in bytes */
1494 struct sock *prev;
1495
1496 /* Not all are volatile, but some are, so we might as well say they all are.
1497 * XXX Make this a flag word -DaveM
1498 */
1499 volatile char dead,
1500 done,
1501 urginline,
1502 keepopen,
1503 linger,
1504 destroy,
1505 no_check,
1506 broadcast,
1507 bsdism;
1508 unsigned char debug;
1509 unsigned char rcvtstamp;
1510 unsigned char use_write_queue;
1511 unsigned char userlocks;
1512 /* Hole of 3 bytes. Try to pack. */
1513 int route_caps;
1514 int proc;
1515 unsigned long lingertime;
1516
1517 int hashent;
1518 struct sock *pair;
1519
1520 /* The backlog queue is special, it is always used with
1521 * the per-socket spinlock held and requires low latency
1522 * access. Therefore we special case it's implementation.
1523 */
1524 struct {
1525 struct sk_buff *head;
1526 struct sk_buff *tail;
1527 } backlog;
1528
1529 rwlock_t callback_lock;
1530
1531 /* Error queue, rarely used. */
1532 struct sk_buff_head error_queue;
1533
1534 struct proto *prot;
1535
1536 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
1537 union {
1538 struct ipv6_pinfo af_inet6;
1539 } net_pinfo;
1540 #endif
1541
1542 union {
1543 struct tcp_opt af_tcp;
1544 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
1545 struct raw_opt tp_raw4;
1546 #endif
1547 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
1548 struct raw6_opt tp_raw;
1549 #endif /* CONFIG_IPV6 */
1550 #if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE)
1551 struct spx_opt af_spx;
1552 #endif /* CONFIG_SPX */
1553
1554 } tp_pinfo;
1555
1556 int err, err_soft; /* Soft holds errors that don't
1557 cause failure but are the cause
1558 of a persistent failure not just
1559 'timed out' */
1560 unsigned short ack_backlog;
1561 unsigned short max_ack_backlog;
1562 __u32 priority;
1563 unsigned short type;
1564 unsigned char localroute; /* Route locally only */
1565 unsigned char protocol;
1566 // struct ucred peercred;
1567 int rcvlowat;
1568 long rcvtimeo;
1569 long sndtimeo;
1570
1571 #ifdef CONFIG_FILTER
1572 /* Socket Filtering Instructions */
1573 struct sk_filter *filter;
1574 #endif /* CONFIG_FILTER */
1575
1576 /* This is where all the private (optional) areas that don't
1577 * overlap will eventually live.
1578 */
1579 union {
1580 void *destruct_hook;
1581 // struct unix_opt af_unix;
1582 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
1583 struct inet_opt af_inet;
1584 #endif
1585 #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
1586 struct atalk_sock af_at;
1587 #endif
1588 #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
1589 struct ipx_opt af_ipx;
1590 #endif
1591 #if defined (CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
1592 struct dn_scp dn;
1593 #endif
1594 #if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE)
1595 struct packet_opt *af_packet;
1596 #endif
1597 #if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
1598 x25_cb *x25;
1599 #endif
1600 #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
1601 ax25_cb *ax25;
1602 #endif
1603 #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
1604 nr_cb *nr;
1605 #endif
1606 #if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
1607 rose_cb *rose;
1608 #endif
1609 #if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
1610 struct pppox_opt *pppox;
1611 #endif
1612 struct netlink_opt *af_netlink;
1613 #if defined(CONFIG_ECONET) || defined(CONFIG_ECONET_MODULE)
1614 struct econet_opt *af_econet;
1615 #endif
1616 #if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
1617 struct atm_vcc *af_atm;
1618 #endif
1619 #if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
1620 struct irda_sock *irda;
1621 #endif
1622 #if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE)
1623 struct wanpipe_opt *af_wanpipe;
1624 #endif
1625 } protinfo;
1626
1627
1628 /* This part is used for the timeout functions. */
1629 struct timer_list timer; /* This is the sock cleanup timer. */
1630 struct timeval stamp;
1631
1632 /* Identd and reporting IO signals */
1633 struct socket *socket;
1634
1635 /* RPC layer private data */
1636 void *user_data;
1637
1638 /* Callbacks */
1639 void (*state_change)(struct sock *sk);
1640 void (*data_ready)(struct sock *sk,int bytes);
1641 void (*write_space)(struct sock *sk);
1642 void (*error_report)(struct sock *sk);
1643
1644 int (*backlog_rcv) (struct sock *sk,
1645 struct sk_buff *skb);
1646 void (*destruct)(struct sock *sk);
1647 };
1648
1649
1650
1651
1652 #if 1 /* dst (_NET_DST_H) */
1653
1654 #if 0
1655 #include <linux/config.h>
1656 #include <net/neighbour.h>
1657 #endif
1658
1659 /*
1660 * 0 - no debugging messages
1661 * 1 - rare events and bugs (default)
1662 * 2 - trace mode.
1663 */
1664 #define RT_CACHE_DEBUG 0
1665
1666 #define DST_GC_MIN (1*HZ)
1667 #define DST_GC_INC (5*HZ)
1668 #define DST_GC_MAX (120*HZ)
1669
1670 struct sk_buff;
1671
1672 struct dst_entry
1673 {
1674 struct dst_entry *next;
1675 atomic_t __refcnt; /* client references */
1676 int __use;
1677 struct net_device *dev;
1678 int obsolete;
1679 int flags;
1680 #define DST_HOST 1
1681 unsigned long lastuse;
1682 unsigned long expires;
1683
1684 unsigned mxlock;
1685 unsigned pmtu;
1686 unsigned window;
1687 unsigned rtt;
1688 unsigned rttvar;
1689 unsigned ssthresh;
1690 unsigned cwnd;
1691 unsigned advmss;
1692 unsigned reordering;
1693
1694 unsigned long rate_last; /* rate limiting for ICMP */
1695 unsigned long rate_tokens;
1696
1697 int error;
1698
1699 struct neighbour *neighbour;
1700 struct hh_cache *hh;
1701
1702 int (*input)(struct sk_buff*);
1703 int (*output)(struct sk_buff*);
1704
1705 #ifdef CONFIG_NET_CLS_ROUTE
1706 __u32 tclassid;
1707 #endif
1708
1709 struct dst_ops *ops;
1710
1711 char info[0];
1712 };
1713
1714
1715 struct dst_ops
1716 {
1717 unsigned short family;
1718 unsigned short protocol;
1719 unsigned gc_thresh;
1720
1721 int (*gc)(void);
1722 struct dst_entry * (*check)(struct dst_entry *, __u32 cookie);
1723 struct dst_entry * (*reroute)(struct dst_entry *,
1724 struct sk_buff *);
1725 void (*destroy)(struct dst_entry *);
1726 struct dst_entry * (*negative_advice)(struct dst_entry *);
1727 void (*link_failure)(struct sk_buff *);
1728 int entry_size;
1729
1730 atomic_t entries;
1731 kmem_cache_t *kmem_cachep;
1732 };
1733
1734 #ifdef __KERNEL__
1735
1736 static __inline void dst_hold(struct dst_entry * dst)
1737 {
1738 atomic_inc(&dst->__refcnt);
1739 }
1740
1741 static __inline
1742 struct dst_entry * dst_clone(struct dst_entry * dst)
1743 {
1744 if (dst)
1745 atomic_inc(&dst->__refcnt);
1746 return dst;
1747 }
1748
1749 static __inline
1750 void dst_release(struct dst_entry * dst)
1751 {
1752 if (dst)
1753 atomic_dec(&dst->__refcnt);
1754 }
1755
1756 extern void * dst_alloc(struct dst_ops * ops);
1757 extern void __dst_free(struct dst_entry * dst);
1758 extern void dst_destroy(struct dst_entry * dst);
1759
1760 static __inline
1761 void dst_free(struct dst_entry * dst)
1762 {
1763 if (dst->obsolete > 1)
1764 return;
1765 if (!atomic_read(&dst->__refcnt)) {
1766 dst_destroy(dst);
1767 return;
1768 }
1769 __dst_free(dst);
1770 }
1771
1772 static __inline void dst_confirm(struct dst_entry *dst)
1773 {
1774 if (dst)
1775 neigh_confirm(dst->neighbour);
1776 }
1777
1778 static __inline void dst_negative_advice(struct dst_entry **dst_p)
1779 {
1780 struct dst_entry * dst = *dst_p;
1781 if (dst && dst->ops->negative_advice)
1782 *dst_p = dst->ops->negative_advice(dst);
1783 }
1784
1785 static __inline void dst_link_failure(struct sk_buff *skb)
1786 {
1787 struct dst_entry * dst = skb->dst;
1788 if (dst && dst->ops && dst->ops->link_failure)
1789 dst->ops->link_failure(skb);
1790 }
1791
1792 static __inline void dst_set_expires(struct dst_entry *dst, int timeout)
1793 {
1794 unsigned long expires = jiffies + timeout;
1795
1796 if (expires == 0)
1797 expires = 1;
1798
1799 if (dst->expires == 0 || (long)(dst->expires - expires) > 0)
1800 dst->expires = expires;
1801 }
1802
1803 extern void dst_init(void);
1804
1805 #endif /* dst */
1806
1807
1808
1809 #if 1
1810 /* dummy types */
1811
1812
1813 #endif
1814
1815 #define TCP_DEBUG 1
1816 #define FASTRETRANS_DEBUG 1
1817
1818 /* Cancel timers, when they are not required. */
1819 #undef TCP_CLEAR_TIMERS
1820
1821 #if 0
1822 #include <linux/config.h>
1823 #include <linux/tcp.h>
1824 #include <linux/slab.h>
1825 #include <linux/cache.h>
1826 #include <net/checksum.h>
1827 #include <net/sock.h>
1828 #else
1829 #include "linux.h"
1830 #endif
1831
1832 /* This is for all connections with a full identity, no wildcards.
1833 * New scheme, half the table is for TIME_WAIT, the other half is
1834 * for the rest. I'll experiment with dynamic table growth later.
1835 */
1836 struct tcp_ehash_bucket {
1837 rwlock_t lock;
1838 struct sock *chain;
1839 } __attribute__((__aligned__(8)));
1840
1841 /* This is for listening sockets, thus all sockets which possess wildcards. */
1842 #define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
1843
1844 /* There are a few simple rules, which allow for local port reuse by
1845 * an application. In essence:
1846 *
1847 * 1) Sockets bound to different interfaces may share a local port.
1848 * Failing that, goto test 2.
1849 * 2) If all sockets have sk->reuse set, and none of them are in
1850 * TCP_LISTEN state, the port may be shared.
1851 * Failing that, goto test 3.
1852 * 3) If all sockets are bound to a specific sk->rcv_saddr local
1853 * address, and none of them are the same, the port may be
1854 * shared.
1855 * Failing this, the port cannot be shared.
1856 *
1857 * The interesting point, is test #2. This is what an FTP server does
1858 * all day. To optimize this case we use a specific flag bit defined
1859 * below. As we add sockets to a bind bucket list, we perform a
1860 * check of: (newsk->reuse && (newsk->state != TCP_LISTEN))
1861 * As long as all sockets added to a bind bucket pass this test,
1862 * the flag bit will be set.
1863 * The resulting situation is that tcp_v[46]_verify_bind() can just check
1864 * for this flag bit, if it is set and the socket trying to bind has
1865 * sk->reuse set, we don't even have to walk the owners list at all,
1866 * we return that it is ok to bind this socket to the requested local port.
1867 *
1868 * Sounds like a lot of work, but it is worth it. In a more naive
1869 * implementation (ie. current FreeBSD etc.) the entire list of ports
1870 * must be walked for each data port opened by an ftp server. Needless
1871 * to say, this does not scale at all. With a couple thousand FTP
1872 * users logged onto your box, isn't it nice to know that new data
1873 * ports are created in O(1) time? I thought so. ;-) -DaveM
1874 */
1875 struct tcp_bind_bucket {
1876 unsigned short port;
1877 signed short fastreuse;
1878 struct tcp_bind_bucket *next;
1879 struct sock *owners;
1880 struct tcp_bind_bucket **pprev;
1881 };
1882
1883 struct tcp_bind_hashbucket {
1884 spinlock_t lock;
1885 struct tcp_bind_bucket *chain;
1886 };
1887
1888 extern struct tcp_hashinfo {
1889 /* This is for sockets with full identity only. Sockets here will
1890 * always be without wildcards and will have the following invariant:
1891 *
1892 * TCP_ESTABLISHED <= sk->state < TCP_CLOSE
1893 *
1894 * First half of the table is for sockets not in TIME_WAIT, second half
1895 * is for TIME_WAIT sockets only.
1896 */
1897 struct tcp_ehash_bucket *__tcp_ehash;
1898
1899 /* Ok, let's try this, I give up, we do need a local binding
1900 * TCP hash as well as the others for fast bind/connect.
1901 */
1902 struct tcp_bind_hashbucket *__tcp_bhash;
1903
1904 int __tcp_bhash_size;
1905 int __tcp_ehash_size;
1906
1907 /* All sockets in TCP_LISTEN state will be in here. This is the only
1908 * table where wildcard'd TCP sockets can exist. Hash function here
1909 * is just local port number.
1910 */
1911 struct sock *__tcp_listening_hash[TCP_LHTABLE_SIZE];
1912
1913 /* All the above members are written once at bootup and
1914 * never written again _or_ are predominantly read-access.
1915 *
1916 * Now align to a new cache line as all the following members
1917 * are often dirty.
1918 */
1919 rwlock_t __tcp_lhash_lock ____cacheline_aligned;
1920 atomic_t __tcp_lhash_users;
1921 wait_queue_head_t __tcp_lhash_wait;
1922 spinlock_t __tcp_portalloc_lock;
1923 } tcp_hashinfo;
1924
1925 #define tcp_ehash (tcp_hashinfo.__tcp_ehash)
1926 #define tcp_bhash (tcp_hashinfo.__tcp_bhash)
1927 #define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size)
1928 #define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size)
1929 #define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)
1930 #define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock)
1931 #define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users)
1932 #define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait)
1933 #define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
1934
1935 extern kmem_cache_t *tcp_bucket_cachep;
1936 extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
1937 unsigned short snum);
1938 extern void tcp_bucket_unlock(struct sock *sk);
1939 extern int tcp_port_rover;
1940 extern struct sock *tcp_v4_lookup_listener(u32 addr, unsigned short hnum, int dif);
1941
1942 /* These are AF independent. */
1943 static __inline int tcp_bhashfn(__u16 lport)
1944 {
1945 return (lport & (tcp_bhash_size - 1));
1946 }
1947
1948 /* This is a TIME_WAIT bucket. It works around the memory consumption
1949 * problems of sockets in such a state on heavily loaded servers, but
1950 * without violating the protocol specification.
1951 */
1952 struct tcp_tw_bucket {
1953 /* These _must_ match the beginning of struct sock precisely.
1954 * XXX Yes I know this is gross, but I'd have to edit every single
1955 * XXX networking file if I created a "struct sock_header". -DaveM
1956 */
1957 __u32 daddr;
1958 __u32 rcv_saddr;
1959 __u16 dport;
1960 unsigned short num;
1961 int bound_dev_if;
1962 struct sock *next;
1963 struct sock **pprev;
1964 struct sock *bind_next;
1965 struct sock **bind_pprev;
1966 unsigned char state,
1967 substate; /* "zapped" is replaced with "substate" */
1968 __u16 sport;
1969 unsigned short family;
1970 unsigned char reuse,
1971 rcv_wscale; /* It is also TW bucket specific */
1972 atomic_t refcnt;
1973
1974 /* And these are ours. */
1975 int hashent;
1976 int timeout;
1977 __u32 rcv_nxt;
1978 __u32 snd_nxt;
1979 __u32 rcv_wnd;
1980 __u32 ts_recent;
1981 long ts_recent_stamp;
1982 unsigned long ttd;
1983 struct tcp_bind_bucket *tb;
1984 struct tcp_tw_bucket *next_death;
1985 struct tcp_tw_bucket **pprev_death;
1986
1987 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1988 struct in6_addr v6_daddr;
1989 struct in6_addr v6_rcv_saddr;
1990 #endif
1991 };
1992
1993 extern kmem_cache_t *tcp_timewait_cachep;
1994
1995 static __inline void tcp_tw_put(struct tcp_tw_bucket *tw)
1996 {
1997 if (atomic_dec_and_test(&tw->refcnt)) {
1998 #ifdef INET_REFCNT_DEBUG
1999 printk(KERN_DEBUG "tw_bucket %p released\n", tw);
2000 #endif
2001 kmem_cache_free(tcp_timewait_cachep, tw);
2002 }
2003 }
2004
2005 extern atomic_t tcp_orphan_count;
2006 extern int tcp_tw_count;
2007 extern void tcp_time_wait(struct sock *sk, int state, int timeo);
2008 extern void tcp_timewait_kill(struct tcp_tw_bucket *tw);
2009 extern void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo);
2010 extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
2011
2012
2013 /* Socket demux engine toys. */
2014 #ifdef __BIG_ENDIAN
2015 #define TCP_COMBINED_PORTS(__sport, __dport) \
2016 (((__u32)(__sport)<<16) | (__u32)(__dport))
2017 #else /* __LITTLE_ENDIAN */
2018 #define TCP_COMBINED_PORTS(__sport, __dport) \
2019 (((__u32)(__dport)<<16) | (__u32)(__sport))
2020 #endif
2021
2022 #if (BITS_PER_LONG == 64)
2023 #ifdef __BIG_ENDIAN
2024 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
2025 __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));
2026 #else /* __LITTLE_ENDIAN */
2027 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
2028 __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
2029 #endif /* __BIG_ENDIAN */
2030 #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
2031 (((*((__u64 *)&((__sk)->daddr)))== (__cookie)) && \
2032 ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \
2033 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
2034 #else /* 32-bit arch */
2035 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
2036 #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
2037 (((__sk)->daddr == (__saddr)) && \
2038 ((__sk)->rcv_saddr == (__daddr)) && \
2039 ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \
2040 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
2041 #endif /* 64-bit arch */
2042
2043 #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \
2044 (((*((__u32 *)&((__sk)->dport)))== (__ports)) && \
2045 ((__sk)->family == AF_INET6) && \
2046 !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.daddr, (__saddr)) && \
2047 !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.rcv_saddr, (__daddr)) && \
2048 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
2049
2050 /* These can have wildcards, don't try too hard. */
2051 static __inline int tcp_lhashfn(unsigned short num)
2052 {
2053 #if 0
2054 return num & (TCP_LHTABLE_SIZE - 1);
2055 #else
2056 return 0;
2057 #endif
2058 }
2059
2060 static __inline int tcp_sk_listen_hashfn(struct sock *sk)
2061 {
2062 #if 0
2063 return tcp_lhashfn(sk->num);
2064 #else
2065 return 0;
2066 #endif
2067 }
2068
2069 #define MAX_TCP_HEADER (128 + MAX_HEADER)
2070
2071 /*
2072 * Never offer a window over 32767 without using window scaling. Some
2073 * poor stacks do signed 16bit maths!
2074 */
2075 #define MAX_TCP_WINDOW 32767U
2076
2077 /* Minimal accepted MSS. It is (60+60+8) - (20+20). */
2078 #define TCP_MIN_MSS 88U
2079
2080 /* Minimal RCV_MSS. */
2081 #define TCP_MIN_RCVMSS 536U
2082
2083 /* After receiving this amount of duplicate ACKs fast retransmit starts. */
2084 #define TCP_FASTRETRANS_THRESH 3
2085
2086 /* Maximal reordering. */
2087 #define TCP_MAX_REORDERING 127
2088
2089 /* Maximal number of ACKs sent quickly to accelerate slow-start. */
2090 #define TCP_MAX_QUICKACKS 16U
2091
2092 /* urg_data states */
2093 #define TCP_URG_VALID 0x0100
2094 #define TCP_URG_NOTYET 0x0200
2095 #define TCP_URG_READ 0x0400
2096
2097 #define TCP_RETR1 3 /*
2098 * This is how many retries it does before it
2099 * tries to figure out if the gateway is
2100 * down. Minimal RFC value is 3; it corresponds
2101 * to ~3sec-8min depending on RTO.
2102 */
2103
2104 #define TCP_RETR2 15 /*
2105 * This should take at least
2106 * 90 minutes to time out.
2107 * RFC1122 says that the limit is 100 sec.
2108 * 15 is ~13-30min depending on RTO.
2109 */
2110
2111 #define TCP_SYN_RETRIES 5 /* number of times to retry active opening a
2112 * connection: ~180sec is RFC minumum */
2113
2114 #define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a
2115 * connection: ~180sec is RFC minumum */
2116
2117
2118 #define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned
2119 * socket. 7 is ~50sec-16min.
2120 */
2121
2122
2123 #define TCP_TIMEWAIT_LEN (60*1000)
2124 //#define TCP_TIMEWAIT_LEN (60*HZ)
2125 /* how long to wait to destroy TIME-WAIT
2126 * state, about 60 seconds */
2127 #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
2128 /* BSD style FIN_WAIT2 deadlock breaker.
2129 * It used to be 3min, new value is 60sec,
2130 * to combine FIN-WAIT-2 timeout with
2131 * TIME-WAIT timer.
2132 */
2133
2134 #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */
2135 #if HZ >= 100
2136 #define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */
2137 #define TCP_ATO_MIN ((unsigned)(HZ/25))
2138 #else
2139 #define TCP_DELACK_MIN 4U
2140 #define TCP_ATO_MIN 4U
2141 #endif
2142 #define TCP_RTO_MAX ((unsigned)(120*HZ))
2143 #define TCP_RTO_MIN ((unsigned)(HZ/5))
2144 #define TCP_TIMEOUT_INIT ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value */
2145
2146 #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
2147 * for local resources.
2148 */
2149
2150 #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */
2151 #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */
2152 #define TCP_KEEPALIVE_INTVL (75*HZ)
2153
2154 #define MAX_TCP_KEEPIDLE 32767
2155 #define MAX_TCP_KEEPINTVL 32767
2156 #define MAX_TCP_KEEPCNT 127
2157 #define MAX_TCP_SYNCNT 127
2158
2159 /* TIME_WAIT reaping mechanism. */
2160 #define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */
2161 #define TCP_TWKILL_PERIOD (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS)
2162
2163 #define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */
2164 #define TCP_SYNQ_HSIZE 512 /* Size of SYNACK hash table */
2165
2166 #define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
2167 #define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated
2168 * after this time. It should be equal
2169 * (or greater than) TCP_TIMEWAIT_LEN
2170 * to provide reliability equal to one
2171 * provided by timewait state.
2172 */
2173 #define TCP_PAWS_WINDOW 1 /* Replay window for per-host
2174 * timestamps. It must be less than
2175 * minimal timewait lifetime.
2176 */
2177
2178 #define TCP_TW_RECYCLE_SLOTS_LOG 5
2179 #define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG)
2180
2181 /* If time > 4sec, it is "slow" path, no recycling is required,
2182 so that we select tick to get range about 4 seconds.
2183 */
2184
2185 #if 0
2186 #if HZ <= 16 || HZ > 4096
2187 # error Unsupported: HZ <= 16 or HZ > 4096
2188 #elif HZ <= 32
2189 # define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
2190 #elif HZ <= 64
2191 # define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
2192 #elif HZ <= 128
2193 # define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
2194 #elif HZ <= 256
2195 # define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
2196 #elif HZ <= 512
2197 # define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
2198 #elif HZ <= 1024
2199 # define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
2200 #elif HZ <= 2048
2201 # define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
2202 #else
2203 # define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
2204 #endif
2205 #else
2206 #define TCP_TW_RECYCLE_TICK (0)
2207 #endif
2208
2209 /*
2210 * TCP option
2211 */
2212
2213 #define TCPOPT_NOP 1 /* Padding */
2214 #define TCPOPT_EOL 0 /* End of options */
2215 #define TCPOPT_MSS 2 /* Segment size negotiating */
2216 #define TCPOPT_WINDOW 3 /* Window scaling */
2217 #define TCPOPT_SACK_PERM 4 /* SACK Permitted */
2218 #define TCPOPT_SACK 5 /* SACK Block */
2219 #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
2220
2221 /*
2222 * TCP option lengths
2223 */
2224
2225 #define TCPOLEN_MSS 4
2226 #define TCPOLEN_WINDOW 3
2227 #define TCPOLEN_SACK_PERM 2
2228 #define TCPOLEN_TIMESTAMP 10
2229
2230 /* But this is what stacks really send out. */
2231 #define TCPOLEN_TSTAMP_ALIGNED 12
2232 #define TCPOLEN_WSCALE_ALIGNED 4
2233 #define TCPOLEN_SACKPERM_ALIGNED 4
2234 #define TCPOLEN_SACK_BASE 2
2235 #define TCPOLEN_SACK_BASE_ALIGNED 4
2236 #define TCPOLEN_SACK_PERBLOCK 8
2237
2238 #define TCP_TIME_RETRANS 1 /* Retransmit timer */
2239 #define TCP_TIME_DACK 2 /* Delayed ack timer */
2240 #define TCP_TIME_PROBE0 3 /* Zero window probe timer */
2241 #define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */
2242
2243 #if 0
2244 /* sysctl variables for tcp */
2245 extern int sysctl_max_syn_backlog;
2246 extern int sysctl_tcp_timestamps;
2247 extern int sysctl_tcp_window_scaling;
2248 extern int sysctl_tcp_sack;
2249 extern int sysctl_tcp_fin_timeout;
2250 extern int sysctl_tcp_tw_recycle;
2251 extern int sysctl_tcp_keepalive_time;
2252 extern int sysctl_tcp_keepalive_probes;
2253 extern int sysctl_tcp_keepalive_intvl;
2254 extern int sysctl_tcp_syn_retries;
2255 extern int sysctl_tcp_synack_retries;
2256 extern int sysctl_tcp_retries1;
2257 extern int sysctl_tcp_retries2;
2258 extern int sysctl_tcp_orphan_retries;
2259 extern int sysctl_tcp_syncookies;
2260 extern int sysctl_tcp_retrans_collapse;
2261 extern int sysctl_tcp_stdurg;
2262 extern int sysctl_tcp_rfc1337;
2263 extern int sysctl_tcp_abort_on_overflow;
2264 extern int sysctl_tcp_max_orphans;
2265 extern int sysctl_tcp_max_tw_buckets;
2266 extern int sysctl_tcp_fack;
2267 extern int sysctl_tcp_reordering;
2268 extern int sysctl_tcp_ecn;
2269 extern int sysctl_tcp_dsack;
2270 extern int sysctl_tcp_mem[3];
2271 extern int sysctl_tcp_wmem[3];
2272 extern int sysctl_tcp_rmem[3];
2273 extern int sysctl_tcp_app_win;
2274 extern int sysctl_tcp_adv_win_scale;
2275 extern int sysctl_tcp_tw_reuse;
2276 #endif
2277
2278 extern atomic_t tcp_memory_allocated;
2279 extern atomic_t tcp_sockets_allocated;
2280 extern int tcp_memory_pressure;
2281
2282 struct open_request;
2283
2284 struct or_calltable {
2285 int family;
2286 int (*rtx_syn_ack) (struct sock *sk, struct open_request *req, struct dst_entry*);
2287 void (*send_ack) (struct sk_buff *skb, struct open_request *req);
2288 void (*destructor) (struct open_request *req);
2289 void (*send_reset) (struct sk_buff *skb);
2290 };
2291
2292 struct tcp_v4_open_req {
2293 __u32 loc_addr;
2294 __u32 rmt_addr;
2295 struct ip_options *opt;
2296 };
2297
2298 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
2299 struct tcp_v6_open_req {
2300 struct in6_addr loc_addr;
2301 struct in6_addr rmt_addr;
2302 struct sk_buff *pktopts;
2303 int iif;
2304 };
2305 #endif
2306
2307 /* this structure is too big */
2308 struct open_request {
2309 struct open_request *dl_next; /* Must be first member! */
2310 __u32 rcv_isn;
2311 __u32 snt_isn;
2312 __u16 rmt_port;
2313 __u16 mss;
2314 __u8 retrans;
2315 __u8 __pad;
2316 __u16 snd_wscale : 4,
2317 rcv_wscale : 4,
2318 tstamp_ok : 1,
2319 sack_ok : 1,
2320 wscale_ok : 1,
2321 ecn_ok : 1,
2322 acked : 1;
2323 /* The following two fields can be easily recomputed I think -AK */
2324 __u32 window_clamp; /* window clamp at creation time */
2325 __u32 rcv_wnd; /* rcv_wnd offered first time */
2326 __u32 ts_recent;
2327 unsigned long expires;
2328 struct or_calltable *class;
2329 struct sock *sk;
2330 union {
2331 struct tcp_v4_open_req v4_req;
2332 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
2333 struct tcp_v6_open_req v6_req;
2334 #endif
2335 } af;
2336 };
2337
2338 /* SLAB cache for open requests. */
2339 extern kmem_cache_t *tcp_openreq_cachep;
2340
2341 #define tcp_openreq_alloc() kmem_cache_alloc(tcp_openreq_cachep, SLAB_ATOMIC)
2342 #define tcp_openreq_fastfree(req) kmem_cache_free(tcp_openreq_cachep, req)
2343
2344 static __inline void tcp_openreq_free(struct open_request *req)
2345 {
2346 req->class->destructor(req);
2347 tcp_openreq_fastfree(req);
2348 }
2349
2350 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2351 #define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
2352 #else
2353 #define TCP_INET_FAMILY(fam) 1
2354 #endif
2355
2356 /*
2357 * Pointers to address related TCP functions
2358 * (i.e. things that depend on the address family)
2359 *
2360 * BUGGG_FUTURE: all the idea behind this struct is wrong.
2361 * It mixes socket frontend with transport function.
2362 * With port sharing between IPv6/v4 it gives the only advantage,
2363 * only poor IPv6 needs to permanently recheck, that it
2364 * is still IPv6 8)8) It must be cleaned up as soon as possible.
2365 * --ANK (980802)
2366 */
2367
2368 struct tcp_func {
2369 int (*queue_xmit) (struct sk_buff *skb);
2370
2371 void (*send_check) (struct sock *sk,
2372 struct tcphdr *th,
2373 int len,
2374 struct sk_buff *skb);
2375
2376 int (*rebuild_header) (struct sock *sk);
2377
2378 int (*conn_request) (struct sock *sk,
2379 struct sk_buff *skb);
2380
2381 struct sock * (*syn_recv_sock) (struct sock *sk,
2382 struct sk_buff *skb,
2383 struct open_request *req,
2384 struct dst_entry *dst);
2385
2386 int (*remember_stamp) (struct sock *sk);
2387
2388 __u16 net_header_len;
2389
2390 int (*setsockopt) (struct sock *sk,
2391 int level,
2392 int optname,
2393 char *optval,
2394 int optlen);
2395
2396 int (*getsockopt) (struct sock *sk,
2397 int level,
2398 int optname,
2399 char *optval,
2400 int *optlen);
2401
2402
2403 void (*addr2sockaddr) (struct sock *sk,
2404 struct sockaddr *);
2405
2406 int sockaddr_len;
2407 };
2408
2409 /*
2410 * The next routines deal with comparing 32 bit unsigned ints
2411 * and worry about wraparound (automatic with unsigned arithmetic).
2412 */
2413
2414 extern __inline int before(__u32 seq1, __u32 seq2)
2415 {
2416 return (__s32)(seq1-seq2) < 0;
2417 }
2418
2419 extern __inline int after(__u32 seq1, __u32 seq2)
2420 {
2421 return (__s32)(seq2-seq1) < 0;
2422 }
2423
2424
2425 /* is s2<=s1<=s3 ? */
2426 extern __inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
2427 {
2428 return seq3 - seq2 >= seq1 - seq2;
2429 }
2430
2431
2432 extern struct proto tcp_prot;
2433
2434 #ifdef ROS_STATISTICS
2435 extern struct tcp_mib tcp_statistics[NR_CPUS*2];
2436
2437 #define TCP_INC_STATS(field) SNMP_INC_STATS(tcp_statistics, field)
2438 #define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(tcp_statistics, field)
2439 #define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(tcp_statistics, field)
2440 #endif
2441
2442 extern void tcp_put_port(struct sock *sk);
2443 extern void __tcp_put_port(struct sock *sk);
2444 extern void tcp_inherit_port(struct sock *sk, struct sock *child);
2445
2446 extern void tcp_v4_err(struct sk_buff *skb, u32);
2447
2448 extern void tcp_shutdown (struct sock *sk, int how);
2449
2450 extern int tcp_v4_rcv(struct sk_buff *skb);
2451
2452 extern int tcp_v4_remember_stamp(struct sock *sk);
2453
2454 extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw);
2455
2456 extern int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size);
2457 extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
2458
2459 extern int tcp_ioctl(struct sock *sk,
2460 int cmd,
2461 unsigned long arg);
2462
2463 extern int tcp_rcv_state_process(struct sock *sk,
2464 struct sk_buff *skb,
2465 struct tcphdr *th,
2466 unsigned len);
2467
2468 extern int tcp_rcv_established(struct sock *sk,
2469 struct sk_buff *skb,
2470 struct tcphdr *th,
2471 unsigned len);
2472
2473 enum tcp_ack_state_t
2474 {
2475 TCP_ACK_SCHED = 1,
2476 TCP_ACK_TIMER = 2,
2477 TCP_ACK_PUSHED= 4
2478 };
2479
2480 static __inline void tcp_schedule_ack(struct tcp_opt *tp)
2481 {
2482 tp->ack.pending |= TCP_ACK_SCHED;
2483 }
2484
2485 static __inline int tcp_ack_scheduled(struct tcp_opt *tp)
2486 {
2487 return tp->ack.pending&TCP_ACK_SCHED;
2488 }
2489
2490 static __inline void tcp_dec_quickack_mode(struct tcp_opt *tp)
2491 {
2492 if (tp->ack.quick && --tp->ack.quick == 0) {
2493 /* Leaving quickack mode we deflate ATO. */
2494 tp->ack.ato = TCP_ATO_MIN;
2495 }
2496 }
2497
2498 extern void tcp_enter_quickack_mode(struct tcp_opt *tp);
2499
2500 static __inline void tcp_delack_init(struct tcp_opt *tp)
2501 {
2502 memset(&tp->ack, 0, sizeof(tp->ack));
2503 }
2504
2505 static __inline void tcp_clear_options(struct tcp_opt *tp)
2506 {
2507 tp->tstamp_ok = tp->sack_ok = tp->wscale_ok = tp->snd_wscale = 0;
2508 }
2509
2510 enum tcp_tw_status
2511 {
2512 TCP_TW_SUCCESS = 0,
2513 TCP_TW_RST = 1,
2514 TCP_TW_ACK = 2,
2515 TCP_TW_SYN = 3
2516 };
2517
2518
2519 extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw,
2520 struct sk_buff *skb,
2521 struct tcphdr *th,
2522 unsigned len);
2523
2524 extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
2525 struct open_request *req,
2526 struct open_request **prev);
2527 extern int tcp_child_process(struct sock *parent,
2528 struct sock *child,
2529 struct sk_buff *skb);
2530 extern void tcp_enter_loss(struct sock *sk, int how);
2531 extern void tcp_clear_retrans(struct tcp_opt *tp);
2532 extern void tcp_update_metrics(struct sock *sk);
2533
2534 extern void tcp_close(struct sock *sk,
2535 long timeout);
2536 extern struct sock * tcp_accept(struct sock *sk, int flags, int *err);
2537 extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
2538 extern void tcp_write_space(struct sock *sk);
2539
2540 extern int tcp_getsockopt(struct sock *sk, int level,
2541 int optname, char *optval,
2542 int *optlen);
2543 extern int tcp_setsockopt(struct sock *sk, int level,
2544 int optname, char *optval,
2545 int optlen);
2546 extern void tcp_set_keepalive(struct sock *sk, int val);
2547 extern int tcp_recvmsg(struct sock *sk,
2548 struct msghdr *msg,
2549 int len, int nonblock,
2550 int flags, int *addr_len);
2551
2552 extern int tcp_listen_start(struct sock *sk);
2553
2554 extern void tcp_parse_options(struct sk_buff *skb,
2555 struct tcp_opt *tp,
2556 int estab);
2557
2558 /*
2559 * TCP v4 functions exported for the inet6 API
2560 */
2561
2562 extern int tcp_v4_rebuild_header(struct sock *sk);
2563
2564 extern int tcp_v4_build_header(struct sock *sk,
2565 struct sk_buff *skb);
2566
2567 extern void tcp_v4_send_check(struct sock *sk,
2568 struct tcphdr *th, int len,
2569 struct sk_buff *skb);
2570
2571 extern int tcp_v4_conn_request(struct sock *sk,
2572 struct sk_buff *skb);
2573
2574 extern struct sock * tcp_create_openreq_child(struct sock *sk,
2575 struct open_request *req,
2576 struct sk_buff *skb);
2577
2578 extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk,
2579 struct sk_buff *skb,
2580 struct open_request *req,
2581 struct dst_entry *dst);
2582
2583 extern int tcp_v4_do_rcv(struct sock *sk,
2584 struct sk_buff *skb);
2585
2586 extern int tcp_v4_connect(struct sock *sk,
2587 struct sockaddr *uaddr,
2588 int addr_len);
2589
2590 extern int tcp_connect(struct sock *sk);
2591
2592 extern struct sk_buff * tcp_make_synack(struct sock *sk,
2593 struct dst_entry *dst,
2594 struct open_request *req);
2595
2596 extern int tcp_disconnect(struct sock *sk, int flags);
2597
2598 extern void tcp_unhash(struct sock *sk);
2599
2600 extern int tcp_v4_hash_connecting(struct sock *sk);
2601
2602
2603 /* From syncookies.c */
2604 extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
2605 struct ip_options *opt);
2606 extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
2607 __u16 *mss);
2608
2609 /* tcp_output.c */
2610
2611 extern int tcp_write_xmit(struct sock *, int nonagle);
2612 extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
2613 extern void tcp_xmit_retransmit_queue(struct sock *);
2614 extern void tcp_simple_retransmit(struct sock *);
2615
2616 extern void tcp_send_probe0(struct sock *);
2617 extern void tcp_send_partial(struct sock *);
2618 extern int tcp_write_wakeup(struct sock *);
2619 extern void tcp_send_fin(struct sock *sk);
2620 extern void tcp_send_active_reset(struct sock *sk, int priority);
2621 extern int tcp_send_synack(struct sock *);
2622 extern int tcp_transmit_skb(struct sock *, struct sk_buff *);
2623 extern void tcp_send_skb(struct sock *, struct sk_buff *, int force_queue, unsigned mss_now);
2624 extern void tcp_push_one(struct sock *, unsigned mss_now);
2625 extern void tcp_send_ack(struct sock *sk);
2626 extern void tcp_send_delayed_ack(struct sock *sk);
2627
2628 /* tcp_timer.c */
2629 extern void tcp_init_xmit_timers(struct sock *);
2630 extern void tcp_clear_xmit_timers(struct sock *);
2631
2632 extern void tcp_delete_keepalive_timer (struct sock *);
2633 extern void tcp_reset_keepalive_timer (struct sock *, unsigned long);
2634 extern int tcp_sync_mss(struct sock *sk, u32 pmtu);
2635
2636 extern const char timer_bug_msg[];
2637
2638 /* Read 'sendfile()'-style from a TCP socket */
2639 typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
2640 unsigned int, size_t);
2641 extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
2642 sk_read_actor_t recv_actor);
2643
2644 static __inline void tcp_clear_xmit_timer(struct sock *sk, int what)
2645 {
2646 #if 0
2647 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
2648
2649 switch (what) {
2650 case TCP_TIME_RETRANS:
2651 case TCP_TIME_PROBE0:
2652 tp->pending = 0;
2653
2654 #ifdef TCP_CLEAR_TIMERS
2655 if (timer_pending(&tp->retransmit_timer) &&
2656 del_timer(&tp->retransmit_timer))
2657 __sock_put(sk);
2658 #endif
2659 break;
2660 case TCP_TIME_DACK:
2661 tp->ack.blocked = 0;
2662 tp->ack.pending = 0;
2663
2664 #ifdef TCP_CLEAR_TIMERS
2665 if (timer_pending(&tp->delack_timer) &&
2666 del_timer(&tp->delack_timer))
2667 __sock_put(sk);
2668 #endif
2669 break;
2670 default:
2671 printk(timer_bug_msg);
2672 return;
2673 };
2674 #endif
2675 }
2676
2677 /*
2678 * Reset the retransmission timer
2679 */
2680 static __inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
2681 {
2682 #if 0
2683 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
2684
2685 if (when > TCP_RTO_MAX) {
2686 #ifdef TCP_DEBUG
2687 printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr());
2688 #endif
2689 when = TCP_RTO_MAX;
2690 }
2691
2692 switch (what) {
2693 case TCP_TIME_RETRANS:
2694 case TCP_TIME_PROBE0:
2695 tp->pending = what;
2696 tp->timeout = jiffies+when;
2697 if (!mod_timer(&tp->retransmit_timer, tp->timeout))
2698 sock_hold(sk);
2699 break;
2700
2701 case TCP_TIME_DACK:
2702 tp->ack.pending |= TCP_ACK_TIMER;
2703 tp->ack.timeout = jiffies+when;
2704 if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
2705 sock_hold(sk);
2706 break;
2707
2708 default:
2709 printk(KERN_DEBUG "bug: unknown timer value\n");
2710 };
2711 #endif
2712 }
2713
2714 /* Compute the current effective MSS, taking SACKs and IP options,
2715 * and even PMTU discovery events into account.
2716 */
2717
2718 static __inline unsigned int tcp_current_mss(struct sock *sk)
2719 {
2720 #if 0
2721 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
2722 struct dst_entry *dst = __sk_dst_get(sk);
2723 int mss_now = tp->mss_cache;
2724
2725 if (dst && dst->pmtu != tp->pmtu_cookie)
2726 mss_now = tcp_sync_mss(sk, dst->pmtu);
2727
2728 if (tp->eff_sacks)
2729 mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
2730 (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
2731 return mss_now;
2732 #else
2733 return 0;
2734 #endif
2735 }
2736
2737 /* Initialize RCV_MSS value.
2738 * RCV_MSS is an our guess about MSS used by the peer.
2739 * We haven't any direct information about the MSS.
2740 * It's better to underestimate the RCV_MSS rather than overestimate.
2741 * Overestimations make us ACKing less frequently than needed.
2742 * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
2743 */
2744
2745 static __inline void tcp_initialize_rcv_mss(struct sock *sk)
2746 {
2747 #if 0
2748 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
2749 unsigned int hint = min(tp->advmss, tp->mss_cache);
2750
2751 hint = min(hint, tp->rcv_wnd/2);
2752 hint = min(hint, TCP_MIN_RCVMSS);
2753 hint = max(hint, TCP_MIN_MSS);
2754
2755 tp->ack.rcv_mss = hint;
2756 #endif
2757 }
2758
2759 static __inline void __tcp_fast_path_on(struct tcp_opt *tp, u32 snd_wnd)
2760 {
2761 #if 0
2762 tp->pred_flags = htonl((tp->tcp_header_len << 26) |
2763 ntohl(TCP_FLAG_ACK) |
2764 snd_wnd);
2765 #endif
2766 }
2767
2768 static __inline void tcp_fast_path_on(struct tcp_opt *tp)
2769 {
2770 #if 0
2771 __tcp_fast_path_on(tp, tp->snd_wnd>>tp->snd_wscale);
2772 #endif
2773 }
2774
2775 static __inline void tcp_fast_path_check(struct sock *sk, struct tcp_opt *tp)
2776 {
2777 #if 0
2778 if (skb_queue_len(&tp->out_of_order_queue) == 0 &&
2779 tp->rcv_wnd &&
2780 atomic_read(&sk->rmem_alloc) < sk->rcvbuf &&
2781 !tp->urg_data)
2782 tcp_fast_path_on(tp);
2783 #endif
2784 }
2785
2786 /* Compute the actual receive window we are currently advertising.
2787 * Rcv_nxt can be after the window if our peer push more data
2788 * than the offered window.
2789 */
2790 static __inline u32 tcp_receive_window(struct tcp_opt *tp)
2791 {
2792 #if 0
2793 s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
2794
2795 if (win < 0)
2796 win = 0;
2797 return (u32) win;
2798 #else
2799 return 0;
2800 #endif
2801 }
2802
2803 /* Choose a new window, without checks for shrinking, and without
2804 * scaling applied to the result. The caller does these things
2805 * if necessary. This is a "raw" window selection.
2806 */
2807 extern u32 __tcp_select_window(struct sock *sk);
2808
2809 /* TCP timestamps are only 32-bits, this causes a slight
2810 * complication on 64-bit systems since we store a snapshot
2811 * of jiffies in the buffer control blocks below. We decidely
2812 * only use of the low 32-bits of jiffies and hide the ugly
2813 * casts with the following macro.
2814 */
2815 #define tcp_time_stamp ((__u32)(jiffies))
2816
2817 /* This is what the send packet queueing engine uses to pass
2818 * TCP per-packet control information to the transmission
2819 * code. We also store the host-order sequence numbers in
2820 * here too. This is 36 bytes on 32-bit architectures,
2821 * 40 bytes on 64-bit machines, if this grows please adjust
2822 * skbuff.h:skbuff->cb[xxx] size appropriately.
2823 */
2824 struct tcp_skb_cb {
2825 union {
2826 #if 0
2827 struct inet_skb_parm h4;
2828 #endif
2829 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
2830 struct inet6_skb_parm h6;
2831 #endif
2832 } header; /* For incoming frames */
2833 __u32 seq; /* Starting sequence number */
2834 __u32 end_seq; /* SEQ + FIN + SYN + datalen */
2835 __u32 when; /* used to compute rtt's */
2836 __u8 flags; /* TCP header flags. */
2837
2838 /* NOTE: These must match up to the flags byte in a
2839 * real TCP header.
2840 */
2841 #define TCPCB_FLAG_FIN 0x01
2842 #define TCPCB_FLAG_SYN 0x02
2843 #define TCPCB_FLAG_RST 0x04
2844 #define TCPCB_FLAG_PSH 0x08
2845 #define TCPCB_FLAG_ACK 0x10
2846 #define TCPCB_FLAG_URG 0x20
2847 #define TCPCB_FLAG_ECE 0x40
2848 #define TCPCB_FLAG_CWR 0x80
2849
2850 __u8 sacked; /* State flags for SACK/FACK. */
2851 #define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */
2852 #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
2853 #define TCPCB_LOST 0x04 /* SKB is lost */
2854 #define TCPCB_TAGBITS 0x07 /* All tag bits */
2855
2856 #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
2857 #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
2858
2859 #define TCPCB_URG 0x20 /* Urgent pointer advenced here */
2860
2861 #define TCPCB_AT_TAIL (TCPCB_URG)
2862
2863 __u16 urg_ptr; /* Valid w/URG flags is set. */
2864 __u32 ack_seq; /* Sequence number ACK'd */
2865 };
2866
2867 #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
2868
2869 #define for_retrans_queue(skb, sk, tp) \
2870 for (skb = (sk)->write_queue.next; \
2871 (skb != (tp)->send_head) && \
2872 (skb != (struct sk_buff *)&(sk)->write_queue); \
2873 skb=skb->next)
2874
2875
2876 //#include <net/tcp_ecn.h>
2877
2878
2879 /*
2880 * Compute minimal free write space needed to queue new packets.
2881 */
2882 static __inline int tcp_min_write_space(struct sock *sk)
2883 {
2884 #if 0
2885 return sk->wmem_queued/2;
2886 #else
2887 return 0;
2888 #endif
2889 }
2890
2891 static __inline int tcp_wspace(struct sock *sk)
2892 {
2893 #if 0
2894 return sk->sndbuf - sk->wmem_queued;
2895 #else
2896 return 0;
2897 #endif
2898 }
2899
2900
2901 /* This determines how many packets are "in the network" to the best
2902 * of our knowledge. In many cases it is conservative, but where
2903 * detailed information is available from the receiver (via SACK
2904 * blocks etc.) we can make more aggressive calculations.
2905 *
2906 * Use this for decisions involving congestion control, use just
2907 * tp->packets_out to determine if the send queue is empty or not.
2908 *
2909 * Read this equation as:
2910 *
2911 * "Packets sent once on transmission queue" MINUS
2912 * "Packets left network, but not honestly ACKed yet" PLUS
2913 * "Packets fast retransmitted"
2914 */
2915 static __inline unsigned int tcp_packets_in_flight(struct tcp_opt *tp)
2916 {
2917 #if 0
2918 return tp->packets_out - tp->left_out + tp->retrans_out;
2919 #else
2920 return 0;
2921 #endif
2922 }
2923
2924 /* Recalculate snd_ssthresh, we want to set it to:
2925 *
2926 * one half the current congestion window, but no
2927 * less than two segments
2928 */
2929 static __inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
2930 {
2931 #if 0
2932 return max(tp->snd_cwnd >> 1U, 2U);
2933 #else
2934 return 0;
2935 #endif
2936 }
2937
2938 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
2939 * The exception is rate halving phase, when cwnd is decreasing towards
2940 * ssthresh.
2941 */
2942 static __inline __u32 tcp_current_ssthresh(struct tcp_opt *tp)
2943 {
2944 #if 0
2945 if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
2946 return tp->snd_ssthresh;
2947 else
2948 return max(tp->snd_ssthresh,
2949 ((tp->snd_cwnd >> 1) +
2950 (tp->snd_cwnd >> 2)));
2951 #else
2952 return 0;
2953 #endif
2954 }
2955
2956 static __inline void tcp_sync_left_out(struct tcp_opt *tp)
2957 {
2958 #if 0
2959 if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out)
2960 tp->sacked_out = tp->packets_out - tp->lost_out;
2961 tp->left_out = tp->sacked_out + tp->lost_out;
2962 #endif
2963 }
2964
2965 extern void tcp_cwnd_application_limited(struct sock *sk);
2966
2967 /* Congestion window validation. (RFC2861) */
2968
2969 static __inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp)
2970 {
2971 #if 0
2972 if (tp->packets_out >= tp->snd_cwnd) {
2973 /* Network is feed fully. */
2974 tp->snd_cwnd_used = 0;
2975 tp->snd_cwnd_stamp = tcp_time_stamp;
2976 } else {
2977 /* Network starves. */
2978 if (tp->packets_out > tp->snd_cwnd_used)
2979 tp->snd_cwnd_used = tp->packets_out;
2980
2981 if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
2982 tcp_cwnd_application_limited(sk);
2983 }
2984 #endif
2985 }
2986
2987 /* Set slow start threshould and cwnd not falling to slow start */
2988 static __inline void __tcp_enter_cwr(struct tcp_opt *tp)
2989 {
2990 #if 0
2991 tp->undo_marker = 0;
2992 tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
2993 tp->snd_cwnd = min(tp->snd_cwnd,
2994 tcp_packets_in_flight(tp) + 1U);
2995 tp->snd_cwnd_cnt = 0;
2996 tp->high_seq = tp->snd_nxt;
2997 tp->snd_cwnd_stamp = tcp_time_stamp;
2998 TCP_ECN_queue_cwr(tp);
2999 #endif
3000 }
3001
3002 static __inline void tcp_enter_cwr(struct tcp_opt *tp)
3003 {
3004 #if 0
3005 tp->prior_ssthresh = 0;
3006 if (tp->ca_state < TCP_CA_CWR) {
3007 __tcp_enter_cwr(tp);
3008 tp->ca_state = TCP_CA_CWR;
3009 }
3010 #endif
3011 }
3012
3013 extern __u32 tcp_init_cwnd(struct tcp_opt *tp);
3014
3015 /* Slow start with delack produces 3 packets of burst, so that
3016 * it is safe "de facto".
3017 */
3018 static __inline __u32 tcp_max_burst(struct tcp_opt *tp)
3019 {
3020 return 3;
3021 }
3022
3023 static __inline__ int tcp_minshall_check(struct tcp_opt *tp)
3024 {
3025 #if 0
3026 return after(tp->snd_sml,tp->snd_una) &&
3027 !after(tp->snd_sml, tp->snd_nxt);
3028 #else
3029 return 0;
3030 #endif
3031 }
3032
3033 static __inline void tcp_minshall_update(struct tcp_opt *tp, int mss, struct sk_buff *skb)
3034 {
3035 #if 0
3036 if (skb->len < mss)
3037 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
3038 #endif
3039 }
3040
3041 /* Return 0, if packet can be sent now without violation Nagle's rules:
3042 1. It is full sized.
3043 2. Or it contains FIN.
3044 3. Or TCP_NODELAY was set.
3045 4. Or TCP_CORK is not set, and all sent packets are ACKed.
3046 With Minshall's modification: all sent small packets are ACKed.
3047 */
3048
3049 static __inline int
3050 tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int nonagle)
3051 {
3052 #if 0
3053 return (skb->len < mss_now &&
3054 !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
3055 (nonagle == 2 ||
3056 (!nonagle &&
3057 tp->packets_out &&
3058 tcp_minshall_check(tp))));
3059 #else
3060 return 0;
3061 #endif
3062 }
3063
3064 /* This checks if the data bearing packet SKB (usually tp->send_head)
3065 * should be put on the wire right now.
3066 */
3067 static __inline int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
3068 unsigned cur_mss, int nonagle)
3069 {
3070 #if 0
3071 /* RFC 1122 - section 4.2.3.4
3072 *
3073 * We must queue if
3074 *
3075 * a) The right edge of this frame exceeds the window
3076 * b) There are packets in flight and we have a small segment
3077 * [SWS avoidance and Nagle algorithm]
3078 * (part of SWS is done on packetization)
3079 * Minshall version sounds: there are no _small_
3080 * segments in flight. (tcp_nagle_check)
3081 * c) We have too many packets 'in flight'
3082 *
3083 * Don't use the nagle rule for urgent data (or
3084 * for the final FIN -DaveM).
3085 *
3086 * Also, Nagle rule does not apply to frames, which
3087 * sit in the middle of queue (they have no chances
3088 * to get new data) and if room at tail of skb is
3089 * not enough to save something seriously (<32 for now).
3090 */
3091
3092 /* Don't be strict about the congestion window for the
3093 * final FIN frame. -DaveM
3094 */
3095 return ((nonagle==1 || tp->urg_mode
3096 || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) &&
3097 ((tcp_packets_in_flight(tp) < tp->snd_cwnd) ||
3098 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
3099 !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));
3100 #else
3101 return 0;
3102 #endif
3103 }
3104
3105 static __inline void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp)
3106 {
3107 #if 0
3108 if (!tp->packets_out && !tp->pending)
3109 tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
3110 #endif
3111 }
3112
3113 static __inline int tcp_skb_is_last(struct sock *sk, struct sk_buff *skb)
3114 {
3115 #if 0
3116 return (skb->next == (struct sk_buff*)&sk->write_queue);
3117 #else
3118 return 0;
3119 #endif
3120 }
3121
3122 /* Push out any pending frames which were held back due to
3123 * TCP_CORK or attempt at coalescing tiny packets.
3124 * The socket must be locked by the caller.
3125 */
3126 static __inline void __tcp_push_pending_frames(struct sock *sk,
3127 struct tcp_opt *tp,
3128 unsigned cur_mss,
3129 int nonagle)
3130 {
3131 #if 0
3132 struct sk_buff *skb = tp->send_head;
3133
3134 if (skb) {
3135 if (!tcp_skb_is_last(sk, skb))
3136 nonagle = 1;
3137 if (!tcp_snd_test(tp, skb, cur_mss, nonagle) ||
3138 tcp_write_xmit(sk, nonagle))
3139 tcp_check_probe_timer(sk, tp);
3140 }
3141 tcp_cwnd_validate(sk, tp);
3142 #endif
3143 }
3144
3145 static __inline void tcp_push_pending_frames(struct sock *sk,
3146 struct tcp_opt *tp)
3147 {
3148 #if 0
3149 __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk), tp->nonagle);
3150 #endif
3151 }
3152
3153 static __inline int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp)
3154 {
3155 #if 0
3156 struct sk_buff *skb = tp->send_head;
3157
3158 return (skb &&
3159 tcp_snd_test(tp, skb, tcp_current_mss(sk),
3160 tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle));
3161 #else
3162 return 0;
3163 #endif
3164 }
3165
3166 static __inline void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq)
3167 {
3168 #if 0
3169 tp->snd_wl1 = seq;
3170 #endif
3171 }
3172
3173 static __inline void tcp_update_wl(struct tcp_opt *tp, u32 ack, u32 seq)
3174 {
3175 #if 0
3176 tp->snd_wl1 = seq;
3177 #endif
3178 }
3179
3180 extern void tcp_destroy_sock(struct sock *sk);
3181
3182
3183 /*
3184 * Calculate(/check) TCP checksum
3185 */
3186 static __inline u16 tcp_v4_check(struct tcphdr *th, int len,
3187 unsigned long saddr, unsigned long daddr,
3188 unsigned long base)
3189 {
3190 #if 0
3191 return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
3192 #else
3193 return 0;
3194 #endif
3195 }
3196
3197 static __inline int __tcp_checksum_complete(struct sk_buff *skb)
3198 {
3199 #if 0
3200 return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
3201 #else
3202 return 0;
3203 #endif
3204 }
3205
3206 static __inline int tcp_checksum_complete(struct sk_buff *skb)
3207 {
3208 #if 0
3209 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
3210 __tcp_checksum_complete(skb);
3211 #else
3212 return 0;
3213 #endif
3214 }
3215
3216 /* Prequeue for VJ style copy to user, combined with checksumming. */