- Move from using include guards to pragma once.
[reactos.git] / reactos / drivers / network / tcpip / include / tcpcore.h
1 /*
2 * COPYRIGHT: See COPYING in the top level directory
3 * PROJECT: ReactOS TCP/IP protocol driver
4 * FILE: include/tcpcore.h
5 * PURPOSE: Transmission Control Protocol definitions
6 * REVISIONS:
7 * CSH 01/01-2003 Ported from linux kernel 2.4.20
8 */
9
10 /*
11 * INET An implementation of the TCP/IP protocol suite for the LINUX
12 * operating system. INET is implemented using the BSD Socket
13 * interface as the means of communication with the user level.
14 *
15 * Definitions for the TCP module.
16 *
17 * Version: @(#)tcp.h 1.0.5 05/23/93
18 *
19 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
20 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28 #pragma once
29
30 #include "tcpdef.h"
31
32
33 struct socket;
34
35
36
37 #if 1 /* skbuff */
38
39 #define HAVE_ALLOC_SKB /* For the drivers to know */
40 #define HAVE_ALIGNABLE_SKB /* Ditto 8) */
41 #define SLAB_SKB /* Slabified skbuffs */
42
43 #define CHECKSUM_NONE 0
44 #define CHECKSUM_HW 1
45 #define CHECKSUM_UNNECESSARY 2
46
47 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1))
48 #define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1))
49 #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X),0))
50 #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0,2))
51
52 /* A. Checksumming of received packets by device.
53 *
54 * NONE: device failed to checksum this packet.
55 * skb->csum is undefined.
56 *
57 * UNNECESSARY: device parsed packet and wouldbe verified checksum.
58 * skb->csum is undefined.
59 * It is bad option, but, unfortunately, many of vendors do this.
60 * Apparently with secret goal to sell you new device, when you
61 * will add new protocol to your host. F.e. IPv6. 8)
62 *
63 * HW: the most generic way. Device supplied checksum of _all_
64 * the packet as seen by netif_rx in skb->csum.
65 * NOTE: Even if device supports only some protocols, but
66 * is able to produce some skb->csum, it MUST use HW,
67 * not UNNECESSARY.
68 *
69 * B. Checksumming on output.
70 *
71 * NONE: skb is checksummed by protocol or csum is not required.
72 *
73 * HW: device is required to csum packet as seen by hard_start_xmit
74 * from skb->h.raw to the end and to record the checksum
75 * at skb->h.raw+skb->csum.
76 *
77 * Device must show its capabilities in dev->features, set
78 * at device setup time.
79 * NETIF_F_HW_CSUM - it is clever device, it is able to checksum
80 * everything.
81 * NETIF_F_NO_CSUM - loopback or reliable single hop media.
82 * NETIF_F_IP_CSUM - device is dumb. It is able to csum only
83 * TCP/UDP over IPv4. Sigh. Vendors like this
84 * way by an unknown reason. Though, see comment above
85 * about CHECKSUM_UNNECESSARY. 8)
86 *
87 * Any questions? No questions, good. --ANK
88 */
89
90 #ifdef __i386__
91 #define NET_CALLER(arg) (*(((void**)&arg)-1))
92 #else
93 #define NET_CALLER(arg) __builtin_return_address(0)
94 #endif
95
96 #ifdef CONFIG_NETFILTER
97 struct nf_conntrack {
98 atomic_t use;
99 void (*destroy)(struct nf_conntrack *);
100 };
101
102 struct nf_ct_info {
103 struct nf_conntrack *master;
104 };
105 #endif
106
107 struct sk_buff_head {
108 /* These two members must be first. */
109 struct sk_buff * next;
110 struct sk_buff * prev;
111
112 __u32 qlen;
113 spinlock_t lock;
114 };
115
116 struct sk_buff;
117
118 #define MAX_SKB_FRAGS 6
119
120 typedef struct skb_frag_struct skb_frag_t;
121
122 struct skb_frag_struct
123 {
124 struct page *page;
125 __u16 page_offset;
126 __u16 size;
127 };
128
129 /* This data is invariant across clones and lives at
130 * the end of the header data, ie. at skb->end.
131 */
132 struct skb_shared_info {
133 atomic_t dataref;
134 unsigned int nr_frags;
135 struct sk_buff *frag_list;
136 skb_frag_t frags[MAX_SKB_FRAGS];
137 };
138
139 struct sk_buff {
140 /* These two members must be first. */
141 struct sk_buff * next; /* Next buffer in list */
142 struct sk_buff * prev; /* Previous buffer in list */
143
144 struct sk_buff_head * list; /* List we are on */
145 struct sock *sk; /* Socket we are owned by */
146 struct timeval stamp; /* Time we arrived */
147 struct net_device *dev; /* Device we arrived on/are leaving by */
148
149 /* Transport layer header */
150 union
151 {
152 struct tcphdr *th;
153 struct udphdr *uh;
154 struct icmphdr *icmph;
155 struct igmphdr *igmph;
156 struct iphdr *ipiph;
157 struct spxhdr *spxh;
158 unsigned char *raw;
159 } h;
160
161 /* Network layer header */
162 union
163 {
164 struct iphdr *iph;
165 struct ipv6hdr *ipv6h;
166 struct arphdr *arph;
167 struct ipxhdr *ipxh;
168 unsigned char *raw;
169 } nh;
170
171 /* Link layer header */
172 union
173 {
174 struct ethhdr *ethernet;
175 unsigned char *raw;
176 } mac;
177
178 struct dst_entry *dst;
179
180 /*
181 * This is the control buffer. It is free to use for every
182 * layer. Please put your private variables there. If you
183 * want to keep them across layers you have to do a skb_clone()
184 * first. This is owned by whoever has the skb queued ATM.
185 */
186 char cb[48];
187
188 unsigned int len; /* Length of actual data */
189 unsigned int data_len;
190 unsigned int csum; /* Checksum */
191 unsigned char __unused, /* Dead field, may be reused */
192 cloned, /* head may be cloned (check refcnt to be sure). */
193 pkt_type, /* Packet class */
194 ip_summed; /* Driver fed us an IP checksum */
195 __u32 priority; /* Packet queueing priority */
196 atomic_t users; /* User count - see datagram.c,tcp.c */
197 unsigned short protocol; /* Packet protocol from driver. */
198 unsigned short security; /* Security level of packet */
199 unsigned int truesize; /* Buffer size */
200
201 unsigned char *head; /* Head of buffer */
202 unsigned char *data; /* Data head pointer */
203 unsigned char *tail; /* Tail pointer */
204 unsigned char *end; /* End pointer */
205
206 void (*destructor)(struct sk_buff *); /* Destruct function */
207 #ifdef CONFIG_NETFILTER
208 /* Can be used for communication between hooks. */
209 unsigned long nfmark;
210 /* Cache info */
211 __u32 nfcache;
212 /* Associated connection, if any */
213 struct nf_ct_info *nfct;
214 #ifdef CONFIG_NETFILTER_DEBUG
215 unsigned int nf_debug;
216 #endif
217 #endif /*CONFIG_NETFILTER*/
218
219 #if defined(CONFIG_HIPPI)
220 union{
221 __u32 ifield;
222 } private;
223 #endif
224
225 #ifdef CONFIG_NET_SCHED
226 __u32 tc_index; /* traffic control index */
227 #endif
228 };
229
230 #define SK_WMEM_MAX 65535
231 #define SK_RMEM_MAX 65535
232
233 #if 1
234 //#ifdef __KERNEL__
235 /*
236 * Handling routines are only of interest to the kernel
237 */
238
239 extern void __kfree_skb(struct sk_buff *skb);
240 extern struct sk_buff * alloc_skb(unsigned int size, int priority);
241 extern void kfree_skbmem(struct sk_buff *skb);
242 extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority);
243 extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority);
244 extern struct sk_buff * pskb_copy(struct sk_buff *skb, int gfp_mask);
245 extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask);
246 extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom);
247 extern struct sk_buff * skb_copy_expand(const struct sk_buff *skb,
248 int newheadroom,
249 int newtailroom,
250 int priority);
251 #define dev_kfree_skb(a) kfree_skb(a)
252 extern void skb_over_panic(struct sk_buff *skb, int len, void *here);
253 extern void skb_under_panic(struct sk_buff *skb, int len, void *here);
254
255 /* Internal */
256 #define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end))
257
258 /**
259 * skb_queue_empty - check if a queue is empty
260 * @list: queue head
261 *
262 * Returns true if the queue is empty, false otherwise.
263 */
264
265 static __inline int skb_queue_empty(struct sk_buff_head *list)
266 {
267 return (list->next == (struct sk_buff *) list);
268 }
269
270 /**
271 * skb_get - reference buffer
272 * @skb: buffer to reference
273 *
274 * Makes another reference to a socket buffer and returns a pointer
275 * to the buffer.
276 */
277
278 static __inline struct sk_buff *skb_get(struct sk_buff *skb)
279 {
280 atomic_inc(&skb->users);
281 return skb;
282 }
283
284 /*
285 * If users==1, we are the only owner and are can avoid redundant
286 * atomic change.
287 */
288
289 /**
290 * kfree_skb - free an sk_buff
291 * @skb: buffer to free
292 *
293 * Drop a reference to the buffer and free it if the usage count has
294 * hit zero.
295 */
296
297 static __inline void kfree_skb(struct sk_buff *skb)
298 {
299 if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
300 __kfree_skb(skb);
301 }
302
303 /* Use this if you didn't touch the skb state [for fast switching] */
304 static __inline void kfree_skb_fast(struct sk_buff *skb)
305 {
306 if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
307 kfree_skbmem(skb);
308 }
309
310 /**
311 * skb_cloned - is the buffer a clone
312 * @skb: buffer to check
313 *
314 * Returns true if the buffer was generated with skb_clone() and is
315 * one of multiple shared copies of the buffer. Cloned buffers are
316 * shared data so must not be written to under normal circumstances.
317 */
318
319 static __inline int skb_cloned(struct sk_buff *skb)
320 {
321 return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
322 }
323
324 /**
325 * skb_shared - is the buffer shared
326 * @skb: buffer to check
327 *
328 * Returns true if more than one person has a reference to this
329 * buffer.
330 */
331
332 static __inline int skb_shared(struct sk_buff *skb)
333 {
334 return (atomic_read(&skb->users) != 1);
335 }
336
337 /**
338 * skb_share_check - check if buffer is shared and if so clone it
339 * @skb: buffer to check
340 * @pri: priority for memory allocation
341 *
342 * If the buffer is shared the buffer is cloned and the old copy
343 * drops a reference. A new clone with a single reference is returned.
344 * If the buffer is not shared the original buffer is returned. When
345 * being called from interrupt status or with spinlocks held pri must
346 * be GFP_ATOMIC.
347 *
348 * NULL is returned on a memory allocation failure.
349 */
350
351 static __inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
352 {
353 if (skb_shared(skb)) {
354 struct sk_buff *nskb;
355 nskb = skb_clone(skb, pri);
356 kfree_skb(skb);
357 return nskb;
358 }
359 return skb;
360 }
361
362
363 /*
364 * Copy shared buffers into a new sk_buff. We effectively do COW on
365 * packets to handle cases where we have a local reader and forward
366 * and a couple of other messy ones. The normal one is tcpdumping
367 * a packet thats being forwarded.
368 */
369
370 /**
371 * skb_unshare - make a copy of a shared buffer
372 * @skb: buffer to check
373 * @pri: priority for memory allocation
374 *
375 * If the socket buffer is a clone then this function creates a new
376 * copy of the data, drops a reference count on the old copy and returns
377 * the new copy with the reference count at 1. If the buffer is not a clone
378 * the original buffer is returned. When called with a spinlock held or
379 * from interrupt state @pri must be %GFP_ATOMIC
380 *
381 * %NULL is returned on a memory allocation failure.
382 */
383
384 static __inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
385 {
386 struct sk_buff *nskb;
387 if(!skb_cloned(skb))
388 return skb;
389 nskb=skb_copy(skb, pri);
390 kfree_skb(skb); /* Free our shared copy */
391 return nskb;
392 }
393
394 /**
395 * skb_peek
396 * @list_: list to peek at
397 *
398 * Peek an &sk_buff. Unlike most other operations you _MUST_
399 * be careful with this one. A peek leaves the buffer on the
400 * list and someone else may run off with it. You must hold
401 * the appropriate locks or have a private queue to do this.
402 *
403 * Returns %NULL for an empty list or a pointer to the head element.
404 * The reference count is not incremented and the reference is therefore
405 * volatile. Use with caution.
406 */
407
408 static __inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
409 {
410 struct sk_buff *list = ((struct sk_buff *)list_)->next;
411 if (list == (struct sk_buff *)list_)
412 list = NULL;
413 return list;
414 }
415
416 /**
417 * skb_peek_tail
418 * @list_: list to peek at
419 *
420 * Peek an &sk_buff. Unlike most other operations you _MUST_
421 * be careful with this one. A peek leaves the buffer on the
422 * list and someone else may run off with it. You must hold
423 * the appropriate locks or have a private queue to do this.
424 *
425 * Returns %NULL for an empty list or a pointer to the tail element.
426 * The reference count is not incremented and the reference is therefore
427 * volatile. Use with caution.
428 */
429
430 static __inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
431 {
432 struct sk_buff *list = ((struct sk_buff *)list_)->prev;
433 if (list == (struct sk_buff *)list_)
434 list = NULL;
435 return list;
436 }
437
438 /**
439 * skb_queue_len - get queue length
440 * @list_: list to measure
441 *
442 * Return the length of an &sk_buff queue.
443 */
444
445 static __inline __u32 skb_queue_len(struct sk_buff_head *list_)
446 {
447 return(list_->qlen);
448 }
449
450 static __inline void skb_queue_head_init(struct sk_buff_head *list)
451 {
452 spin_lock_init(&list->lock);
453 list->prev = (struct sk_buff *)list;
454 list->next = (struct sk_buff *)list;
455 list->qlen = 0;
456 }
457
458 /*
459 * Insert an sk_buff at the start of a list.
460 *
461 * The "__skb_xxxx()" functions are the non-atomic ones that
462 * can only be called with interrupts disabled.
463 */
464
465 /**
466 * __skb_queue_head - queue a buffer at the list head
467 * @list: list to use
468 * @newsk: buffer to queue
469 *
470 * Queue a buffer at the start of a list. This function takes no locks
471 * and you must therefore hold required locks before calling it.
472 *
473 * A buffer cannot be placed on two lists at the same time.
474 */
475
476 static __inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
477 {
478 struct sk_buff *prev, *next;
479
480 newsk->list = list;
481 list->qlen++;
482 prev = (struct sk_buff *)list;
483 next = prev->next;
484 newsk->next = next;
485 newsk->prev = prev;
486 next->prev = newsk;
487 prev->next = newsk;
488 }
489
490
491 /**
492 * skb_queue_head - queue a buffer at the list head
493 * @list: list to use
494 * @newsk: buffer to queue
495 *
496 * Queue a buffer at the start of the list. This function takes the
497 * list lock and can be used safely with other locking &sk_buff functions
498 * safely.
499 *
500 * A buffer cannot be placed on two lists at the same time.
501 */
502
503 static __inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
504 {
505 unsigned long flags;
506
507 spin_lock_irqsave(&list->lock, flags);
508 __skb_queue_head(list, newsk);
509 spin_unlock_irqrestore(&list->lock, flags);
510 }
511
512 /**
513 * __skb_queue_tail - queue a buffer at the list tail
514 * @list: list to use
515 * @newsk: buffer to queue
516 *
517 * Queue a buffer at the end of a list. This function takes no locks
518 * and you must therefore hold required locks before calling it.
519 *
520 * A buffer cannot be placed on two lists at the same time.
521 */
522
523
524 static __inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
525 {
526 struct sk_buff *prev, *next;
527
528 newsk->list = list;
529 list->qlen++;
530 next = (struct sk_buff *)list;
531 prev = next->prev;
532 newsk->next = next;
533 newsk->prev = prev;
534 next->prev = newsk;
535 prev->next = newsk;
536 }
537
538 /**
539 * skb_queue_tail - queue a buffer at the list tail
540 * @list: list to use
541 * @newsk: buffer to queue
542 *
543 * Queue a buffer at the tail of the list. This function takes the
544 * list lock and can be used safely with other locking &sk_buff functions
545 * safely.
546 *
547 * A buffer cannot be placed on two lists at the same time.
548 */
549
550 static __inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
551 {
552 unsigned long flags;
553
554 spin_lock_irqsave(&list->lock, flags);
555 __skb_queue_tail(list, newsk);
556 spin_unlock_irqrestore(&list->lock, flags);
557 }
558
559 /**
560 * __skb_dequeue - remove from the head of the queue
561 * @list: list to dequeue from
562 *
563 * Remove the head of the list. This function does not take any locks
564 * so must be used with appropriate locks held only. The head item is
565 * returned or %NULL if the list is empty.
566 */
567
568 static __inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
569 {
570 struct sk_buff *next, *prev, *result;
571
572 prev = (struct sk_buff *) list;
573 next = prev->next;
574 result = NULL;
575 if (next != prev) {
576 result = next;
577 next = next->next;
578 list->qlen--;
579 next->prev = prev;
580 prev->next = next;
581 result->next = NULL;
582 result->prev = NULL;
583 result->list = NULL;
584 }
585 return result;
586 }
587
588 /**
589 * skb_dequeue - remove from the head of the queue
590 * @list: list to dequeue from
591 *
592 * Remove the head of the list. The list lock is taken so the function
593 * may be used safely with other locking list functions. The head item is
594 * returned or %NULL if the list is empty.
595 */
596
597 static __inline struct sk_buff *skb_dequeue(struct sk_buff_head *list)
598 {
599 unsigned long flags;
600 struct sk_buff *result;
601
602 spin_lock_irqsave(&list->lock, flags);
603 result = __skb_dequeue(list);
604 spin_unlock_irqrestore(&list->lock, flags);
605 return result;
606 }
607
608 /*
609 * Insert a packet on a list.
610 */
611
612 static __inline void __skb_insert(struct sk_buff *newsk,
613 struct sk_buff * prev, struct sk_buff *next,
614 struct sk_buff_head * list)
615 {
616 newsk->next = next;
617 newsk->prev = prev;
618 next->prev = newsk;
619 prev->next = newsk;
620 newsk->list = list;
621 list->qlen++;
622 }
623
624 /**
625 * skb_insert - insert a buffer
626 * @old: buffer to insert before
627 * @newsk: buffer to insert
628 *
629 * Place a packet before a given packet in a list. The list locks are taken
630 * and this function is atomic with respect to other list locked calls
631 * A buffer cannot be placed on two lists at the same time.
632 */
633
634 static __inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
635 {
636 unsigned long flags;
637
638 spin_lock_irqsave(&old->list->lock, flags);
639 __skb_insert(newsk, old->prev, old, old->list);
640 spin_unlock_irqrestore(&old->list->lock, flags);
641 }
642
643 /*
644 * Place a packet after a given packet in a list.
645 */
646
647 static __inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
648 {
649 __skb_insert(newsk, old, old->next, old->list);
650 }
651
652 /**
653 * skb_append - append a buffer
654 * @old: buffer to insert after
655 * @newsk: buffer to insert
656 *
657 * Place a packet after a given packet in a list. The list locks are taken
658 * and this function is atomic with respect to other list locked calls.
659 * A buffer cannot be placed on two lists at the same time.
660 */
661
662
663 static __inline void skb_append(struct sk_buff *old, struct sk_buff *newsk)
664 {
665 unsigned long flags;
666
667 spin_lock_irqsave(&old->list->lock, flags);
668 __skb_append(old, newsk);
669 spin_unlock_irqrestore(&old->list->lock, flags);
670 }
671
672 /*
673 * remove sk_buff from list. _Must_ be called atomically, and with
674 * the list known..
675 */
676
677 static __inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
678 {
679 struct sk_buff * next, * prev;
680
681 list->qlen--;
682 next = skb->next;
683 prev = skb->prev;
684 skb->next = NULL;
685 skb->prev = NULL;
686 skb->list = NULL;
687 next->prev = prev;
688 prev->next = next;
689 }
690
691 /**
692 * skb_unlink - remove a buffer from a list
693 * @skb: buffer to remove
694 *
695 * Place a packet after a given packet in a list. The list locks are taken
696 * and this function is atomic with respect to other list locked calls
697 *
698 * Works even without knowing the list it is sitting on, which can be
699 * handy at times. It also means that THE LIST MUST EXIST when you
700 * unlink. Thus a list must have its contents unlinked before it is
701 * destroyed.
702 */
703
704 static __inline void skb_unlink(struct sk_buff *skb)
705 {
706 struct sk_buff_head *list = skb->list;
707
708 if(list) {
709 unsigned long flags;
710
711 spin_lock_irqsave(&list->lock, flags);
712 if(skb->list == list)
713 __skb_unlink(skb, skb->list);
714 spin_unlock_irqrestore(&list->lock, flags);
715 }
716 }
717
718 /* XXX: more streamlined implementation */
719
720 /**
721 * __skb_dequeue_tail - remove from the tail of the queue
722 * @list: list to dequeue from
723 *
724 * Remove the tail of the list. This function does not take any locks
725 * so must be used with appropriate locks held only. The tail item is
726 * returned or %NULL if the list is empty.
727 */
728
729 static __inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
730 {
731 struct sk_buff *skb = skb_peek_tail(list);
732 if (skb)
733 __skb_unlink(skb, list);
734 return skb;
735 }
736
737 /**
738 * skb_dequeue - remove from the head of the queue
739 * @list: list to dequeue from
740 *
741 * Remove the head of the list. The list lock is taken so the function
742 * may be used safely with other locking list functions. The tail item is
743 * returned or %NULL if the list is empty.
744 */
745
746 static __inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
747 {
748 unsigned long flags;
749 struct sk_buff *result;
750
751 spin_lock_irqsave(&list->lock, flags);
752 result = __skb_dequeue_tail(list);
753 spin_unlock_irqrestore(&list->lock, flags);
754 return result;
755 }
756
757 static __inline int skb_is_nonlinear(const struct sk_buff *skb)
758 {
759 return skb->data_len;
760 }
761
762 static __inline int skb_headlen(const struct sk_buff *skb)
763 {
764 return skb->len - skb->data_len;
765 }
766
767 #define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) out_of_line_bug(); } while (0)
768 #define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) out_of_line_bug(); } while (0)
769 #define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) out_of_line_bug(); } while (0)
770
771 /*
772 * Add data to an sk_buff
773 */
774
775 static __inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
776 {
777 unsigned char *tmp=skb->tail;
778 SKB_LINEAR_ASSERT(skb);
779 skb->tail+=len;
780 skb->len+=len;
781 return tmp;
782 }
783
784 /**
785 * skb_put - add data to a buffer
786 * @skb: buffer to use
787 * @len: amount of data to add
788 *
789 * This function extends the used data area of the buffer. If this would
790 * exceed the total buffer size the kernel will panic. A pointer to the
791 * first byte of the extra data is returned.
792 */
793
794 static __inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
795 {
796 #if 0
797 unsigned char *tmp=skb->tail;
798 SKB_LINEAR_ASSERT(skb);
799 skb->tail+=len;
800 skb->len+=len;
801 if(skb->tail>skb->end) {
802 skb_over_panic(skb, len, current_text_addr());
803 }
804 return tmp;
805 #else
806 return NULL;
807 #endif
808 }
809
810 static __inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
811 {
812 skb->data-=len;
813 skb->len+=len;
814 return skb->data;
815 }
816
817 /**
818 * skb_push - add data to the start of a buffer
819 * @skb: buffer to use
820 * @len: amount of data to add
821 *
822 * This function extends the used data area of the buffer at the buffer
823 * start. If this would exceed the total buffer headroom the kernel will
824 * panic. A pointer to the first byte of the extra data is returned.
825 */
826
827 static __inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
828 {
829 #if 0
830 skb->data-=len;
831 skb->len+=len;
832 if(skb->data<skb->head) {
833 skb_under_panic(skb, len, current_text_addr());
834 }
835 return skb->data;
836 #else
837 return NULL;
838 #endif
839 }
840
841 static __inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
842 {
843 skb->len-=len;
844 if (skb->len < skb->data_len)
845 out_of_line_bug();
846 return skb->data+=len;
847 }
848
849 /**
850 * skb_pull - remove data from the start of a buffer
851 * @skb: buffer to use
852 * @len: amount of data to remove
853 *
854 * This function removes data from the start of a buffer, returning
855 * the memory to the headroom. A pointer to the next data in the buffer
856 * is returned. Once the data has been pulled future pushes will overwrite
857 * the old data.
858 */
859
860 static __inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len)
861 {
862 if (len > skb->len)
863 return NULL;
864 return __skb_pull(skb,len);
865 }
866
867 extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta);
868
869 static __inline char *__pskb_pull(struct sk_buff *skb, unsigned int len)
870 {
871 if (len > skb_headlen(skb) &&
872 __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL)
873 return NULL;
874 skb->len -= len;
875 return skb->data += len;
876 }
877
878 static __inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len)
879 {
880 if (len > skb->len)
881 return NULL;
882 return __pskb_pull(skb,len);
883 }
884
885 static __inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
886 {
887 if (len <= skb_headlen(skb))
888 return 1;
889 if (len > skb->len)
890 return 0;
891 return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL);
892 }
893
894 /**
895 * skb_headroom - bytes at buffer head
896 * @skb: buffer to check
897 *
898 * Return the number of bytes of free space at the head of an &sk_buff.
899 */
900
901 static __inline int skb_headroom(const struct sk_buff *skb)
902 {
903 return skb->data-skb->head;
904 }
905
906 /**
907 * skb_tailroom - bytes at buffer end
908 * @skb: buffer to check
909 *
910 * Return the number of bytes of free space at the tail of an sk_buff
911 */
912
913 static __inline int skb_tailroom(const struct sk_buff *skb)
914 {
915 return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail;
916 }
917
918 /**
919 * skb_reserve - adjust headroom
920 * @skb: buffer to alter
921 * @len: bytes to move
922 *
923 * Increase the headroom of an empty &sk_buff by reducing the tail
924 * room. This is only allowed for an empty buffer.
925 */
926
927 static __inline void skb_reserve(struct sk_buff *skb, unsigned int len)
928 {
929 skb->data+=len;
930 skb->tail+=len;
931 }
932
933 extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
934
935 static __inline void __skb_trim(struct sk_buff *skb, unsigned int len)
936 {
937 if (!skb->data_len) {
938 skb->len = len;
939 skb->tail = skb->data+len;
940 } else {
941 ___pskb_trim(skb, len, 0);
942 }
943 }
944
945 /**
946 * skb_trim - remove end from a buffer
947 * @skb: buffer to alter
948 * @len: new length
949 *
950 * Cut the length of a buffer down by removing data from the tail. If
951 * the buffer is already under the length specified it is not modified.
952 */
953
954 static __inline void skb_trim(struct sk_buff *skb, unsigned int len)
955 {
956 if (skb->len > len) {
957 __skb_trim(skb, len);
958 }
959 }
960
961
962 static __inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
963 {
964 if (!skb->data_len) {
965 skb->len = len;
966 skb->tail = skb->data+len;
967 return 0;
968 } else {
969 return ___pskb_trim(skb, len, 1);
970 }
971 }
972
973 static __inline int pskb_trim(struct sk_buff *skb, unsigned int len)
974 {
975 if (len < skb->len)
976 return __pskb_trim(skb, len);
977 return 0;
978 }
979
980 /**
981 * skb_orphan - orphan a buffer
982 * @skb: buffer to orphan
983 *
984 * If a buffer currently has an owner then we call the owner's
985 * destructor function and make the @skb unowned. The buffer continues
986 * to exist but is no longer charged to its former owner.
987 */
988
989
990 static __inline void skb_orphan(struct sk_buff *skb)
991 {
992 if (skb->destructor)
993 skb->destructor(skb);
994 skb->destructor = NULL;
995 skb->sk = NULL;
996 }
997
998 /**
999 * skb_purge - empty a list
1000 * @list: list to empty
1001 *
1002 * Delete all buffers on an &sk_buff list. Each buffer is removed from
1003 * the list and one reference dropped. This function takes the list
1004 * lock and is atomic with respect to other list locking functions.
1005 */
1006
1007
1008 static __inline void skb_queue_purge(struct sk_buff_head *list)
1009 {
1010 struct sk_buff *skb;
1011 while ((skb=skb_dequeue(list))!=NULL)
1012 kfree_skb(skb);
1013 }
1014
1015 /**
1016 * __skb_purge - empty a list
1017 * @list: list to empty
1018 *
1019 * Delete all buffers on an &sk_buff list. Each buffer is removed from
1020 * the list and one reference dropped. This function does not take the
1021 * list lock and the caller must hold the relevant locks to use it.
1022 */
1023
1024
1025 static __inline void __skb_queue_purge(struct sk_buff_head *list)
1026 {
1027 struct sk_buff *skb;
1028 while ((skb=__skb_dequeue(list))!=NULL)
1029 kfree_skb(skb);
1030 }
1031
1032 /**
1033 * __dev_alloc_skb - allocate an skbuff for sending
1034 * @length: length to allocate
1035 * @gfp_mask: get_free_pages mask, passed to alloc_skb
1036 *
1037 * Allocate a new &sk_buff and assign it a usage count of one. The
1038 * buffer has unspecified headroom built in. Users should allocate
1039 * the headroom they think they need without accounting for the
1040 * built in space. The built in space is used for optimisations.
1041 *
1042 * %NULL is returned in there is no free memory.
1043 */
1044
1045 static __inline struct sk_buff *__dev_alloc_skb(unsigned int length,
1046 int gfp_mask)
1047 {
1048 struct sk_buff *skb;
1049
1050 skb = alloc_skb(length+16, gfp_mask);
1051 if (skb)
1052 skb_reserve(skb,16);
1053 return skb;
1054 }
1055
1056 /**
1057 * dev_alloc_skb - allocate an skbuff for sending
1058 * @length: length to allocate
1059 *
1060 * Allocate a new &sk_buff and assign it a usage count of one. The
1061 * buffer has unspecified headroom built in. Users should allocate
1062 * the headroom they think they need without accounting for the
1063 * built in space. The built in space is used for optimisations.
1064 *
1065 * %NULL is returned in there is no free memory. Although this function
1066 * allocates memory it can be called from an interrupt.
1067 */
1068
1069 static __inline struct sk_buff *dev_alloc_skb(unsigned int length)
1070 {
1071 #if 0
1072 return __dev_alloc_skb(length, GFP_ATOMIC);
1073 #else
1074 return NULL;
1075 #endif
1076 }
1077
1078 /**
1079 * skb_cow - copy header of skb when it is required
1080 * @skb: buffer to cow
1081 * @headroom: needed headroom
1082 *
1083 * If the skb passed lacks sufficient headroom or its data part
1084 * is shared, data is reallocated. If reallocation fails, an error
1085 * is returned and original skb is not changed.
1086 *
1087 * The result is skb with writable area skb->head...skb->tail
1088 * and at least @headroom of space at head.
1089 */
1090
1091 static __inline int
1092 skb_cow(struct sk_buff *skb, unsigned int headroom)
1093 {
1094 #if 0
1095 int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
1096
1097 if (delta < 0)
1098 delta = 0;
1099
1100 if (delta || skb_cloned(skb))
1101 return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC);
1102 return 0;
1103 #else
1104 return 0;
1105 #endif
1106 }
1107
1108 /**
1109 * skb_linearize - convert paged skb to linear one
1110 * @skb: buffer to linarize
1111 * @gfp: allocation mode
1112 *
1113 * If there is no free memory -ENOMEM is returned, otherwise zero
1114 * is returned and the old skb data released. */
1115 int skb_linearize(struct sk_buff *skb, int gfp);
1116
1117 static __inline void *kmap_skb_frag(const skb_frag_t *frag)
1118 {
1119 #if 0
1120 #ifdef CONFIG_HIGHMEM
1121 if (in_irq())
1122 out_of_line_bug();
1123
1124 local_bh_disable();
1125 #endif
1126 return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
1127 #else
1128 return NULL;
1129 #endif
1130 }
1131
1132 static __inline void kunmap_skb_frag(void *vaddr)
1133 {
1134 #if 0
1135 kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
1136 #ifdef CONFIG_HIGHMEM
1137 local_bh_enable();
1138 #endif
1139 #endif
1140 }
1141
1142 #define skb_queue_walk(queue, skb) \
1143 for (skb = (queue)->next; \
1144 (skb != (struct sk_buff *)(queue)); \
1145 skb=skb->next)
1146
1147
1148 extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err);
1149 extern unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait);
1150 extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size);
1151 extern int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size);
1152 extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump);
1153 extern int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov);
1154 extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb);
1155
1156 extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum);
1157 extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
1158 extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum);
1159 extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
1160
1161 extern void skb_init(void);
1162 extern void skb_add_mtu(int mtu);
1163
1164 #ifdef CONFIG_NETFILTER
1165 static __inline void
1166 nf_conntrack_put(struct nf_ct_info *nfct)
1167 {
1168 if (nfct && atomic_dec_and_test(&nfct->master->use))
1169 nfct->master->destroy(nfct->master);
1170 }
1171 static __inline void
1172 nf_conntrack_get(struct nf_ct_info *nfct)
1173 {
1174 if (nfct)
1175 atomic_inc(&nfct->master->use);
1176 }
1177 #endif
1178
1179
1180 #endif /* skbuff */
1181
1182
1183
1184
1185
1186 struct sock;
1187
1188 typedef struct sockaddr
1189 {
1190 int x;
1191 } _sockaddr;
1192
1193
1194 struct msghdr {
1195 void * msg_name; /* Socket name */
1196 int msg_namelen; /* Length of name */
1197 struct iovec * msg_iov; /* Data blocks */
1198 __kernel_size_t msg_iovlen; /* Number of blocks */
1199 void * msg_control; /* Per protocol magic (eg BSD file descriptor passing) */
1200 __kernel_size_t msg_controllen; /* Length of cmsg list */
1201 unsigned msg_flags;
1202 };
1203
1204
1205 /* IP protocol blocks we attach to sockets.
1206 * socket layer -> transport layer interface
1207 * transport -> network interface is defined by struct inet_proto
1208 */
1209 struct proto {
1210 void (*close)(struct sock *sk,
1211 long timeout);
1212 int (*connect)(struct sock *sk,
1213 struct sockaddr *uaddr,
1214 int addr_len);
1215 int (*disconnect)(struct sock *sk, int flags);
1216
1217 struct sock * (*accept) (struct sock *sk, int flags, int *err);
1218
1219 int (*ioctl)(struct sock *sk, int cmd,
1220 unsigned long arg);
1221 int (*init)(struct sock *sk);
1222 int (*destroy)(struct sock *sk);
1223 void (*shutdown)(struct sock *sk, int how);
1224 int (*setsockopt)(struct sock *sk, int level,
1225 int optname, char *optval, int optlen);
1226 int (*getsockopt)(struct sock *sk, int level,
1227 int optname, char *optval,
1228 int *option);
1229 int (*sendmsg)(struct sock *sk, struct msghdr *msg,
1230 int len);
1231 int (*recvmsg)(struct sock *sk, struct msghdr *msg,
1232 int len, int noblock, int flags,
1233 int *addr_len);
1234 int (*bind)(struct sock *sk,
1235 struct sockaddr *uaddr, int addr_len);
1236
1237 int (*backlog_rcv) (struct sock *sk,
1238 struct sk_buff *skb);
1239
1240 /* Keeping track of sk's, looking them up, and port selection methods. */
1241 void (*hash)(struct sock *sk);
1242 void (*unhash)(struct sock *sk);
1243 int (*get_port)(struct sock *sk, unsigned short snum);
1244
1245 char name[32];
1246
1247 struct {
1248 int inuse;
1249 } stats[32];
1250 // u8 __pad[SMP_CACHE_BYTES - sizeof(int)];
1251 // } stats[NR_CPUS];
1252 };
1253
1254
1255
1256
1257
1258
1259
1260 /* This defines a selective acknowledgement block. */
1261 struct tcp_sack_block {
1262 __u32 start_seq;
1263 __u32 end_seq;
1264 };
1265
1266
1267 struct tcp_opt {
1268 int tcp_header_len; /* Bytes of tcp header to send */
1269
1270 /*
1271 * Header prediction flags
1272 * 0x5?10 << 16 + snd_wnd in net byte order
1273 */
1274 __u32 pred_flags;
1275
1276 /*
1277 * RFC793 variables by their proper names. This means you can
1278 * read the code and the spec side by side (and laugh ...)
1279 * See RFC793 and RFC1122. The RFC writes these in capitals.
1280 */
1281 __u32 rcv_nxt; /* What we want to receive next */
1282 __u32 snd_nxt; /* Next sequence we send */
1283
1284 __u32 snd_una; /* First byte we want an ack for */
1285 __u32 snd_sml; /* Last byte of the most recently transmitted small packet */
1286 __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
1287 __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */
1288
1289 /* Delayed ACK control data */
1290 struct {
1291 __u8 pending; /* ACK is pending */
1292 __u8 quick; /* Scheduled number of quick acks */
1293 __u8 pingpong; /* The session is interactive */
1294 __u8 blocked; /* Delayed ACK was blocked by socket lock*/
1295 __u32 ato; /* Predicted tick of soft clock */
1296 unsigned long timeout; /* Currently scheduled timeout */
1297 __u32 lrcvtime; /* timestamp of last received data packet*/
1298 __u16 last_seg_size; /* Size of last incoming segment */
1299 __u16 rcv_mss; /* MSS used for delayed ACK decisions */
1300 } ack;
1301
1302 /* Data for direct copy to user */
1303 struct {
1304 //struct sk_buff_head prequeue;
1305 struct task_struct *task;
1306 struct iovec *iov;
1307 int memory;
1308 int len;
1309 } ucopy;
1310
1311 __u32 snd_wl1; /* Sequence for window update */
1312 __u32 snd_wnd; /* The window we expect to receive */
1313 __u32 max_window; /* Maximal window ever seen from peer */
1314 __u32 pmtu_cookie; /* Last pmtu seen by socket */
1315 __u16 mss_cache; /* Cached effective mss, not including SACKS */
1316 __u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
1317 __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
1318 __u8 ca_state; /* State of fast-retransmit machine */
1319 __u8 retransmits; /* Number of unrecovered RTO timeouts. */
1320
1321 __u8 reordering; /* Packet reordering metric. */
1322 __u8 queue_shrunk; /* Write queue has been shrunk recently.*/
1323 __u8 defer_accept; /* User waits for some data after accept() */
1324
1325 /* RTT measurement */
1326 __u8 backoff; /* backoff */
1327 __u32 srtt; /* smothed round trip time << 3 */
1328 __u32 mdev; /* medium deviation */
1329 __u32 mdev_max; /* maximal mdev for the last rtt period */
1330 __u32 rttvar; /* smoothed mdev_max */
1331 __u32 rtt_seq; /* sequence number to update rttvar */
1332 __u32 rto; /* retransmit timeout */
1333
1334 __u32 packets_out; /* Packets which are "in flight" */
1335 __u32 left_out; /* Packets which leaved network */
1336 __u32 retrans_out; /* Retransmitted packets out */
1337
1338
1339 /*
1340 * Slow start and congestion control (see also Nagle, and Karn & Partridge)
1341 */
1342 __u32 snd_ssthresh; /* Slow start size threshold */
1343 __u32 snd_cwnd; /* Sending congestion window */
1344 __u16 snd_cwnd_cnt; /* Linear increase counter */
1345 __u16 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
1346 __u32 snd_cwnd_used;
1347 __u32 snd_cwnd_stamp;
1348
1349 /* Two commonly used timers in both sender and receiver paths. */
1350 unsigned long timeout;
1351 struct timer_list retransmit_timer; /* Resend (no ack) */
1352 struct timer_list delack_timer; /* Ack delay */
1353
1354 struct sk_buff_head out_of_order_queue; /* Out of order segments go here */
1355
1356 struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */
1357 struct sk_buff *send_head; /* Front of stuff to transmit */
1358 struct page *sndmsg_page; /* Cached page for sendmsg */
1359 u32 sndmsg_off; /* Cached offset for sendmsg */
1360
1361 __u32 rcv_wnd; /* Current receiver window */
1362 __u32 rcv_wup; /* rcv_nxt on last window update sent */
1363 __u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
1364 __u32 pushed_seq; /* Last pushed seq, required to talk to windows */
1365 __u32 copied_seq; /* Head of yet unread data */
1366 /*
1367 * Options received (usually on last packet, some only on SYN packets).
1368 */
1369 char tstamp_ok, /* TIMESTAMP seen on SYN packet */
1370 wscale_ok, /* Wscale seen on SYN packet */
1371 sack_ok; /* SACK seen on SYN packet */
1372 char saw_tstamp; /* Saw TIMESTAMP on last packet */
1373 __u8 snd_wscale; /* Window scaling received from sender */
1374 __u8 rcv_wscale; /* Window scaling to send to receiver */
1375 __u8 nonagle; /* Disable Nagle algorithm? */
1376 __u8 keepalive_probes; /* num of allowed keep alive probes */
1377
1378 /* PAWS/RTTM data */
1379 __u32 rcv_tsval; /* Time stamp value */
1380 __u32 rcv_tsecr; /* Time stamp echo reply */
1381 __u32 ts_recent; /* Time stamp to echo next */
1382 long ts_recent_stamp;/* Time we stored ts_recent (for aging) */
1383
1384 /* SACKs data */
1385 __u16 user_mss; /* mss requested by user in ioctl */
1386 __u8 dsack; /* D-SACK is scheduled */
1387 __u8 eff_sacks; /* Size of SACK array to send with next packet */
1388 struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
1389 struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
1390
1391 __u32 window_clamp; /* Maximal window to advertise */
1392 __u32 rcv_ssthresh; /* Current window clamp */
1393 __u8 probes_out; /* unanswered 0 window probes */
1394 __u8 num_sacks; /* Number of SACK blocks */
1395 __u16 advmss; /* Advertised MSS */
1396
1397 __u8 syn_retries; /* num of allowed syn retries */
1398 __u8 ecn_flags; /* ECN status bits. */
1399 __u16 prior_ssthresh; /* ssthresh saved at recovery start */
1400 __u32 lost_out; /* Lost packets */
1401 __u32 sacked_out; /* SACK'd packets */
1402 __u32 fackets_out; /* FACK'd packets */
1403 __u32 high_seq; /* snd_nxt at onset of congestion */
1404
1405 __u32 retrans_stamp; /* Timestamp of the last retransmit,
1406 * also used in SYN-SENT to remember stamp of
1407 * the first SYN. */
1408 __u32 undo_marker; /* tracking retrans started here. */
1409 int undo_retrans; /* number of undoable retransmissions. */
1410 __u32 urg_seq; /* Seq of received urgent pointer */
1411 __u16 urg_data; /* Saved octet of OOB data and control flags */
1412 __u8 pending; /* Scheduled timer event */
1413 __u8 urg_mode; /* In urgent mode */
1414 __u32 snd_up; /* Urgent pointer */
1415
1416 /* The syn_wait_lock is necessary only to avoid tcp_get_info having
1417 * to grab the main lock sock while browsing the listening hash
1418 * (otherwise it's deadlock prone).
1419 * This lock is acquired in read mode only from tcp_get_info() and
1420 * it's acquired in write mode _only_ from code that is actively
1421 * changing the syn_wait_queue. All readers that are holding
1422 * the master sock lock don't need to grab this lock in read mode
1423 * too as the syn_wait_queue writes are always protected from
1424 * the main sock lock.
1425 */
1426 rwlock_t syn_wait_lock;
1427 struct tcp_listen_opt *listen_opt;
1428
1429 /* FIFO of established children */
1430 struct open_request *accept_queue;
1431 struct open_request *accept_queue_tail;
1432
1433 int write_pending; /* A write to socket waits to start. */
1434
1435 unsigned int keepalive_time; /* time before keep alive takes place */
1436 unsigned int keepalive_intvl; /* time interval between keep alive probes */
1437 int linger2;
1438
1439 unsigned long last_synq_overflow;
1440 };
1441
1442
1443
1444
1445 /* This is the per-socket lock. The spinlock provides a synchronization
1446 * between user contexts and software interrupt processing, whereas the
1447 * mini-semaphore synchronizes multiple users amongst themselves.
1448 */
1449 typedef struct {
1450 spinlock_t slock;
1451 unsigned int users;
1452 wait_queue_head_t wq;
1453 } socket_lock_t;
1454
1455 struct sock {
1456 /* Socket demultiplex comparisons on incoming packets. */
1457 __u32 daddr; /* Foreign IPv4 addr */
1458 __u32 rcv_saddr; /* Bound local IPv4 addr */
1459 __u16 dport; /* Destination port */
1460 unsigned short num; /* Local port */
1461 int bound_dev_if; /* Bound device index if != 0 */
1462
1463 /* Main hash linkage for various protocol lookup tables. */
1464 struct sock *next;
1465 struct sock **pprev;
1466 struct sock *bind_next;
1467 struct sock **bind_pprev;
1468
1469 volatile unsigned char state, /* Connection state */
1470 zapped; /* In ax25 & ipx means not linked */
1471 __u16 sport; /* Source port */
1472
1473 unsigned short family; /* Address family */
1474 unsigned char reuse; /* SO_REUSEADDR setting */
1475 unsigned char shutdown;
1476 atomic_t refcnt; /* Reference count */
1477
1478 socket_lock_t lock; /* Synchronizer... */
1479 int rcvbuf; /* Size of receive buffer in bytes */
1480
1481 wait_queue_head_t *sleep; /* Sock wait queue */
1482 struct dst_entry *dst_cache; /* Destination cache */
1483 rwlock_t dst_lock;
1484 atomic_t rmem_alloc; /* Receive queue bytes committed */
1485 struct sk_buff_head receive_queue; /* Incoming packets */
1486 atomic_t wmem_alloc; /* Transmit queue bytes committed */
1487 struct sk_buff_head write_queue; /* Packet sending queue */
1488 atomic_t omem_alloc; /* "o" is "option" or "other" */
1489 int wmem_queued; /* Persistent queue size */
1490 int forward_alloc; /* Space allocated forward. */
1491 __u32 saddr; /* Sending source */
1492 unsigned int allocation; /* Allocation mode */
1493 int sndbuf; /* Size of send buffer in bytes */
1494 struct sock *prev;
1495
1496 /* Not all are volatile, but some are, so we might as well say they all are.
1497 * XXX Make this a flag word -DaveM
1498 */
1499 volatile char dead,
1500 done,
1501 urginline,
1502 keepopen,
1503 linger,
1504 destroy,
1505 no_check,
1506 broadcast,
1507 bsdism;
1508 unsigned char debug;
1509 unsigned char rcvtstamp;
1510 unsigned char use_write_queue;
1511 unsigned char userlocks;
1512 /* Hole of 3 bytes. Try to pack. */
1513 int route_caps;
1514 int proc;
1515 unsigned long lingertime;
1516
1517 int hashent;
1518 struct sock *pair;
1519
1520 /* The backlog queue is special, it is always used with
1521 * the per-socket spinlock held and requires low latency
1522 * access. Therefore we special case it's implementation.
1523 */
1524 struct {
1525 struct sk_buff *head;
1526 struct sk_buff *tail;
1527 } backlog;
1528
1529 rwlock_t callback_lock;
1530
1531 /* Error queue, rarely used. */
1532 struct sk_buff_head error_queue;
1533
1534 struct proto *prot;
1535
1536 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
1537 union {
1538 struct ipv6_pinfo af_inet6;
1539 } net_pinfo;
1540 #endif
1541
1542 union {
1543 struct tcp_opt af_tcp;
1544 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
1545 struct raw_opt tp_raw4;
1546 #endif
1547 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
1548 struct raw6_opt tp_raw;
1549 #endif /* CONFIG_IPV6 */
1550 #if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE)
1551 struct spx_opt af_spx;
1552 #endif /* CONFIG_SPX */
1553
1554 } tp_pinfo;
1555
1556 int err, err_soft; /* Soft holds errors that don't
1557 cause failure but are the cause
1558 of a persistent failure not just
1559 'timed out' */
1560 unsigned short ack_backlog;
1561 unsigned short max_ack_backlog;
1562 __u32 priority;
1563 unsigned short type;
1564 unsigned char localroute; /* Route locally only */
1565 unsigned char protocol;
1566 // struct ucred peercred;
1567 int rcvlowat;
1568 long rcvtimeo;
1569 long sndtimeo;
1570
1571 #ifdef CONFIG_FILTER
1572 /* Socket Filtering Instructions */
1573 struct sk_filter *filter;
1574 #endif /* CONFIG_FILTER */
1575
1576 /* This is where all the private (optional) areas that don't
1577 * overlap will eventually live.
1578 */
1579 union {
1580 void *destruct_hook;
1581 // struct unix_opt af_unix;
1582 #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
1583 struct inet_opt af_inet;
1584 #endif
1585 #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
1586 struct atalk_sock af_at;
1587 #endif
1588 #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
1589 struct ipx_opt af_ipx;
1590 #endif
1591 #if defined (CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
1592 struct dn_scp dn;
1593 #endif
1594 #if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE)
1595 struct packet_opt *af_packet;
1596 #endif
1597 #if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
1598 x25_cb *x25;
1599 #endif
1600 #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
1601 ax25_cb *ax25;
1602 #endif
1603 #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
1604 nr_cb *nr;
1605 #endif
1606 #if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
1607 rose_cb *rose;
1608 #endif
1609 #if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
1610 struct pppox_opt *pppox;
1611 #endif
1612 struct netlink_opt *af_netlink;
1613 #if defined(CONFIG_ECONET) || defined(CONFIG_ECONET_MODULE)
1614 struct econet_opt *af_econet;
1615 #endif
1616 #if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
1617 struct atm_vcc *af_atm;
1618 #endif
1619 #if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
1620 struct irda_sock *irda;
1621 #endif
1622 #if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE)
1623 struct wanpipe_opt *af_wanpipe;
1624 #endif
1625 } protinfo;
1626
1627
1628 /* This part is used for the timeout functions. */
1629 struct timer_list timer; /* This is the sock cleanup timer. */
1630 struct timeval stamp;
1631
1632 /* Identd and reporting IO signals */
1633 struct socket *socket;
1634
1635 /* RPC layer private data */
1636 void *user_data;
1637
1638 /* Callbacks */
1639 void (*state_change)(struct sock *sk);
1640 void (*data_ready)(struct sock *sk,int bytes);
1641 void (*write_space)(struct sock *sk);
1642 void (*error_report)(struct sock *sk);
1643
1644 int (*backlog_rcv) (struct sock *sk,
1645 struct sk_buff *skb);
1646 void (*destruct)(struct sock *sk);
1647 };
1648
1649
1650
1651
1652 #if 1 /* dst (_NET_DST_H) */
1653
1654 #if 0
1655 #include <linux/config.h>
1656 #include <net/neighbour.h>
1657 #endif
1658
1659 /*
1660 * 0 - no debugging messages
1661 * 1 - rare events and bugs (default)
1662 * 2 - trace mode.
1663 */
1664 #define RT_CACHE_DEBUG 0
1665
1666 #define DST_GC_MIN (1*HZ)
1667 #define DST_GC_INC (5*HZ)
1668 #define DST_GC_MAX (120*HZ)
1669
1670 struct sk_buff;
1671
1672 struct dst_entry
1673 {
1674 struct dst_entry *next;
1675 atomic_t __refcnt; /* client references */
1676 int __use;
1677 struct net_device *dev;
1678 int obsolete;
1679 int flags;
1680 #define DST_HOST 1
1681 unsigned long lastuse;
1682 unsigned long expires;
1683
1684 unsigned mxlock;
1685 unsigned pmtu;
1686 unsigned window;
1687 unsigned rtt;
1688 unsigned rttvar;
1689 unsigned ssthresh;
1690 unsigned cwnd;
1691 unsigned advmss;
1692 unsigned reordering;
1693
1694 unsigned long rate_last; /* rate limiting for ICMP */
1695 unsigned long rate_tokens;
1696
1697 int error;
1698
1699 struct neighbour *neighbour;
1700 struct hh_cache *hh;
1701
1702 int (*input)(struct sk_buff*);
1703 int (*output)(struct sk_buff*);
1704
1705 #ifdef CONFIG_NET_CLS_ROUTE
1706 __u32 tclassid;
1707 #endif
1708
1709 struct dst_ops *ops;
1710
1711 char info[0];
1712 };
1713
1714
1715 struct dst_ops
1716 {
1717 unsigned short family;
1718 unsigned short protocol;
1719 unsigned gc_thresh;
1720
1721 int (*gc)(void);
1722 struct dst_entry * (*check)(struct dst_entry *, __u32 cookie);
1723 struct dst_entry * (*reroute)(struct dst_entry *,
1724 struct sk_buff *);
1725 void (*destroy)(struct dst_entry *);
1726 struct dst_entry * (*negative_advice)(struct dst_entry *);
1727 void (*link_failure)(struct sk_buff *);
1728 int entry_size;
1729
1730 atomic_t entries;
1731 kmem_cache_t *kmem_cachep;
1732 };
1733
1734 #ifdef __KERNEL__
1735
1736 static __inline void dst_hold(struct dst_entry * dst)
1737 {
1738 atomic_inc(&dst->__refcnt);
1739 }
1740
1741 static __inline
1742 struct dst_entry * dst_clone(struct dst_entry * dst)
1743 {
1744 if (dst)
1745 atomic_inc(&dst->__refcnt);
1746 return dst;
1747 }
1748
1749 static __inline
1750 void dst_release(struct dst_entry * dst)
1751 {
1752 if (dst)
1753 atomic_dec(&dst->__refcnt);
1754 }
1755
1756 extern void * dst_alloc(struct dst_ops * ops);
1757 extern void __dst_free(struct dst_entry * dst);
1758 extern void dst_destroy(struct dst_entry * dst);
1759
1760 static __inline
1761 void dst_free(struct dst_entry * dst)
1762 {
1763 if (dst->obsolete > 1)
1764 return;
1765 if (!atomic_read(&dst->__refcnt)) {
1766 dst_destroy(dst);
1767 return;
1768 }
1769 __dst_free(dst);
1770 }
1771
1772 static __inline void dst_confirm(struct dst_entry *dst)
1773 {
1774 if (dst)
1775 neigh_confirm(dst->neighbour);
1776 }
1777
1778 static __inline void dst_negative_advice(struct dst_entry **dst_p)
1779 {
1780 struct dst_entry * dst = *dst_p;
1781 if (dst && dst->ops->negative_advice)
1782 *dst_p = dst->ops->negative_advice(dst);
1783 }
1784
1785 static __inline void dst_link_failure(struct sk_buff *skb)
1786 {
1787 struct dst_entry * dst = skb->dst;
1788 if (dst && dst->ops && dst->ops->link_failure)
1789 dst->ops->link_failure(skb);
1790 }
1791
1792 static __inline void dst_set_expires(struct dst_entry *dst, int timeout)
1793 {
1794 unsigned long expires = jiffies + timeout;
1795
1796 if (expires == 0)
1797 expires = 1;
1798
1799 if (dst->expires == 0 || (long)(dst->expires - expires) > 0)
1800 dst->expires = expires;
1801 }
1802
1803 extern void dst_init(void);
1804
1805 #endif /* dst */
1806
1807
1808
1809 #if 1
1810 /* dummy types */
1811
1812
1813 #endif
1814
1815 #define TCP_DEBUG 1
1816 #define FASTRETRANS_DEBUG 1
1817
1818 /* Cancel timers, when they are not required. */
1819 #undef TCP_CLEAR_TIMERS
1820
1821 #if 0
1822 #include <linux/config.h>
1823 #include <linux/tcp.h>
1824 #include <linux/slab.h>
1825 #include <linux/cache.h>
1826 #include <net/checksum.h>
1827 #include <net/sock.h>
1828 #else
1829 #include "linux.h"
1830 #endif
1831
1832 /* This is for all connections with a full identity, no wildcards.
1833 * New scheme, half the table is for TIME_WAIT, the other half is
1834 * for the rest. I'll experiment with dynamic table growth later.
1835 */
1836 struct tcp_ehash_bucket {
1837 rwlock_t lock;
1838 struct sock *chain;
1839 } __attribute__((__aligned__(8)));
1840
1841 /* This is for listening sockets, thus all sockets which possess wildcards. */
1842 #define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
1843
1844 /* There are a few simple rules, which allow for local port reuse by
1845 * an application. In essence:
1846 *
1847 * 1) Sockets bound to different interfaces may share a local port.
1848 * Failing that, goto test 2.
1849 * 2) If all sockets have sk->reuse set, and none of them are in
1850 * TCP_LISTEN state, the port may be shared.
1851 * Failing that, goto test 3.
1852 * 3) If all sockets are bound to a specific sk->rcv_saddr local
1853 * address, and none of them are the same, the port may be
1854 * shared.
1855 * Failing this, the port cannot be shared.
1856 *
1857 * The interesting point, is test #2. This is what an FTP server does
1858 * all day. To optimize this case we use a specific flag bit defined
1859 * below. As we add sockets to a bind bucket list, we perform a
1860 * check of: (newsk->reuse && (newsk->state != TCP_LISTEN))
1861 * As long as all sockets added to a bind bucket pass this test,
1862 * the flag bit will be set.
1863 * The resulting situation is that tcp_v[46]_verify_bind() can just check
1864 * for this flag bit, if it is set and the socket trying to bind has
1865 * sk->reuse set, we don't even have to walk the owners list at all,
1866 * we return that it is ok to bind this socket to the requested local port.
1867 *
1868 * Sounds like a lot of work, but it is worth it. In a more naive
1869 * implementation (ie. current FreeBSD etc.) the entire list of ports
1870 * must be walked for each data port opened by an ftp server. Needless
1871 * to say, this does not scale at all. With a couple thousand FTP
1872 * users logged onto your box, isn't it nice to know that new data
1873 * ports are created in O(1) time? I thought so. ;-) -DaveM
1874 */
1875 struct tcp_bind_bucket {
1876 unsigned short port;
1877 signed short fastreuse;
1878 struct tcp_bind_bucket *next;
1879 struct sock *owners;
1880 struct tcp_bind_bucket **pprev;
1881 };
1882
1883 struct tcp_bind_hashbucket {
1884 spinlock_t lock;
1885 struct tcp_bind_bucket *chain;
1886 };
1887
1888 extern struct tcp_hashinfo {
1889 /* This is for sockets with full identity only. Sockets here will
1890 * always be without wildcards and will have the following invariant:
1891 *
1892 * TCP_ESTABLISHED <= sk->state < TCP_CLOSE
1893 *
1894 * First half of the table is for sockets not in TIME_WAIT, second half
1895 * is for TIME_WAIT sockets only.
1896 */
1897 struct tcp_ehash_bucket *__tcp_ehash;
1898
1899 /* Ok, let's try this, I give up, we do need a local binding
1900 * TCP hash as well as the others for fast bind/connect.
1901 */
1902 struct tcp_bind_hashbucket *__tcp_bhash;
1903
1904 int __tcp_bhash_size;
1905 int __tcp_ehash_size;
1906
1907 /* All sockets in TCP_LISTEN state will be in here. This is the only
1908 * table where wildcard'd TCP sockets can exist. Hash function here
1909 * is just local port number.
1910 */
1911 struct sock *__tcp_listening_hash[TCP_LHTABLE_SIZE];
1912
1913 /* All the above members are written once at bootup and
1914 * never written again _or_ are predominantly read-access.
1915 *
1916 * Now align to a new cache line as all the following members
1917 * are often dirty.
1918 */
1919 rwlock_t __tcp_lhash_lock ____cacheline_aligned;
1920 atomic_t __tcp_lhash_users;
1921 wait_queue_head_t __tcp_lhash_wait;
1922 spinlock_t __tcp_portalloc_lock;
1923 } tcp_hashinfo;
1924
1925 #define tcp_ehash (tcp_hashinfo.__tcp_ehash)
1926 #define tcp_bhash (tcp_hashinfo.__tcp_bhash)
1927 #define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size)
1928 #define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size)
1929 #define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)
1930 #define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock)
1931 #define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users)
1932 #define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait)
1933 #define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
1934
1935 extern kmem_cache_t *tcp_bucket_cachep;
1936 extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
1937 unsigned short snum);
1938 extern void tcp_bucket_unlock(struct sock *sk);
1939 extern int tcp_port_rover;
1940 extern struct sock *tcp_v4_lookup_listener(u32 addr, unsigned short hnum, int dif);
1941
1942 /* These are AF independent. */
1943 static __inline int tcp_bhashfn(__u16 lport)
1944 {
1945 return (lport & (tcp_bhash_size - 1));
1946 }
1947
1948 /* This is a TIME_WAIT bucket. It works around the memory consumption
1949 * problems of sockets in such a state on heavily loaded servers, but
1950 * without violating the protocol specification.
1951 */
1952 struct tcp_tw_bucket {
1953 /* These _must_ match the beginning of struct sock precisely.
1954 * XXX Yes I know this is gross, but I'd have to edit every single
1955 * XXX networking file if I created a "struct sock_header". -DaveM
1956 */
1957 __u32 daddr;
1958 __u32 rcv_saddr;
1959 __u16 dport;
1960 unsigned short num;
1961 int bound_dev_if;
1962 struct sock *next;
1963 struct sock **pprev;
1964 struct sock *bind_next;
1965 struct sock **bind_pprev;
1966 unsigned char state,
1967 substate; /* "zapped" is replaced with "substate" */
1968 __u16 sport;
1969 unsigned short family;
1970 unsigned char reuse,
1971 rcv_wscale; /* It is also TW bucket specific */
1972 atomic_t refcnt;
1973
1974 /* And these are ours. */
1975 int hashent;
1976 int timeout;
1977 __u32 rcv_nxt;
1978 __u32 snd_nxt;
1979 __u32 rcv_wnd;
1980 __u32 ts_recent;
1981 long ts_recent_stamp;
1982 unsigned long ttd;
1983 struct tcp_bind_bucket *tb;
1984 struct tcp_tw_bucket *next_death;
1985 struct tcp_tw_bucket **pprev_death;
1986
1987 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1988 struct in6_addr v6_daddr;
1989 struct in6_addr v6_rcv_saddr;
1990 #endif
1991 };
1992
1993 extern kmem_cache_t *tcp_timewait_cachep;
1994
1995 static __inline void tcp_tw_put(struct tcp_tw_bucket *tw)
1996 {
1997 if (atomic_dec_and_test(&tw->refcnt)) {
1998 #ifdef INET_REFCNT_DEBUG
1999 printk(KERN_DEBUG "tw_bucket %p released\n", tw);
2000 #endif
2001 kmem_cache_free(tcp_timewait_cachep, tw);
2002 }
2003 }
2004
2005 extern atomic_t tcp_orphan_count;
2006 extern int tcp_tw_count;
2007 extern void tcp_time_wait(struct sock *sk, int state, int timeo);
2008 extern void tcp_timewait_kill(struct tcp_tw_bucket *tw);
2009 extern void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo);
2010 extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
2011
2012
2013 /* Socket demux engine toys. */
2014 #ifdef __BIG_ENDIAN
2015 #define TCP_COMBINED_PORTS(__sport, __dport) \
2016 (((__u32)(__sport)<<16) | (__u32)(__dport))
2017 #else /* __LITTLE_ENDIAN */
2018 #define TCP_COMBINED_PORTS(__sport, __dport) \
2019 (((__u32)(__dport)<<16) | (__u32)(__sport))
2020 #endif
2021
2022 #if (BITS_PER_LONG == 64)
2023 #ifdef __BIG_ENDIAN
2024 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
2025 __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));
2026 #else /* __LITTLE_ENDIAN */
2027 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
2028 __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
2029 #endif /* __BIG_ENDIAN */
2030 #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
2031 (((*((__u64 *)&((__sk)->daddr)))== (__cookie)) && \
2032 ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \
2033 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
2034 #else /* 32-bit arch */
2035 #define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
2036 #define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
2037 (((__sk)->daddr == (__saddr)) && \
2038 ((__sk)->rcv_saddr == (__daddr)) && \
2039 ((*((__u32 *)&((__sk)->dport)))== (__ports)) && \
2040 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
2041 #endif /* 64-bit arch */
2042
2043 #define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \
2044 (((*((__u32 *)&((__sk)->dport)))== (__ports)) && \
2045 ((__sk)->family == AF_INET6) && \
2046 !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.daddr, (__saddr)) && \
2047 !ipv6_addr_cmp(&(__sk)->net_pinfo.af_inet6.rcv_saddr, (__daddr)) && \
2048 (!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
2049
2050 /* These can have wildcards, don't try too hard. */
2051 static __inline int tcp_lhashfn(unsigned short num)
2052 {
2053 #if 0
2054 return num & (TCP_LHTABLE_SIZE - 1);
2055 #else
2056 return 0;
2057 #endif
2058 }
2059
2060 static __inline int tcp_sk_listen_hashfn(struct sock *sk)
2061 {
2062 #if 0
2063 return tcp_lhashfn(sk->num);
2064 #else
2065 return 0;
2066 #endif
2067 }
2068
2069 #define MAX_TCP_HEADER (128 + MAX_HEADER)
2070
2071 /*
2072 * Never offer a window over 32767 without using window scaling. Some
2073 * poor stacks do signed 16bit maths!
2074 */
2075 #define MAX_TCP_WINDOW 32767U
2076
2077 /* Minimal accepted MSS. It is (60+60+8) - (20+20). */
2078 #define TCP_MIN_MSS 88U
2079
2080 /* Minimal RCV_MSS. */
2081 #define TCP_MIN_RCVMSS 536U
2082
2083 /* After receiving this amount of duplicate ACKs fast retransmit starts. */
2084 #define TCP_FASTRETRANS_THRESH 3
2085
2086 /* Maximal reordering. */
2087 #define TCP_MAX_REORDERING 127
2088
2089 /* Maximal number of ACKs sent quickly to accelerate slow-start. */
2090 #define TCP_MAX_QUICKACKS 16U
2091
2092 /* urg_data states */
2093 #define TCP_URG_VALID 0x0100
2094 #define TCP_URG_NOTYET 0x0200
2095 #define TCP_URG_READ 0x0400
2096
2097 #define TCP_RETR1 3 /*
2098 * This is how many retries it does before it
2099 * tries to figure out if the gateway is
2100 * down. Minimal RFC value is 3; it corresponds
2101 * to ~3sec-8min depending on RTO.
2102 */
2103
2104 #define TCP_RETR2 15 /*
2105 * This should take at least
2106 * 90 minutes to time out.
2107 * RFC1122 says that the limit is 100 sec.
2108 * 15 is ~13-30min depending on RTO.
2109 */
2110
2111 #define TCP_SYN_RETRIES 5 /* number of times to retry active opening a
2112 * connection: ~180sec is RFC minumum */
2113
2114 #define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a
2115 * connection: ~180sec is RFC minumum */
2116
2117
2118 #define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned
2119 * socket. 7 is ~50sec-16min.
2120 */
2121
2122
2123 #define TCP_TIMEWAIT_LEN (60*1000)
2124 //#define TCP_TIMEWAIT_LEN (60*HZ)
2125 /* how long to wait to destroy TIME-WAIT
2126 * state, about 60 seconds */
2127 #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
2128 /* BSD style FIN_WAIT2 deadlock breaker.
2129 * It used to be 3min, new value is 60sec,
2130 * to combine FIN-WAIT-2 timeout with
2131 * TIME-WAIT timer.
2132 */
2133
2134 #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */
2135 #if HZ >= 100
2136 #define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */
2137 #define TCP_ATO_MIN ((unsigned)(HZ/25))
2138 #else
2139 #define TCP_DELACK_MIN 4U
2140 #define TCP_ATO_MIN 4U
2141 #endif
2142 #define TCP_RTO_MAX ((unsigned)(120*HZ))
2143 #define TCP_RTO_MIN ((unsigned)(HZ/5))
2144 #define TCP_TIMEOUT_INIT ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value */
2145
2146 #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
2147 * for local resources.
2148 */
2149
2150 #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */
2151 #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */
2152 #define TCP_KEEPALIVE_INTVL (75*HZ)
2153
2154 #define MAX_TCP_KEEPIDLE 32767
2155 #define MAX_TCP_KEEPINTVL 32767
2156 #define MAX_TCP_KEEPCNT 127
2157 #define MAX_TCP_SYNCNT 127
2158
2159 /* TIME_WAIT reaping mechanism. */
2160 #define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */
2161 #define TCP_TWKILL_PERIOD (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS)
2162
2163 #define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */
2164 #define TCP_SYNQ_HSIZE 512 /* Size of SYNACK hash table */
2165
2166 #define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
2167 #define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated
2168 * after this time. It should be equal
2169 * (or greater than) TCP_TIMEWAIT_LEN
2170 * to provide reliability equal to one
2171 * provided by timewait state.
2172 */
2173 #define TCP_PAWS_WINDOW 1 /* Replay window for per-host
2174 * timestamps. It must be less than
2175 * minimal timewait lifetime.
2176 */
2177
2178 #define TCP_TW_RECYCLE_SLOTS_LOG 5
2179 #define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG)
2180
2181 /* If time > 4sec, it is "slow" path, no recycling is required,
2182 so that we select tick to get range about 4 seconds.
2183 */
2184
2185 #if 0
2186 #if HZ <= 16 || HZ > 4096
2187 # error Unsupported: HZ <= 16 or HZ > 4096
2188 #elif HZ <= 32
2189 # define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
2190 #elif HZ <= 64
2191 # define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
2192 #elif HZ <= 128
2193 # define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
2194 #elif HZ <= 256
2195 # define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
2196 #elif HZ <= 512
2197 # define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
2198 #elif HZ <= 1024
2199 # define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
2200 #elif HZ <= 2048
2201 # define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
2202 #else
2203 # define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
2204 #endif
2205 #else
2206 #define TCP_TW_RECYCLE_TICK (0)
2207 #endif
2208
2209 /*
2210 * TCP option
2211 */
2212
2213 #define TCPOPT_NOP 1 /* Padding */
2214 #define TCPOPT_EOL 0 /* End of options */
2215 #define TCPOPT_MSS 2 /* Segment size negotiating */
2216 #define TCPOPT_WINDOW 3 /* Window scaling */
2217 #define TCPOPT_SACK_PERM 4 /* SACK Permitted */
2218 #define TCPOPT_SACK 5 /* SACK Block */
2219 #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
2220
2221 /*
2222 * TCP option lengths
2223 */
2224
2225 #define TCPOLEN_MSS 4
2226 #define TCPOLEN_WINDOW 3
2227 #define TCPOLEN_SACK_PERM 2
2228 #define TCPOLEN_TIMESTAMP 10
2229
2230 /* But this is what stacks really send out. */
2231 #define TCPOLEN_TSTAMP_ALIGNED 12
2232 #define TCPOLEN_WSCALE_ALIGNED 4
2233 #define TCPOLEN_SACKPERM_ALIGNED 4
2234 #define TCPOLEN_SACK_BASE 2
2235 #define TCPOLEN_SACK_BASE_ALIGNED 4
2236 #define TCPOLEN_SACK_PERBLOCK 8
2237
2238 #define TCP_TIME_RETRANS 1 /* Retransmit timer */
2239 #define TCP_TIME_DACK 2 /* Delayed ack timer */
2240 #define TCP_TIME_PROBE0 3 /* Zero window probe timer */
2241 #define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */
2242
2243 #if 0
2244 /* sysctl variables for tcp */
2245 extern int sysctl_max_syn_backlog;
2246 extern int sysctl_tcp_timestamps;
2247 extern int sysctl_tcp_window_scaling;
2248 extern int sysctl_tcp_sack;
2249 extern int sysctl_tcp_fin_timeout;
2250 extern int sysctl_tcp_tw_recycle;
2251 extern int sysctl_tcp_keepalive_time;
2252 extern int sysctl_tcp_keepalive_probes;
2253 extern int sysctl_tcp_keepalive_intvl;
2254 extern int sysctl_tcp_syn_retries;
2255 extern int sysctl_tcp_synack_retries;
2256 extern int sysctl_tcp_retries1;
2257 extern int sysctl_tcp_retries2;
2258 extern int sysctl_tcp_orphan_retries;
2259 extern int sysctl_tcp_syncookies;
2260 extern int sysctl_tcp_retrans_collapse;
2261 extern int sysctl_tcp_stdurg;
2262 extern int sysctl_tcp_rfc1337;
2263 extern int sysctl_tcp_abort_on_overflow;
2264 extern int sysctl_tcp_max_orphans;
2265 extern int sysctl_tcp_max_tw_buckets;
2266 extern int sysctl_tcp_fack;
2267 extern int sysctl_tcp_reordering;
2268 extern int sysctl_tcp_ecn;
2269 extern int sysctl_tcp_dsack;
2270 extern int sysctl_tcp_mem[3];
2271 extern int sysctl_tcp_wmem[3];
2272 extern int sysctl_tcp_rmem[3];
2273 extern int sysctl_tcp_app_win;
2274 extern int sysctl_tcp_adv_win_scale;
2275 extern int sysctl_tcp_tw_reuse;
2276 #endif
2277
2278 extern atomic_t tcp_memory_allocated;
2279 extern atomic_t tcp_sockets_allocated;
2280 extern int tcp_memory_pressure;
2281
2282 struct open_request;
2283
2284 struct or_calltable {
2285 int family;
2286 int (*rtx_syn_ack) (struct sock *sk, struct open_request *req, struct dst_entry*);
2287 void (*send_ack) (struct sk_buff *skb, struct open_request *req);
2288 void (*destructor) (struct open_request *req);
2289 void (*send_reset) (struct sk_buff *skb);
2290 };
2291
2292 struct tcp_v4_open_req {
2293 __u32 loc_addr;
2294 __u32 rmt_addr;
2295 struct ip_options *opt;
2296 };
2297
2298 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
2299 struct tcp_v6_open_req {
2300 struct in6_addr loc_addr;
2301 struct in6_addr rmt_addr;
2302 struct sk_buff *pktopts;
2303 int iif;
2304 };
2305 #endif
2306
2307 /* this structure is too big */
2308 struct open_request {
2309 struct open_request *dl_next; /* Must be first member! */
2310 __u32 rcv_isn;
2311 __u32 snt_isn;
2312 __u16 rmt_port;
2313 __u16 mss;
2314 __u8 retrans;
2315 __u8 __pad;
2316 __u16 snd_wscale : 4,
2317 rcv_wscale : 4,
2318 tstamp_ok : 1,
2319 sack_ok : 1,
2320 wscale_ok : 1,
2321 ecn_ok : 1,
2322 acked : 1;
2323 /* The following two fields can be easily recomputed I think -AK */
2324 __u32 window_clamp; /* window clamp at creation time */
2325 __u32 rcv_wnd; /* rcv_wnd offered first time */
2326 __u32 ts_recent;
2327 unsigned long expires;
2328 struct or_calltable *class;
2329 struct sock *sk;
2330 union {
2331 struct tcp_v4_open_req v4_req;
2332 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
2333 struct tcp_v6_open_req v6_req;
2334 #endif
2335 } af;
2336 };
2337
2338 /* SLAB cache for open requests. */
2339 extern kmem_cache_t *tcp_openreq_cachep;
2340
2341 #define tcp_openreq_alloc() kmem_cache_alloc(tcp_openreq_cachep, SLAB_ATOMIC)
2342 #define tcp_openreq_fastfree(req) kmem_cache_free(tcp_openreq_cachep, req)
2343
2344 static __inline void tcp_openreq_free(struct open_request *req)
2345 {
2346 req->class->destructor(req);
2347 tcp_openreq_fastfree(req);
2348 }
2349
2350 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2351 #define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
2352 #else
2353 #define TCP_INET_FAMILY(fam) 1
2354 #endif
2355
2356 /*
2357 * Pointers to address related TCP functions
2358 * (i.e. things that depend on the address family)
2359 *
2360 * BUGGG_FUTURE: all the idea behind this struct is wrong.
2361 * It mixes socket frontend with transport function.
2362 * With port sharing between IPv6/v4 it gives the only advantage,
2363 * only poor IPv6 needs to permanently recheck, that it
2364 * is still IPv6 8)8) It must be cleaned up as soon as possible.
2365 * --ANK (980802)
2366 */
2367
2368 struct tcp_func {
2369 int (*queue_xmit) (struct sk_buff *skb);
2370
2371 void (*send_check) (struct sock *sk,
2372 struct tcphdr *th,
2373 int len,
2374 struct sk_buff *skb);
2375
2376 int (*rebuild_header) (struct sock *sk);
2377
2378 int (*conn_request) (struct sock *sk,
2379 struct sk_buff *skb);
2380
2381 struct sock * (*syn_recv_sock) (struct sock *sk,
2382 struct sk_buff *skb,
2383 struct open_request *req,
2384 struct dst_entry *dst);
2385
2386 int (*remember_stamp) (struct sock *sk);
2387
2388 __u16 net_header_len;
2389
2390 int (*setsockopt) (struct sock *sk,
2391 int level,
2392 int optname,
2393 char *optval,
2394 int optlen);
2395
2396 int (*getsockopt) (struct sock *sk,
2397 int level,
2398 int optname,
2399 char *optval,
2400 int *optlen);
2401
2402
2403 void (*addr2sockaddr) (struct sock *sk,
2404 struct sockaddr *);
2405
2406 int sockaddr_len;
2407 };
2408
2409 /*
2410 * The next routines deal with comparing 32 bit unsigned ints
2411 * and worry about wraparound (automatic with unsigned arithmetic).
2412 */
2413
2414 extern __inline int before(__u32 seq1, __u32 seq2)
2415 {
2416 return (__s32)(seq1-seq2) < 0;
2417 }
2418
2419 extern __inline int after(__u32 seq1, __u32 seq2)
2420 {
2421 return (__s32)(seq2-seq1) < 0;
2422 }
2423
2424
2425 /* is s2<=s1<=s3 ? */
2426 extern __inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
2427 {
2428 return seq3 - seq2 >= seq1 - seq2;
2429 }
2430
2431
2432 extern struct proto tcp_prot;
2433
2434 #ifdef ROS_STATISTICS
2435 extern struct tcp_mib tcp_statistics[NR_CPUS*2];
2436
2437 #define TCP_INC_STATS(field) SNMP_INC_STATS(tcp_statistics, field)
2438 #define TCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(tcp_statistics, field)
2439 #define TCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(tcp_statistics, field)
2440 #endif
2441
2442 extern void tcp_put_port(struct sock *sk);
2443 extern void __tcp_put_port(struct sock *sk);
2444 extern void tcp_inherit_port(struct sock *sk, struct sock *child);
2445
2446 extern void tcp_v4_err(struct sk_buff *skb, u32);
2447
2448 extern void tcp_shutdown (struct sock *sk, int how);
2449
2450 extern int tcp_v4_rcv(struct sk_buff *skb);
2451
2452 extern int tcp_v4_remember_stamp(struct sock *sk);
2453
2454 extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw);
2455
2456 extern int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size);
2457 extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
2458
2459 extern int tcp_ioctl(struct sock *sk,
2460 int cmd,
2461 unsigned long arg);
2462
2463 extern int tcp_rcv_state_process(struct sock *sk,
2464 struct sk_buff *skb,
2465 struct tcphdr *th,
2466 unsigned len);
2467
2468 extern int tcp_rcv_established(struct sock *sk,
2469 struct sk_buff *skb,
2470 struct tcphdr *th,
2471 unsigned len);
2472
2473 enum tcp_ack_state_t
2474 {
2475 TCP_ACK_SCHED = 1,
2476 TCP_ACK_TIMER = 2,
2477 TCP_ACK_PUSHED= 4
2478 };
2479
2480 static __inline void tcp_schedule_ack(struct tcp_opt *tp)
2481 {
2482 tp->ack.pending |= TCP_ACK_SCHED;
2483 }
2484
2485 static __inline int tcp_ack_scheduled(struct tcp_opt *tp)
2486 {
2487 return tp->ack.pending&TCP_ACK_SCHED;
2488 }
2489
2490 static __inline void tcp_dec_quickack_mode(struct tcp_opt *tp)
2491 {
2492 if (tp->ack.quick && --tp->ack.quick == 0) {
2493 /* Leaving quickack mode we deflate ATO. */
2494 tp->ack.ato = TCP_ATO_MIN;
2495 }
2496 }
2497
2498 extern void tcp_enter_quickack_mode(struct tcp_opt *tp);
2499
2500 static __inline void tcp_delack_init(struct tcp_opt *tp)
2501 {
2502 memset(&tp->ack, 0, sizeof(tp->ack));
2503 }
2504
2505 static __inline void tcp_clear_options(struct tcp_opt *tp)
2506 {
2507 tp->tstamp_ok = tp->sack_ok = tp->wscale_ok = tp->snd_wscale = 0;
2508 }
2509
2510 enum tcp_tw_status
2511 {
2512 TCP_TW_SUCCESS = 0,
2513 TCP_TW_RST = 1,
2514 TCP_TW_ACK = 2,
2515 TCP_TW_SYN = 3
2516 };
2517
2518
2519 extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw,
2520 struct sk_buff *skb,
2521 struct tcphdr *th,
2522 unsigned len);
2523
2524 extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
2525 struct open_request *req,
2526 struct open_request **prev);
2527 extern int tcp_child_process(struct sock *parent,
2528 struct sock *child,
2529 struct sk_buff *skb);
2530 extern void tcp_enter_loss(struct sock *sk, int how);
2531 extern void tcp_clear_retrans(struct tcp_opt *tp);
2532 extern void tcp_update_metrics(struct sock *sk);
2533
2534 extern void tcp_close(struct sock *sk,
2535 long timeout);
2536 extern struct sock * tcp_accept(struct sock *sk, int flags, int *err);
2537 extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
2538 extern void tcp_write_space(struct sock *sk);
2539
2540 extern int tcp_getsockopt(struct sock *sk, int level,
2541 int optname, char *optval,
2542 int *optlen);
2543 extern int tcp_setsockopt(struct sock *sk, int level,
2544 int optname, char *optval,
2545 int optlen);
2546 extern void tcp_set_keepalive(struct sock *sk, int val);
2547 extern int tcp_recvmsg(struct sock *sk,
2548 struct msghdr *msg,
2549 int len, int nonblock,
2550 int flags, int *addr_len);
2551
2552 extern int tcp_listen_start(struct sock *sk);
2553
2554 extern void tcp_parse_options(struct sk_buff *skb,
2555 struct tcp_opt *tp,
2556 int estab);
2557
2558 /*
2559 * TCP v4 functions exported for the inet6 API
2560 */
2561
2562 extern int tcp_v4_rebuild_header(struct sock *sk);
2563
2564 extern int tcp_v4_build_header(struct sock *sk,
2565 struct sk_buff *skb);
2566
2567 extern void tcp_v4_send_check(struct sock *sk,
2568 struct tcphdr *th, int len,
2569 struct sk_buff *skb);
2570
2571 extern int tcp_v4_conn_request(struct sock *sk,
2572 struct sk_buff *skb);
2573
2574 extern struct sock * tcp_create_openreq_child(struct sock *sk,
2575 struct open_request *req,
2576 struct sk_buff *skb);
2577
2578 extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk,
2579 struct sk_buff *skb,
2580 struct open_request *req,
2581 struct dst_entry *dst);
2582
2583 extern int tcp_v4_do_rcv(struct sock *sk,
2584 struct sk_buff *skb);
2585
2586 extern int tcp_v4_connect(struct sock *sk,
2587 struct sockaddr *uaddr,
2588 int addr_len);
2589
2590 extern int tcp_connect(struct sock *sk);
2591
2592 extern struct sk_buff * tcp_make_synack(struct sock *sk,
2593 struct dst_entry *dst,
2594 struct open_request *req);
2595
2596 extern int tcp_disconnect(struct sock *sk, int flags);
2597
2598 extern void tcp_unhash(struct sock *sk);
2599
2600 extern int tcp_v4_hash_connecting(struct sock *sk);
2601
2602
2603 /* From syncookies.c */
2604 extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
2605 struct ip_options *opt);
2606 extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
2607 __u16 *mss);
2608
2609 /* tcp_output.c */
2610
2611 extern int tcp_write_xmit(struct sock *, int nonagle);
2612 extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
2613 extern void tcp_xmit_retransmit_queue(struct sock *);
2614 extern void tcp_simple_retransmit(struct sock *);
2615
2616 extern void tcp_send_probe0(struct sock *);
2617 extern void tcp_send_partial(struct sock *);
2618 extern int tcp_write_wakeup(struct sock *);
2619 extern void tcp_send_fin(struct sock *sk);
2620 extern void tcp_send_active_reset(struct sock *sk, int priority);
2621 extern int tcp_send_synack(struct sock *);
2622 extern int tcp_transmit_skb(struct sock *, struct sk_buff *);
2623 extern void tcp_send_skb(struct sock *, struct sk_buff *, int force_queue, unsigned mss_now);
2624 extern void tcp_push_one(struct sock *, unsigned mss_now);
2625 extern void tcp_send_ack(struct sock *sk);
2626 extern void tcp_send_delayed_ack(struct sock *sk);
2627
2628 /* tcp_timer.c */
2629 extern void tcp_init_xmit_timers(struct sock *);
2630 extern void tcp_clear_xmit_timers(struct sock *);
2631
2632 extern void tcp_delete_keepalive_timer (struct sock *);
2633 extern void tcp_reset_keepalive_timer (struct sock *, unsigned long);
2634 extern int tcp_sync_mss(struct sock *sk, u32 pmtu);
2635
2636 extern const char timer_bug_msg[];
2637
2638 /* Read 'sendfile()'-style from a TCP socket */
2639 typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
2640 unsigned int, size_t);
2641 extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
2642 sk_read_actor_t recv_actor);
2643
2644 static __inline void tcp_clear_xmit_timer(struct sock *sk, int what)
2645 {
2646 #if 0
2647 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
2648
2649 switch (what) {
2650 case TCP_TIME_RETRANS:
2651 case TCP_TIME_PROBE0:
2652 tp->pending = 0;
2653
2654 #ifdef TCP_CLEAR_TIMERS
2655 if (timer_pending(&tp->retransmit_timer) &&
2656 del_timer(&tp->retransmit_timer))
2657 __sock_put(sk);
2658 #endif
2659 break;
2660 case TCP_TIME_DACK:
2661 tp->ack.blocked = 0;
2662 tp->ack.pending = 0;
2663
2664 #ifdef TCP_CLEAR_TIMERS
2665 if (timer_pending(&tp->delack_timer) &&
2666 del_timer(&tp->delack_timer))
2667 __sock_put(sk);
2668 #endif
2669 break;
2670 default:
2671 printk(timer_bug_msg);
2672 return;
2673 };
2674 #endif
2675 }
2676
2677 /*
2678 * Reset the retransmission timer
2679 */
2680 static __inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
2681 {
2682 #if 0
2683 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
2684
2685 if (when > TCP_RTO_MAX) {
2686 #ifdef TCP_DEBUG
2687 printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr());
2688 #endif
2689 when = TCP_RTO_MAX;
2690 }
2691
2692 switch (what) {
2693 case TCP_TIME_RETRANS:
2694 case TCP_TIME_PROBE0:
2695 tp->pending = what;
2696 tp->timeout = jiffies+when;
2697 if (!mod_timer(&tp->retransmit_timer, tp->timeout))
2698 sock_hold(sk);
2699 break;
2700
2701 case TCP_TIME_DACK:
2702 tp->ack.pending |= TCP_ACK_TIMER;
2703 tp->ack.timeout = jiffies+when;
2704 if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
2705 sock_hold(sk);
2706 break;
2707
2708 default:
2709 printk(KERN_DEBUG "bug: unknown timer value\n");
2710 };
2711 #endif
2712 }
2713
2714 /* Compute the current effective MSS, taking SACKs and IP options,
2715 * and even PMTU discovery events into account.
2716 */
2717
2718 static __inline unsigned int tcp_current_mss(struct sock *sk)
2719 {
2720 #if 0
2721 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
2722 struct dst_entry *dst = __sk_dst_get(sk);
2723 int mss_now = tp->mss_cache;
2724
2725 if (dst && dst->pmtu != tp->pmtu_cookie)
2726 mss_now = tcp_sync_mss(sk, dst->pmtu);
2727
2728 if (tp->eff_sacks)
2729 mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
2730 (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
2731 return mss_now;
2732 #else
2733 return 0;
2734 #endif
2735 }
2736
2737 /* Initialize RCV_MSS value.
2738 * RCV_MSS is an our guess about MSS used by the peer.
2739 * We haven't any direct information about the MSS.
2740 * It's better to underestimate the RCV_MSS rather than overestimate.
2741 * Overestimations make us ACKing less frequently than needed.
2742 * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
2743 */
2744
2745 static __inline void tcp_initialize_rcv_mss(struct sock *sk)
2746 {
2747 #if 0
2748 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
2749 unsigned int hint = min(tp->advmss, tp->mss_cache);
2750
2751 hint = min(hint, tp->rcv_wnd/2);
2752 hint = min(hint, TCP_MIN_RCVMSS);
2753 hint = max(hint, TCP_MIN_MSS);
2754
2755 tp->ack.rcv_mss = hint;
2756 #endif
2757 }
2758
2759 static __inline void __tcp_fast_path_on(struct tcp_opt *tp, u32 snd_wnd)
2760 {
2761 #if 0
2762 tp->pred_flags = htonl((tp->tcp_header_len << 26) |
2763 ntohl(TCP_FLAG_ACK) |
2764 snd_wnd);
2765 #endif
2766 }
2767
2768 static __inline void tcp_fast_path_on(struct tcp_opt *tp)
2769 {
2770 #if 0
2771 __tcp_fast_path_on(tp, tp->snd_wnd>>tp->snd_wscale);
2772 #endif
2773 }
2774
2775 static __inline void tcp_fast_path_check(struct sock *sk, struct tcp_opt *tp)
2776 {
2777 #if 0
2778 if (skb_queue_len(&tp->out_of_order_queue) == 0 &&
2779 tp->rcv_wnd &&
2780 atomic_read(&sk->rmem_alloc) < sk->rcvbuf &&
2781 !tp->urg_data)
2782 tcp_fast_path_on(tp);
2783 #endif
2784 }
2785
2786 /* Compute the actual receive window we are currently advertising.
2787 * Rcv_nxt can be after the window if our peer push more data
2788 * than the offered window.
2789 */
2790 static __inline u32 tcp_receive_window(struct tcp_opt *tp)
2791 {
2792 #if 0
2793 s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
2794
2795 if (win < 0)
2796 win = 0;
2797 return (u32) win;
2798 #else
2799 return 0;
2800 #endif
2801 }
2802
2803 /* Choose a new window, without checks for shrinking, and without
2804 * scaling applied to the result. The caller does these things
2805 * if necessary. This is a "raw" window selection.
2806 */
2807 extern u32 __tcp_select_window(struct sock *sk);
2808
2809 /* TCP timestamps are only 32-bits, this causes a slight
2810 * complication on 64-bit systems since we store a snapshot
2811 * of jiffies in the buffer control blocks below. We decidely
2812 * only use of the low 32-bits of jiffies and hide the ugly
2813 * casts with the following macro.
2814 */
2815 #define tcp_time_stamp ((__u32)(jiffies))
2816
2817 /* This is what the send packet queueing engine uses to pass
2818 * TCP per-packet control information to the transmission
2819 * code. We also store the host-order sequence numbers in
2820 * here too. This is 36 bytes on 32-bit architectures,
2821 * 40 bytes on 64-bit machines, if this grows please adjust
2822 * skbuff.h:skbuff->cb[xxx] size appropriately.
2823 */
2824 struct tcp_skb_cb {
2825 union {
2826 #if 0
2827 struct inet_skb_parm h4;
2828 #endif
2829 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
2830 struct inet6_skb_parm h6;
2831 #endif
2832 } header; /* For incoming frames */
2833 __u32 seq; /* Starting sequence number */
2834 __u32 end_seq; /* SEQ + FIN + SYN + datalen */
2835 __u32 when; /* used to compute rtt's */
2836 __u8 flags; /* TCP header flags. */
2837
2838 /* NOTE: These must match up to the flags byte in a
2839 * real TCP header.
2840 */
2841 #define TCPCB_FLAG_FIN 0x01
2842 #define TCPCB_FLAG_SYN 0x02
2843 #define TCPCB_FLAG_RST 0x04
2844 #define TCPCB_FLAG_PSH 0x08
2845 #define TCPCB_FLAG_ACK 0x10
2846 #define TCPCB_FLAG_URG 0x20
2847 #define TCPCB_FLAG_ECE 0x40
2848 #define TCPCB_FLAG_CWR 0x80
2849
2850 __u8 sacked; /* State flags for SACK/FACK. */
2851 #define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */
2852 #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
2853 #define TCPCB_LOST 0x04 /* SKB is lost */
2854 #define TCPCB_TAGBITS 0x07 /* All tag bits */
2855
2856 #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
2857 #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
2858
2859 #define TCPCB_URG 0x20 /* Urgent pointer advenced here */
2860
2861 #define TCPCB_AT_TAIL (TCPCB_URG)
2862
2863 __u16 urg_ptr; /* Valid w/URG flags is set. */
2864 __u32 ack_seq; /* Sequence number ACK'd */
2865 };
2866
2867 #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
2868
2869 #define for_retrans_queue(skb, sk, tp) \
2870 for (skb = (sk)->write_queue.next; \
2871 (skb != (tp)->send_head) && \
2872 (skb != (struct sk_buff *)&(sk)->write_queue); \
2873 skb=skb->next)
2874
2875
2876 //#include <net/tcp_ecn.h>
2877
2878
2879 /*
2880 * Compute minimal free write space needed to queue new packets.
2881 */
2882 static __inline int tcp_min_write_space(struct sock *sk)
2883 {
2884 #if 0
2885 return sk->wmem_queued/2;
2886 #else
2887 return 0;
2888 #endif
2889 }
2890
2891 static __inline int tcp_wspace(struct sock *sk)
2892 {
2893 #if 0
2894 return sk->sndbuf - sk->wmem_queued;
2895 #else
2896 return 0;
2897 #endif
2898 }
2899
2900
2901 /* This determines how many packets are "in the network" to the best
2902 * of our knowledge. In many cases it is conservative, but where
2903 * detailed information is available from the receiver (via SACK
2904 * blocks etc.) we can make more aggressive calculations.
2905 *
2906 * Use this for decisions involving congestion control, use just
2907 * tp->packets_out to determine if the send queue is empty or not.
2908 *
2909 * Read this equation as:
2910 *
2911 * "Packets sent once on transmission queue" MINUS
2912 * "Packets left network, but not honestly ACKed yet" PLUS
2913 * "Packets fast retransmitted"
2914 */
2915 static __inline unsigned int tcp_packets_in_flight(struct tcp_opt *tp)
2916 {
2917 #if 0
2918 return tp->packets_out - tp->left_out + tp->retrans_out;
2919 #else
2920 return 0;
2921 #endif
2922 }
2923
2924 /* Recalculate snd_ssthresh, we want to set it to:
2925 *
2926 * one half the current congestion window, but no
2927 * less than two segments
2928 */
2929 static __inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
2930 {
2931 #if 0
2932 return max(tp->snd_cwnd >> 1U, 2U);
2933 #else
2934 return 0;
2935 #endif
2936 }
2937
2938 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
2939 * The exception is rate halving phase, when cwnd is decreasing towards
2940 * ssthresh.
2941 */
2942 static __inline __u32 tcp_current_ssthresh(struct tcp_opt *tp)
2943 {
2944 #if 0
2945 if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
2946 return tp->snd_ssthresh;
2947 else
2948 return max(tp->snd_ssthresh,
2949 ((tp->snd_cwnd >> 1) +
2950 (tp->snd_cwnd >> 2)));
2951 #else
2952 return 0;
2953 #endif
2954 }
2955
2956 static __inline void tcp_sync_left_out(struct tcp_opt *tp)
2957 {
2958 #if 0
2959 if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out)
2960 tp->sacked_out = tp->packets_out - tp->lost_out;
2961 tp->left_out = tp->sacked_out + tp->lost_out;
2962 #endif
2963 }
2964
2965 extern void tcp_cwnd_application_limited(struct sock *sk);
2966
2967 /* Congestion window validation. (RFC2861) */
2968
2969 static __inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp)
2970 {
2971 #if 0
2972 if (tp->packets_out >= tp->snd_cwnd) {
2973 /* Network is feed fully. */
2974 tp->snd_cwnd_used = 0;
2975 tp->snd_cwnd_stamp = tcp_time_stamp;
2976 } else {
2977 /* Network starves. */
2978 if (tp->packets_out > tp->snd_cwnd_used)
2979 tp->snd_cwnd_used = tp->packets_out;
2980
2981 if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
2982 tcp_cwnd_application_limited(sk);
2983 }
2984 #endif
2985 }
2986
2987 /* Set slow start threshould and cwnd not falling to slow start */
2988 static __inline void __tcp_enter_cwr(struct tcp_opt *tp)
2989 {
2990 #if 0
2991 tp->undo_marker = 0;
2992 tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
2993 tp->snd_cwnd = min(tp->snd_cwnd,
2994 tcp_packets_in_flight(tp) + 1U);
2995 tp->snd_cwnd_cnt = 0;
2996 tp->high_seq = tp->snd_nxt;
2997 tp->snd_cwnd_stamp = tcp_time_stamp;
2998 TCP_ECN_queue_cwr(tp);
2999 #endif
3000 }
3001
3002 static __inline void tcp_enter_cwr(struct tcp_opt *tp)
3003 {
3004 #if 0
3005 tp->prior_ssthresh = 0;
3006 if (tp->ca_state < TCP_CA_CWR) {
3007 __tcp_enter_cwr(tp);
3008 tp->ca_state = TCP_CA_CWR;
3009 }
3010 #endif
3011 }
3012
3013 extern __u32 tcp_init_cwnd(struct tcp_opt *tp);
3014
3015 /* Slow start with delack produces 3 packets of burst, so that
3016 * it is safe "de facto".
3017 */
3018 static __inline __u32 tcp_max_burst(struct tcp_opt *tp)
3019 {
3020 return 3;
3021 }
3022
3023 static __inline__ int tcp_minshall_check(struct tcp_opt *tp)
3024 {
3025 #if 0
3026 return after(tp->snd_sml,tp->snd_una) &&
3027 !after(tp->snd_sml, tp->snd_nxt);
3028 #else
3029 return 0;
3030 #endif
3031 }
3032
3033 static __inline void tcp_minshall_update(struct tcp_opt *tp, int mss, struct sk_buff *skb)
3034 {
3035 #if 0
3036 if (skb->len < mss)
3037 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
3038 #endif
3039 }
3040
3041 /* Return 0, if packet can be sent now without violation Nagle's rules:
3042 1. It is full sized.
3043 2. Or it contains FIN.
3044 3. Or TCP_NODELAY was set.
3045 4. Or TCP_CORK is not set, and all sent packets are ACKed.
3046 With Minshall's modification: all sent small packets are ACKed.
3047 */
3048
3049 static __inline int
3050 tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int nonagle)
3051 {
3052 #if 0
3053 return (skb->len < mss_now &&
3054 !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
3055 (nonagle == 2 ||
3056 (!nonagle &&
3057 tp->packets_out &&
3058 tcp_minshall_check(tp))));
3059 #else
3060 return 0;
3061 #endif
3062 }
3063
3064 /* This checks if the data bearing packet SKB (usually tp->send_head)
3065 * should be put on the wire right now.
3066 */
3067 static __inline int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
3068 unsigned cur_mss, int nonagle)
3069 {
3070 #if 0
3071 /* RFC 1122 - section 4.2.3.4
3072 *
3073 * We must queue if
3074 *
3075 * a) The right edge of this frame exceeds the window
3076 * b) There are packets in flight and we have a small segment
3077 * [SWS avoidance and Nagle algorithm]
3078 * (part of SWS is done on packetization)
3079 * Minshall version sounds: there are no _small_
3080 * segments in flight. (tcp_nagle_check)
3081 * c) We have too many packets 'in flight'
3082 *
3083 * Don't use the nagle rule for urgent data (or
3084 * for the final FIN -DaveM).
3085 *
3086 * Also, Nagle rule does not apply to frames, which
3087 * sit in the middle of queue (they have no chances
3088 * to get new data) and if room at tail of skb is
3089 * not enough to save something seriously (<32 for now).
3090 */
3091
3092 /* Don't be strict about the congestion window for the
3093 * final FIN frame. -DaveM
3094 */
3095 return ((nonagle==1 || tp->urg_mode
3096 || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) &&
3097 ((tcp_packets_in_flight(tp) < tp->snd_cwnd) ||
3098 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
3099 !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));
3100 #else
3101 return 0;
3102 #endif
3103 }
3104
3105 static __inline void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp)
3106 {
3107 #if 0
3108 if (!tp->packets_out && !tp->pending)
3109 tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
3110 #endif
3111 }
3112
3113 static __inline int tcp_skb_is_last(struct sock *sk, struct sk_buff *skb)
3114 {
3115 #if 0
3116 return (skb->next == (struct sk_buff*)&sk->write_queue);
3117 #else
3118 return 0;
3119 #endif
3120 }
3121
3122 /* Push out any pending frames which were held back due to
3123 * TCP_CORK or attempt at coalescing tiny packets.
3124 * The socket must be locked by the caller.
3125 */
3126 static __inline void __tcp_push_pending_frames(struct sock *sk,
3127 struct tcp_opt *tp,
3128 unsigned cur_mss,
3129 int nonagle)
3130 {
3131 #if 0
3132 struct sk_buff *skb = tp->send_head;
3133
3134 if (skb) {
3135 if (!tcp_skb_is_last(sk, skb))
3136 nonagle = 1;
3137 if (!tcp_snd_test(tp, skb, cur_mss, nonagle) ||
3138 tcp_write_xmit(sk, nonagle))
3139 tcp_check_probe_timer(sk, tp);
3140 }
3141 tcp_cwnd_validate(sk, tp);
3142 #endif
3143 }
3144
3145 static __inline void tcp_push_pending_frames(struct sock *sk,
3146 struct tcp_opt *tp)
3147 {
3148 #if 0
3149 __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk), tp->nonagle);
3150 #endif
3151 }
3152
3153 static __inline int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp)
3154 {
3155 #if 0
3156 struct sk_buff *skb = tp->send_head;
3157
3158 return (skb &&
3159 tcp_snd_test(tp, skb, tcp_current_mss(sk),
3160 tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle));
3161 #else
3162 return 0;
3163 #endif
3164 }
3165
3166 static __inline void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq)
3167 {
3168 #if 0
3169 tp->snd_wl1 = seq;
3170 #endif
3171 }
3172
3173 static __inline void tcp_update_wl(struct tcp_opt *tp, u32 ack, u32 seq)
3174 {
3175 #if 0
3176 tp->snd_wl1 = seq;
3177 #endif
3178 }
3179
3180 extern void tcp_destroy_sock(struct sock *sk);
3181
3182
3183 /*
3184 * Calculate(/check) TCP checksum
3185 */
3186 static __inline u16 tcp_v4_check(struct tcphdr *th, int len,
3187 unsigned long saddr, unsigned long daddr,
3188 unsigned long base)
3189 {
3190 #if 0
3191 return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
3192 #else
3193 return 0;
3194 #endif
3195 }
3196
3197 static __inline int __tcp_checksum_complete(struct sk_buff *skb)
3198 {
3199 #if 0
3200 return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
3201 #else
3202 return 0;
3203 #endif
3204 }
3205
3206 static __inline int tcp_checksum_complete(struct sk_buff *skb)
3207 {
3208 #if 0
3209 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
3210 __tcp_checksum_complete(skb);
3211 #else
3212 return 0;
3213 #endif
3214 }
3215
3216 /* Prequeue for VJ style copy to user, combined with checksumming. */
3217
3218 static __inline void tcp_prequeue_init(struct tcp_opt *tp)
3219 {
3220 #if 0
3221 tp->ucopy.task = NULL;
3222 tp->ucopy.len = 0;
3223 tp->ucopy.memory = 0;
3224 skb_queue_head_init(&tp->ucopy.prequeue);
3225 #endif
3226 }
3227
3228 /* Packet is added to VJ-style prequeue for processing in process
3229 * context, if a reader task is waiting. Apparently, this exciting
3230 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
3231 * failed somewhere. Latency? Burstiness? Well, at least now we will
3232 * see, why it failed. 8)8) --ANK
3233 *
3234 * NOTE: is this not too big to inline?
3235 */
3236 static __inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
3237 {
3238 #if 0
3239 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
3240
3241 if (tp->ucopy.task) {
3242 __skb_queue_tail(&tp->ucopy.prequeue, skb);
3243 tp->ucopy.memory += skb->truesize;
3244 if (tp->ucopy.memory > sk->rcvbuf) {
3245 struct sk_buff *skb1;
3246
3247 if (sk->lock.users)
3248 out_of_line_bug();
3249
3250 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
3251 sk->backlog_rcv(sk, skb1);
3252 NET_INC_STATS_BH(TCPPrequeueDropped);
3253 }
3254
3255 tp->ucopy.memory = 0;
3256 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
3257 wake_up_interruptible(sk->sleep);
3258 if (!tcp_ack_scheduled(tp))
3259 tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4);
3260 }
3261 return 1;
3262 }
3263 return 0;
3264 #else
3265 return 0;
3266 #endif
3267 }
3268
3269
3270 #undef STATE_TRACE
3271
3272 #ifdef STATE_TRACE
3273 static char *statename[]={
3274 "Unused","Established","Syn Sent","Syn Recv",
3275 "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
3276 "Close Wait","Last ACK","Listen","Closing"
3277 };
3278 #endif
3279
3280 static __inline void tcp_set_state(struct sock *sk, int state)
3281 {
3282 #if 0
3283 int oldstate = sk->state;
3284
3285 switch (state) {
3286 case TCP_ESTABLISHED:
3287 if (oldstate != TCP_ESTABLISHED)
3288 TCP_INC_STATS(TcpCurrEstab);
3289 break;
3290
3291 case TCP_CLOSE:
3292 sk->prot->unhash(sk);
3293 if (sk->prev && !(sk->userlocks&SOCK_BINDPORT_LOCK))
3294 tcp_put_port(sk);
3295 /* fall through */
3296 default:
3297 if (oldstate==TCP_ESTABLISHED)
3298 tcp_statistics[smp_processor_id()*2+!in_softirq()].TcpCurrEstab--;
3299 }
3300
3301 /* Change state AFTER socket is unhashed to avoid closed
3302 * socket sitting in hash tables.
3303 */
3304 sk->state = state;
3305
3306 #ifdef STATE_TRACE
3307 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]);
3308 #endif
3309 #endif
3310 }
3311
3312 static __inline void tcp_done(struct sock *sk)
3313 {
3314 #if 0
3315 tcp_set_state(sk, TCP_CLOSE);
3316 tcp_clear_xmit_timers(sk);
3317
3318 sk->shutdown = SHUTDOWN_MASK;
3319
3320 if (!sk->dead)
3321 sk->state_change(sk);
3322 else
3323 tcp_destroy_sock(sk);
3324 #endif
3325 }
3326
3327 static __inline void tcp_sack_reset(struct tcp_opt *tp)
3328 {
3329 #if 0
3330 tp->dsack = 0;
3331 tp->eff_sacks = 0;
3332 tp->num_sacks = 0;
3333 #endif
3334 }
3335
3336 static __inline void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *tp, __u32 tstamp)
3337 {
3338 #if 0
3339 if (tp->tstamp_ok) {
3340 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
3341 (TCPOPT_NOP << 16) |
3342 (TCPOPT_TIMESTAMP << 8) |
3343 TCPOLEN_TIMESTAMP);
3344 *ptr++ = htonl(tstamp);
3345 *ptr++ = htonl(tp->ts_recent);
3346 }
3347 if (tp->eff_sacks) {
3348 struct tcp_sack_block *sp = tp->dsack ? tp->duplicate_sack : tp->selective_acks;
3349 int this_sack;
3350
3351 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
3352 (TCPOPT_NOP << 16) |
3353 (TCPOPT_SACK << 8) |
3354 (TCPOLEN_SACK_BASE +
3355 (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)));
3356 for(this_sack = 0; this_sack < tp->eff_sacks; this_sack++) {
3357 *ptr++ = htonl(sp[this_sack].start_seq);
3358 *ptr++ = htonl(sp[this_sack].end_seq);
3359 }
3360 if (tp->dsack) {
3361 tp->dsack = 0;
3362 tp->eff_sacks--;
3363 }
3364 }
3365 #endif
3366 }
3367
3368 /* Construct a tcp options header for a SYN or SYN_ACK packet.
3369 * If this is every changed make sure to change the definition of
3370 * MAX_SYN_SIZE to match the new maximum number of options that you
3371 * can generate.
3372 */
3373 static __inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
3374 int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent)
3375 {
3376 #if 0
3377 /* We always get an MSS option.
3378 * The option bytes which will be seen in normal data
3379 * packets should timestamps be used, must be in the MSS
3380 * advertised. But we subtract them from tp->mss_cache so
3381 * that calculations in tcp_sendmsg are simpler etc.
3382 * So account for this fact here if necessary. If we
3383 * don't do this correctly, as a receiver we won't
3384 * recognize data packets as being full sized when we
3385 * should, and thus we won't abide by the delayed ACK
3386 * rules correctly.
3387 * SACKs don't matter, we never delay an ACK when we
3388 * have any of those going out.
3389 */
3390 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
3391 if (ts) {
3392 if(sack)
3393 *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
3394 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
3395 else
3396 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
3397 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
3398 *ptr++ = htonl(tstamp); /* TSVAL */
3399 *ptr++ = htonl(ts_recent); /* TSECR */
3400 } else if(sack)
3401 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
3402 (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
3403 if (offer_wscale)
3404 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
3405 #endif
3406 }
3407
3408 /* Determine a window scaling and initial window to offer.
3409 * Based on the assumption that the given amount of space
3410 * will be offered. Store the results in the tp structure.
3411 * NOTE: for smooth operation initial space offering should
3412 * be a multiple of mss if possible. We assume here that mss >= 1.
3413 * This MUST be enforced by all callers.
3414 */
3415 static __inline void tcp_select_initial_window(int __space, __u32 mss,
3416 __u32 *rcv_wnd,
3417 __u32 *window_clamp,
3418 int wscale_ok,
3419 __u8 *rcv_wscale)
3420 {
3421 #if 0
3422 unsigned int space = (__space < 0 ? 0 : __space);
3423
3424 /* If no clamp set the clamp to the max possible scaled window */
3425 if (*window_clamp == 0)
3426 (*window_clamp) = (65535 << 14);
3427 space = min(*window_clamp, space);
3428
3429 /* Quantize space offering to a multiple of mss if possible. */
3430 if (space > mss)
3431 space = (space / mss) * mss;
3432
3433 /* NOTE: offering an initial window larger than 32767
3434 * will break some buggy TCP stacks. We try to be nice.
3435 * If we are not window scaling, then this truncates
3436 * our initial window offering to 32k. There should also
3437 * be a sysctl option to stop being nice.
3438 */
3439 (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
3440 (*rcv_wscale) = 0;
3441 if (wscale_ok) {
3442 /* See RFC1323 for an explanation of the limit to 14 */
3443 while (space > 65535 && (*rcv_wscale) < 14) {
3444 space >>= 1;
3445 (*rcv_wscale)++;
3446 }
3447 if (*rcv_wscale && sysctl_tcp_app_win && space>=mss &&
3448 space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2)
3449 (*rcv_wscale)--;
3450 }
3451
3452 /* Set initial window to value enough for senders,
3453 * following RFC1414. Senders, not following this RFC,
3454 * will be satisfied with 2.
3455 */
3456 if (mss > (1<<*rcv_wscale)) {
3457 int init_cwnd = 4;
3458 if (mss > 1460*3)
3459 init_cwnd = 2;
3460 else if (mss > 1460)
3461 init_cwnd = 3;
3462 if (*rcv_wnd > init_cwnd*mss)
3463 *rcv_wnd = init_cwnd*mss;
3464 }
3465 /* Set the clamp no higher than max representable value */
3466 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
3467 #endif
3468 }
3469
3470 static __inline int tcp_win_from_space(int space)
3471 {
3472 #if 0
3473 return sysctl_tcp_adv_win_scale<=0 ?
3474 (space>>(-sysctl_tcp_adv_win_scale)) :
3475 space - (space>>sysctl_tcp_adv_win_scale);
3476 #else
3477 return 0;
3478 #endif
3479 }
3480
3481 /* Note: caller must be prepared to deal with negative returns */
3482 static __inline int tcp_space(struct sock *sk)
3483 {
3484 #if 0
3485 return tcp_win_from_space(sk->rcvbuf - atomic_read(&sk->rmem_alloc));
3486 #else
3487 return 0;
3488 #endif
3489 }
3490
3491 static __inline int tcp_full_space( struct sock *sk)
3492 {
3493 #if 0
3494 return tcp_win_from_space(sk->rcvbuf);
3495 #else
3496 return 0;
3497 #endif
3498 }
3499
3500 static __inline void tcp_acceptq_removed(struct sock *sk)
3501 {
3502 #if 0
3503 sk->ack_backlog--;
3504 #endif
3505 }
3506
3507 static __inline void tcp_acceptq_added(struct sock *sk)
3508 {
3509 #if 0
3510 sk->ack_backlog++;
3511 #endif
3512 }
3513
3514 static __inline int tcp_acceptq_is_full(struct sock *sk)
3515 {
3516 #if 0
3517 return sk->ack_backlog > sk->max_ack_backlog;
3518 #else
3519 return 0;
3520 #endif
3521 }
3522
3523 static __inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req,
3524 struct sock *child)
3525 {
3526 #if 0
3527 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
3528
3529 req->sk = child;
3530 tcp_acceptq_added(sk);
3531
3532 if (!tp->accept_queue_tail) {
3533 tp->accept_queue = req;
3534 } else {
3535 tp->accept_queue_tail->dl_next = req;
3536 }
3537 tp->accept_queue_tail = req;
3538 req->dl_next = NULL;
3539 #endif
3540 }
3541
3542 struct tcp_listen_opt
3543 {
3544 u8 max_qlen_log; /* log_2 of maximal queued SYNs */
3545 int qlen;
3546 int qlen_young;
3547 int clock_hand;
3548 struct open_request *syn_table[TCP_SYNQ_HSIZE];
3549 };
3550
3551 static __inline void
3552 tcp_synq_removed(struct sock *sk, struct open_request *req)
3553 {
3554 #if 0
3555 struct tcp_listen_opt *lopt = sk->tp_pinfo.af_tcp.listen_opt;
3556
3557 if (--lopt->qlen == 0)
3558 tcp_delete_keepalive_timer(sk);
3559 if (req->retrans == 0)
3560 lopt->qlen_young--;
3561 #endif
3562 }
3563
3564 static __inline void tcp_synq_added(struct sock *sk)
3565 {
3566 #if 0
3567 struct tcp_listen_opt *lopt = sk->tp_pinfo.af_tcp.listen_opt;
3568
3569 if (lopt->qlen++ == 0)
3570 tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
3571 lopt->qlen_young++;
3572 #endif
3573 }
3574
3575 static __inline int tcp_synq_len(struct sock *sk)
3576 {
3577 #if 0
3578 return sk->tp_pinfo.af_tcp.listen_opt->qlen;
3579 #else
3580 return 0;
3581 #endif
3582 }
3583
3584 static __inline int tcp_synq_young(struct sock *sk)
3585 {
3586 #if 0
3587 return sk->tp_pinfo.af_tcp.listen_opt->qlen_young;
3588 #else
3589 return 0;
3590 #endif
3591 }
3592
3593 static __inline int tcp_synq_is_full(struct sock *sk)
3594 {
3595 #if 0
3596 return tcp_synq_len(sk)>>sk->tp_pinfo.af_tcp.listen_opt->max_qlen_log;
3597 #else
3598 return 0;
3599 #endif
3600 }
3601
3602 static __inline void tcp_synq_unlink(struct tcp_opt *tp, struct open_request *req,
3603 struct open_request **prev)
3604 {
3605 #if 0
3606 write_lock(&tp->syn_wait_lock);
3607 *prev = req->dl_next;
3608 write_unlock(&tp->syn_wait_lock);
3609 #endif
3610 }
3611
3612 static __inline void tcp_synq_drop(struct sock *sk, struct open_request *req,
3613 struct open_request **prev)
3614 {
3615 #if 0
3616 tcp_synq_unlink(&sk->tp_pinfo.af_tcp, req, prev);
3617 tcp_synq_removed(sk, req);
3618 tcp_openreq_free(req);
3619 #endif
3620 }
3621
3622 static __inline void tcp_openreq_init(struct open_request *req,
3623 struct tcp_opt *tp,
3624 struct sk_buff *skb)
3625 {
3626 #if 0
3627 req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
3628 req->rcv_isn = TCP_SKB_CB(skb)->seq;
3629 req->mss = tp->mss_clamp;
3630 req->ts_recent = tp->saw_tstamp ? tp->rcv_tsval : 0;
3631 req->tstamp_ok = tp->tstamp_ok;
3632 req->sack_ok = tp->sack_ok;
3633 req->snd_wscale = tp->snd_wscale;
3634 req->wscale_ok = tp->wscale_ok;
3635 req->acked = 0;
3636 req->ecn_ok = 0;
3637 req->rmt_port = skb->h.th->source;
3638 #endif
3639 }
3640
3641 #define TCP_MEM_QUANTUM ((int)PAGE_SIZE)
3642
3643 static __inline void tcp_free_skb(struct sock *sk, struct sk_buff *skb)
3644 {
3645 #if 0
3646 sk->tp_pinfo.af_tcp.queue_shrunk = 1;
3647 sk->wmem_queued -= skb->truesize;
3648 sk->forward_alloc += skb->truesize;
3649 __kfree_skb(skb);
3650 #endif
3651 }
3652
3653 static __inline void tcp_charge_skb(struct sock *sk, struct sk_buff *skb)
3654 {
3655 #if 0
3656 sk->wmem_queued += skb->truesize;
3657 sk->forward_alloc -= skb->truesize;
3658 #endif
3659 }
3660
3661 extern void __tcp_mem_reclaim(struct sock *sk);
3662 extern int tcp_mem_schedule(struct sock *sk, int size, int kind);
3663
3664 static __inline void tcp_mem_reclaim(struct sock *sk)
3665 {
3666 #if 0
3667 if (sk->forward_alloc >= TCP_MEM_QUANTUM)
3668 __tcp_mem_reclaim(sk);
3669 #endif
3670 }
3671
3672 static __inline void tcp_enter_memory_pressure(void)
3673 {
3674 #if 0
3675 if (!tcp_memory_pressure) {
3676 NET_INC_STATS(TCPMemoryPressures);
3677 tcp_memory_pressure = 1;
3678 }
3679 #endif
3680 }
3681
3682 static __inline void tcp_moderate_sndbuf(struct sock *sk)
3683 {
3684 #if 0
3685 if (!(sk->userlocks&SOCK_SNDBUF_LOCK)) {
3686 sk->sndbuf = min(sk->sndbuf, sk->wmem_queued/2);
3687 sk->sndbuf = max(sk->sndbuf, SOCK_MIN_SNDBUF);
3688 }
3689 #endif
3690 }
3691
3692 static __inline struct sk_buff *tcp_alloc_pskb(struct sock *sk, int size, int mem, int gfp)
3693 {
3694 #if 0
3695 struct sk_buff *skb = alloc_skb(size+MAX_TCP_HEADER, gfp);
3696
3697 if (skb) {
3698 skb->truesize += mem;
3699 if (sk->forward_alloc >= (int)skb->truesize ||
3700 tcp_mem_schedule(sk, skb->truesize, 0)) {
3701 skb_reserve(skb, MAX_TCP_HEADER);
3702 return skb;
3703 }
3704 __kfree_skb(skb);
3705 } else {
3706 tcp_enter_memory_pressure();
3707 tcp_moderate_sndbuf(sk);
3708 }
3709 return NULL;
3710 #else
3711 return NULL;
3712 #endif
3713 }
3714
3715 static __inline struct sk_buff *tcp_alloc_skb(struct sock *sk, int size, int gfp)
3716 {
3717 #if 0
3718 return tcp_alloc_pskb(sk, size, 0, gfp);
3719 #else
3720 return NULL;
3721 #endif
3722 }
3723
3724 static __inline struct page * tcp_alloc_page(struct sock *sk)
3725 {
3726 #if 0
3727 if (sk->forward_alloc >= (int)PAGE_SIZE ||
3728 tcp_mem_schedule(sk, PAGE_SIZE, 0)) {
3729 struct page *page = alloc_pages(sk->allocation, 0);
3730 if (page)
3731 return page;
3732 }
3733 tcp_enter_memory_pressure();
3734 tcp_moderate_sndbuf(sk);
3735 return NULL;
3736 #else
3737 return NULL;
3738 #endif
3739 }
3740
3741 static __inline void tcp_writequeue_purge(struct sock *sk)
3742 {
3743 #if 0
3744 struct sk_buff *skb;
3745
3746 while ((skb = __skb_dequeue(&sk->write_queue)) != NULL)
3747 tcp_free_skb(sk, skb);
3748 tcp_mem_reclaim(sk);
3749 #endif
3750 }
3751
3752 extern void tcp_rfree(struct sk_buff *skb);
3753
3754 static __inline void tcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
3755 {
3756 #if 0
3757 skb->sk = sk;
3758 skb->destructor = tcp_rfree;
3759 atomic_add(skb->truesize, &sk->rmem_alloc);
3760 sk->forward_alloc -= skb->truesize;
3761 #endif
3762 }
3763
3764 extern void tcp_listen_wlock(void);
3765
3766 /* - We may sleep inside this lock.
3767 * - If sleeping is not required (or called from BH),
3768 * use plain read_(un)lock(&tcp_lhash_lock).
3769 */
3770
3771 static __inline void tcp_listen_lock(void)
3772 {
3773 #if 0
3774 /* read_lock synchronizes to candidates to writers */
3775 read_lock(&tcp_lhash_lock);
3776 atomic_inc(&tcp_lhash_users);
3777 read_unlock(&tcp_lhash_lock);
3778 #endif
3779 }
3780
3781 static __inline void tcp_listen_unlock(void)
3782 {
3783 #if 0
3784 if (atomic_dec_and_test(&tcp_lhash_users))
3785 wake_up(&tcp_lhash_wait);
3786 #endif
3787 }
3788
3789 static __inline int keepalive_intvl_when(struct tcp_opt *tp)
3790 {
3791 #if 0
3792 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
3793 #else
3794 return 0;
3795 #endif
3796 }
3797
3798 static __inline int keepalive_time_when(struct tcp_opt *tp)
3799 {
3800 #if 0
3801 return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
3802 #else
3803 return 0;
3804 #endif
3805 }
3806
3807 static __inline int tcp_fin_time(struct tcp_opt *tp)
3808 {
3809 #if 0
3810 int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout;
3811
3812 if (fin_timeout < (tp->rto<<2) - (tp->rto>>1))
3813 fin_timeout = (tp->rto<<2) - (tp->rto>>1);
3814
3815 return fin_timeout;
3816 #else
3817 return 0;
3818 #endif
3819 }
3820
3821 static __inline int tcp_paws_check(struct tcp_opt *tp, int rst)
3822 {
3823 #if 0
3824 if ((s32)(tp->rcv_tsval - tp->ts_recent) >= 0)
3825 return 0;
3826 if (xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
3827 return 0;
3828
3829 /* RST segments are not recommended to carry timestamp,
3830 and, if they do, it is recommended to ignore PAWS because
3831 "their cleanup function should take precedence over timestamps."
3832 Certainly, it is mistake. It is necessary to understand the reasons
3833 of this constraint to relax it: if peer reboots, clock may go
3834 out-of-sync and half-open connections will not be reset.
3835 Actually, the problem would be not existing if all
3836 the implementations followed draft about maintaining clock
3837 via reboots. Linux-2.2 DOES NOT!
3838
3839 However, we can relax time bounds for RST segments to MSL.
3840 */
3841 if (rst && xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_MSL)
3842 return 0;
3843 return 1;
3844 #else
3845 return 0;
3846 #endif
3847 }
3848
3849 #define TCP_CHECK_TIMER(sk) do { } while (0)
3850
3851 #endif /* __TCPCORE_H */
3852
3853
3854 //
3855 #endif