2 * Copyright (c) 1982, 1986, 1991, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/malloc.h>
40 #include <sys/protosw.h>
41 #include <sys/socket.h>
42 #include <sys/socketvar.h>
43 #include <sys/ioctl.h>
44 #include <sys/errno.h>
47 #include <sys/queue.h>
50 #include <net/route.h>
52 #include <netinet/in.h>
53 #include <netinet/in_systm.h>
54 #include <netinet/ip.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/in_var.h>
57 #include <netinet/ip_var.h>
59 struct in_addr zeroin_addr
;
62 in_pcballoc(so
, pcbinfo
)
64 struct inpcbinfo
*pcbinfo
;
66 register struct inpcb
*inp
;
69 MALLOC(inp
, struct inpcb
*, sizeof(*inp
), M_PCB
, M_NOWAIT
);
72 bzero((caddr_t
)inp
, sizeof(*inp
));
73 inp
->inp_pcbinfo
= pcbinfo
;
76 LIST_INSERT_HEAD(pcbinfo
->listhead
, inp
, inp_list
);
79 so
->so_pcb
= (caddr_t
)inp
;
85 register struct inpcb
*inp
;
88 register struct socket
*so
= inp
->inp_socket
;
89 struct inpcbhead
*head
= inp
->inp_pcbinfo
->listhead
;
90 unsigned short *lastport
= &inp
->inp_pcbinfo
->lastport
;
91 struct sockaddr_in
*sin
;
92 struct proc
*p
= curproc
; /* XXX */
94 int wild
= 0, reuseport
= (so
->so_options
& SO_REUSEPORT
);
97 OS_DbgPrint(OSK_MID_TRACE
,("Called\n"));
99 if( nam
) OskitDumpBuffer( nam
->m_data
, nam
->m_len
);
102 if (in_ifaddr
== 0) {
103 OS_DbgPrint(OSK_MID_TRACE
,("Leaving EADDRNOTAVAIL\n"));
104 return (EADDRNOTAVAIL
);
107 if (inp
->inp_lport
|| inp
->inp_laddr
.s_addr
!= INADDR_ANY
)
109 if ((so
->so_options
& (SO_REUSEADDR
|SO_REUSEPORT
)) == 0 &&
110 ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) == 0 ||
111 (so
->so_options
& SO_ACCEPTCONN
) == 0))
112 wild
= INPLOOKUP_WILDCARD
;
114 sin
= mtod(nam
, struct sockaddr_in
*);
115 if (nam
->m_len
!= sizeof (*sin
)) {
116 OS_DbgPrint(OSK_MID_TRACE
,("Leaving EINVAL\n"));
121 * We should check the family, but old programs
122 * incorrectly fail to initialize it.
124 if (sin
->sin_family
!= AF_INET
) {
125 OS_DbgPrint(OSK_MID_TRACE
,("Leaving EAFNOSUPPORT\n"));
126 return (EAFNOSUPPORT
);
129 lport
= sin
->sin_port
;
130 if (IN_MULTICAST(ntohl(sin
->sin_addr
.s_addr
))) {
132 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
133 * allow complete duplication of binding if
134 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
135 * and a multicast address is bound on both
136 * new and duplicated sockets.
138 if (so
->so_options
& SO_REUSEADDR
)
139 reuseport
= SO_REUSEADDR
|SO_REUSEPORT
;
140 } else if (sin
->sin_addr
.s_addr
!= INADDR_ANY
) {
141 sin
->sin_port
= 0; /* yech... */
142 OS_DbgPrint(OSK_MID_TRACE
,("Calling ifwithaddr\n"));
143 if (ifa_ifwithaddr((struct sockaddr
*)sin
) == 0) {
144 OS_DbgPrint(OSK_MID_TRACE
,
145 ("Leaving EADDRNOTAVAIL\n"));
146 return (EADDRNOTAVAIL
);
148 OS_DbgPrint(OSK_MID_TRACE
,("Yep, we have that addr\n"));
154 if (ntohs(lport
) < IPPORT_RESERVED
&&
155 (error
= suser(p
->p_ucred
, &p
->p_acflag
))) {
156 OS_DbgPrint(OSK_MID_TRACE
,
157 ("Leaving EACCESS\n"));
160 t
= in_pcblookup(head
, zeroin_addr
, 0,
161 sin
->sin_addr
, lport
, wild
);
162 if (t
&& (reuseport
& t
->inp_socket
->so_options
) == 0)
164 OS_DbgPrint(OSK_MID_TRACE
,
165 ("Leaving EADDRINUSE\n"));
169 inp
->inp_laddr
= sin
->sin_addr
;
174 OS_DbgPrint(OSK_MID_TRACE
,("Finding port %d\n",
176 if (*lastport
< IPPORT_RESERVED
||
177 *lastport
> IPPORT_USERRESERVED
)
178 *lastport
= IPPORT_RESERVED
;
179 lport
= htons(*lastport
);
180 } while (in_pcblookup(head
,
181 zeroin_addr
, 0, inp
->inp_laddr
,
183 inp
->inp_lport
= lport
;
186 OS_DbgPrint(OSK_MID_TRACE
,("Returning success\n"));
191 * Transform old in_pcbconnect() into an inner subroutine for new
192 * in_pcbconnect(): Do some validity-checking on the remote
193 * address (in mbuf 'nam') and then determine local host address
194 * (i.e., which interface) to use to access that remote host.
196 * This preserves definition of in_pcbconnect(), while supporting a
197 * slightly different version for T/TCP. (This is more than
198 * a bit of a kludge, but cleaning up the internal interfaces would
199 * have forced minor changes in every protocol).
203 in_pcbladdr(inp
, nam
, plocal_sin
)
204 register struct inpcb
*inp
;
206 struct sockaddr_in
**plocal_sin
;
208 struct in_ifaddr
*ia
;
209 struct sockaddr_in
*ifaddr
= 0;
210 register struct sockaddr_in
*sin
= mtod(nam
, struct sockaddr_in
*);
212 OS_DbgPrint(OSK_MID_TRACE
,("Called\n"));
214 if (nam
->m_len
!= sizeof (*sin
))
216 if (sin
->sin_family
!= AF_INET
)
217 return (EAFNOSUPPORT
);
218 if (sin
->sin_port
== 0)
219 return (EADDRNOTAVAIL
);
222 * If the destination address is INADDR_ANY,
223 * use the primary local address.
224 * If the supplied address is INADDR_BROADCAST,
225 * and the primary interface supports broadcast,
226 * choose the broadcast address for that interface.
228 #define satosin(sa) ((struct sockaddr_in *)(sa))
229 #define sintosa(sin) ((struct sockaddr *)(sin))
230 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa))
231 if (sin
->sin_addr
.s_addr
== INADDR_ANY
)
232 sin
->sin_addr
= IA_SIN(in_ifaddr
)->sin_addr
;
234 else if (sin
->sin_addr
.s_addr
== (u_long
)INADDR_BROADCAST
&&
235 (in_ifaddr
->ia_ifp
->if_flags
& IFF_BROADCAST
))
236 sin
->sin_addr
= satosin(&in_ifaddr
->ia_broadaddr
)->sin_addr
;
239 if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
240 register struct route
*ro
;
242 ia
= (struct in_ifaddr
*)0;
244 * If route is known or can be allocated now,
245 * our src addr is taken from the i/f, else punt.
247 ro
= &inp
->inp_route
;
249 (satosin(&ro
->ro_dst
)->sin_addr
.s_addr
!=
250 sin
->sin_addr
.s_addr
||
251 inp
->inp_socket
->so_options
& SO_DONTROUTE
)) {
253 ro
->ro_rt
= (struct rtentry
*)0;
255 if ((inp
->inp_socket
->so_options
& SO_DONTROUTE
) == 0 && /*XXX*/
256 (ro
->ro_rt
== (struct rtentry
*)0 ||
257 ro
->ro_rt
->rt_ifp
== (struct ifnet
*)0)) {
258 /* No route yet, so try to acquire one */
259 ro
->ro_dst
.sa_family
= AF_INET
;
260 ro
->ro_dst
.sa_len
= sizeof(struct sockaddr_in
);
261 ((struct sockaddr_in
*) &ro
->ro_dst
)->sin_addr
=
266 * If we found a route, use the address
267 * corresponding to the outgoing interface
268 * unless it is the loopback (in case a route
269 * to our address on another net goes to loopback).
271 if (ro
->ro_rt
&& !(ro
->ro_rt
->rt_ifp
->if_flags
& IFF_LOOPBACK
))
272 ia
= ifatoia(ro
->ro_rt
->rt_ifa
);
274 u_short fport
= sin
->sin_port
;
277 ia
= ifatoia(ifa_ifwithdstaddr(sintosa(sin
)));
280 ia
= ifatoia(ifa_ifwithnet(sintosa(sin
)));
281 sin
->sin_port
= fport
;
285 return (EADDRNOTAVAIL
);
288 * If the destination address is multicast and an outgoing
289 * interface has been set as a multicast option, use the
290 * address of that interface as our source address.
293 if (IN_MULTICAST(ntohl(sin
->sin_addr
.s_addr
)) &&
294 inp
->inp_moptions
!= NULL
) {
295 struct ip_moptions
*imo
;
298 imo
= inp
->inp_moptions
;
299 if (imo
->imo_multicast_ifp
!= NULL
) {
300 ifp
= imo
->imo_multicast_ifp
;
301 for (ia
= in_ifaddr
; ia
; ia
= ia
->ia_next
)
302 if (ia
->ia_ifp
== ifp
)
305 return (EADDRNOTAVAIL
);
310 * Don't do pcblookup call here; return interface in plocal_sin
311 * and exit to caller, that will do the lookup.
313 *plocal_sin
= ia
->ia_ifa
.ifa_addr
;
314 OS_DbgPrint(OSK_MID_TRACE
,("plocal sin %x\n",
315 (*plocal_sin
)->sin_addr
.s_addr
));
323 * Connect from a socket to a specified address.
324 * Both address and port must be specified in argument sin.
325 * If don't have a local address for this socket yet,
329 in_pcbconnect(inp
, nam
)
330 register struct inpcb
*inp
;
333 struct sockaddr_in
*ifaddr
;
334 register struct sockaddr_in
*sin
= mtod(nam
, struct sockaddr_in
*);
338 * Call inner routine, to assign local interface address.
340 if (error
= in_pcbladdr(inp
, nam
, &ifaddr
))
343 if (in_pcblookuphash(inp
->inp_pcbinfo
, sin
->sin_addr
, sin
->sin_port
,
344 inp
->inp_laddr
.s_addr
? inp
->inp_laddr
: ifaddr
->sin_addr
,
345 inp
->inp_lport
) != NULL
)
347 if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
348 if (inp
->inp_lport
== 0)
349 (void)in_pcbbind(inp
, (struct mbuf
*)0);
350 inp
->inp_laddr
= ifaddr
->sin_addr
;
352 inp
->inp_faddr
= sin
->sin_addr
;
353 inp
->inp_fport
= sin
->sin_port
;
359 in_pcbdisconnect(inp
)
363 inp
->inp_faddr
.s_addr
= INADDR_ANY
;
366 if (inp
->inp_socket
->so_state
& SS_NOFDREF
)
374 struct socket
*so
= inp
->inp_socket
;
379 if (inp
->inp_options
)
380 (void)m_free(inp
->inp_options
);
381 if (inp
->inp_route
.ro_rt
)
382 rtfree(inp
->inp_route
.ro_rt
);
383 ip_freemoptions(inp
->inp_moptions
);
385 LIST_REMOVE(inp
, inp_hash
);
386 LIST_REMOVE(inp
, inp_list
);
392 in_setsockaddr(inp
, nam
)
393 register struct inpcb
*inp
;
396 register struct sockaddr_in
*sin
;
398 nam
->m_len
= sizeof (*sin
);
399 sin
= mtod(nam
, struct sockaddr_in
*);
400 bzero((caddr_t
)sin
, sizeof (*sin
));
401 sin
->sin_family
= AF_INET
;
402 sin
->sin_len
= sizeof(*sin
);
403 sin
->sin_port
= inp
->inp_lport
;
404 sin
->sin_addr
= inp
->inp_laddr
;
408 in_setpeeraddr(inp
, nam
)
412 register struct sockaddr_in
*sin
;
414 nam
->m_len
= sizeof (*sin
);
415 sin
= mtod(nam
, struct sockaddr_in
*);
416 bzero((caddr_t
)sin
, sizeof (*sin
));
417 sin
->sin_family
= AF_INET
;
418 sin
->sin_len
= sizeof(*sin
);
419 sin
->sin_port
= inp
->inp_fport
;
420 sin
->sin_addr
= inp
->inp_faddr
;
424 * Pass some notification to all connections of a protocol
425 * associated with address dst. The local address and/or port numbers
426 * may be specified to limit the search. The "usual action" will be
427 * taken, depending on the ctlinput cmd. The caller must filter any
428 * cmds that are uninteresting (e.g., no error in the map).
429 * Call the protocol specific routine (if any) to report
430 * any errors for each matching socket.
432 * Must be called at splnet.
435 in_pcbnotify(head
, dst
, fport_arg
, laddr
, lport_arg
, cmd
, notify
)
436 struct inpcbhead
*head
;
437 struct sockaddr
*dst
;
438 u_int fport_arg
, lport_arg
;
439 struct in_addr laddr
;
441 void (*notify
) __P((struct inpcb
*, int));
443 register struct inpcb
*inp
, *oinp
;
444 struct in_addr faddr
;
445 u_short fport
= fport_arg
, lport
= lport_arg
;
448 if ((unsigned)cmd
> PRC_NCMDS
|| dst
->sa_family
!= AF_INET
)
450 faddr
= ((struct sockaddr_in
*)dst
)->sin_addr
;
451 if (faddr
.s_addr
== INADDR_ANY
)
455 * Redirects go to all references to the destination,
456 * and use in_rtchange to invalidate the route cache.
457 * Dead host indications: notify all references to the destination.
458 * Otherwise, if we have knowledge of the local port and address,
459 * deliver only to that socket.
461 if (PRC_IS_REDIRECT(cmd
) || cmd
== PRC_HOSTDEAD
) {
465 if (cmd
!= PRC_HOSTDEAD
)
466 notify
= in_rtchange
;
468 errno
= inetctlerrmap
[cmd
];
470 for (inp
= head
->lh_first
; inp
!= NULL
;) {
471 if (inp
->inp_faddr
.s_addr
!= faddr
.s_addr
||
472 inp
->inp_socket
== 0 ||
473 (lport
&& inp
->inp_lport
!= lport
) ||
474 (laddr
.s_addr
&& inp
->inp_laddr
.s_addr
!= laddr
.s_addr
) ||
475 (fport
&& inp
->inp_fport
!= fport
)) {
476 inp
= inp
->inp_list
.le_next
;
480 inp
= inp
->inp_list
.le_next
;
482 (*notify
)(oinp
, errno
);
488 * Check for alternatives when higher level complains
489 * about service problems. For now, invalidate cached
490 * routing information. If the route was created dynamically
491 * (by a redirect), time to try a default gateway again.
497 register struct rtentry
*rt
;
498 struct rt_addrinfo info
;
500 if ((rt
= inp
->inp_route
.ro_rt
)) {
501 inp
->inp_route
.ro_rt
= 0;
502 bzero((caddr_t
)&info
, sizeof(info
));
503 info
.rti_info
[RTAX_DST
] =
504 (struct sockaddr
*)&inp
->inp_route
.ro_dst
;
505 info
.rti_info
[RTAX_GATEWAY
] = rt
->rt_gateway
;
506 info
.rti_info
[RTAX_NETMASK
] = rt_mask(rt
);
507 rt_missmsg(RTM_LOSING
, &info
, rt
->rt_flags
, 0);
508 if (rt
->rt_flags
& RTF_DYNAMIC
)
509 (void) rtrequest(RTM_DELETE
, rt_key(rt
),
510 rt
->rt_gateway
, rt_mask(rt
), rt
->rt_flags
,
511 (struct rtentry
**)0);
514 * A new route can be allocated
515 * the next time output is attempted.
522 * After a routing change, flush old routing
523 * and allocate a (hopefully) better one.
526 in_rtchange(inp
, errno
)
527 register struct inpcb
*inp
;
530 if (inp
->inp_route
.ro_rt
) {
531 rtfree(inp
->inp_route
.ro_rt
);
532 inp
->inp_route
.ro_rt
= 0;
534 * A new route can be allocated the next time
535 * output is attempted.
541 in_pcblookup(head
, faddr
, fport_arg
, laddr
, lport_arg
, flags
)
542 struct inpcbhead
*head
;
543 struct in_addr faddr
, laddr
;
544 u_int fport_arg
, lport_arg
;
547 register struct inpcb
*inp
, *match
= NULL
;
548 int matchwild
= 3, wildcard
;
549 u_short fport
= fport_arg
, lport
= lport_arg
;
554 for (inp
= head
->lh_first
; inp
!= NULL
; inp
= inp
->inp_list
.le_next
) {
555 if (inp
->inp_lport
!= lport
)
558 if (inp
->inp_faddr
.s_addr
!= INADDR_ANY
) {
559 if (faddr
.s_addr
== INADDR_ANY
)
561 else if (inp
->inp_faddr
.s_addr
!= faddr
.s_addr
||
562 inp
->inp_fport
!= fport
)
565 if (faddr
.s_addr
!= INADDR_ANY
)
568 if (inp
->inp_laddr
.s_addr
!= INADDR_ANY
) {
569 if (laddr
.s_addr
== INADDR_ANY
)
571 else if (inp
->inp_laddr
.s_addr
!= laddr
.s_addr
)
574 if (laddr
.s_addr
!= INADDR_ANY
)
577 if (wildcard
&& (flags
& INPLOOKUP_WILDCARD
) == 0)
579 if (wildcard
< matchwild
) {
581 matchwild
= wildcard
;
582 if (matchwild
== 0) {
592 * Lookup PCB in hash list.
595 in_pcblookuphash(pcbinfo
, faddr
, fport_arg
, laddr
, lport_arg
)
596 struct inpcbinfo
*pcbinfo
;
597 struct in_addr faddr
, laddr
;
598 u_int fport_arg
, lport_arg
;
600 struct inpcbhead
*head
;
601 register struct inpcb
*inp
;
602 u_short fport
= fport_arg
, lport
= lport_arg
;
607 * First look for an exact match.
609 head
= &pcbinfo
->hashbase
[(faddr
.s_addr
+ lport
+ fport
) % pcbinfo
->hashsize
];
611 for (inp
= head
->lh_first
; inp
!= NULL
; inp
= inp
->inp_hash
.le_next
) {
612 if (inp
->inp_faddr
.s_addr
!= faddr
.s_addr
||
613 inp
->inp_fport
!= fport
||
614 inp
->inp_lport
!= lport
||
615 inp
->inp_laddr
.s_addr
!= laddr
.s_addr
)
618 * Move PCB to head of this hash chain so that it can be
619 * found more quickly in the future.
621 if (inp
!= head
->lh_first
) {
622 LIST_REMOVE(inp
, inp_hash
);
623 LIST_INSERT_HEAD(head
, inp
, inp_hash
);
632 * Insert PCB into hash chain. Must be called at splnet.
638 struct inpcbhead
*head
;
640 head
= &inp
->inp_pcbinfo
->hashbase
[(inp
->inp_faddr
.s_addr
+
641 inp
->inp_lport
+ inp
->inp_fport
) % inp
->inp_pcbinfo
->hashsize
];
643 LIST_INSERT_HEAD(head
, inp
, inp_hash
);
650 struct inpcbhead
*head
;
654 LIST_REMOVE(inp
, inp_hash
);
656 head
= &inp
->inp_pcbinfo
->hashbase
[(inp
->inp_faddr
.s_addr
+
657 inp
->inp_lport
+ inp
->inp_fport
) % inp
->inp_pcbinfo
->hashsize
];
659 LIST_INSERT_HEAD(head
, inp
, inp_hash
);