Revert r64621.
[reactos.git] / reactos / lib / drivers / lwip / src / rostcp.c
index d2f4f00..44a865a 100755 (executable)
@@ -1,4 +1,5 @@
 #include "lwip/sys.h"
+#include "lwip/netif.h"
 #include "lwip/tcpip.h"
 
 #include "rosip.h"
@@ -31,6 +32,24 @@ extern KEVENT TerminationEvent;
 extern NPAGED_LOOKASIDE_LIST MessageLookasideList;
 extern NPAGED_LOOKASIDE_LIST QueueEntryLookasideList;
 
+/* Required for ERR_T to NTSTATUS translation in receive error handling */
+NTSTATUS TCPTranslateError(const err_t err);
+
+void
+LibTCPDumpPcb(PVOID SocketContext)
+{
+    struct tcp_pcb *pcb = (struct tcp_pcb*)SocketContext;
+    unsigned int addr = ntohl(pcb->remote_ip.addr);
+
+    DbgPrint("\tState: %s\n", tcp_state_str[pcb->state]);
+    DbgPrint("\tRemote: (%d.%d.%d.%d, %d)\n",
+    (addr >> 24) & 0xFF,
+    (addr >> 16) & 0xFF,
+    (addr >> 8) & 0xFF,
+    addr & 0xFF,
+    pcb->remote_port);
+}
+
 static
 void
 LibTCPEmptyQueue(PCONNECTION_ENDPOINT Connection)
@@ -60,6 +79,7 @@ void LibTCPEnqueuePacket(PCONNECTION_ENDPOINT Connection, struct pbuf *p)
 
     qp = (PQUEUE_ENTRY)ExAllocateFromNPagedLookasideList(&QueueEntryLookasideList);
     qp->p = p;
+    qp->Offset = 0;
 
     ExInterlockedInsertTailList(&Connection->PacketQueue, &qp->ListEntry, &Connection->Lock);
 }
@@ -82,12 +102,11 @@ NTSTATUS LibTCPGetDataFromConnectionQueue(PCONNECTION_ENDPOINT Connection, PUCHA
 {
     PQUEUE_ENTRY qp;
     struct pbuf* p;
-    NTSTATUS Status = STATUS_PENDING;
-    UINT ReadLength, ExistingDataLength, SpaceLeft;
+    NTSTATUS Status;
+    UINT ReadLength, PayloadLength, Offset, Copied;
     KIRQL OldIrql;
 
     (*Received) = 0;
-    SpaceLeft = RecvLen;
 
     LockObject(Connection, &OldIrql);
 
@@ -96,56 +115,59 @@ NTSTATUS LibTCPGetDataFromConnectionQueue(PCONNECTION_ENDPOINT Connection, PUCHA
         while ((qp = LibTCPDequeuePacket(Connection)) != NULL)
         {
             p = qp->p;
-            ExistingDataLength = (*Received);
 
-            Status = STATUS_SUCCESS;
+            /* Calculate the payload length first */
+            PayloadLength = p->tot_len;
+            PayloadLength -= qp->Offset;
+            Offset = qp->Offset;
 
-            ReadLength = MIN(p->tot_len, SpaceLeft);
-            if (ReadLength != p->tot_len)
+            /* Check if we're reading the whole buffer */
+            ReadLength = MIN(PayloadLength, RecvLen);
+            ASSERT(ReadLength != 0);
+            if (ReadLength != PayloadLength)
             {
-                if (ExistingDataLength)
-                {
-                    /* The packet was too big but we used some data already so give it another shot later */
-                    InsertHeadList(&Connection->PacketQueue, &qp->ListEntry);
-                    break;
-                }
-                else
-                {
-                    /* The packet is just too big to fit fully in our buffer, even when empty so
-                     * return an informative status but still copy all the data we can fit.
-                     */
-                    Status = STATUS_BUFFER_OVERFLOW;
-                }
+                /* Save this one for later */
+                qp->Offset += ReadLength;
+                InsertHeadList(&Connection->PacketQueue, &qp->ListEntry);
+                qp = NULL;
             }
 
             UnlockObject(Connection, OldIrql);
 
-            /* Return to a lower IRQL because the receive buffer may be pageable memory */
-            for (; (*Received) < ReadLength + ExistingDataLength; (*Received) += p->len, p = p->next)
-            {
-                RtlCopyMemory(RecvBuffer + (*Received), p->payload, p->len);
-            }
+            Copied = pbuf_copy_partial(p, RecvBuffer, ReadLength, Offset);
+            ASSERT(Copied == ReadLength);
 
             LockObject(Connection, &OldIrql);
 
-            SpaceLeft -= ReadLength;
+            /* Update trackers */
+            RecvLen -= ReadLength;
+            RecvBuffer += ReadLength;
+            (*Received) += ReadLength;
+
+            if (qp != NULL)
+            {
+                /* Use this special pbuf free callback function because we're outside tcpip thread */
+                pbuf_free_callback(qp->p);
 
-            /* Use this special pbuf free callback function because we're outside tcpip thread */
-            pbuf_free_callback(qp->p);
+                ExFreeToNPagedLookasideList(&QueueEntryLookasideList, qp);
+            }
+            else
+            {
+                /* If we get here, it means we've filled the buffer */
+                ASSERT(RecvLen == 0);
+            }
 
-            ExFreeToNPagedLookasideList(&QueueEntryLookasideList, qp);
+            ASSERT((*Received) != 0);
+            Status = STATUS_SUCCESS;
 
             if (!RecvLen)
                 break;
-
-            if (Status != STATUS_SUCCESS)
-                break;
         }
     }
     else
     {
         if (Connection->ReceiveShutdown)
-            Status = STATUS_SUCCESS;
+            Status = Connection->ReceiveShutdownStatus;
         else
             Status = STATUS_PENDING;
     }
@@ -197,7 +219,6 @@ err_t
 InternalRecvEventHandler(void *arg, PTCP_PCB pcb, struct pbuf *p, const err_t err)
 {
     PCONNECTION_ENDPOINT Connection = arg;
-    u32_t len;
 
     /* Make sure the socket didn't get closed */
     if (!arg)
@@ -210,33 +231,11 @@ InternalRecvEventHandler(void *arg, PTCP_PCB pcb, struct pbuf *p, const err_t er
 
     if (p)
     {
-        len = TCPRecvEventHandler(arg, p);
-        if (len == p->tot_len)
-        {
-            tcp_recved(pcb, len);
+        LibTCPEnqueuePacket(Connection, p);
 
-            pbuf_free(p);
+        tcp_recved(pcb, p->tot_len);
 
-            return ERR_OK;
-        }
-        else if (len != 0)
-        {
-            DbgPrint("UNTESTED CASE: NOT ALL DATA TAKEN! EXTRA DATA MAY BE LOST!\n");
-
-            tcp_recved(pcb, len);
-
-            /* Possible memory leak of pbuf here? */
-
-            return ERR_OK;
-        }
-        else
-        {
-            LibTCPEnqueuePacket(Connection, p);
-
-            tcp_recved(pcb, p->tot_len);
-
-            return ERR_OK;
-        }
+        TCPRecvEventHandler(arg);
     }
     else if (err == ERR_OK)
     {
@@ -245,19 +244,37 @@ InternalRecvEventHandler(void *arg, PTCP_PCB pcb, struct pbuf *p, const err_t er
          * whole socket here (by calling tcp_close()) as that would violate TCP specs
          */
         Connection->ReceiveShutdown = TRUE;
-        TCPFinEventHandler(arg, ERR_OK);
+        Connection->ReceiveShutdownStatus = STATUS_SUCCESS;
+
+        /* If we already did a send shutdown, we're in TIME_WAIT so we can't use this PCB anymore */
+        if (Connection->SendShutdown)
+        {
+            Connection->SocketContext = NULL;
+            tcp_arg(pcb, NULL);
+        }
+
+        /* Indicate the graceful close event */
+        TCPRecvEventHandler(arg);
+
+        /* If the PCB is gone, clean up the connection */
+        if (Connection->SendShutdown)
+        {
+            TCPFinEventHandler(Connection, ERR_CLSD);
+        }
     }
 
     return ERR_OK;
 }
 
+/* This function MUST return an error value that is not ERR_ABRT or ERR_OK if the connection
+ * is not accepted to avoid leaking the new PCB */
 static
 err_t
 InternalAcceptEventHandler(void *arg, PTCP_PCB newpcb, const err_t err)
 {
     /* Make sure the socket didn't get closed */
     if (!arg)
-        return ERR_ABRT;
+        return ERR_CLSD;
 
     TCPAcceptEventHandler(arg, newpcb);
 
@@ -265,7 +282,7 @@ InternalAcceptEventHandler(void *arg, PTCP_PCB newpcb, const err_t err)
     if (newpcb->callback_arg)
         return ERR_OK;
     else
-        return ERR_ABRT;
+        return ERR_CLSD;
 }
 
 static
@@ -285,10 +302,21 @@ static
 void
 InternalErrorEventHandler(void *arg, const err_t err)
 {
+    PCONNECTION_ENDPOINT Connection = arg;
+
     /* Make sure the socket didn't get closed */
-    if (!arg) return;
+    if (!arg || Connection->SocketContext == NULL) return;
+
+    /* The PCB is dead now */
+    Connection->SocketContext = NULL;
 
-    TCPFinEventHandler(arg, err);
+    /* Give them one shot to receive the remaining data */
+    Connection->ReceiveShutdown = TRUE;
+    Connection->ReceiveShutdownStatus = TCPTranslateError(err);
+    TCPRecvEventHandler(Connection);
+
+    /* Terminate the connection */
+    TCPFinEventHandler(Connection, err);
 }
 
 static
@@ -341,6 +369,7 @@ void
 LibTCPBindCallback(void *arg)
 {
     struct lwip_callback_msg *msg = arg;
+    PTCP_PCB pcb = msg->Input.Bind.Connection->SocketContext;
 
     ASSERT(msg);
 
@@ -350,7 +379,10 @@ LibTCPBindCallback(void *arg)
         goto done;
     }
 
-    msg->Output.Bind.Error = tcp_bind((PTCP_PCB)msg->Input.Bind.Connection->SocketContext,
+    /* We're guaranteed that the local address is valid to bind at this point */
+    pcb->so_options |= SOF_REUSEADDR;
+
+    msg->Output.Bind.Error = tcp_bind(pcb,
                                       msg->Input.Bind.IpAddress,
                                       ntohs(msg->Input.Bind.Port));
 
@@ -445,6 +477,9 @@ void
 LibTCPSendCallback(void *arg)
 {
     struct lwip_callback_msg *msg = arg;
+    PTCP_PCB pcb = msg->Input.Send.Connection->SocketContext;
+    ULONG SendLength;
+    UCHAR SendFlags;
 
     ASSERT(msg);
 
@@ -460,19 +495,37 @@ LibTCPSendCallback(void *arg)
         goto done;
     }
 
-    msg->Output.Send.Error = tcp_write((PTCP_PCB)msg->Input.Send.Connection->SocketContext,
-                                       msg->Input.Send.Data,
-                                       msg->Input.Send.DataLength,
-                                       TCP_WRITE_FLAG_COPY);
-    if (msg->Output.Send.Error == ERR_MEM)
+    SendFlags = TCP_WRITE_FLAG_COPY;
+    SendLength = msg->Input.Send.DataLength;
+    if (tcp_sndbuf(pcb) == 0)
     {
         /* No buffer space so return pending */
         msg->Output.Send.Error = ERR_INPROGRESS;
+        goto done;
+    }
+    else if (tcp_sndbuf(pcb) < SendLength)
+    {
+        /* We've got some room so let's send what we can */
+        SendLength = tcp_sndbuf(pcb);
+
+        /* Don't set the push flag */
+        SendFlags |= TCP_WRITE_FLAG_MORE;
     }
-    else if (msg->Output.Send.Error == ERR_OK)
+
+    msg->Output.Send.Error = tcp_write(pcb,
+                                       msg->Input.Send.Data,
+                                       SendLength,
+                                       SendFlags);
+    if (msg->Output.Send.Error == ERR_OK)
     {
         /* Queued successfully so try to send it */
         tcp_output((PTCP_PCB)msg->Input.Send.Connection->SocketContext);
+        msg->Output.Send.Information = SendLength;
+    }
+    else if (msg->Output.Send.Error == ERR_MEM)
+    {
+        /* The queue is too long */
+        msg->Output.Send.Error = ERR_INPROGRESS;
     }
 
 done:
@@ -480,7 +533,7 @@ done:
 }
 
 err_t
-LibTCPSend(PCONNECTION_ENDPOINT Connection, void *const dataptr, const u16_t len, const int safe)
+LibTCPSend(PCONNECTION_ENDPOINT Connection, void *const dataptr, const u16_t len, u32_t *sent, const int safe)
 {
     err_t ret;
     struct lwip_callback_msg *msg;
@@ -503,6 +556,11 @@ LibTCPSend(PCONNECTION_ENDPOINT Connection, void *const dataptr, const u16_t len
         else
             ret = ERR_CLSD;
 
+        if (ret == ERR_OK)
+            *sent = msg->Output.Send.Information;
+        else
+            *sent = 0;
+
         ExFreeToNPagedLookasideList(&MessageLookasideList, msg);
 
         return ret;
@@ -583,24 +641,38 @@ LibTCPShutdownCallback(void *arg)
         goto done;
     }
 
-    if (pcb->state == CLOSE_WAIT)
-    {
-        /* This case actually results in a socket closure later (lwIP bug?) */
-        msg->Input.Shutdown.Connection->SocketContext = NULL;
+    /* LwIP makes the (questionable) assumption that SHUTDOWN_RDWR is equivalent to tcp_close().
+     * This assumption holds even if the shutdown calls are done separately (even through multiple
+     * WinSock shutdown() calls). This assumption means that lwIP has the right to deallocate our
+     * PCB without telling us if we shutdown TX and RX. To avoid these problems, we'll clear the
+     * socket context if we have called shutdown for TX and RX.
+     */
+    if (msg->Input.Shutdown.shut_rx) {
+        msg->Output.Shutdown.Error = tcp_shutdown(pcb, TRUE, FALSE);
     }
-
-    msg->Output.Shutdown.Error = tcp_shutdown(pcb, msg->Input.Shutdown.shut_rx, msg->Input.Shutdown.shut_tx);
-    if (msg->Output.Shutdown.Error)
-    {
-        msg->Input.Shutdown.Connection->SocketContext = pcb;
+    if (msg->Input.Shutdown.shut_tx) {
+        msg->Output.Shutdown.Error = tcp_shutdown(pcb, FALSE, TRUE);
     }
-    else
+
+    if (!msg->Output.Shutdown.Error)
     {
         if (msg->Input.Shutdown.shut_rx)
+        {
             msg->Input.Shutdown.Connection->ReceiveShutdown = TRUE;
+            msg->Input.Shutdown.Connection->ReceiveShutdownStatus = STATUS_FILE_CLOSED;
+        }
 
         if (msg->Input.Shutdown.shut_tx)
             msg->Input.Shutdown.Connection->SendShutdown = TRUE;
+
+        if (msg->Input.Shutdown.Connection->ReceiveShutdown &&
+            msg->Input.Shutdown.Connection->SendShutdown)
+        {
+            /* The PCB is not ours anymore */
+            msg->Input.Shutdown.Connection->SocketContext = NULL;
+            tcp_arg(pcb, NULL);
+            TCPFinEventHandler(msg->Input.Shutdown.Connection, ERR_CLSD);
+        }
     }
 
 done:
@@ -647,48 +719,41 @@ LibTCPCloseCallback(void *arg)
     /* Empty the queue even if we're already "closed" */
     LibTCPEmptyQueue(msg->Input.Close.Connection);
 
-    if (!msg->Input.Close.Connection->SocketContext)
+    /* Check if we've already been closed */
+    if (msg->Input.Close.Connection->Closing)
     {
         msg->Output.Close.Error = ERR_OK;
         goto done;
     }
 
-    /* Clear the PCB pointer */
-    msg->Input.Close.Connection->SocketContext = NULL;
+    /* Enter "closing" mode if we're doing a normal close */
+    if (msg->Input.Close.Callback)
+        msg->Input.Close.Connection->Closing = TRUE;
 
-    switch (pcb->state)
+    /* Check if the PCB was already "closed" but the client doesn't know it yet */
+    if (!msg->Input.Close.Connection->SocketContext)
     {
-        case CLOSED:
-        case LISTEN:
-        case SYN_SENT:
-           msg->Output.Close.Error = tcp_close(pcb);
-
-           if (!msg->Output.Close.Error && msg->Input.Close.Callback)
-               TCPFinEventHandler(msg->Input.Close.Connection, ERR_OK);
-           break;
+        msg->Output.Close.Error = ERR_OK;
+        goto done;
+    }
 
-        default:
-           if (msg->Input.Close.Connection->SendShutdown &&
-               msg->Input.Close.Connection->ReceiveShutdown)
-           {
-               /* Abort the connection */
-               tcp_abort(pcb);
+    /* Clear the PCB pointer and stop callbacks */
+    msg->Input.Close.Connection->SocketContext = NULL;
+    tcp_arg(pcb, NULL);
 
-               /* Aborts always succeed */
-               msg->Output.Close.Error = ERR_OK;
-           }
-           else
-           {
-               /* Start the graceful close process (or send RST for pending data) */
-               msg->Output.Close.Error = tcp_close(pcb);
-           }
-           break;
-    }
+    /* This may generate additional callbacks but we don't care,
+     * because they're too inconsistent to rely on */
+    msg->Output.Close.Error = tcp_close(pcb);
 
     if (msg->Output.Close.Error)
     {
         /* Restore the PCB pointer */
         msg->Input.Close.Connection->SocketContext = pcb;
+        msg->Input.Close.Connection->Closing = FALSE;
+    }
+    else if (msg->Input.Close.Callback)
+    {
+        TCPFinEventHandler(msg->Input.Close.Connection, ERR_CLSD);
     }
 
 done: