[NTOSKRNL] Add a raw implementation of !irpfind in kdbg
[reactos.git] / ntoskrnl / cc / copy.c
index 63ad5ae..41980b8 100644 (file)
@@ -27,6 +27,14 @@ typedef enum _CC_COPY_OPERATION
     CcOperationZero
 } CC_COPY_OPERATION;
 
+typedef enum _CC_CAN_WRITE_RETRY
+{
+    FirstTry = 0,
+    RetryAllowRemote = 253,
+    RetryForceCheckPerFile = 254,
+    RetryMasterLocked = 255,
+} CC_CAN_WRITE_RETRY;
+
 ULONG CcRosTraceLevel = 0;
 ULONG CcFastMdlReadWait;
 ULONG CcFastMdlReadNotPossible;
@@ -35,6 +43,13 @@ ULONG CcFastReadWait;
 ULONG CcFastReadNoWait;
 ULONG CcFastReadResourceMiss;
 
+/* Counters:
+ * - Amount of pages flushed to the disk
+ * - Number of flush operations
+ */
+ULONG CcDataPages = 0;
+ULONG CcDataFlushes = 0;
+
 /* FUNCTIONS *****************************************************************/
 
 VOID
@@ -66,22 +81,25 @@ NTAPI
 CcReadVirtualAddress (
     PROS_VACB Vacb)
 {
-    ULONG Size, Pages;
+    ULONG Size;
     PMDL Mdl;
     NTSTATUS Status;
     IO_STATUS_BLOCK IoStatus;
     KEVENT Event;
+    ULARGE_INTEGER LargeSize;
 
-    Size = (ULONG)(Vacb->SharedCacheMap->SectionSize.QuadPart - Vacb->FileOffset.QuadPart);
-    if (Size > VACB_MAPPING_GRANULARITY)
+    LargeSize.QuadPart = Vacb->SharedCacheMap->SectionSize.QuadPart - Vacb->FileOffset.QuadPart;
+    if (LargeSize.QuadPart > VACB_MAPPING_GRANULARITY)
     {
-        Size = VACB_MAPPING_GRANULARITY;
+        LargeSize.QuadPart = VACB_MAPPING_GRANULARITY;
     }
+    Size = LargeSize.LowPart;
 
-    Pages = BYTES_TO_PAGES(Size);
-    ASSERT(Pages * PAGE_SIZE <= VACB_MAPPING_GRANULARITY);
+    Size = ROUND_TO_PAGES(Size);
+    ASSERT(Size <= VACB_MAPPING_GRANULARITY);
+    ASSERT(Size > 0);
 
-    Mdl = IoAllocateMdl(Vacb->BaseAddress, Pages * PAGE_SIZE, FALSE, FALSE, NULL);
+    Mdl = IoAllocateMdl(Vacb->BaseAddress, Size, FALSE, FALSE, NULL);
     if (!Mdl)
     {
         return STATUS_INSUFFICIENT_RESOURCES;
@@ -95,6 +113,7 @@ CcReadVirtualAddress (
     _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER)
     {
         Status = _SEH2_GetExceptionCode();
+        DPRINT1("MmProbeAndLockPages failed with: %lx for %p (%p, %p)\n", Status, Mdl, Vacb, Vacb->BaseAddress);
         KeBugCheck(CACHE_MANAGER);
     } _SEH2_END;
 
@@ -139,12 +158,14 @@ CcWriteVirtualAddress (
     NTSTATUS Status;
     IO_STATUS_BLOCK IoStatus;
     KEVENT Event;
+    ULARGE_INTEGER LargeSize;
 
-    Size = (ULONG)(Vacb->SharedCacheMap->SectionSize.QuadPart - Vacb->FileOffset.QuadPart);
-    if (Size > VACB_MAPPING_GRANULARITY)
+    LargeSize.QuadPart = Vacb->SharedCacheMap->SectionSize.QuadPart - Vacb->FileOffset.QuadPart;
+    if (LargeSize.QuadPart > VACB_MAPPING_GRANULARITY)
     {
-        Size = VACB_MAPPING_GRANULARITY;
+        LargeSize.QuadPart = VACB_MAPPING_GRANULARITY;
     }
+    Size = LargeSize.LowPart;
     //
     // Nonpaged pool PDEs in ReactOS must actually be synchronized between the
     // MmGlobalPageDirectory and the real system PDE directory. What a mess...
@@ -157,6 +178,10 @@ CcWriteVirtualAddress (
         } while (++i < (Size >> PAGE_SHIFT));
     }
 
+    Size = ROUND_TO_PAGES(Size);
+    ASSERT(Size <= VACB_MAPPING_GRANULARITY);
+    ASSERT(Size > 0);
+
     Mdl = IoAllocateMdl(Vacb->BaseAddress, Size, FALSE, FALSE, NULL);
     if (!Mdl)
     {
@@ -171,6 +196,7 @@ CcWriteVirtualAddress (
     _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER)
     {
         Status = _SEH2_GetExceptionCode();
+        DPRINT1("MmProbeAndLockPages failed with: %lx for %p (%p, %p)\n", Status, Mdl, Vacb, Vacb->BaseAddress);
         KeBugCheck(CACHE_MANAGER);
     } _SEH2_END;
 
@@ -362,8 +388,11 @@ CcCopyData (
     /* If that was a successful sync read operation, let's handle read ahead */
     if (Operation == CcOperationRead && Length == 0 && Wait)
     {
-        /* If file isn't random access, schedule next read */
-        if (!BooleanFlagOn(FileObject->Flags, FO_RANDOM_ACCESS))
+        /* If file isn't random access and next read may get us cross VACB boundary,
+         * schedule next read
+         */
+        if (!BooleanFlagOn(FileObject->Flags, FO_RANDOM_ACCESS) &&
+            (CurrentOffset - 1) / VACB_MAPPING_GRANULARITY != (CurrentOffset + BytesCopied - 1) / VACB_MAPPING_GRANULARITY)
         {
             CcScheduleReadAhead(FileObject, (PLARGE_INTEGER)&FileOffset, BytesCopied);
         }
@@ -414,7 +443,7 @@ CcPostDeferredWrites(VOID)
             }
 
             /* Check we can write */
-            if (CcCanIWrite(DeferredWrite->FileObject, WrittenBytes, FALSE, TRUE))
+            if (CcCanIWrite(DeferredWrite->FileObject, WrittenBytes, FALSE, RetryForceCheckPerFile))
             {
                 /* We can, so remove it from the list and stop looking for entry */
                 RemoveEntryList(&DeferredWrite->DeferredWriteLinks);
@@ -509,6 +538,16 @@ CcPerformReadAhead(
     /* Remember it's locked */
     Locked = TRUE;
 
+    /* Don't read past the end of the file */
+    if (CurrentOffset >= SharedCacheMap->FileSize.QuadPart)
+    {
+        goto Clear;
+    }
+    if (CurrentOffset + Length > SharedCacheMap->FileSize.QuadPart)
+    {
+        Length = SharedCacheMap->FileSize.QuadPart - CurrentOffset;
+    }
+
     /* Next of the algorithm will lock like CcCopyData with the slight
      * difference that we don't copy data back to an user-backed buffer
      * We just bring data into Cc
@@ -586,10 +625,10 @@ Clear:
     {
         /* Mark read ahead as unactive */
         KeAcquireSpinLockAtDpcLevel(&PrivateCacheMap->ReadAheadSpinLock);
-        InterlockedAnd((volatile long *)&PrivateCacheMap->UlongFlags, 0xFFFEFFFF);
+        InterlockedAnd((volatile long *)&PrivateCacheMap->UlongFlags, ~PRIVATE_CACHE_MAP_READ_AHEAD_ACTIVE);
         KeReleaseSpinLockFromDpcLevel(&PrivateCacheMap->ReadAheadSpinLock);
     }
-    KeReleaseSpinLock(&PrivateCacheMap->ReadAheadSpinLock, OldIrql);
+    KeReleaseQueuedSpinLock(LockQueueMasterLock, OldIrql);
 
     /* If file was locked, release it */
     if (Locked)
@@ -614,61 +653,97 @@ CcCanIWrite (
     IN BOOLEAN Wait,
     IN BOOLEAN Retrying)
 {
+    KIRQL OldIrql;
     KEVENT WaitEvent;
+    ULONG Length, Pages;
+    BOOLEAN PerFileDefer;
     DEFERRED_WRITE Context;
     PFSRTL_COMMON_FCB_HEADER Fcb;
+    CC_CAN_WRITE_RETRY TryContext;
     PROS_SHARED_CACHE_MAP SharedCacheMap;
 
     CCTRACE(CC_API_DEBUG, "FileObject=%p BytesToWrite=%lu Wait=%d Retrying=%d\n",
         FileObject, BytesToWrite, Wait, Retrying);
 
-    /* We cannot write if dirty pages count is above threshold */
-    if (CcTotalDirtyPages > CcDirtyPageThreshold)
+    /* Write through is always OK */
+    if (BooleanFlagOn(FileObject->Flags, FO_WRITE_THROUGH))
     {
-        /* Can the caller wait till it's possible to write? */
-        goto CanIWait;
+        return TRUE;
     }
 
-    /* We cannot write if dirty pages count will bring use above
-     * XXX: Might not be accurate
-     */
-    if (CcTotalDirtyPages + (BytesToWrite / PAGE_SIZE) > CcDirtyPageThreshold)
+    TryContext = Retrying;
+    /* Allow remote file if not from posted */
+    if (IoIsFileOriginRemote(FileObject) && TryContext < RetryAllowRemote)
     {
-        /* Can the caller wait till it's possible to write? */
-        goto CanIWait;
+        return TRUE;
     }
 
-    /* Is there a limit per file object? */
-    Fcb = FileObject->FsContext;
-    SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
-    if (!BooleanFlagOn(Fcb->Flags, FSRTL_FLAG_LIMIT_MODIFIED_PAGES) ||
-        SharedCacheMap->DirtyPageThreshold == 0)
+    /* Don't exceed max tolerated size */
+    Length = MAX_ZERO_LENGTH;
+    if (BytesToWrite < MAX_ZERO_LENGTH)
     {
-        /* Nope, so that's fine, allow write operation */
-        return TRUE;
+        Length = BytesToWrite;
     }
 
-    /* Is dirty page count above local threshold? */
-    if (SharedCacheMap->DirtyPages > SharedCacheMap->DirtyPageThreshold)
+    Pages = BYTES_TO_PAGES(Length);
+
+    /* By default, assume limits per file won't be hit */
+    PerFileDefer = FALSE;
+    Fcb = FileObject->FsContext;
+    /* Do we have to check for limits per file? */
+    if (TryContext >= RetryForceCheckPerFile ||
+        BooleanFlagOn(Fcb->Flags, FSRTL_FLAG_LIMIT_MODIFIED_PAGES))
     {
-        /* Can the caller wait till it's possible to write? */
-        goto CanIWait;
+        /* If master is not locked, lock it now */
+        if (TryContext != RetryMasterLocked)
+        {
+            OldIrql = KeAcquireQueuedSpinLock(LockQueueMasterLock);
+        }
+
+        /* Let's not assume the file is cached... */
+        if (FileObject->SectionObjectPointer != NULL &&
+            FileObject->SectionObjectPointer->SharedCacheMap != NULL)
+        {
+            SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+            /* Do we have limits per file set? */
+            if (SharedCacheMap->DirtyPageThreshold != 0 &&
+                SharedCacheMap->DirtyPages != 0)
+            {
+                /* Yes, check whether they are blocking */
+                if (Pages + SharedCacheMap->DirtyPages > SharedCacheMap->DirtyPageThreshold)
+                {
+                    PerFileDefer = TRUE;
+                }
+            }
+        }
+
+        /* And don't forget to release master */
+        if (TryContext != RetryMasterLocked)
+        {
+            KeReleaseQueuedSpinLock(LockQueueMasterLock, OldIrql);
+        }
     }
 
-    /* We cannot write if dirty pages count will bring use above
-     * XXX: Might not be accurate
+    /* So, now allow write if:
+     * - Not the first try or we have no throttling yet
+     * AND:
+     * - We don't exceed threshold!
+     * - We don't exceed what Mm can allow us to use
+     *   + If we're above top, that's fine
+     *   + If we're above bottom with limited modified pages, that's fine
+     *   + Otherwise, throttle!
      */
-    if (SharedCacheMap->DirtyPages + (BytesToWrite / PAGE_SIZE) > SharedCacheMap->DirtyPageThreshold)
+    if ((TryContext != FirstTry || IsListEmpty(&CcDeferredWrites)) &&
+        CcTotalDirtyPages + Pages < CcDirtyPageThreshold &&
+        (MmAvailablePages > MmThrottleTop ||
+         (MmModifiedPageListHead.Total < 1000 && MmAvailablePages > MmThrottleBottom)) &&
+        !PerFileDefer)
     {
-        /* Can the caller wait till it's possible to write? */
-        goto CanIWait;
+        return TRUE;
     }
 
-    return TRUE;
-
-CanIWait:
-    /* If we reached that point, it means caller cannot write
-     * If he cannot wait, then fail and deny write
+    /* If we can wait, we'll start the wait loop for waiting till we can
+     * write for real
      */
     if (!Wait)
     {
@@ -712,6 +787,7 @@ CanIWait:
                                     &CcDeferredWriteSpinLock);
     }
 
+    DPRINT1("Actively deferring write for: %p\n", FileObject);
     /* Now, we'll loop until our event is set. When it is set, it means that caller
      * can immediately write, and has to
      */