[NTOSKRNL] Rewrite CcCanIWrite() to make it more accurate and handle specific callers
[reactos.git] / ntoskrnl / cc / copy.c
index 4c68bf0..4089702 100644 (file)
@@ -4,7 +4,8 @@
  * FILE:            ntoskrnl/cc/copy.c
  * PURPOSE:         Implements cache managers copy interface
  *
- * PROGRAMMERS:
+ * PROGRAMMERS:     Some people?
+ *                  Pierre Schweitzer (pierre@reactos.org)
  */
 
 /* INCLUDES ******************************************************************/
@@ -26,6 +27,14 @@ typedef enum _CC_COPY_OPERATION
     CcOperationZero
 } CC_COPY_OPERATION;
 
+typedef enum _CC_CAN_WRITE_RETRY
+{
+    FirstTry = 0,
+    RetryAllowRemote = 253,
+    RetryForceCheckPerFile = 254,
+    RetryMasterLocked = 255,
+} CC_CAN_WRITE_RETRY;
+
 ULONG CcRosTraceLevel = 0;
 ULONG CcFastMdlReadWait;
 ULONG CcFastMdlReadNotPossible;
@@ -34,8 +43,6 @@ ULONG CcFastReadWait;
 ULONG CcFastReadNoWait;
 ULONG CcFastReadResourceMiss;
 
-extern KEVENT iLazyWriterNotify;
-
 /* FUNCTIONS *****************************************************************/
 
 VOID
@@ -141,7 +148,6 @@ CcWriteVirtualAddress (
     IO_STATUS_BLOCK IoStatus;
     KEVENT Event;
 
-    Vacb->Dirty = FALSE;
     Size = (ULONG)(Vacb->SharedCacheMap->SectionSize.QuadPart - Vacb->FileOffset.QuadPart);
     if (Size > VACB_MAPPING_GRANULARITY)
     {
@@ -192,7 +198,6 @@ CcWriteVirtualAddress (
     if (!NT_SUCCESS(Status) && (Status != STATUS_END_OF_FILE))
     {
         DPRINT1("IoPageWrite failed, Status %x\n", Status);
-        Vacb->Dirty = TRUE;
         return Status;
     }
 
@@ -251,8 +256,10 @@ CcCopyData (
     ULONG PartialLength;
     PVOID BaseAddress;
     BOOLEAN Valid;
+    PPRIVATE_CACHE_MAP PrivateCacheMap;
 
     SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+    PrivateCacheMap = FileObject->PrivateCacheMap;
     CurrentOffset = FileOffset;
     BytesCopied = 0;
 
@@ -359,11 +366,251 @@ CcCopyData (
         if (Operation != CcOperationZero)
             Buffer = (PVOID)((ULONG_PTR)Buffer + PartialLength);
     }
+
+    /* If that was a successful sync read operation, let's handle read ahead */
+    if (Operation == CcOperationRead && Length == 0 && Wait)
+    {
+        /* If file isn't random access, schedule next read */
+        if (!BooleanFlagOn(FileObject->Flags, FO_RANDOM_ACCESS))
+        {
+            CcScheduleReadAhead(FileObject, (PLARGE_INTEGER)&FileOffset, BytesCopied);
+        }
+
+        /* And update read history in private cache map */
+        PrivateCacheMap->FileOffset1.QuadPart = PrivateCacheMap->FileOffset2.QuadPart;
+        PrivateCacheMap->BeyondLastByte1.QuadPart = PrivateCacheMap->BeyondLastByte2.QuadPart;
+        PrivateCacheMap->FileOffset2.QuadPart = FileOffset;
+        PrivateCacheMap->BeyondLastByte2.QuadPart = FileOffset + BytesCopied;
+    }
+
     IoStatus->Status = STATUS_SUCCESS;
     IoStatus->Information = BytesCopied;
     return TRUE;
 }
 
+VOID
+CcPostDeferredWrites(VOID)
+{
+    ULONG WrittenBytes;
+
+    /* We'll try to write as much as we can */
+    WrittenBytes = 0;
+    while (TRUE)
+    {
+        KIRQL OldIrql;
+        PLIST_ENTRY ListEntry;
+        PDEFERRED_WRITE DeferredWrite;
+
+        DeferredWrite = NULL;
+
+        /* Lock our deferred writes list */
+        KeAcquireSpinLock(&CcDeferredWriteSpinLock, &OldIrql);
+        for (ListEntry = CcDeferredWrites.Flink;
+             ListEntry != &CcDeferredWrites;
+             ListEntry = ListEntry->Flink)
+        {
+            /* Extract an entry */
+            DeferredWrite = CONTAINING_RECORD(ListEntry, DEFERRED_WRITE, DeferredWriteLinks);
+
+            /* Compute the modified bytes, based on what we already wrote */
+            WrittenBytes += DeferredWrite->BytesToWrite;
+            /* We overflowed, give up */
+            if (WrittenBytes < DeferredWrite->BytesToWrite)
+            {
+                DeferredWrite = NULL;
+                break;
+            }
+
+            /* Check we can write */
+            if (CcCanIWrite(DeferredWrite->FileObject, WrittenBytes, FALSE, RetryForceCheckPerFile))
+            {
+                /* We can, so remove it from the list and stop looking for entry */
+                RemoveEntryList(&DeferredWrite->DeferredWriteLinks);
+                break;
+            }
+
+            /* If we don't accept modified pages, stop here */
+            if (!DeferredWrite->LimitModifiedPages)
+            {
+                DeferredWrite = NULL;
+                break;
+            }
+
+            /* Reset count as nothing was written yet */
+            WrittenBytes -= DeferredWrite->BytesToWrite;
+            DeferredWrite = NULL;
+        }
+        KeReleaseSpinLock(&CcDeferredWriteSpinLock, OldIrql);
+
+        /* Nothing to write found, give up */
+        if (DeferredWrite == NULL)
+        {
+            break;
+        }
+
+        /* If we have an event, set it and quit */
+        if (DeferredWrite->Event)
+        {
+            KeSetEvent(DeferredWrite->Event, IO_NO_INCREMENT, FALSE);
+        }
+        /* Otherwise, call the write routine and free the context */
+        else
+        {
+            DeferredWrite->PostRoutine(DeferredWrite->Context1, DeferredWrite->Context2);
+            ExFreePoolWithTag(DeferredWrite, 'CcDw');
+        }
+    }
+}
+
+VOID
+CcPerformReadAhead(
+    IN PFILE_OBJECT FileObject)
+{
+    NTSTATUS Status;
+    LONGLONG CurrentOffset;
+    KIRQL OldIrql;
+    PROS_SHARED_CACHE_MAP SharedCacheMap;
+    PROS_VACB Vacb;
+    ULONG PartialLength;
+    PVOID BaseAddress;
+    BOOLEAN Valid;
+    ULONG Length;
+    PPRIVATE_CACHE_MAP PrivateCacheMap;
+    BOOLEAN Locked;
+
+    SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+    /* Critical:
+     * PrivateCacheMap might disappear in-between if the handle
+     * to the file is closed (private is attached to the handle not to
+     * the file), so we need to lock the master lock while we deal with
+     * it. It won't disappear without attempting to lock such lock.
+     */
+    OldIrql = KeAcquireQueuedSpinLock(LockQueueMasterLock);
+    PrivateCacheMap = FileObject->PrivateCacheMap;
+    /* If the handle was closed since the read ahead was scheduled, just quit */
+    if (PrivateCacheMap == NULL)
+    {
+        KeReleaseQueuedSpinLock(LockQueueMasterLock, OldIrql);
+        ObDereferenceObject(FileObject);
+        return;
+    }
+    /* Otherwise, extract read offset and length and release private map */
+    else
+    {
+        KeAcquireSpinLockAtDpcLevel(&PrivateCacheMap->ReadAheadSpinLock);
+        CurrentOffset = PrivateCacheMap->ReadAheadOffset[1].QuadPart;
+        Length = PrivateCacheMap->ReadAheadLength[1];
+        KeReleaseSpinLockFromDpcLevel(&PrivateCacheMap->ReadAheadSpinLock);
+    }
+    KeReleaseQueuedSpinLock(LockQueueMasterLock, OldIrql);
+
+    /* Time to go! */
+    DPRINT("Doing ReadAhead for %p\n", FileObject);
+    /* Lock the file, first */
+    if (!SharedCacheMap->Callbacks->AcquireForReadAhead(SharedCacheMap->LazyWriteContext, FALSE))
+    {
+        Locked = FALSE;
+        goto Clear;
+    }
+
+    /* Remember it's locked */
+    Locked = TRUE;
+
+    /* Next of the algorithm will lock like CcCopyData with the slight
+     * difference that we don't copy data back to an user-backed buffer
+     * We just bring data into Cc
+     */
+    PartialLength = CurrentOffset % VACB_MAPPING_GRANULARITY;
+    if (PartialLength != 0)
+    {
+        PartialLength = min(Length, VACB_MAPPING_GRANULARITY - PartialLength);
+        Status = CcRosRequestVacb(SharedCacheMap,
+                                  ROUND_DOWN(CurrentOffset,
+                                             VACB_MAPPING_GRANULARITY),
+                                  &BaseAddress,
+                                  &Valid,
+                                  &Vacb);
+        if (!NT_SUCCESS(Status))
+        {
+            DPRINT1("Failed to request VACB: %lx!\n", Status);
+            goto Clear;
+        }
+
+        if (!Valid)
+        {
+            Status = CcReadVirtualAddress(Vacb);
+            if (!NT_SUCCESS(Status))
+            {
+                CcRosReleaseVacb(SharedCacheMap, Vacb, FALSE, FALSE, FALSE);
+                DPRINT1("Failed to read data: %lx!\n", Status);
+                goto Clear;
+            }
+        }
+
+        CcRosReleaseVacb(SharedCacheMap, Vacb, TRUE, FALSE, FALSE);
+
+        Length -= PartialLength;
+        CurrentOffset += PartialLength;
+    }
+
+    while (Length > 0)
+    {
+        ASSERT(CurrentOffset % VACB_MAPPING_GRANULARITY == 0);
+        PartialLength = min(VACB_MAPPING_GRANULARITY, Length);
+        Status = CcRosRequestVacb(SharedCacheMap,
+                                  CurrentOffset,
+                                  &BaseAddress,
+                                  &Valid,
+                                  &Vacb);
+        if (!NT_SUCCESS(Status))
+        {
+            DPRINT1("Failed to request VACB: %lx!\n", Status);
+            goto Clear;
+        }
+
+        if (!Valid)
+        {
+            Status = CcReadVirtualAddress(Vacb);
+            if (!NT_SUCCESS(Status))
+            {
+                CcRosReleaseVacb(SharedCacheMap, Vacb, FALSE, FALSE, FALSE);
+                DPRINT1("Failed to read data: %lx!\n", Status);
+                goto Clear;
+            }
+        }
+
+        CcRosReleaseVacb(SharedCacheMap, Vacb, TRUE, FALSE, FALSE);
+
+        Length -= PartialLength;
+        CurrentOffset += PartialLength;
+    }
+
+Clear:
+    /* See previous comment about private cache map */
+    OldIrql = KeAcquireQueuedSpinLock(LockQueueMasterLock);
+    PrivateCacheMap = FileObject->PrivateCacheMap;
+    if (PrivateCacheMap != NULL)
+    {
+        /* Mark read ahead as unactive */
+        KeAcquireSpinLockAtDpcLevel(&PrivateCacheMap->ReadAheadSpinLock);
+        InterlockedAnd((volatile long *)&PrivateCacheMap->UlongFlags, 0xFFFEFFFF);
+        KeReleaseSpinLockFromDpcLevel(&PrivateCacheMap->ReadAheadSpinLock);
+    }
+    KeReleaseSpinLock(&PrivateCacheMap->ReadAheadSpinLock, OldIrql);
+
+    /* If file was locked, release it */
+    if (Locked)
+    {
+        SharedCacheMap->Callbacks->ReleaseFromReadAhead(SharedCacheMap->LazyWriteContext);
+    }
+
+    /* And drop our extra reference (See: CcScheduleReadAhead) */
+    ObDereferenceObject(FileObject);
+
+    return;
+}
+
 /*
  * @unimplemented
  */
@@ -375,24 +622,142 @@ CcCanIWrite (
     IN BOOLEAN Wait,
     IN BOOLEAN Retrying)
 {
+    KIRQL OldIrql;
+    KEVENT WaitEvent;
+    ULONG Length, Pages;
+    BOOLEAN PerFileDefer;
+    DEFERRED_WRITE Context;
+    PFSRTL_COMMON_FCB_HEADER Fcb;
+    CC_CAN_WRITE_RETRY TryContext;
+    PROS_SHARED_CACHE_MAP SharedCacheMap;
+
     CCTRACE(CC_API_DEBUG, "FileObject=%p BytesToWrite=%lu Wait=%d Retrying=%d\n",
         FileObject, BytesToWrite, Wait, Retrying);
 
-    /* We cannot write if dirty pages count is above threshold */
-    if (CcTotalDirtyPages > CcDirtyPageThreshold)
+    /* Write through is always OK */
+    if (BooleanFlagOn(FileObject->Flags, FO_WRITE_THROUGH))
     {
-        return FALSE;
+        return TRUE;
+    }
+
+    TryContext = Retrying;
+    /* Allow remote file if not from posted */
+    if (IoIsFileOriginRemote(FileObject) && TryContext < RetryAllowRemote)
+    {
+        return TRUE;
+    }
+
+    /* Don't exceed max tolerated size */
+    Length = MAX_ZERO_LENGTH;
+    if (BytesToWrite < MAX_ZERO_LENGTH)
+    {
+        Length = BytesToWrite;
     }
 
-    /* We cannot write if dirty pages count will bring use above
-     * XXX: Might not be accurate
+    /* Convert it to pages count */
+    Pages = (Length + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+    /* By default, assume limits per file won't be hit */
+    PerFileDefer = FALSE;
+    Fcb = FileObject->FsContext;
+    /* Do we have to check for limits per file? */
+    if (TryContext >= RetryForceCheckPerFile ||
+        BooleanFlagOn(Fcb->Flags, FSRTL_FLAG_LIMIT_MODIFIED_PAGES))
+    {
+        /* If master is not locked, lock it now */
+        if (TryContext != RetryMasterLocked)
+        {
+            OldIrql = KeAcquireQueuedSpinLock(LockQueueMasterLock);
+        }
+
+        /* Let's not assume the file is cached... */
+        if (FileObject->SectionObjectPointer != NULL &&
+            FileObject->SectionObjectPointer->SharedCacheMap != NULL)
+        {
+            SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+            /* Do we have limits per file set? */
+            if (SharedCacheMap->DirtyPageThreshold != 0 &&
+                SharedCacheMap->DirtyPages != 0)
+            {
+                /* Yes, check whether they are blocking */
+                if (Pages + SharedCacheMap->DirtyPages > SharedCacheMap->DirtyPageThreshold)
+                {
+                    PerFileDefer = TRUE;
+                }
+            }
+        }
+
+        /* And don't forget to release master */
+        if (TryContext != RetryMasterLocked)
+        {
+            KeReleaseQueuedSpinLock(LockQueueMasterLock, OldIrql);
+        }
+    }
+
+    /* So, now allow write if:
+     * - Not the first try or we have no throttling yet
+     * AND:
+     * - We don't execeed threshold!
      */
-    if (CcTotalDirtyPages + (BytesToWrite / PAGE_SIZE) > CcDirtyPageThreshold)
+    if ((TryContext != FirstTry || IsListEmpty(&CcDeferredWrites)) &&
+        CcTotalDirtyPages + Pages < CcDirtyPageThreshold &&
+        !PerFileDefer)
+    {
+        return TRUE;
+    }
+
+    /* If we can wait, we'll start the wait loop for waiting till we can
+     * write for real
+     */
+    if (!Wait)
     {
         return FALSE;
     }
 
-    /* FIXME: Handle per-file threshold */
+    /* Otherwise, if there are no deferred writes yet, start the lazy writer */
+    if (IsListEmpty(&CcDeferredWrites))
+    {
+        KIRQL OldIrql;
+
+        OldIrql = KeAcquireQueuedSpinLock(LockQueueMasterLock);
+        CcScheduleLazyWriteScan(TRUE);
+        KeReleaseQueuedSpinLock(LockQueueMasterLock, OldIrql);
+    }
+
+    /* Initialize our wait event */
+    KeInitializeEvent(&WaitEvent, NotificationEvent, FALSE);
+
+    /* And prepare a dummy context */
+    Context.NodeTypeCode = NODE_TYPE_DEFERRED_WRITE;
+    Context.NodeByteSize = sizeof(DEFERRED_WRITE);
+    Context.FileObject = FileObject;
+    Context.BytesToWrite = BytesToWrite;
+    Context.LimitModifiedPages = BooleanFlagOn(Fcb->Flags, FSRTL_FLAG_LIMIT_MODIFIED_PAGES);
+    Context.Event = &WaitEvent;
+
+    /* And queue it */
+    if (Retrying)
+    {
+        /* To the top, if that's a retry */
+        ExInterlockedInsertHeadList(&CcDeferredWrites,
+                                    &Context.DeferredWriteLinks,
+                                    &CcDeferredWriteSpinLock);
+    }
+    else
+    {
+        /* To the bottom, if that's a first time */
+        ExInterlockedInsertTailList(&CcDeferredWrites,
+                                    &Context.DeferredWriteLinks,
+                                    &CcDeferredWriteSpinLock);
+    }
+
+    /* Now, we'll loop until our event is set. When it is set, it means that caller
+     * can immediately write, and has to
+     */
+    do
+    {
+        CcPostDeferredWrites();
+    } while (KeWaitForSingleObject(&WaitEvent, Executive, KernelMode, FALSE, &CcIdleDelay) != STATUS_SUCCESS);
 
     return TRUE;
 }
@@ -470,13 +835,15 @@ CcDeferWrite (
     IN ULONG BytesToWrite,
     IN BOOLEAN Retrying)
 {
-    PROS_DEFERRED_WRITE_CONTEXT Context;
+    KIRQL OldIrql;
+    PDEFERRED_WRITE Context;
+    PFSRTL_COMMON_FCB_HEADER Fcb;
 
     CCTRACE(CC_API_DEBUG, "FileObject=%p PostRoutine=%p Context1=%p Context2=%p BytesToWrite=%lu Retrying=%d\n",
         FileObject, PostRoutine, Context1, Context2, BytesToWrite, Retrying);
 
     /* Try to allocate a context for queueing the write operation */
-    Context = ExAllocatePoolWithTag(NonPagedPool, sizeof(ROS_DEFERRED_WRITE_CONTEXT), 'CcDw');
+    Context = ExAllocatePoolWithTag(NonPagedPool, sizeof(DEFERRED_WRITE), 'CcDw');
     /* If it failed, immediately execute the operation! */
     if (Context == NULL)
     {
@@ -484,29 +851,45 @@ CcDeferWrite (
         return;
     }
 
+    Fcb = FileObject->FsContext;
+
     /* Otherwise, initialize the context */
+    RtlZeroMemory(Context, sizeof(DEFERRED_WRITE));
+    Context->NodeTypeCode = NODE_TYPE_DEFERRED_WRITE;
+    Context->NodeByteSize = sizeof(DEFERRED_WRITE);
     Context->FileObject = FileObject;
     Context->PostRoutine = PostRoutine;
     Context->Context1 = Context1;
     Context->Context2 = Context2;
     Context->BytesToWrite = BytesToWrite;
-    Context->Retrying = Retrying;
+    Context->LimitModifiedPages = BooleanFlagOn(Fcb->Flags, FSRTL_FLAG_LIMIT_MODIFIED_PAGES);
 
     /* And queue it */
     if (Retrying)
     {
         /* To the top, if that's a retry */
         ExInterlockedInsertHeadList(&CcDeferredWrites,
-                                    &Context->CcDeferredWritesEntry,
+                                    &Context->DeferredWriteLinks,
                                     &CcDeferredWriteSpinLock);
     }
     else
     {
         /* To the bottom, if that's a first time */
         ExInterlockedInsertTailList(&CcDeferredWrites,
-                                    &Context->CcDeferredWritesEntry,
+                                    &Context->DeferredWriteLinks,
                                     &CcDeferredWriteSpinLock);
     }
+
+    /* Try to execute the posted writes */
+    CcPostDeferredWrites();
+
+    /* Schedule a lazy writer run to handle deferred writes */
+    OldIrql = KeAcquireQueuedSpinLock(LockQueueMasterLock);
+    if (!LazyWriter.ScanActive)
+    {
+        CcScheduleLazyWriteScan(FALSE);
+    }
+    KeReleaseQueuedSpinLock(LockQueueMasterLock, OldIrql);
 }
 
 /*
@@ -566,30 +949,6 @@ CcFastCopyWrite (
     ASSERT(Success == TRUE);
 }
 
-/*
- * @implemented
- */
-NTSTATUS
-NTAPI
-CcWaitForCurrentLazyWriterActivity (
-    VOID)
-{
-    NTSTATUS Status;
-
-    /* Lazy writer is done when its event is set */
-    Status = KeWaitForSingleObject(&iLazyWriterNotify,
-                                   Executive,
-                                   KernelMode,
-                                   FALSE,
-                                   NULL);
-    if (!NT_SUCCESS(Status))
-    {
-        return Status;
-    }
-
-    return STATUS_SUCCESS;
-}
-
 /*
  * @implemented
  */