[BTRFS]
authorPierre Schweitzer <pierre@reactos.org>
Sat, 29 Oct 2016 17:05:10 +0000 (17:05 +0000)
committerPierre Schweitzer <pierre@reactos.org>
Sat, 29 Oct 2016 17:05:10 +0000 (17:05 +0000)
Sync btrfs to 0.7.

CORE-12223

svn path=/trunk/; revision=73062

25 files changed:
reactos/drivers/filesystems/btrfs/CMakeLists.txt
reactos/drivers/filesystems/btrfs/btrfs.c
reactos/drivers/filesystems/btrfs/btrfs.h
reactos/drivers/filesystems/btrfs/btrfs.rc
reactos/drivers/filesystems/btrfs/btrfs_drv.h
reactos/drivers/filesystems/btrfs/compress.c
reactos/drivers/filesystems/btrfs/crc32c.c
reactos/drivers/filesystems/btrfs/create.c
reactos/drivers/filesystems/btrfs/devctrl.c [new file with mode: 0644]
reactos/drivers/filesystems/btrfs/dirctrl.c
reactos/drivers/filesystems/btrfs/extent-tree.c
reactos/drivers/filesystems/btrfs/fileinfo.c
reactos/drivers/filesystems/btrfs/flushthread.c
reactos/drivers/filesystems/btrfs/free-space.c
reactos/drivers/filesystems/btrfs/fsctl.c
reactos/drivers/filesystems/btrfs/galois.c [new file with mode: 0644]
reactos/drivers/filesystems/btrfs/pnp.c
reactos/drivers/filesystems/btrfs/read.c
reactos/drivers/filesystems/btrfs/registry.c
reactos/drivers/filesystems/btrfs/reparse.c
reactos/drivers/filesystems/btrfs/search.c
reactos/drivers/filesystems/btrfs/security.c
reactos/drivers/filesystems/btrfs/treefuncs.c
reactos/drivers/filesystems/btrfs/worker-thread.c
reactos/drivers/filesystems/btrfs/write.c

index 730aa1f..118429e 100644 (file)
@@ -9,6 +9,7 @@ list(APPEND SOURCE
     compress.c
     crc32c.c
     create.c
+    devctrl.c
     dirctrl.c
     extent-tree.c
     fastio.c
@@ -16,6 +17,7 @@ list(APPEND SOURCE
     flushthread.c
     free-space.c
     fsctl.c
+    galois.c
     pnp.c
     read.c
     registry.c
index 2426362..6611438 100644 (file)
 #include <intrin.h>
 #endif
 #endif
+#include <ntddscsi.h>
 #include "btrfs.h"
 #ifndef __REACTOS__
 #include <winioctl.h>
 #else
 #include <rtlfuncs.h>
 #endif
-#include <mountdev.h>
+#include <ata.h>
 
 #define INCOMPAT_SUPPORTED (BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF | BTRFS_INCOMPAT_FLAGS_DEFAULT_SUBVOL | BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS | \
-                            BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO | BTRFS_INCOMPAT_FLAGS_BIG_METADATA | BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF | \
-                            BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA | BTRFS_INCOMPAT_FLAGS_NO_HOLES)
+                            BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO | BTRFS_INCOMPAT_FLAGS_BIG_METADATA | BTRFS_INCOMPAT_FLAGS_RAID56 | \
+                            BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF | BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA | BTRFS_INCOMPAT_FLAGS_NO_HOLES)
 #define COMPAT_RO_SUPPORTED 0
 
 static WCHAR device_name[] = {'\\','B','t','r','f','s',0};
@@ -46,7 +47,7 @@ static WCHAR dosdevice_name[] = {'\\','D','o','s','D','e','v','i','c','e','s','\
 PDRIVER_OBJECT drvobj;
 PDEVICE_OBJECT devobj;
 #ifndef __REACTOS__
-BOOL have_sse42 = FALSE;
+BOOL have_sse42 = FALSE, have_sse2 = FALSE;
 #endif
 UINT64 num_reads = 0;
 LIST_ENTRY uid_map_list;
@@ -60,6 +61,8 @@ UINT32 mount_compress_type = 0;
 UINT32 mount_zlib_level = 3;
 UINT32 mount_flush_interval = 30;
 UINT32 mount_max_inline = 2048;
+UINT32 mount_raid5_recalculation = 1;
+UINT32 mount_raid6_recalculation = 1;
 BOOL log_started = FALSE;
 UNICODE_STRING log_device, log_file, registry_path;
 
@@ -243,28 +246,6 @@ UINT64 sector_align( UINT64 NumberToBeAligned, UINT64 Alignment )
     return NumberToBeAligned;
 }
 
-int keycmp(const KEY* key1, const KEY* key2) {
-    if (key1->obj_id < key2->obj_id) {
-        return -1;
-    } else if (key1->obj_id > key2->obj_id) {
-        return 1;
-    }
-    
-    if (key1->obj_type < key2->obj_type) {
-        return -1;
-    } else if (key1->obj_type > key2->obj_type) {
-        return 1;
-    }
-    
-    if (key1->offset < key2->offset) {
-        return -1;
-    } else if (key1->offset > key2->offset) {
-        return 1;
-    }
-    
-    return 0;
-}
-
 BOOL is_top_level(PIRP Irp) {
     if (!IoGetTopLevelIrp()) {
         IoSetTopLevelIrp(Irp);
@@ -320,7 +301,7 @@ static void STDCALL DriverUnload(PDRIVER_OBJECT DriverObject) {
         ExFreePool(registry_path.Buffer);
 }
 
-BOOL STDCALL get_last_inode(device_extension* Vcb, root* r, PIRP Irp) {
+static BOOL STDCALL get_last_inode(device_extension* Vcb, root* r, PIRP Irp) {
     KEY searchkey;
     traverse_ptr tp, prev_tp;
     NTSTATUS Status;
@@ -336,6 +317,12 @@ BOOL STDCALL get_last_inode(device_extension* Vcb, root* r, PIRP Irp) {
         return FALSE;
     }
     
+    if (tp.item->key.obj_type == TYPE_INODE_ITEM || (tp.item->key.obj_type == TYPE_ROOT_ITEM && !(tp.item->key.obj_id & 0x8000000000000000))) {
+        r->lastinode = tp.item->key.obj_id;
+        TRACE("last inode for tree %llx is %llx\n", r->id, r->lastinode);
+        return TRUE;
+    }
+    
     while (find_prev_item(Vcb, &tp, &prev_tp, FALSE, Irp)) {
         tp = prev_tp;
         
@@ -374,7 +361,7 @@ BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char*
         return FALSE;
     }
     
-    if (keycmp(&tp.item->key, &searchkey)) {
+    if (keycmp(tp.item->key, searchkey)) {
         TRACE("could not find item (%llx,%x,%llx)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
         return FALSE;
     }
@@ -426,53 +413,6 @@ BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char*
     return FALSE;
 }
 
-NTSTATUS add_dir_item(device_extension* Vcb, root* subvol, UINT64 inode, UINT32 crc32, DIR_ITEM* di, ULONG disize, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    UINT8* di2;
-    NTSTATUS Status;
-    
-    searchkey.obj_id = inode;
-    searchkey.obj_type = TYPE_DIR_ITEM;
-    searchkey.offset = crc32;
-    
-    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (!keycmp(&tp.item->key, &searchkey)) {
-        ULONG maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
-        
-        if (tp.item->size + disize > maxlen) {
-            WARN("DIR_ITEM was longer than maxlen (%u + %u > %u)\n", tp.item->size, disize, maxlen);
-            return STATUS_INTERNAL_ERROR;
-        }
-        
-        di2 = ExAllocatePoolWithTag(PagedPool, tp.item->size + disize, ALLOC_TAG);
-        if (!di2) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
-        
-        if (tp.item->size > 0)
-            RtlCopyMemory(di2, tp.item->data, tp.item->size);
-        
-        RtlCopyMemory(di2 + tp.item->size, di, disize);
-        
-        delete_tree_item(Vcb, &tp, rollback);
-        
-        insert_tree_item(Vcb, subvol, inode, TYPE_DIR_ITEM, crc32, di2, tp.item->size + disize, NULL, Irp, rollback);
-        
-        ExFreePool(di);
-    } else {
-        insert_tree_item(Vcb, subvol, inode, TYPE_DIR_ITEM, crc32, di, disize, NULL, Irp, rollback);
-    }
-    
-    return STATUS_SUCCESS;
-}
-
 static NTSTATUS STDCALL drv_close(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
     NTSTATUS Status;
     PIO_STACK_LOCATION IrpSp;
@@ -514,73 +454,6 @@ exit:
     return Status;
 }
 
-static NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
-    NTSTATUS Status;
-    BOOL top_level;
-    device_extension* Vcb = DeviceObject->DeviceExtension;
-
-    FsRtlEnterFileSystem();
-
-    top_level = is_top_level(Irp);
-    
-    if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
-        Status = part0_passthrough(DeviceObject, Irp);
-        goto exit;
-    }
-    
-    FIXME("STUB: query ea\n");
-    Status = STATUS_NOT_IMPLEMENTED;
-    
-    Irp->IoStatus.Status = Status;
-    Irp->IoStatus.Information = 0;
-
-    IoCompleteRequest( Irp, IO_NO_INCREMENT );
-
-exit:
-    if (top_level) 
-        IoSetTopLevelIrp(NULL);
-    
-    FsRtlExitFileSystem();
-
-    return Status;
-}
-
-static NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
-    NTSTATUS Status;
-    device_extension* Vcb = DeviceObject->DeviceExtension;
-    BOOL top_level;
-
-    FsRtlEnterFileSystem();
-
-    top_level = is_top_level(Irp);
-    
-    if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
-        Status = part0_passthrough(DeviceObject, Irp);
-        goto exit;
-    }
-    
-    FIXME("STUB: set ea\n");
-    Status = STATUS_NOT_IMPLEMENTED;
-    
-    if (Vcb->readonly)
-        Status = STATUS_MEDIA_WRITE_PROTECTED;
-    
-    // FIXME - return STATUS_ACCESS_DENIED if subvol readonly
-    
-    Irp->IoStatus.Status = Status;
-    Irp->IoStatus.Information = 0;
-
-    IoCompleteRequest( Irp, IO_NO_INCREMENT );
-    
-exit:
-    if (top_level) 
-        IoSetTopLevelIrp(NULL);
-    
-    FsRtlExitFileSystem();
-
-    return Status;
-}
-
 static NTSTATUS STDCALL drv_flush_buffers(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
     NTSTATUS Status;
     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation( Irp );
@@ -627,15 +500,27 @@ exit:
 }
 
 static void calculate_total_space(device_extension* Vcb, LONGLONG* totalsize, LONGLONG* freespace) {
-    UINT8 factor;
+    UINT16 nfactor, dfactor;
+    UINT64 sectors_used;
+    
+    if (Vcb->data_flags & BLOCK_FLAG_DUPLICATE || Vcb->data_flags & BLOCK_FLAG_RAID1 || Vcb->data_flags & BLOCK_FLAG_RAID10) {
+        nfactor = 1;
+        dfactor = 2;
+    } else if (Vcb->data_flags & BLOCK_FLAG_RAID5) {
+        nfactor = Vcb->superblock.num_devices - 1;
+        dfactor = Vcb->superblock.num_devices;
+    } else if (Vcb->data_flags & BLOCK_FLAG_RAID6) {
+        nfactor = Vcb->superblock.num_devices - 2;
+        dfactor = Vcb->superblock.num_devices;
+    } else {
+        nfactor = 1;
+        dfactor = 1;
+    }
     
-    if (Vcb->data_flags & BLOCK_FLAG_DUPLICATE || Vcb->data_flags & BLOCK_FLAG_RAID1 || Vcb->data_flags & BLOCK_FLAG_RAID10)
-        factor = 2;
-    else
-        factor = 1;
+    sectors_used = Vcb->superblock.bytes_used / Vcb->superblock.sector_size;
     
-    *totalsize = (Vcb->superblock.total_bytes / Vcb->superblock.sector_size) factor;
-    *freespace = ((Vcb->superblock.total_bytes - Vcb->superblock.bytes_used) / Vcb->superblock.sector_size) / factor;
+    *totalsize = (Vcb->superblock.total_bytes / Vcb->superblock.sector_size) * nfactor / dfactor;
+    *freespace = sectors_used > *totalsize ? 0 : (*totalsize - sectors_used);
 }
 
 static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
@@ -692,7 +577,8 @@ static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObj
             
             data->FileSystemAttributes = FILE_CASE_PRESERVED_NAMES | FILE_CASE_SENSITIVE_SEARCH |
                                          FILE_UNICODE_ON_DISK | FILE_NAMED_STREAMS | FILE_SUPPORTS_HARD_LINKS | FILE_PERSISTENT_ACLS |
-                                         FILE_SUPPORTS_REPARSE_POINTS | FILE_SUPPORTS_SPARSE_FILES | FILE_SUPPORTS_OBJECT_IDS;
+                                         FILE_SUPPORTS_REPARSE_POINTS | FILE_SUPPORTS_SPARSE_FILES | FILE_SUPPORTS_OBJECT_IDS |
+                                         FILE_SUPPORTS_OPEN_BY_FILE_ID | FILE_SUPPORTS_EXTENDED_ATTRIBUTES;
             if (Vcb->readonly)
                 data->FileSystemAttributes |= FILE_READ_ONLY_VOLUME;
                                          
@@ -830,6 +716,29 @@ static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObj
             Status = overflow ? STATUS_BUFFER_OVERFLOW : STATUS_SUCCESS;
             break;
         }
+        
+#ifdef _MSC_VER // not in mingw yet
+        case FileFsSectorSizeInformation:
+        {
+            FILE_FS_SECTOR_SIZE_INFORMATION* data = Irp->AssociatedIrp.SystemBuffer;
+            
+            data->LogicalBytesPerSector = Vcb->superblock.sector_size;
+            data->PhysicalBytesPerSectorForAtomicity = Vcb->superblock.sector_size;
+            data->PhysicalBytesPerSectorForPerformance = Vcb->superblock.sector_size;
+            data->FileSystemEffectivePhysicalBytesPerSectorForAtomicity = Vcb->superblock.sector_size;
+            data->ByteOffsetForSectorAlignment = 0;
+            data->ByteOffsetForPartitionAlignment = 0;
+            
+            data->Flags = SSINFO_FLAGS_ALIGNED_DEVICE | SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE;
+            
+            if (Vcb->trim)
+                data->Flags |= SSINFO_FLAGS_TRIM_ENABLED;
+            
+            BytesCopied = sizeof(FILE_FS_SECTOR_SIZE_INFORMATION);
+  
+            break;
+        }
+#endif
 
         default:
             Status = STATUS_INVALID_PARAMETER;
@@ -902,7 +811,7 @@ static NTSTATUS STDCALL read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, P
 //         return;
 //     }
 //     
-//     while (TRUE/*keycmp(&tp.item->key, &endkey) < 1*/) {
+//     while (TRUE/*keycmp(tp.item->key, endkey) < 1*/) {
 //         tp.item->ignore = TRUE;
 //         add_to_tree_cache(tc, tp.tree);
 //         
@@ -1035,6 +944,7 @@ NTSTATUS create_root(device_extension* Vcb, UINT64 id, root** rootptr, BOOL no_t
     
         t->new_address = 0;
         t->has_new_address = FALSE;
+        t->updated_extents = FALSE;
         t->flags = tp.tree->flags;
         
         InsertTailList(&Vcb->trees, &t->list_entry);
@@ -1179,10 +1089,103 @@ NTSTATUS create_root(device_extension* Vcb, UINT64 id, root** rootptr, BOOL no_t
 //     int3;
 // }
 
+#if 0
+void STDCALL tree_test(void* context) {
+    device_extension* Vcb = context;
+    NTSTATUS Status;
+    UINT64 id;
+    LARGE_INTEGER due_time, time;
+    KTIMER timer;
+    root* r;
+    LIST_ENTRY rollback;
+    ULONG seed;
+    
+    InitializeListHead(&rollback);
+    
+    KeInitializeTimer(&timer);
+    
+    id = InterlockedIncrement64(&Vcb->root_root->lastinode);
+    Status = create_root(Vcb, id, &r, FALSE, 0, NULL, &rollback);
+    if (!NT_SUCCESS(Status)) {
+        ERR("create_root returned %08x\n");
+        return;
+    }
+    
+    clear_rollback(Vcb, &rollback);
+    
+    due_time.QuadPart = (UINT64)1 * -10000000;
+    
+    KeQueryPerformanceCounter(&time);
+    seed = time.LowPart;
+    
+    while (TRUE) {
+        UINT32 i;
+        
+        FsRtlEnterFileSystem();
+        
+        ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
+        
+        for (i = 0; i < 100; i++) {
+            void* data;
+            ULONG datalen;
+            UINT64 objid, offset;
+            
+            objid = RtlRandomEx(&seed);
+            objid <<= 32;
+            objid |= RtlRandomEx(&seed);
+            
+            offset = RtlRandomEx(&seed);
+            offset <<= 32;
+            offset |= RtlRandomEx(&seed);
+            
+            datalen = 30;
+            data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
+            
+            if (!insert_tree_item(Vcb, r, objid, 0xfd, offset, data, datalen, NULL, NULL, &rollback)) {
+                ERR("insert_tree_item failed\n");
+            }
+        }
+        
+        for (i = 0; i < 25; i++) {
+            KEY searchkey;
+            traverse_ptr tp;
+            
+            searchkey.obj_id = RtlRandomEx(&seed);
+            searchkey.obj_id <<= 32;
+            searchkey.obj_id |= RtlRandomEx(&seed);
+            
+            searchkey.obj_type = 0xfd;
+            
+            searchkey.offset = RtlRandomEx(&seed);
+            searchkey.offset <<= 32;
+            searchkey.offset |= RtlRandomEx(&seed);
+            
+            Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL);
+            if (!NT_SUCCESS(Status)) {
+                ERR("error - find_item returned %08x\n", Status);
+            } else {
+                delete_tree_item(Vcb, &tp, &rollback);
+            }
+        }
+        
+        clear_rollback(Vcb, &rollback);
+        
+        ExReleaseResourceLite(&Vcb->tree_lock);
+        
+        FsRtlExitFileSystem();
+        
+        KeSetTimer(&timer, due_time, NULL);
+        
+        KeWaitForSingleObject(&timer, Executive, KernelMode, FALSE, NULL);
+    }
+}
+#endif
+
 static NTSTATUS STDCALL set_label(device_extension* Vcb, FILE_FS_LABEL_INFORMATION* ffli) {
     ULONG utf8len;
     NTSTATUS Status;
     USHORT vollen, i;
+//     HANDLE h;
     
     TRACE("label = %.*S\n", ffli->VolumeLabelLength / sizeof(WCHAR), ffli->VolumeLabel);
     
@@ -1232,6 +1235,8 @@ static NTSTATUS STDCALL set_label(device_extension* Vcb, FILE_FS_LABEL_INFORMATI
     
     Vcb->need_write = TRUE;
     
+//     PsCreateSystemThread(&h, 0, NULL, NULL, NULL, tree_test, Vcb);
+    
 release:  
     ExReleaseResourceLite(&Vcb->tree_lock);
 
@@ -1320,7 +1325,7 @@ NTSTATUS delete_dir_item(device_extension* Vcb, root* subvol, UINT64 parinode, U
         return Status;
     }
     
-    if (!keycmp(&searchkey, &tp.item->key)) {
+    if (!keycmp(searchkey, tp.item->key)) {
         if (tp.item->size < sizeof(DIR_ITEM)) {
             WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
         } else {
@@ -1391,7 +1396,7 @@ NTSTATUS delete_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UIN
         return Status;
     }
     
-    if (!keycmp(&searchkey, &tp.item->key)) {
+    if (!keycmp(searchkey, tp.item->key)) {
         if (tp.item->size < sizeof(INODE_REF)) {
             WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF));
         } else {
@@ -1476,7 +1481,7 @@ NTSTATUS delete_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UIN
         return Status;
     }
     
-    if (!keycmp(&searchkey, &tp.item->key)) {
+    if (!keycmp(searchkey, tp.item->key)) {
         if (tp.item->size < sizeof(INODE_EXTREF)) {
             WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_EXTREF));
         } else {
@@ -1816,6 +1821,9 @@ void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line)
     if (fcb->reparse_xattr.Buffer)
         ExFreePool(fcb->reparse_xattr.Buffer);
     
+    if (fcb->ea_xattr.Buffer)
+        ExFreePool(fcb->ea_xattr.Buffer);
+    
     if (fcb->adsdata.Buffer)
         ExFreePool(fcb->adsdata.Buffer);
     
@@ -1938,12 +1946,19 @@ static NTSTATUS STDCALL close_file(device_extension* Vcb, PFILE_OBJECT FileObjec
     fcb* fcb;
     ccb* ccb;
     file_ref* fileref = NULL;
+    LONG open_files;
     
     TRACE("FileObject = %p\n", FileObject);
     
+    open_files = InterlockedDecrement(&Vcb->open_files);
+    
     fcb = FileObject->FsContext;
     if (!fcb) {
         TRACE("FCB was NULL, returning success\n");
+        
+        if (open_files == 0 && Vcb->removing)
+            uninit(Vcb, FALSE);
+        
         return STATUS_SUCCESS;
     }
     
@@ -1968,6 +1983,11 @@ static NTSTATUS STDCALL close_file(device_extension* Vcb, PFILE_OBJECT FileObjec
     
     CcUninitializeCacheMap(FileObject, NULL, NULL);
     
+    if (open_files == 0 && Vcb->removing) {
+        uninit(Vcb, FALSE);
+        return STATUS_SUCCESS;
+    }
+    
     if (!(Vcb->Vpb->Flags & VPB_MOUNTED))
         return STATUS_SUCCESS;
     
@@ -2009,22 +2029,11 @@ void STDCALL uninit(device_extension* Vcb, BOOL flush) {
         
         free_trees(Vcb);
         
-        clear_rollback(&rollback);
+        clear_rollback(Vcb, &rollback);
 
         ExReleaseResourceLite(&Vcb->tree_lock);
     }
     
-    for (i = 0; i < Vcb->threads.num_threads; i++) {
-        Vcb->threads.threads[i].quit = TRUE;
-        KeSetEvent(&Vcb->threads.threads[i].event, 0, FALSE);
-        
-        KeWaitForSingleObject(&Vcb->threads.threads[i].finished, Executive, KernelMode, FALSE, NULL);
-        
-        ZwClose(Vcb->threads.threads[i].handle);
-    }
-    
-    ExFreePool(Vcb->threads.threads);
-    
     time.QuadPart = 0;
     KeSetTimer(&Vcb->flush_thread_timer, time, NULL); // trigger the timer early
     KeWaitForSingleObject(&Vcb->flush_thread_finished, Executive, KernelMode, FALSE, NULL);
@@ -2114,6 +2123,12 @@ void STDCALL uninit(device_extension* Vcb, BOOL flush) {
     ExDeleteResourceLite(&Vcb->checksum_lock);
     ExDeleteResourceLite(&Vcb->chunk_lock);
     
+    ExDeletePagedLookasideList(&Vcb->tree_data_lookaside);
+    ExDeletePagedLookasideList(&Vcb->traverse_ptr_lookaside);
+    ExDeletePagedLookasideList(&Vcb->rollback_item_lookaside);
+    ExDeletePagedLookasideList(&Vcb->batch_item_lookaside);
+    ExDeleteNPagedLookasideList(&Vcb->range_lock_lookaside);
+    
     ZwClose(Vcb->flush_thread_handle);
 }
 
@@ -2150,6 +2165,7 @@ NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LI
                 fileref->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation;
                 fileref->fcb->inode_item.sequence++;
                 fileref->fcb->inode_item.st_ctime = now;
+                fileref->fcb->inode_item_changed = TRUE;
             } else {
                 fileref->fcb->deleted = TRUE;
             
@@ -2229,6 +2245,7 @@ NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LI
     fileref->parent->fcb->inode_item.st_mtime = now;
     ExReleaseResourceLite(fileref->parent->fcb->Header.Resource);
 
+    fileref->parent->fcb->inode_item_changed = TRUE;
     mark_fcb_dirty(fileref->parent->fcb);
     
     send_notification_fcb(fileref->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED);
@@ -2236,9 +2253,6 @@ NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LI
     fileref->fcb->subvol->root_item.ctransid = fileref->fcb->Vcb->superblock.generation;
     fileref->fcb->subvol->root_item.ctime = now;
     
-    if (FileObject && FileObject->Flags & FO_CACHE_SUPPORTED && fileref->fcb->nonpaged->segment_object.DataSectionObject)
-        CcPurgeCacheSection(&fileref->fcb->nonpaged->segment_object, NULL, 0, FALSE);
-    
     newlength.QuadPart = 0;
     
     if (FileObject && !CcUninitializeCacheMap(FileObject, &newlength, NULL))
@@ -2284,16 +2298,18 @@ static NTSTATUS STDCALL drv_cleanup(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp)
         fileref = ccb ? ccb->fileref : NULL;
         
         TRACE("cleanup called for FileObject %p\n", FileObject);
-        TRACE("fcb %p (%S), refcount = %u, open_count = %u\n", fcb, file_desc(FileObject), fcb->refcount, fcb->open_count);
+        TRACE("fileref %p (%S), refcount = %u, open_count = %u\n", fileref, file_desc(FileObject), fileref ? fileref->refcount : 0, fileref ? fileref->open_count : 0);
         
         IoRemoveShareAccess(FileObject, &fcb->share_access);
         
         FsRtlNotifyCleanup(Vcb->NotifySync, &Vcb->DirNotifyList, ccb);    
         
-        oc = InterlockedDecrement(&fcb->open_count);
+        if (fileref) {
+            oc = InterlockedDecrement(&fileref->open_count);
 #ifdef DEBUG_FCB_REFCOUNTS
-        ERR("fcb %p: open_count now %i\n", fcb, oc);
+            ERR("fileref %p: open_count now %i\n", fileref, oc);
 #endif
+        }
         
         if (ccb && ccb->options & FILE_DELETE_ON_CLOSE && fileref)
             fileref->delete_on_close = TRUE;
@@ -2307,7 +2323,7 @@ static NTSTATUS STDCALL drv_cleanup(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp)
             FsRtlNotifyVolumeEvent(FileObject, FSRTL_VOLUME_UNLOCK);
         }
         
-        if (oc == 0) {
+        if (fileref && oc == 0) {
             if (!Vcb->removing) {
                 LIST_ENTRY rollback;
         
@@ -2318,16 +2334,21 @@ static NTSTATUS STDCALL drv_cleanup(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp)
                     
                     ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, TRUE);
                     
+                    ExAcquireResourceExclusiveLite(&fcb->Vcb->fcb_lock, TRUE);
+                    
                     Status = delete_fileref(fileref, FileObject, Irp, &rollback);
                     if (!NT_SUCCESS(Status)) {
                         ERR("delete_fileref returned %08x\n", Status);
                         do_rollback(Vcb, &rollback);
+                        ExReleaseResourceLite(&fcb->Vcb->fcb_lock);
                         ExReleaseResourceLite(&fcb->Vcb->tree_lock);
                         goto exit;
                     }
                     
+                    ExReleaseResourceLite(&fcb->Vcb->fcb_lock);
+                    
                     ExReleaseResourceLite(&fcb->Vcb->tree_lock);
-                    clear_rollback(&rollback);
+                    clear_rollback(Vcb, &rollback);
                 } else if (FileObject->Flags & FO_CACHE_SUPPORTED && fcb->nonpaged->segment_object.DataSectionObject) {
                     IO_STATUS_BLOCK iosb;
                     CcFlushCache(FileObject->SectionObjectPointer, NULL, 0, &iosb);
@@ -2543,7 +2564,7 @@ static NTSTATUS STDCALL read_superblock(device_extension* Vcb, PDEVICE_OBJECT de
     NTSTATUS Status;
     superblock* sb;
     unsigned int i, to_read;
-    UINT32 crc32;
+    UINT8 valid_superblocks;
     
     to_read = sector_align(sizeof(superblock), device->SectorSize);
     
@@ -2554,8 +2575,11 @@ static NTSTATUS STDCALL read_superblock(device_extension* Vcb, PDEVICE_OBJECT de
     }
     
     i = 0;
+    valid_superblocks = 0;
     
     while (superblock_addrs[i] > 0) {
+        UINT32 crc32;
+        
         if (i > 0 && superblock_addrs[i] + sizeof(superblock) > length)
             break;
         
@@ -2566,27 +2590,28 @@ static NTSTATUS STDCALL read_superblock(device_extension* Vcb, PDEVICE_OBJECT de
             return Status;
         }
         
-        // FIXME - check checksum before accepting?
-        
         TRACE("got superblock %u!\n", i);
-
-        if (i == 0 || sb->generation > Vcb->superblock.generation)
+        
+        crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum));
+        
+        if (crc32 != *((UINT32*)sb->checksum))
+            WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)sb->checksum));
+        else if (valid_superblocks == 0 || sb->generation > Vcb->superblock.generation) {
             RtlCopyMemory(&Vcb->superblock, sb, sizeof(superblock));
+            valid_superblocks++;
+        }
         
         i++;
     }
     
     ExFreePool(sb);
     
-    crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
-    crc32 = ~crc32;
-    TRACE("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)Vcb->superblock.checksum));
-    
-    if (crc32 != *((UINT32*)Vcb->superblock.checksum))
-        return STATUS_INTERNAL_ERROR; // FIXME - correct error?
+    if (valid_superblocks == 0) {
+        ERR("could not find any valid superblocks\n");
+        return STATUS_INTERNAL_ERROR;
+    }
     
     TRACE("label is %s\n", Vcb->superblock.label);
-//     utf8_to_utf16(Vcb->superblock.label, Vcb->label, MAX_LABEL_SIZE * sizeof(WCHAR));
     
     return STATUS_SUCCESS;
 }
@@ -2662,6 +2687,14 @@ static NTSTATUS STDCALL add_root(device_extension* Vcb, UINT64 id, UINT64 addr,
             RtlZeroMemory(((UINT8*)&r->root_item) + tp->item->size, sizeof(ROOT_ITEM) - tp->item->size);
     }
     
+    if (!Vcb->readonly && (r->id == BTRFS_ROOT_ROOT || r->id == BTRFS_ROOT_FSTREE || (r->id >= 0x100 && !(r->id & 0xf000000000000000)))) { // FS tree root
+        // FIXME - don't call this if subvol is readonly (though we will have to if we ever toggle this flag)
+        get_last_inode(Vcb, r, NULL);
+        
+        if (r->id == BTRFS_ROOT_ROOT && r->lastinode < 0x100)
+            r->lastinode = 0x100;
+    }
+    
     InsertTailList(&Vcb->roots, &r->list_entry);
     
     switch (r->id) {
@@ -2688,6 +2721,9 @@ static NTSTATUS STDCALL add_root(device_extension* Vcb, UINT64 id, UINT64 addr,
         case BTRFS_ROOT_UUID:
             Vcb->uuid_root = r;
             break;
+            
+        case BTRFS_ROOT_DATA_RELOC:
+            Vcb->data_reloc_root = r;
     }
     
     return STATUS_SUCCESS;
@@ -2734,6 +2770,79 @@ static NTSTATUS STDCALL look_for_roots(device_extension* Vcb, PIRP Irp) {
             tp = next_tp;
     } while (b);
     
+    if (!Vcb->readonly && !Vcb->data_reloc_root) {
+        root* reloc_root;
+        INODE_ITEM* ii;
+        ULONG irlen;
+        INODE_REF* ir;
+        LARGE_INTEGER time;
+        BTRFS_TIME now;
+        LIST_ENTRY rollback;
+        
+        InitializeListHead(&rollback);
+        
+        WARN("data reloc root doesn't exist, creating it\n");
+        
+        Status = create_root(Vcb, BTRFS_ROOT_DATA_RELOC, &reloc_root, FALSE, 0, Irp, &rollback);
+        
+        if (!NT_SUCCESS(Status)) {
+            ERR("create_root returned %08x\n", Status);
+            do_rollback(Vcb, &rollback);
+            goto end;
+        }
+        
+        reloc_root->root_item.inode.generation = 1;
+        reloc_root->root_item.inode.st_size = 3;
+        reloc_root->root_item.inode.st_blocks = Vcb->superblock.node_size;
+        reloc_root->root_item.inode.st_nlink = 1;
+        reloc_root->root_item.inode.st_mode = 040755;
+        reloc_root->root_item.inode.flags = 0xffffffff80000000;
+        reloc_root->root_item.objid = SUBVOL_ROOT_INODE;
+        reloc_root->root_item.bytes_used = Vcb->superblock.node_size;
+        
+        ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
+        if (!ii) {
+            ERR("out of memory\n");
+            do_rollback(Vcb, &rollback);
+            goto end;
+        }
+        
+        KeQuerySystemTime(&time);
+        win_time_to_unix(time, &now);
+        
+        RtlZeroMemory(ii, sizeof(INODE_ITEM));
+        ii->generation = Vcb->superblock.generation;
+        ii->st_blocks = Vcb->superblock.node_size;
+        ii->st_nlink = 1;
+        ii->st_mode = 040755;
+        ii->st_atime = now;
+        ii->st_ctime = now;
+        ii->st_mtime = now;
+        
+        insert_tree_item(Vcb, reloc_root, SUBVOL_ROOT_INODE, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp, &rollback);
+
+        irlen = offsetof(INODE_REF, name[0]) + 2;
+        ir = ExAllocatePoolWithTag(PagedPool, irlen, ALLOC_TAG);
+        if (!ir) {
+            ERR("out of memory\n");
+            do_rollback(Vcb, &rollback);
+            goto end;
+        }
+        
+        ir->index = 0;
+        ir->n = 2;
+        ir->name[0] = '.';
+        ir->name[1] = '.';
+        
+        insert_tree_item(Vcb, reloc_root, SUBVOL_ROOT_INODE, TYPE_INODE_REF, SUBVOL_ROOT_INODE, ir, irlen, NULL, Irp, &rollback);
+        
+        clear_rollback(Vcb, &rollback);
+        
+        Vcb->data_reloc_root = reloc_root;
+        Vcb->need_write = TRUE;
+    }
+    
+end:
     return STATUS_SUCCESS;
 }
 
@@ -2794,6 +2903,10 @@ static NTSTATUS find_disk_holes(device_extension* Vcb, device* dev, PIRP Irp) {
         }
     }
     
+    // The Linux driver doesn't like to allocate chunks within the first megabyte of a device.
+    
+    space_list_subtract2(Vcb, &dev->space, NULL, 0, 0x100000, NULL);
+    
     return STATUS_SUCCESS;
 }
 
@@ -2837,6 +2950,9 @@ device* find_device_from_uuid(device_extension* Vcb, BTRFS_UUID* uuid) {
                 
                 Vcb->devices[Vcb->devices_loaded].devobj = DeviceObject;
                 Vcb->devices[Vcb->devices_loaded].devitem.device_uuid = *uuid;
+                Vcb->devices[Vcb->devices_loaded].seeding = v->seeding;
+                Vcb->devices[Vcb->devices_loaded].readonly = Vcb->devices[Vcb->devices_loaded].seeding;
+                Vcb->devices[Vcb->devices_loaded].removable = FALSE;
                 Vcb->devices_loaded++;
                 
                 return &Vcb->devices[Vcb->devices_loaded - 1];
@@ -2890,6 +3006,9 @@ static ULONG get_device_change_count(PDEVICE_OBJECT devobj) {
 static void init_device(device_extension* Vcb, device* dev, BOOL get_length) {
     NTSTATUS Status;
     GET_LENGTH_INFORMATION gli;
+    ULONG aptelen;
+    ATA_PASS_THROUGH_EX* apte;
+    IDENTIFY_DEVICE_DATA* idd;
     
     dev->removable = is_device_removable(dev->devobj);
     dev->change_count = dev->removable ? get_device_change_count(dev->devobj) : 0;
@@ -2903,6 +3022,59 @@ static void init_device(device_extension* Vcb, device* dev, BOOL get_length) {
         
         dev->length = gli.Length.QuadPart;
     }
+    
+    dev->ssd = FALSE;
+    dev->trim = FALSE;
+    dev->readonly = dev->seeding;
+    
+    if (!dev->readonly) {
+        Status = dev_ioctl(dev->devobj, IOCTL_DISK_IS_WRITABLE, NULL, 0,
+                        NULL, 0, TRUE, NULL);
+        if (Status == STATUS_MEDIA_WRITE_PROTECTED)
+            dev->readonly = TRUE;
+    }
+
+    aptelen = sizeof(ATA_PASS_THROUGH_EX) + 512;
+    apte = ExAllocatePoolWithTag(NonPagedPool, aptelen, ALLOC_TAG);
+    if (!apte) {
+        ERR("out of memory\n");
+        return;
+    }
+    
+    RtlZeroMemory(apte, aptelen);
+    
+    apte->Length = sizeof(ATA_PASS_THROUGH_EX);
+    apte->AtaFlags = ATA_FLAGS_DATA_IN;
+    apte->DataTransferLength = aptelen - sizeof(ATA_PASS_THROUGH_EX);
+    apte->TimeOutValue = 3;
+    apte->DataBufferOffset = apte->Length;
+    apte->CurrentTaskFile[6] = 0xec; // IDENTIFY DEVICE
+    
+    Status = dev_ioctl(dev->devobj, IOCTL_ATA_PASS_THROUGH, apte, aptelen,
+                       apte, aptelen, TRUE, NULL);
+    
+    if (!NT_SUCCESS(Status)) {
+        ERR("error calling ATA IDENTIFY DEVICE: %08x\n", Status);
+    } else {
+        idd = (IDENTIFY_DEVICE_DATA*)((UINT8*)apte + sizeof(ATA_PASS_THROUGH_EX));
+        
+        if (idd->NominalMediaRotationRate == 1) {
+            dev->ssd = TRUE;
+            TRACE("device identified as SSD\n");
+        } else if (idd->NominalMediaRotationRate == 0)
+            TRACE("no rotational speed returned, assuming not SSD\n");
+        else
+            TRACE("rotational speed of %u RPM\n", idd->NominalMediaRotationRate);
+        
+        if (idd->DataSetManagementFeature.SupportsTrim) {
+            dev->trim = TRUE;
+            Vcb->trim = TRUE;
+            TRACE("TRIM supported\n");
+        } else
+            TRACE("TRIM not supported\n");
+    }
+    
+    ExFreePool(apte);
 }
 
 static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
@@ -2954,9 +3126,7 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
                         while (le != &volumes) {
                             volume* v = CONTAINING_RECORD(le, volume, list_entry);
             
-                            if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) &&
-                                RtlCompareMemory(&di->device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)
-                            ) {
+                            if (RtlCompareMemory(&di->device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
                                 PFILE_OBJECT FileObject;
                                 PDEVICE_OBJECT DeviceObject;
                                 
@@ -2974,6 +3144,9 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
                                 Vcb->devices[Vcb->devices_loaded].devobj = DeviceObject;
                                 RtlCopyMemory(&Vcb->devices[Vcb->devices_loaded].devitem, di, min(tp.item->size, sizeof(DEV_ITEM)));
                                 init_device(Vcb, &Vcb->devices[i], FALSE);
+
+                                Vcb->devices[i].seeding = v->seeding;
+
                                 Vcb->devices[i].length = v->length;
                                 Vcb->devices_loaded++;
 
@@ -3009,6 +3182,7 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
                 c->used = c->oldused = 0;
                 c->cache = NULL;
                 c->created = FALSE;
+                c->readonly = FALSE;
                 
                 c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, tp.item->size, ALLOC_TAG);
                 
@@ -3030,14 +3204,24 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
                     
                     if (!c->devices) {
                         ERR("out of memory\n");
-                        ExFreePool(c);
                         ExFreePool(c->chunk_item);
+                        ExFreePool(c);
                         return STATUS_INSUFFICIENT_RESOURCES;
                     }
                     
                     for (i = 0; i < c->chunk_item->num_stripes; i++) {
                         c->devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
                         TRACE("device %llu = %p\n", i, c->devices[i]);
+                        
+                        if (!c->devices[i]) {
+                            ERR("missing device\n");
+                            ExFreePool(c->chunk_item);
+                            ExFreePool(c);
+                            return STATUS_INTERNAL_ERROR;
+                        }
+                            
+                        if (c->devices[i]->readonly)
+                            c->readonly = TRUE;
                     }
                 } else
                     c->devices = NULL;
@@ -3049,6 +3233,10 @@ static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
                 InitializeListHead(&c->space_size);
                 InitializeListHead(&c->deleting);
                 InitializeListHead(&c->changed_extents);
+                
+                InitializeListHead(&c->range_locks);
+                KeInitializeSpinLock(&c->range_locks_spinlock);
+                KeInitializeEvent(&c->range_locks_event, NotificationEvent, FALSE);
 
                 InsertTailList(&Vcb->chunks, &c->list_entry);
                 
@@ -3109,6 +3297,42 @@ void protect_superblocks(device_extension* Vcb, chunk* c) {
                     TRACE("startoff = %llx, superblock = %llx\n", startoff + cis[j].offset, superblock_addrs[i]);
 #endif
                     
+                    space_list_subtract(Vcb, c, FALSE, c->offset + off_start, off_end - off_start, NULL);
+                }
+            }
+        } else if (ci->type & BLOCK_FLAG_RAID5) {
+            for (j = 0; j < ci->num_stripes; j++) {
+                UINT64 stripe_size = ci->size / (ci->num_stripes - 1);
+                
+                if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
+                    TRACE("cut out superblock in chunk %llx\n", c->offset);
+                    
+                    off_start = superblock_addrs[i] - cis[j].offset;
+                    off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 1));
+                    off_start *= ci->num_stripes - 1;
+
+                    off_end = off_start + (ci->stripe_length * (ci->num_stripes - 1));
+                    
+                    TRACE("cutting out %llx, size %llx\n", c->offset + off_start, off_end - off_start);
+
+                    space_list_subtract(Vcb, c, FALSE, c->offset + off_start, off_end - off_start, NULL);
+                }
+            }
+        } else if (ci->type & BLOCK_FLAG_RAID6) {
+            for (j = 0; j < ci->num_stripes; j++) {
+                UINT64 stripe_size = ci->size / (ci->num_stripes - 2);
+                
+                if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
+                    TRACE("cut out superblock in chunk %llx\n", c->offset);
+                    
+                    off_start = superblock_addrs[i] - cis[j].offset;
+                    off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 2));
+                    off_start *= ci->num_stripes - 2;
+
+                    off_end = off_start + (ci->stripe_length * (ci->num_stripes - 2));
+                    
+                    TRACE("cutting out %llx, size %llx\n", c->offset + off_start, off_end - off_start);
+
                     space_list_subtract(Vcb, c, FALSE, c->offset + off_start, off_end - off_start, NULL);
                 }
             }
@@ -3156,7 +3380,7 @@ static NTSTATUS STDCALL find_chunk_usage(device_extension* Vcb, PIRP Irp) {
             return Status;
         }
         
-        if (!keycmp(&searchkey, &tp.item->key)) {
+        if (!keycmp(searchkey, tp.item->key)) {
             if (tp.item->size >= sizeof(BLOCK_GROUP_ITEM)) {
                 bgi = (BLOCK_GROUP_ITEM*)tp.item->data;
                 
@@ -3168,13 +3392,17 @@ static NTSTATUS STDCALL find_chunk_usage(device_extension* Vcb, PIRP Irp) {
                     Vcb->extent_root->id, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
             }
         }
-        
+            
 //         if (addr >= c->offset && (addr - c->offset) < c->chunk_item->size && c->chunk_item->num_stripes > 0) {
 //             cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
 // 
 //             return (addr - c->offset) + cis->offset;
 //         }
-        
+
+        // It doesn't make a great deal of sense to load the free space cache of a
+        // readonly seeding chunk, as we'll never write to it. But btrfs check will
+        // complain if we don't write a valid cache, so we have to do it anyway...
+            
         // FIXME - make sure we free occasionally after doing one of these, or we
         // might use up a lot of memory with a big disk.
         
@@ -3182,7 +3410,7 @@ static NTSTATUS STDCALL find_chunk_usage(device_extension* Vcb, PIRP Irp) {
         if (!NT_SUCCESS(Status)) {
             ERR("load_free_space_cache returned %08x\n", Status);
             return Status;
-        }        
+        }
         
         protect_superblocks(Vcb, c);
 
@@ -3324,7 +3552,7 @@ static root* find_default_subvol(device_extension* Vcb, PIRP Irp) {
             goto end;
         }
         
-        if (keycmp(&tp.item->key, &searchkey)) {
+        if (keycmp(tp.item->key, searchkey)) {
             ERR("could not find (%llx,%x,%llx) in root tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
             goto end;
         }
@@ -3378,78 +3606,6 @@ end:
     return NULL;
 }
 
-static NTSTATUS create_worker_threads(PDEVICE_OBJECT DeviceObject) {
-    device_extension* Vcb = DeviceObject->DeviceExtension;
-    ULONG i;
-    NTSTATUS Status;
-    
-    Vcb->threads.num_threads = max(3, KeQueryActiveProcessorCount(NULL));
-    
-    Vcb->threads.threads = ExAllocatePoolWithTag(NonPagedPool, sizeof(drv_thread) * Vcb->threads.num_threads, ALLOC_TAG);
-    if (!Vcb->threads.threads) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    RtlZeroMemory(Vcb->threads.threads, sizeof(drv_thread) * Vcb->threads.num_threads);
-    
-    for (i = 0; i < Vcb->threads.num_threads; i++) {
-        Vcb->threads.threads[i].DeviceObject = DeviceObject;
-        KeInitializeEvent(&Vcb->threads.threads[i].event, SynchronizationEvent, FALSE);
-        KeInitializeEvent(&Vcb->threads.threads[i].finished, NotificationEvent, FALSE);
-        InitializeListHead(&Vcb->threads.threads[i].jobs);
-        KeInitializeSpinLock(&Vcb->threads.threads[i].spin_lock);
-        
-        Status = PsCreateSystemThread(&Vcb->threads.threads[i].handle, 0, NULL, NULL, NULL, worker_thread, &Vcb->threads.threads[i]);
-        if (!NT_SUCCESS(Status)) {
-            ULONG j;
-            
-            ERR("PsCreateSystemThread returned %08x\n", Status);
-            
-            for (j = 0; j < i; j++) {
-                Vcb->threads.threads[i].quit = TRUE;
-                KeSetEvent(&Vcb->threads.threads[i].event, 0, FALSE);
-            }
-            
-            return Status;
-        }
-    }
-    
-    Vcb->threads.pending_jobs = 0;
-    
-    return STATUS_SUCCESS;
-}
-
-BOOL add_thread_job(device_extension* Vcb, PIRP Irp) {
-    ULONG threadnum;
-    thread_job* tj;
-    
-    threadnum = InterlockedIncrement(&Vcb->threads.next_thread) % Vcb->threads.num_threads;
-    
-    if (Vcb->threads.pending_jobs >= Vcb->threads.num_threads)
-        return FALSE;
-    
-    if (Vcb->threads.threads[threadnum].quit)
-        return FALSE;
-    
-    tj = ExAllocatePoolWithTag(NonPagedPool, sizeof(thread_job), ALLOC_TAG);
-    if (!tj) {
-        Irp->IoStatus.Status = STATUS_INSUFFICIENT_RESOURCES;
-        Irp->IoStatus.Information = 0;
-        IoCompleteRequest(Irp, IO_NO_INCREMENT);
-        return FALSE;
-    }
-    
-    tj->Irp = Irp;
-    
-    InterlockedIncrement(&Vcb->threads.pending_jobs);
-    
-    ExInterlockedInsertTailList(&Vcb->threads.threads[threadnum].jobs, &tj->list_entry, &Vcb->threads.threads[threadnum].spin_lock);
-    KeSetEvent(&Vcb->threads.threads[threadnum].event, 0, FALSE);
-    
-    return TRUE;
-}
-
 static BOOL raid_generations_okay(device_extension* Vcb) {
     UINT64 i;
     
@@ -3484,11 +3640,12 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     device_extension* Vcb = NULL;
     GET_LENGTH_INFORMATION gli;
     UINT64 i;
-    LIST_ENTRY* le;
+    LIST_ENTRY *le, batchlist;
     KEY searchkey;
     traverse_ptr tp;
     fcb* root_fcb = NULL;
     ccb* root_ccb = NULL;
+    BOOL init_lookaside = FALSE;
     
     TRACE("mount_vol called\n");
     
@@ -3635,6 +3792,9 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     
     Vcb->devices[0].devobj = DeviceToMount;
     RtlCopyMemory(&Vcb->devices[0].devitem, &Vcb->superblock.dev_item, sizeof(DEV_ITEM));
+    
+    Vcb->devices[0].seeding = Vcb->superblock.flags & BTRFS_SUPERBLOCK_FLAGS_SEEDING ? TRUE : FALSE;
+    
     init_device(Vcb, &Vcb->devices[0], FALSE);
     Vcb->devices[0].length = gli.Length.QuadPart;
     
@@ -3643,6 +3803,9 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     
     Vcb->devices_loaded = 1;
     
+    if (DeviceToMount->Flags & DO_SYSTEM_BOOT_PARTITION)
+        Vcb->disallow_dismount = TRUE;
+    
     TRACE("DeviceToMount = %p\n", DeviceToMount);
     TRACE("Stack->Parameters.MountVolume.Vpb = %p\n", Stack->Parameters.MountVolume.Vpb);
 
@@ -3675,17 +3838,22 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     InitializeListHead(&Vcb->all_fcbs);
     InitializeListHead(&Vcb->dirty_fcbs);
     InitializeListHead(&Vcb->dirty_filerefs);
-    InitializeListHead(&Vcb->shared_extents);
     InitializeListHead(&Vcb->sector_checksums);
     
     KeInitializeSpinLock(&Vcb->dirty_fcbs_lock);
     KeInitializeSpinLock(&Vcb->dirty_filerefs_lock);
-    KeInitializeSpinLock(&Vcb->shared_extents_lock);
     
     InitializeListHead(&Vcb->DirNotifyList);
 
     FsRtlNotifyInitializeSync(&Vcb->NotifySync);
     
+    ExInitializePagedLookasideList(&Vcb->tree_data_lookaside, NULL, NULL, 0, sizeof(tree_data), ALLOC_TAG, 0);
+    ExInitializePagedLookasideList(&Vcb->traverse_ptr_lookaside, NULL, NULL, 0, sizeof(traverse_ptr), ALLOC_TAG, 0);
+    ExInitializePagedLookasideList(&Vcb->rollback_item_lookaside, NULL, NULL, 0, sizeof(rollback_item), ALLOC_TAG, 0);
+    ExInitializePagedLookasideList(&Vcb->batch_item_lookaside, NULL, NULL, 0, sizeof(batch_item), ALLOC_TAG, 0);
+    ExInitializeNPagedLookasideList(&Vcb->range_lock_lookaside, NULL, NULL, 0, sizeof(range_lock), ALLOC_TAG, 0);
+    init_lookaside = TRUE;
+    
     Status = load_chunk_root(Vcb, Irp);
     if (!NT_SUCCESS(Status)) {
         ERR("load_chunk_root returned %08x\n", Status);
@@ -3702,6 +3870,23 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
             goto exit;
         }
         
+        if (Vcb->devices[0].readonly && !Vcb->readonly) {
+            Vcb->readonly = TRUE;
+            
+            for (i = 0; i < Vcb->superblock.num_devices; i++) {
+                if (Vcb->devices[i].readonly && !Vcb->devices[i].seeding)
+                    break;
+                
+                if (!Vcb->devices[i].readonly) {
+                    Vcb->readonly = FALSE;
+                    break;
+                }
+            }
+            
+            if (Vcb->readonly)
+                WARN("setting volume to readonly\n");
+        }
+        
         if (!raid_generations_okay(Vcb)) {
             ERR("could not mount as generation mismatch\n");
             
@@ -3710,6 +3895,11 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
             Status = STATUS_INTERNAL_ERROR;
             goto exit;
         }
+    } else {
+        if (Vcb->devices[0].readonly) {
+            WARN("setting volume to readonly as device is readonly\n");
+            Vcb->readonly = TRUE;
+        }
     }
     
     add_root(Vcb, BTRFS_ROOT_ROOT, Vcb->superblock.root_tree_addr, NULL);
@@ -3734,16 +3924,21 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         }
     }
     
+    InitializeListHead(&batchlist);
+    
     // We've already increased the generation by one
     if (!Vcb->readonly && Vcb->superblock.generation - 1 != Vcb->superblock.cache_generation) {
         WARN("generation was %llx, free-space cache generation was %llx; clearing cache...\n", Vcb->superblock.generation - 1, Vcb->superblock.cache_generation);
-        Status = clear_free_space_cache(Vcb, Irp);
+        Status = clear_free_space_cache(Vcb, &batchlist, Irp);
         if (!NT_SUCCESS(Status)) {
             ERR("clear_free_space_cache returned %08x\n", Status);
+            clear_batch_list(Vcb, &batchlist);
             goto exit;
         }
     }
     
+    commit_batch_list(Vcb, &batchlist, Irp, NULL);
+    
     Vcb->volume_fcb = create_fcb(NonPagedPool);
     if (!Vcb->volume_fcb) {
         ERR("out of memory\n");
@@ -3868,12 +4063,6 @@ static NTSTATUS STDCALL mount_vol(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         goto exit;
     }
     
-    Status = create_worker_threads(NewDeviceObject);
-    if (!NT_SUCCESS(Status)) {
-        ERR("create_worker_threads returned %08x\n", Status);
-        goto exit;
-    }
-    
     Status = registry_mark_volume_mounted(&Vcb->superblock.uuid);
     if (!NT_SUCCESS(Status))
         WARN("registry_mark_volume_mounted returned %08x\n", Status);
@@ -3887,6 +4076,14 @@ exit:
 
     if (!NT_SUCCESS(Status)) {
         if (Vcb) {
+            if (init_lookaside) {
+                ExDeletePagedLookasideList(&Vcb->tree_data_lookaside);
+                ExDeletePagedLookasideList(&Vcb->traverse_ptr_lookaside);
+                ExDeletePagedLookasideList(&Vcb->rollback_item_lookaside);
+                ExDeletePagedLookasideList(&Vcb->batch_item_lookaside);
+                ExDeleteNPagedLookasideList(&Vcb->range_lock_lookaside);
+            }
+                
             if (Vcb->root_file)
                 ObDereferenceObject(Vcb->root_file);
             else if (Vcb->root_fileref)
@@ -4054,8 +4251,11 @@ static NTSTATUS STDCALL drv_file_system_control(IN PDEVICE_OBJECT DeviceObject,
             Status = verify_volume(DeviceObject);
             
             if (!NT_SUCCESS(Status) && Vcb->Vpb->Flags & VPB_MOUNTED) {
-                uninit(Vcb, FALSE);
-//                 Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+                if (Vcb->open_files > 0) {
+                    Vcb->removing = TRUE;
+//                     Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+                } else
+                    uninit(Vcb, FALSE);
             }
             
             break;
@@ -4119,151 +4319,6 @@ NTSTATUS part0_passthrough(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
     return Status;
 }
 
-static NTSTATUS part0_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
-    NTSTATUS Status;
-    part0_device_extension* p0de = DeviceObject->DeviceExtension;
-    PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
-    
-    TRACE("control code = %x\n", IrpSp->Parameters.DeviceIoControl.IoControlCode);
-    
-    switch (IrpSp->Parameters.DeviceIoControl.IoControlCode) {
-        case IOCTL_MOUNTDEV_QUERY_UNIQUE_ID:
-        {
-            MOUNTDEV_UNIQUE_ID* mduid;
-
-            if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_UNIQUE_ID)) {
-                Status = STATUS_BUFFER_TOO_SMALL;
-                Irp->IoStatus.Status = Status;
-                Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID);
-                IoCompleteRequest(Irp, IO_NO_INCREMENT);
-                return Status;
-            }
-
-            mduid = Irp->AssociatedIrp.SystemBuffer;
-            mduid->UniqueIdLength = sizeof(BTRFS_UUID);
-
-            if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_UNIQUE_ID) - 1 + mduid->UniqueIdLength) {
-                Status = STATUS_BUFFER_OVERFLOW;
-                Irp->IoStatus.Status = Status;
-                Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID);
-                IoCompleteRequest(Irp, IO_NO_INCREMENT);
-                return Status;
-            }
-
-            RtlCopyMemory(mduid->UniqueId, &p0de->uuid, sizeof(BTRFS_UUID));
-
-            Status = STATUS_SUCCESS;
-            Irp->IoStatus.Status = Status;
-            Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID) - 1 + mduid->UniqueIdLength;
-            IoCompleteRequest(Irp, IO_NO_INCREMENT);
-            
-            return Status;
-        }
-        
-        case IOCTL_MOUNTDEV_QUERY_DEVICE_NAME:
-        {
-            PMOUNTDEV_NAME name;
-
-            if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_NAME)) {
-                Status = STATUS_BUFFER_TOO_SMALL;
-                Irp->IoStatus.Status = Status;
-                Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME);
-                IoCompleteRequest(Irp, IO_NO_INCREMENT);
-                return Status;
-            }
-
-            name = Irp->AssociatedIrp.SystemBuffer;
-            name->NameLength = p0de->name.Length;
-
-            if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_NAME) - 1 + name->NameLength) {
-                Status = STATUS_BUFFER_OVERFLOW;
-                Irp->IoStatus.Status = Status;
-                Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME);
-                IoCompleteRequest(Irp, IO_NO_INCREMENT);
-                return Status;
-            }
-            
-            RtlCopyMemory(name->Name, p0de->name.Buffer, p0de->name.Length);
-
-            Status = STATUS_SUCCESS;
-            Irp->IoStatus.Status = Status;
-            Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME) - 1 + name->NameLength;
-            IoCompleteRequest(Irp, IO_NO_INCREMENT);
-            
-            return Status;
-        }
-    }
-    
-    IoSkipCurrentIrpStackLocation(Irp);
-    
-    Status = IoCallDriver(p0de->devobj, Irp);
-    
-    TRACE("returning %08x\n", Status);
-    
-    return Status;
-}
-
-static NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
-    NTSTATUS Status;
-    PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
-    PFILE_OBJECT FileObject = IrpSp->FileObject;
-    device_extension* Vcb = DeviceObject->DeviceExtension;
-    fcb* fcb;
-    BOOL top_level;
-
-    FsRtlEnterFileSystem();
-
-    top_level = is_top_level(Irp);
-    
-    Irp->IoStatus.Information = 0;
-    
-    if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
-        Status = part0_device_control(DeviceObject, Irp);
-        goto end2;
-    }
-    
-    TRACE("control code = %x\n", IrpSp->Parameters.DeviceIoControl.IoControlCode);
-    
-    if (!FileObject) {
-        ERR("FileObject was NULL\n");
-        Status = STATUS_INVALID_PARAMETER;
-        goto end;
-    }
-    
-    fcb = FileObject->FsContext;
-    
-    if (!fcb) {
-        ERR("FCB was NULL\n");
-        Status = STATUS_INVALID_PARAMETER;
-        goto end;
-    }
-    
-    if (fcb != Vcb->volume_fcb) {
-        Status = STATUS_NOT_IMPLEMENTED;
-        goto end;
-    }
-    
-    IoSkipCurrentIrpStackLocation(Irp);
-    
-    Status = IoCallDriver(Vcb->devices[0].devobj, Irp);
-    
-    goto end2;
-    
-end:
-    Irp->IoStatus.Status = Status;
-
-    if (Status != STATUS_PENDING)
-        IoCompleteRequest(Irp, IO_NO_INCREMENT);
-    
-end2:
-    if (top_level) 
-        IoSetTopLevelIrp(NULL);
-    
-    FsRtlExitFileSystem();
-
-    return Status;
-}
-
 static NTSTATUS STDCALL drv_shutdown(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
     NTSTATUS Status;
     BOOL top_level;
@@ -4325,6 +4380,79 @@ BOOL is_file_name_valid(PUNICODE_STRING us) {
     return TRUE;
 }
 
+void chunk_lock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length) {
+    LIST_ENTRY* le;
+    BOOL locked;
+    range_lock* rl;
+    
+    rl = ExAllocateFromNPagedLookasideList(&Vcb->range_lock_lookaside);
+    if (!rl) {
+        ERR("out of memory\n");
+        return;
+    }
+    
+    rl->start = start;
+    rl->length = length;
+    rl->thread = PsGetCurrentThread();
+    
+    while (TRUE) {
+        KIRQL irql;
+        
+        locked = FALSE;
+        
+        KeAcquireSpinLock(&c->range_locks_spinlock, &irql);
+        
+        le = c->range_locks.Flink;
+        while (le != &c->range_locks) {
+            range_lock* rl2 = CONTAINING_RECORD(le, range_lock, list_entry);
+            
+            if (rl2->start < start + length && rl2->start + rl2->length > start && rl2->thread != PsGetCurrentThread()) {
+                locked = TRUE;
+                break;
+            }
+            
+            le = le->Flink;
+        }
+        
+        if (!locked) {
+            InsertTailList(&c->range_locks, &rl->list_entry);
+            
+            KeReleaseSpinLock(&c->range_locks_spinlock, irql);
+            return;
+        }
+        
+        KeClearEvent(&c->range_locks_event);
+        
+        KeReleaseSpinLock(&c->range_locks_spinlock, irql);
+        
+        KeWaitForSingleObject(&c->range_locks_event, UserRequest, KernelMode, FALSE, NULL);
+    }
+}
+
+void chunk_unlock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length) {
+    KIRQL irql;
+    LIST_ENTRY* le;
+    
+    KeAcquireSpinLock(&c->range_locks_spinlock, &irql);
+    
+    le = c->range_locks.Flink;
+    while (le != &c->range_locks) {
+        range_lock* rl = CONTAINING_RECORD(le, range_lock, list_entry);
+        
+        if (rl->start == start && rl->length == length) {
+            RemoveEntryList(&rl->list_entry);
+            ExFreeToNPagedLookasideList(&Vcb->range_lock_lookaside, rl);
+            break;
+        }
+        
+        le = le->Flink;
+    }
+    
+    KeSetEvent(&c->range_locks_event, 0, FALSE);
+    
+    KeReleaseSpinLock(&c->range_locks_spinlock, irql);
+}
+
 #ifdef _DEBUG
 static void STDCALL init_serial() {
     NTSTATUS Status;
@@ -4342,15 +4470,22 @@ static void STDCALL check_cpu() {
 #ifndef _MSC_VER
     __get_cpuid(1, &cpuInfo[0], &cpuInfo[1], &cpuInfo[2], &cpuInfo[3]);
     have_sse42 = cpuInfo[2] & bit_SSE4_2;
+    have_sse2 = cpuInfo[3] & bit_SSE2;
 #else
    __cpuid(cpuInfo, 1);
    have_sse42 = cpuInfo[2] & (1 << 20);
+   have_sse2 = cpuInfo[3] & (1 << 26);
 #endif
 
     if (have_sse42)
         TRACE("SSE4.2 is supported\n");
     else
         TRACE("SSE4.2 not supported\n");
+    
+    if (have_sse2)
+        TRACE("SSE2 is supported\n");
+    else
+        TRACE("SSE2 is not supported\n");
 }
 #endif
 
index b3a2398..c5184cd 100644 (file)
@@ -46,6 +46,7 @@ static const UINT64 superblock_addrs[] = { 0x10000, 0x4000000, 0x4000000000, 0x4
 #define BTRFS_ROOT_FSTREE       5
 #define BTRFS_ROOT_CHECKSUM     7
 #define BTRFS_ROOT_UUID         9
+#define BTRFS_ROOT_DATA_RELOC   0xFFFFFFFFFFFFFFF7
 
 #define BTRFS_COMPRESSION_NONE  0
 #define BTRFS_COMPRESSION_ZLIB  1
@@ -100,6 +101,8 @@ static const UINT64 superblock_addrs[] = { 0x10000, 0x4000000, 0x4000000000, 0x4
 #define BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA    0x0100
 #define BTRFS_INCOMPAT_FLAGS_NO_HOLES           0x0200
 
+#define BTRFS_SUPERBLOCK_FLAGS_SEEDING   0x100000000
+
 #pragma pack(push, 1)
 
 typedef struct {
@@ -112,8 +115,9 @@ typedef struct {
     UINT64 offset;
 } KEY;
 
-#define HEADER_FLAG_MIXED_BACKREF   0x100000000000000
+#define HEADER_FLAG_WRITTEN         0x000000000000001
 #define HEADER_FLAG_SHARED_BACKREF  0x000000000000002
+#define HEADER_FLAG_MIXED_BACKREF   0x100000000000000
 
 typedef struct {
     UINT8 csum[32];
index 4d7d89e..467f2e8 100644 (file)
@@ -70,12 +70,12 @@ BEGIN
         BLOCK "080904b0"
         BEGIN
             VALUE "FileDescription", "WinBtrfs"
-            VALUE "FileVersion", "0.5"
+            VALUE "FileVersion", "0.7"
             VALUE "InternalName", "btrfs"
             VALUE "LegalCopyright", "Copyright (c) Mark Harmstone 2016"
             VALUE "OriginalFilename", "btrfs.sys"
             VALUE "ProductName", "WinBtrfs"
-            VALUE "ProductVersion", "0.6"
+            VALUE "ProductVersion", "0.7"
         END
     END
     BLOCK "VarFileInfo"
index 1b01d92..22ae32e 100644 (file)
 #include <stdio.h>
 #include <stdarg.h>
 #include <stddef.h>
+#include <emmintrin.h>
 #include "btrfs.h"
 
 #ifdef _DEBUG
 // #define DEBUG_FCB_REFCOUNTS
 // #define DEBUG_LONG_MESSAGES
+// #define DEBUG_FLUSH_TIMES
+// #define DEBUG_STATS
 #define DEBUG_PARANOID
 #endif
 
 #define EA_REPARSE "system.reparse"
 #define EA_REPARSE_HASH 0x786f6167
 
+#define EA_EA "user.EA"
+#define EA_EA_HASH 0x8270dd43
+
 #define MAX_EXTENT_SIZE 0x8000000 // 128 MB
 #define COMPRESSED_EXTENT_SIZE 0x20000 // 128 KB
 
 #define READ_AHEAD_GRANULARITY COMPRESSED_EXTENT_SIZE // really ought to be a multiple of COMPRESSED_EXTENT_SIZE
 
+#define IO_REPARSE_TAG_LXSS_SYMLINK 0xa000001d // undocumented?
+
 #ifdef _MSC_VER
 #define try __try
 #define except __except
@@ -98,6 +106,7 @@ typedef struct {
     UNICODE_STRING devpath;
     UINT64 length;
     UINT64 gen1, gen2;
+    BOOL seeding;
     BOOL processed;
     LIST_ENTRY list_entry;
 } volume;
@@ -147,7 +156,6 @@ typedef struct _fcb {
     FSRTL_ADVANCED_FCB_HEADER Header;
     struct _fcb_nonpaged* nonpaged;
     LONG refcount;
-    LONG open_count;
     struct _device_extension* Vcb;
     struct _root* subvol;
     UINT64 inode;
@@ -163,8 +171,11 @@ typedef struct _fcb {
     LIST_ENTRY extents;
     UINT64 last_dir_index;
     ANSI_STRING reparse_xattr;
+    ANSI_STRING ea_xattr;
+    ULONG ealen;
     LIST_ENTRY hardlinks;
     struct _file_ref* fileref;
+    BOOL inode_item_changed;
     
     BOOL index_loaded;
     LIST_ENTRY index_list;
@@ -174,6 +185,7 @@ typedef struct _fcb {
     BOOL atts_changed, atts_deleted;
     BOOL extents_changed;
     BOOL reparse_xattr_changed;
+    BOOL ea_changed;
     BOOL created;
     
     BOOL ads;
@@ -208,6 +220,7 @@ typedef struct _file_ref {
     file_ref_nonpaged* nonpaged;
     LIST_ENTRY children;
     LONG refcount;
+    LONG open_count;
     struct _file_ref* parent;
     WCHAR* debug_desc;
     
@@ -234,6 +247,12 @@ typedef struct _ccb {
     ACCESS_MASK access;
     file_ref* fileref;
     UNICODE_STRING filename;
+    ULONG ea_index;
+    BOOL case_sensitive;
+    BOOL user_set_creation_time;
+    BOOL user_set_access_time;
+    BOOL user_set_write_time;
+    BOOL user_set_change_time;
 } ccb;
 
 // typedef struct _log_to_phys {
@@ -301,6 +320,7 @@ typedef struct _tree {
     LIST_ENTRY list_entry;
     UINT64 new_address;
     BOOL has_new_address;
+    BOOL updated_extents;
     UINT64 flags;
     BOOL write;
 } tree;
@@ -312,15 +332,37 @@ typedef struct {
 
 typedef struct _root {
     UINT64 id;
+    LONGLONG lastinode; // signed so we can use InterlockedIncrement64
     tree_holder treeholder;
     root_nonpaged* nonpaged;
-    UINT64 lastinode;
     ROOT_ITEM root_item;
     UNICODE_STRING path;
     LIST_ENTRY fcbs;
     LIST_ENTRY list_entry;
 } root;
 
+enum batch_operation {
+    Batch_Insert,
+    Batch_SetXattr,
+    Batch_DirItem,
+    Batch_InodeRef,
+    Batch_InodeExtRef,
+};
+
+typedef struct {
+    KEY key;
+    void* data;
+    UINT16 datalen;
+    enum batch_operation operation;
+    LIST_ENTRY list_entry;
+} batch_item;
+
+typedef struct {
+    root* r;
+    LIST_ENTRY items;
+    LIST_ENTRY list_entry;
+} batch_root;
+
 typedef struct {
     tree* tree;
     tree_data* item;
@@ -342,11 +384,22 @@ typedef struct {
     PDEVICE_OBJECT devobj;
     DEV_ITEM devitem;
     BOOL removable;
+    BOOL seeding;
+    BOOL readonly;
+    BOOL ssd;
+    BOOL trim;
     ULONG change_count;
     UINT64 length;
     LIST_ENTRY space;
 } device;
 
+typedef struct {
+    UINT64 start;
+    UINT64 length;
+    PETHREAD thread;
+    LIST_ENTRY list_entry;
+} range_lock;
+
 typedef struct {
     CHUNK_ITEM* chunk_item;
     UINT32 size;
@@ -359,9 +412,13 @@ typedef struct {
     LIST_ENTRY space_size;
     LIST_ENTRY deleting;
     LIST_ENTRY changed_extents;
+    LIST_ENTRY range_locks;
+    KSPIN_LOCK range_locks_spinlock;
+    KEVENT range_locks_event;
     ERESOURCE lock;
     ERESOURCE changed_extents_lock;
     BOOL created;
+    BOOL readonly;
     
     LIST_ENTRY list_entry;
     LIST_ENTRY list_entry_changed;
@@ -374,30 +431,23 @@ typedef struct {
     UINT64 count;
     UINT64 old_count;
     BOOL no_csum;
+    BOOL superseded;
     LIST_ENTRY refs;
     LIST_ENTRY old_refs;
     LIST_ENTRY list_entry;
 } changed_extent;
 
 typedef struct {
-    EXTENT_DATA_REF edr;
+    UINT8 type;
+    
+    union {
+        EXTENT_DATA_REF edr;
+        SHARED_DATA_REF sdr;
+    };
+    
     LIST_ENTRY list_entry;
 } changed_extent_ref;
 
-typedef struct {
-    UINT64 address;
-    UINT64 size;
-    EXTENT_DATA_REF edr;
-    LIST_ENTRY list_entry;
-} shared_data_entry;
-
-typedef struct {
-    UINT64 address;
-    UINT64 parent;
-    LIST_ENTRY entries;
-    LIST_ENTRY list_entry;
-} shared_data;
-
 typedef struct {
     KEY key;
     void* data;
@@ -405,27 +455,6 @@ typedef struct {
     LIST_ENTRY list_entry;
 } sys_chunk;
 
-typedef struct {
-    PIRP Irp;
-    LIST_ENTRY list_entry;
-} thread_job;
-
-typedef struct {
-    PDEVICE_OBJECT DeviceObject;
-    HANDLE handle;
-    KEVENT event, finished;
-    BOOL quit;
-    LIST_ENTRY jobs;
-    KSPIN_LOCK spin_lock;
-} drv_thread;
-
-typedef struct {
-    ULONG num_threads;
-    LONG next_thread;
-    drv_thread* threads;
-    LONG pending_jobs;
-} drv_threads;
-
 typedef struct {
     BOOL ignore;
     BOOL compress;
@@ -436,16 +465,31 @@ typedef struct {
     UINT32 flush_interval;
     UINT32 max_inline;
     UINT64 subvol_id;
+    UINT32 raid5_recalculation;
+    UINT32 raid6_recalculation;
 } mount_options;
 
 #define VCB_TYPE_VOLUME     1
 #define VCB_TYPE_PARTITION0 2
 
+#ifdef DEBUG_STATS
+typedef struct {
+    UINT64 num_reads;
+    UINT64 data_read;
+    UINT64 read_total_time;
+    UINT64 read_csum_time;
+    UINT64 read_disk_time;
+} debug_stats;
+#endif
+
 typedef struct _device_extension {
     UINT32 type;
     mount_options options;
     PVPB Vpb;
     device* devices;
+#ifdef DEBUG_STATS
+    debug_stats stats;
+#endif
     UINT64 devices_loaded;
 //     DISK_GEOMETRY geometry;
     superblock superblock;
@@ -453,9 +497,12 @@ typedef struct _device_extension {
     BOOL readonly;
     BOOL removing;
     BOOL locked;
+    BOOL disallow_dismount;
+    BOOL trim;
     PFILE_OBJECT locked_fileobj;
     fcb* volume_fcb;
     file_ref* root_fileref;
+    LONG open_files;
     ERESOURCE DirResource;
     KSPIN_LOCK FcbListLock;
     ERESOURCE fcb_lock;
@@ -478,6 +525,7 @@ typedef struct _device_extension {
     root* checksum_root;
     root* dev_root;
     root* uuid_root;
+    root* data_reloc_root;
     BOOL log_to_phys_loaded;
     LIST_ENTRY sys_chunks;
     LIST_ENTRY chunks;
@@ -491,13 +539,15 @@ typedef struct _device_extension {
     ERESOURCE checksum_lock;
     ERESOURCE chunk_lock;
     LIST_ENTRY sector_checksums;
-    LIST_ENTRY shared_extents;
-    KSPIN_LOCK shared_extents_lock;
     HANDLE flush_thread_handle;
     KTIMER flush_thread_timer;
     KEVENT flush_thread_finished;
-    drv_threads threads;
     PFILE_OBJECT root_file;
+    PAGED_LOOKASIDE_LIST tree_data_lookaside;
+    PAGED_LOOKASIDE_LIST traverse_ptr_lookaside;
+    PAGED_LOOKASIDE_LIST rollback_item_lookaside;
+    PAGED_LOOKASIDE_LIST batch_item_lookaside;
+    NPAGED_LOOKASIDE_LIST range_lock_lookaside;
     LIST_ENTRY list_entry;
 } device_extension;
 
@@ -607,16 +657,35 @@ static __inline void get_raid0_offset(UINT64 off, UINT64 stripe_length, UINT16 n
     *stripeoff = initoff + startoff - (*stripe * stripe_length);
 }
 
+/* We only have 64 bits for a file ID, which isn't technically enough to be
+ * unique on Btrfs. We fudge it by having three bytes for the subvol and
+ * five for the inode, which should be good enough.
+ * Inodes are also 64 bits on Linux, but the Linux driver seems to get round
+ * this by tricking it into thinking subvols are separate volumes. */
+#ifdef __REACTOS__
+static __inline UINT64 make_file_id(root* r, UINT64 inode) {
+#else
+static UINT64 __inline make_file_id(root* r, UINT64 inode) {
+#endif
+    return (r->id << 40) | (inode & 0xffffffffff);
+}
+
+#define keycmp(key1, key2)\
+    ((key1.obj_id < key2.obj_id) ? -1 :\
+    ((key1.obj_id > key2.obj_id) ? 1 :\
+    ((key1.obj_type < key2.obj_type) ? -1 :\
+    ((key1.obj_type > key2.obj_type) ? 1 :\
+    ((key1.offset < key2.offset) ? -1 :\
+    ((key1.offset > key2.offset) ? 1 :\
+    0))))))
+
 // in btrfs.c
 device* find_device_from_uuid(device_extension* Vcb, BTRFS_UUID* uuid);
 UINT64 sector_align( UINT64 NumberToBeAligned, UINT64 Alignment );
-int keycmp(const KEY* key1, const KEY* key2);
 ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r, UINT64 inode, UINT8 type, BOOL dotfile, BOOL ignore_xa, PIRP Irp);
 BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, UINT8** data, UINT16* datalen, PIRP Irp);
 void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line);
 void _free_fileref(file_ref* fr, const char* func, const char* file, unsigned int line);
-BOOL STDCALL get_last_inode(device_extension* Vcb, root* r, PIRP Irp);
-NTSTATUS add_dir_item(device_extension* Vcb, root* subvol, UINT64 inode, UINT32 crc32, DIR_ITEM* di, ULONG disize, PIRP Irp, LIST_ENTRY* rollback);
 NTSTATUS delete_dir_item(device_extension* Vcb, root* subvol, UINT64 parinode, UINT32 crc32, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback);
 NTSTATUS delete_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback);
 fcb* create_fcb(POOL_TYPE pool_type);
@@ -637,6 +706,8 @@ NTSTATUS part0_passthrough(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
 void mark_fcb_dirty(fcb* fcb);
 void mark_fileref_dirty(file_ref* fileref);
 NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LIST_ENTRY* rollback);
+void chunk_lock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length);
+void chunk_unlock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length);
 
 #ifdef _MSC_VER
 #define funcname __FUNCTION__
@@ -648,12 +719,16 @@ NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LI
 #define free_fcb(fcb) _free_fcb(fcb, funcname, __FILE__, __LINE__)
 #define free_fileref(fileref) _free_fileref(fileref, funcname, __FILE__, __LINE__)
 
+extern BOOL have_sse2;
+
 extern UINT32 mount_compress;
 extern UINT32 mount_compress_force;
 extern UINT32 mount_compress_type;
 extern UINT32 mount_zlib_level;
 extern UINT32 mount_flush_interval;
 extern UINT32 mount_max_inline;
+extern UINT32 mount_raid5_recalculation;
+extern UINT32 mount_raid6_recalculation;
 
 #ifdef _DEBUG
 
@@ -732,6 +807,12 @@ enum rollback_type {
     ROLLBACK_SUBTRACT_SPACE
 };
 
+typedef struct {
+    enum rollback_type type;
+    void* ptr;
+    LIST_ENTRY list_entry;
+} rollback_item;
+
 // in treefuncs.c
 NTSTATUS STDCALL _find_item(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line);
 BOOL STDCALL _find_next_item(device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* next_tp, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line);
@@ -743,10 +824,12 @@ tree* STDCALL _free_tree(tree* t, const char* func, const char* file, unsigned i
 NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** pt, tree* parent, PIRP Irp, const char* func, const char* file, unsigned int line);
 NTSTATUS STDCALL _do_load_tree(device_extension* Vcb, tree_holder* th, root* r, tree* t, tree_data* td, BOOL* loaded, PIRP Irp,
                                const char* func, const char* file, unsigned int line);
-void clear_rollback(LIST_ENTRY* rollback);
+void clear_rollback(device_extension* Vcb, LIST_ENTRY* rollback);
 void do_rollback(device_extension* Vcb, LIST_ENTRY* rollback);
 void free_trees_root(device_extension* Vcb, root* r);
-void add_rollback(LIST_ENTRY* rollback, enum rollback_type type, void* ptr);
+void add_rollback(device_extension* Vcb, LIST_ENTRY* rollback, enum rollback_type type, void* ptr);
+void commit_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback);
+void clear_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist);
 
 #define find_item(Vcb, r, tp, searchkey, ignore, Irp) _find_item(Vcb, r, tp, searchkey, ignore, Irp, funcname, __FILE__, __LINE__)
 #define find_next_item(Vcb, tp, next_tp, ignore, Irp) _find_next_item(Vcb, tp, next_tp, ignore, Irp, funcname, __FILE__, __LINE__)
@@ -764,7 +847,6 @@ void STDCALL free_cache();
 extern CACHE_MANAGER_CALLBACKS* cache_callbacks;
 
 // in write.c
-NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback);
 NTSTATUS write_file(device_extension* Vcb, PIRP Irp, BOOL wait, BOOL deferred_write);
 NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, BOOL paging_io, BOOL no_cache,
                      BOOL wait, BOOL deferred_write, LIST_ENTRY* rollback);
@@ -777,16 +859,14 @@ chunk* alloc_chunk(device_extension* Vcb, UINT64 flags);
 NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, BOOL need_free, UINT32 length, write_data_context* wtc, PIRP Irp, chunk* c);
 NTSTATUS STDCALL write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c);
 void free_write_data_stripes(write_data_context* wtc);
-NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback);
 NTSTATUS STDCALL drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
-void flush_fcb(fcb* fcb, BOOL cache, PIRP Irp, LIST_ENTRY* rollback);
 BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, BOOL prealloc, void* data, LIST_ENTRY* changed_sector_list,
                          PIRP Irp, LIST_ENTRY* rollback, UINT8 compression, UINT64 decoded_size);
 NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback);
-NTSTATUS update_changed_extent_ref(device_extension* Vcb, chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset,
-                                   signed long long count, BOOL no_csum, UINT64 new_size, PIRP Irp);
 NTSTATUS do_write_file(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback);
 NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback);
+BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address);
+void get_raid56_lock_range(chunk* c, UINT64 address, UINT64 length, UINT64* lockaddr, UINT64* locklen);
 
 // in dirctrl.c
 NTSTATUS STDCALL drv_directory_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
@@ -805,23 +885,25 @@ NTSTATUS fcb_get_new_sd(fcb* fcb, file_ref* parfileref, ACCESS_STATE* as);
 // in fileinfo.c
 NTSTATUS STDCALL drv_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
 NTSTATUS STDCALL drv_query_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
-NTSTATUS add_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, UINT64 index, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback);
 BOOL has_open_children(file_ref* fileref);
 NTSTATUS STDCALL stream_set_end_of_file_information(device_extension* Vcb, UINT64 end, fcb* fcb, file_ref* fileref, PFILE_OBJECT FileObject, BOOL advance_only, LIST_ENTRY* rollback);
 NTSTATUS fileref_get_filename(file_ref* fileref, PUNICODE_STRING fn, USHORT* name_offset);
 NTSTATUS open_fileref_by_inode(device_extension* Vcb, root* subvol, UINT64 inode, file_ref** pfr, PIRP Irp);
+NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
+NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
 
 // in reparse.c
-NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, DWORD* retlen);
+NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, ULONG_PTR* retlen);
 NTSTATUS set_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp);
 NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp);
 
 // in create.c
 NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
 NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING filename, file_ref* fr,
-                                  root** subvol, UINT64* inode, UINT8* type, UINT64* index, PANSI_STRING utf8, PIRP Irp);
-NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* unparsed, ULONG* fn_offset, PIRP Irp);
-NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, PIRP Irp);
+                                  root** subvol, UINT64* inode, UINT8* type, UINT64* index, PANSI_STRING utf8, BOOL case_sensitive, PIRP Irp);
+NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* unparsed, ULONG* fn_offset,
+                      POOL_TYPE pooltype, BOOL case_sensitive, PIRP Irp);
+NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, POOL_TYPE pooltype, PIRP Irp);
 NTSTATUS open_fcb_stream(device_extension* Vcb, root* subvol, UINT64 inode, ANSI_STRING* xattr, UINT32 streamhash, fcb* parent, fcb** pfcb, PIRP Irp);
 void insert_fileref_child(file_ref* parent, file_ref* child, BOOL do_lock);
 NTSTATUS fcb_get_last_dir_index(fcb* fcb, UINT64* index, PIRP Irp);
@@ -833,10 +915,15 @@ void do_unlock_volume(device_extension* Vcb);
 
 // in flushthread.c
 void STDCALL flush_thread(void* context);
+NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback);
+NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback);
+void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback);
+NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length);
+BOOL is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp);
 
 // in read.c
 NTSTATUS STDCALL drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp);
-NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk** pc, PIRP Irp);
+NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk* c, chunk** pc, PIRP Irp);
 NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp);
 NTSTATUS do_read(PIRP Irp, BOOL wait, ULONG* bytes_read);
 
@@ -845,31 +932,40 @@ NTSTATUS STDCALL drv_pnp(PDEVICE_OBJECT DeviceObject, PIRP Irp);
 
 // in free-space.c
 NTSTATUS load_free_space_cache(device_extension* Vcb, chunk* c, PIRP Irp);
-NTSTATUS clear_free_space_cache(device_extension* Vcb, PIRP Irp);
+NTSTATUS clear_free_space_cache(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp);
 NTSTATUS allocate_cache(device_extension* Vcb, BOOL* changed, PIRP Irp, LIST_ENTRY* rollback);
 NTSTATUS update_chunk_caches(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback);
 NTSTATUS add_space_entry(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 offset, UINT64 size);
 void _space_list_add(device_extension* Vcb, chunk* c, BOOL deleting, UINT64 address, UINT64 length, LIST_ENTRY* rollback, const char* func);
-void _space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func);
+void _space_list_add2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func);
 void _space_list_subtract(device_extension* Vcb, chunk* c, BOOL deleting, UINT64 address, UINT64 length, LIST_ENTRY* rollback, const char* func);
-void _space_list_subtract2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func);
+void _space_list_subtract2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func);
 
 #define space_list_add(Vcb, c, deleting, address, length, rollback) _space_list_add(Vcb, c, deleting, address, length, rollback, funcname)
-#define space_list_add2(list, list_size, address, length, rollback) _space_list_add2(list, list_size, address, length, NULL, rollback, funcname)
+#define space_list_add2(Vcb, list, list_size, address, length, rollback) _space_list_add2(Vcb, list, list_size, address, length, NULL, rollback, funcname)
 #define space_list_subtract(Vcb, c, deleting, address, length, rollback) _space_list_subtract(Vcb, c, deleting, address, length, rollback, funcname)
-#define space_list_subtract2(list, list_size, address, length, rollback) _space_list_subtract2(list, list_size, address, length, NULL, rollback, funcname)
+#define space_list_subtract2(Vcb, list, list_size, address, length, rollback) _space_list_subtract2(Vcb, list, list_size, address, length, NULL, rollback, funcname)
 
 // in extent-tree.c
 NTSTATUS increase_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, UINT32 refcount, PIRP Irp, LIST_ENTRY* rollback);
 NTSTATUS decrease_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, UINT32 refcount, PIRP Irp, LIST_ENTRY* rollback);
-NTSTATUS decrease_extent_refcount_shared_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 treeaddr, UINT64 parent, PIRP Irp, LIST_ENTRY* rollback);
-NTSTATUS decrease_extent_refcount_old(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 treeaddr, PIRP Irp, LIST_ENTRY* rollback);
+NTSTATUS decrease_extent_refcount_tree(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT8 level, PIRP Irp, LIST_ENTRY* rollback);
 void decrease_chunk_usage(chunk* c, UINT64 delta);
-NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp, LIST_ENTRY* rollback);
-UINT64 find_extent_data_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, PIRP Irp);
+// NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp, LIST_ENTRY* rollback);
+UINT64 get_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp);
+BOOL is_extent_unique(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp);
+NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback);
+UINT64 get_extent_flags(device_extension* Vcb, UINT64 address, PIRP Irp);
+void update_extent_flags(device_extension* Vcb, UINT64 address, UINT64 flags, PIRP Irp);
+NTSTATUS update_changed_extent_ref(device_extension* Vcb, chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset,
+                                   signed long long count, BOOL no_csum, BOOL superseded, PIRP Irp);
+void add_changed_extent_ref(chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, UINT32 count, BOOL no_csum);
+UINT64 find_extent_shared_tree_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp);
+UINT64 find_extent_shared_data_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp);
+NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem,
+                                  UINT8 level, UINT64 parent, PIRP Irp, LIST_ENTRY* rollback);
 
 // in worker-thread.c
-void STDCALL worker_thread(void* context);
 void do_read_job(PIRP Irp);
 void do_write_job(device_extension* Vcb, PIRP Irp);
 
@@ -883,6 +979,16 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb);
 NTSTATUS decompress(UINT8 type, UINT8* inbuf, UINT64 inlen, UINT8* outbuf, UINT64 outlen);
 NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback);
 
+// in galois.c
+void galois_double(UINT8* data, UINT32 len);
+void galois_divpower(UINT8* data, UINT8 div, UINT32 readlen);
+UINT8 gpow2(UINT8 e);
+UINT8 gmul(UINT8 a, UINT8 b);
+UINT8 gdiv(UINT8 a, UINT8 b);
+
+// in devctrl.c
+NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
+
 #define fast_io_possible(fcb) (!FsRtlAreThereCurrentFileLocks(&fcb->lock) && !fcb->Vcb->readonly ? FastIoIsPossible : FastIoIsQuestionable)
 
 static __inline void print_open_trees(device_extension* Vcb) {
@@ -897,17 +1003,6 @@ static __inline void print_open_trees(device_extension* Vcb) {
     }
 }
 
-static __inline void InsertAfter(LIST_ENTRY* head, LIST_ENTRY* item, LIST_ENTRY* before) {
-    item->Flink = before->Flink;
-    before->Flink = item;
-    item->Blink = before;
-
-    if (item->Flink != head)
-        item->Flink->Blink = item;
-    else
-        head->Blink = item;
-}
-
 static __inline BOOL write_fcb_compressed(fcb* fcb) {
     // make sure we don't accidentally write the cache inodes or pagefile compressed
     if (fcb->subvol->id == BTRFS_ROOT_ROOT || fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE)
@@ -925,6 +1020,34 @@ static __inline BOOL write_fcb_compressed(fcb* fcb) {
     return FALSE;
 }
 
+static __inline void do_xor(UINT8* buf1, UINT8* buf2, UINT32 len) {
+    UINT32 j;
+#ifndef __REACTOS__
+    __m128i x1, x2;
+#endif
+    
+#ifndef __REACTOS__
+    if (have_sse2 && ((uintptr_t)buf1 & 0xf) == 0 && ((uintptr_t)buf2 & 0xf) == 0) {
+        while (len >= 16) {
+            x1 = _mm_load_si128((__m128i*)buf1);
+            x2 = _mm_load_si128((__m128i*)buf2);
+            x1 = _mm_xor_si128(x1, x2);
+            _mm_store_si128((__m128i*)buf1, x1);
+            
+            buf1 += 16;
+            buf2 += 16;
+            len -= 16;
+        }
+    }
+#endif
+    
+    for (j = 0; j < len; j++) {
+        *buf1 ^= *buf2;
+        buf1++;
+        buf2++;
+    }
+}
+
 #ifdef DEBUG_FCB_REFCOUNTS
 #ifdef DEBUG_LONG_MESSAGES
 #define increase_fileref_refcount(fileref) {\
@@ -1009,6 +1132,18 @@ static __inline BOOL write_fcb_compressed(fcb* fcb) {
 #define S_IXOTH (S_IXGRP >> 3)
 #endif
 
+// LXSS programs can be distinguished by the fact they have a NULL PEB.
+#ifdef _AMD64_
+    static __inline BOOL called_from_lxss() {
+        UINT8* proc = (UINT8*)PsGetCurrentProcess();
+        ULONG_PTR* peb = (ULONG_PTR*)&proc[0x3f8];
+        
+        return !*peb;
+    }
+#else
+#define called_from_lxss() FALSE
+#endif
+
 #if defined(__REACTOS__) && (NTDDI_VERSION < NTDDI_WIN7)
 NTSTATUS WINAPI RtlUnicodeToUTF8N(CHAR *utf8_dest, ULONG utf8_bytes_max,
                                   ULONG *utf8_bytes_written,
index 7b7a00a..67ad41b 100755 (executable)
@@ -444,21 +444,22 @@ static NTSTATUS zlib_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 en
     while (le != &fcb->Vcb->chunks) {
         c = CONTAINING_RECORD(le, chunk, list_entry);
         
-        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        
-        if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
-            if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
-                ExReleaseResourceLite(&c->lock);
-                ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
-                
-                if (compression != BTRFS_COMPRESSION_NONE)
-                    ExFreePool(comp_data);
-                
-                return STATUS_SUCCESS;
+        if (!c->readonly) {
+            ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+            
+            if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
+                if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
+                    ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
+                    
+                    if (compression != BTRFS_COMPRESSION_NONE)
+                        ExFreePool(comp_data);
+                    
+                    return STATUS_SUCCESS;
+                }
             }
+            
+            ExReleaseResourceLite(&c->lock);
         }
-        
-        ExReleaseResourceLite(&c->lock);
 
         le = le->Flink;
     }
@@ -474,8 +475,6 @@ static NTSTATUS zlib_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 en
         
         if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
             if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
-                ExReleaseResourceLite(&c->lock);
-                
                 if (compression != BTRFS_COMPRESSION_NONE)
                     ExFreePool(comp_data);
                 
@@ -830,21 +829,22 @@ static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end
     while (le != &fcb->Vcb->chunks) {
         c = CONTAINING_RECORD(le, chunk, list_entry);
         
-        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        
-        if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
-            if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
-                ExReleaseResourceLite(&c->lock);
-                ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
-                
-                if (compression != BTRFS_COMPRESSION_NONE)
-                    ExFreePool(comp_data);
-                
-                return STATUS_SUCCESS;
+        if (!c->readonly) {
+            ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+            
+            if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
+                if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
+                    ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
+                    
+                    if (compression != BTRFS_COMPRESSION_NONE)
+                        ExFreePool(comp_data);
+                    
+                    return STATUS_SUCCESS;
+                }
             }
+            
+            ExReleaseResourceLite(&c->lock);
         }
-        
-        ExReleaseResourceLite(&c->lock);
 
         le = le->Flink;
     }
@@ -860,8 +860,6 @@ static NTSTATUS lzo_write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end
         
         if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
             if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
-                ExReleaseResourceLite(&c->lock);
-                
                 if (compression != BTRFS_COMPRESSION_NONE)
                     ExFreePool(comp_data);
                 
index 791c459..7390d66 100644 (file)
@@ -75,7 +75,7 @@ static UINT32 crc32c_hw(const void *input, int len, UINT32 crc) {
         crc = _mm_crc32_u8(crc, *buf);
     }
 
-#ifdef __x86_64__
+#ifdef _AMD64_
     CALC_CRC(_mm_crc32_u64, crc, UINT64, buf, len);
 #endif
     CALC_CRC(_mm_crc32_u32, crc, UINT32, buf, len);
index 5576833..f389dfb 100644 (file)
@@ -25,6 +25,8 @@
 
 extern PDEVICE_OBJECT devobj;
 
+static WCHAR datastring[] = L"::$DATA";
+
 static NTSTATUS find_file_dir_index(device_extension* Vcb, root* r, UINT64 inode, UINT64 parinode, PANSI_STRING utf8, UINT64* pindex, PIRP Irp) {
     KEY searchkey;
     traverse_ptr tp;
@@ -41,7 +43,7 @@ static NTSTATUS find_file_dir_index(device_extension* Vcb, root* r, UINT64 inode
         return Status;
     }
     
-    if (!keycmp(&tp.item->key, &searchkey)) {
+    if (!keycmp(tp.item->key, searchkey)) {
         INODE_REF* ir;
         ULONG len;
         
@@ -98,7 +100,7 @@ static NTSTATUS find_file_dir_index_extref(device_extension* Vcb, root* r, UINT6
         return Status;
     }
     
-    if (!keycmp(&tp.item->key, &searchkey)) {
+    if (!keycmp(tp.item->key, searchkey)) {
         INODE_EXTREF* ier;
         ULONG len;
         
@@ -155,7 +157,7 @@ static NTSTATUS find_subvol_dir_index(device_extension* Vcb, root* r, UINT64 sub
         return Status;
     }
     
-    if (keycmp(&tp.item->key, &searchkey)) {
+    if (keycmp(tp.item->key, searchkey)) {
         ERR("couldn't find (%llx,%x,%llx) in root tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
         return STATUS_INTERNAL_ERROR;
     }
@@ -197,7 +199,7 @@ static NTSTATUS load_index_list(fcb* fcb, PIRP Irp) {
         return Status;
     }
 
-    if (keycmp(&tp.item->key, &searchkey) == -1) {
+    if (keycmp(tp.item->key, searchkey) == -1) {
         if (find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp)) {
             tp = next_tp;
             
@@ -337,7 +339,8 @@ end:
             ExFreePool(ie);
         }
     } else
-        mark_fcb_dirty(fcb);
+        mark_fcb_dirty(fcb); // It's not necessarily dirty, but this is an easy way of making sure
+                             // the list remains in memory until the next flush.
     
     return Status;
 }
@@ -473,7 +476,8 @@ end:
 }
 
 static NTSTATUS STDCALL find_file_in_dir_with_crc32(device_extension* Vcb, PUNICODE_STRING filename, UINT32 crc32, file_ref* fr,
-                                                    root** subvol, UINT64* inode, UINT8* type, UINT64* pindex, PANSI_STRING utf8, PIRP Irp) {
+                                                    root** subvol, UINT64* inode, UINT8* type, UINT64* pindex, PANSI_STRING utf8,
+                                                    BOOL case_sensitive, PIRP Irp) {
     DIR_ITEM* di;
     KEY searchkey;
     traverse_ptr tp;
@@ -495,7 +499,7 @@ static NTSTATUS STDCALL find_file_in_dir_with_crc32(device_extension* Vcb, PUNIC
     
     TRACE("found item %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
     
-    if (!keycmp(&searchkey, &tp.item->key)) {
+    if (!keycmp(searchkey, tp.item->key)) {
         UINT32 size = tp.item->size;
         
         // found by hash
@@ -538,7 +542,7 @@ static NTSTATUS STDCALL find_file_in_dir_with_crc32(device_extension* Vcb, PUNIC
                         us.Buffer = utf16;
                         us.Length = us.MaximumLength = (USHORT)stringlen;
                         
-                        if (FsRtlAreNamesEqual(filename, &us, TRUE, NULL)) {
+                        if (FsRtlAreNamesEqual(filename, &us, !case_sensitive, NULL)) {
                             UINT64 index;
                             
                             if (di->key.obj_type == TYPE_ROOT_ITEM) {
@@ -627,7 +631,7 @@ static NTSTATUS STDCALL find_file_in_dir_with_crc32(device_extension* Vcb, PUNIC
                                     file_ref* fr2 = CONTAINING_RECORD(le, file_ref, list_entry);
                                     
                                     if (fr2->index == index) {
-                                        if (fr2->deleted || !FsRtlAreNamesEqual(&fr2->filepart, filename, TRUE, NULL)) {
+                                        if (fr2->deleted || !FsRtlAreNamesEqual(&fr2->filepart, filename, !case_sensitive, NULL)) {
                                             goto byindex;
                                         }
                                         break;
@@ -656,6 +660,9 @@ static NTSTATUS STDCALL find_file_in_dir_with_crc32(device_extension* Vcb, PUNIC
     }
     
 byindex:
+    if (case_sensitive)
+        return STATUS_OBJECT_NAME_NOT_FOUND;
+    
     Status = find_file_in_dir_index(fr, filename, subvol, inode, type, pindex, utf8, Irp);
     if (!NT_SUCCESS(Status) && Status != STATUS_OBJECT_NAME_NOT_FOUND) {
         ERR("find_file_in_dir_index returned %08x\n", Status);
@@ -747,7 +754,8 @@ file_ref* create_fileref() {
 }
 
 NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING filename, file_ref* fr,
-                                  root** subvol, UINT64* inode, UINT8* type, UINT64* index, PANSI_STRING utf8, PIRP Irp) {
+                                  root** subvol, UINT64* inode, UINT8* type, UINT64* index, PANSI_STRING utf8,
+                                  BOOL case_sensitive, PIRP Irp) {
     char* fn;
     UINT32 crc32;
     ULONG utf8len;
@@ -777,7 +785,7 @@ NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING filenam
     crc32 = calc_crc32c(0xfffffffe, (UINT8*)fn, (ULONG)utf8len);
     TRACE("crc32c(%.*s) = %08x\n", utf8len, fn, crc32);
     
-    return find_file_in_dir_with_crc32(Vcb, filename, crc32, fr, subvol, inode, type, index, utf8, Irp);
+    return find_file_in_dir_with_crc32(Vcb, filename, crc32, fr, subvol, inode, type, index, utf8, case_sensitive, Irp);
 }
 
 static BOOL find_stream(device_extension* Vcb, fcb* fcb, PUNICODE_STRING stream, PUNICODE_STRING newstreamname, UINT32* hash, PANSI_STRING xattr, PIRP Irp) {
@@ -824,6 +832,11 @@ static BOOL find_stream(device_extension* Vcb, fcb* fcb, PUNICODE_STRING stream,
     crc32 = calc_crc32c(0xfffffffe, (UINT8*)utf8, utf8len);
     TRACE("crc32 = %08x\n", crc32);
     
+    if ((crc32 == EA_DOSATTRIB_HASH && utf8len == strlen(EA_DOSATTRIB) && RtlCompareMemory(utf8, EA_DOSATTRIB, utf8len) == utf8len) || 
+        (crc32 == EA_EA_HASH && utf8len == strlen(EA_EA) && RtlCompareMemory(utf8, EA_EA, utf8len) == utf8len)) {
+        return FALSE;
+    }
+    
     searchkey.obj_id = fcb->inode;
     searchkey.obj_type = TYPE_XATTR_ITEM;
     searchkey.offset = crc32;
@@ -834,7 +847,7 @@ static BOOL find_stream(device_extension* Vcb, fcb* fcb, PUNICODE_STRING stream,
         goto end;
     }
     
-    if (!keycmp(&tp.item->key, &searchkey)) {
+    if (!keycmp(tp.item->key, searchkey)) {
         if (tp.item->size < sizeof(DIR_ITEM)) {
             ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
         } else {
@@ -1091,7 +1104,7 @@ static NTSTATUS split_path(PUNICODE_STRING path, UNICODE_STRING** parts, ULONG*
 // }
 // #endif
 
-static file_ref* search_fileref_children(file_ref* dir, PUNICODE_STRING name) {
+static file_ref* search_fileref_children(file_ref* dir, PUNICODE_STRING name, BOOL case_sensitive) {
     LIST_ENTRY* le;
     file_ref *c, *deleted = NULL;
     NTSTATUS Status;
@@ -1099,13 +1112,39 @@ static file_ref* search_fileref_children(file_ref* dir, PUNICODE_STRING name) {
 #ifdef DEBUG_FCB_REFCOUNTS
     ULONG rc;
 #endif
+    
+    if (case_sensitive) {
+        le = dir->children.Flink;
+        while (le != &dir->children) {
+            c = CONTAINING_RECORD(le, file_ref, list_entry);
+            
+            if (c->refcount > 0 && c->filepart.Length == name->Length &&
+                RtlCompareMemory(c->filepart.Buffer, name->Buffer, name->Length) == name->Length) {
+                if (c->deleted) {
+                    deleted = c;
+                } else {
+#ifdef DEBUG_FCB_REFCOUNTS
+                    rc = InterlockedIncrement(&c->refcount);
+                    WARN("fileref %p: refcount now %i (%S)\n", c, rc, file_desc_fileref(c));
+#else
+                    InterlockedIncrement(&c->refcount);
+#endif
+                    return c;
+                }
+            }
+            
+            le = le->Flink;
+        }
+        
+        goto end;
+    }
 
     Status = RtlUpcaseUnicodeString(&ucus, name, TRUE);
     if (!NT_SUCCESS(Status)) {
         ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
         return NULL;
     }
-    
+        
     le = dir->children.Flink;
     while (le != &dir->children) {
         c = CONTAINING_RECORD(le, file_ref, list_entry);
@@ -1130,56 +1169,23 @@ static file_ref* search_fileref_children(file_ref* dir, PUNICODE_STRING name) {
         le = le->Flink;
     }
     
+    ExFreePool(ucus.Buffer);
+    
+end:
     if (deleted)
         increase_fileref_refcount(deleted);
     
-    ExFreePool(ucus.Buffer);
-    
     return deleted;
 }
 
-static UINT64 get_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp) {
-    KEY searchkey;
-    traverse_ptr tp;
-    NTSTATUS Status;
-    EXTENT_ITEM* ei;
-    
-    searchkey.obj_id = address;
-    searchkey.obj_type = TYPE_EXTENT_ITEM;
-    searchkey.offset = size;
-    
-    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return 0;
-    }
-    
-    if (keycmp(&searchkey, &tp.item->key)) {
-        ERR("couldn't find (%llx,%x,%llx) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
-        return 0;
-    }
-    
-    if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
-        EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
-        
-        return eiv0->refcount;
-    } else if (tp.item->size < sizeof(EXTENT_ITEM)) {
-        ERR("(%llx,%x,%llx) was %x bytes, expected at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type,
-                                                                       tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
-        return 0;
-    }
-    
-    ei = (EXTENT_ITEM*)tp.item->data;
-    
-    return ei->refcount;
-}
-
-NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, PIRP Irp) {
+NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, POOL_TYPE pooltype, PIRP Irp) {
     KEY searchkey;
     traverse_ptr tp;
     NTSTATUS Status;
     fcb* fcb;
     BOOL b;
+    UINT8* eadata;
+    UINT16 ealen;
     
     if (!IsListEmpty(&subvol->fcbs)) {
         LIST_ENTRY* le = subvol->fcbs.Flink;
@@ -1204,7 +1210,7 @@ NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type,
         }
     }
     
-    fcb = create_fcb(PagedPool);
+    fcb = create_fcb(pooltype);
     if (!fcb) {
         ERR("out of memory\n");
         return STATUS_INSUFFICIENT_RESOURCES;
@@ -1257,13 +1263,50 @@ NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type,
     
     fcb_get_sd(fcb, parent, Irp);
     
-    if (fcb->type == BTRFS_TYPE_DIRECTORY) {
+    if (fcb->type == BTRFS_TYPE_DIRECTORY && fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT) {
         UINT8* xattrdata;
         UINT16 xattrlen;
         
         if (get_xattr(Vcb, subvol, inode, EA_REPARSE, EA_REPARSE_HASH, &xattrdata, &xattrlen, Irp)) {
             fcb->reparse_xattr.Buffer = (char*)xattrdata;
             fcb->reparse_xattr.Length = fcb->reparse_xattr.MaximumLength = xattrlen;
+        } else {
+            fcb->atts &= ~FILE_ATTRIBUTE_REPARSE_POINT;
+            
+            if (!Vcb->readonly && !(subvol->root_item.flags & BTRFS_SUBVOL_READONLY)) {
+                fcb->atts_changed = TRUE;
+                mark_fcb_dirty(fcb);
+            }
+        }
+    }
+    
+    fcb->ealen = 0;
+    
+    if (get_xattr(Vcb, subvol, inode, EA_EA, EA_EA_HASH, &eadata, &ealen, Irp)) {
+        ULONG offset;
+        
+        Status = IoCheckEaBufferValidity((FILE_FULL_EA_INFORMATION*)eadata, ealen, &offset);
+        
+        if (!NT_SUCCESS(Status)) {
+            WARN("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset);
+            ExFreePool(eadata);
+        } else {
+            FILE_FULL_EA_INFORMATION* eainfo;
+            fcb->ea_xattr.Buffer = (char*)eadata;
+            fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = ealen;
+            
+            fcb->ealen = 4;
+            
+            // calculate ealen
+            eainfo = (FILE_FULL_EA_INFORMATION*)eadata;
+            do {
+                fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength;
+                
+                if (eainfo->NextEntryOffset == 0)
+                    break;
+                
+                eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset);
+            } while (TRUE);
         }
     }
     
@@ -1320,18 +1363,18 @@ NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type,
                     if (ed2->address == 0 && ed2->size == 0) // sparse
                         goto nextitem;
                     
-                    if (ed2->size != 0)
-                        unique = get_extent_refcount(fcb->Vcb, ed2->address, ed2->size, Irp) == 1;
+                    if (ed2->size != 0 && is_tree_unique(Vcb, tp.tree, Irp))
+                        unique = is_extent_unique(Vcb, ed2->address, ed2->size, Irp);
                 }
                 
-                ext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
+                ext = ExAllocatePoolWithTag(pooltype, sizeof(extent), ALLOC_TAG);
                 if (!ext) {
                     ERR("out of memory\n");
                     free_fcb(fcb);
                     return STATUS_INSUFFICIENT_RESOURCES;
                 }
                 
-                ext->data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+                ext->data = ExAllocatePoolWithTag(pooltype, tp.item->size, ALLOC_TAG);
                 if (!ext->data) {
                     ERR("out of memory\n");
                     ExFreePool(ext);
@@ -1396,7 +1439,7 @@ nextitem:
                     hardlink* hl;
                     ULONG stringlen;
                     
-                    hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG);
+                    hl = ExAllocatePoolWithTag(pooltype, sizeof(hardlink), ALLOC_TAG);
                     if (!hl) {
                         ERR("out of memory\n");
                         free_fcb(fcb);
@@ -1409,7 +1452,7 @@ nextitem:
                     hl->utf8.Length = hl->utf8.MaximumLength = ir->n;
                     
                     if (hl->utf8.Length > 0) {
-                        hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG);
+                        hl->utf8.Buffer = ExAllocatePoolWithTag(pooltype, hl->utf8.MaximumLength, ALLOC_TAG);
                         RtlCopyMemory(hl->utf8.Buffer, ir->name, ir->n);
                     }
                     
@@ -1426,7 +1469,7 @@ nextitem:
                     if (stringlen == 0)
                         hl->name.Buffer = NULL;
                     else {
-                        hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG);
+                        hl->name.Buffer = ExAllocatePoolWithTag(pooltype, hl->name.MaximumLength, ALLOC_TAG);
                         
                         if (!hl->name.Buffer) {
                             ERR("out of memory\n");
@@ -1461,7 +1504,7 @@ nextitem:
                     hardlink* hl;
                     ULONG stringlen;
                     
-                    hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG);
+                    hl = ExAllocatePoolWithTag(pooltype, sizeof(hardlink), ALLOC_TAG);
                     if (!hl) {
                         ERR("out of memory\n");
                         free_fcb(fcb);
@@ -1474,7 +1517,7 @@ nextitem:
                     hl->utf8.Length = hl->utf8.MaximumLength = ier->n;
                     
                     if (hl->utf8.Length > 0) {
-                        hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG);
+                        hl->utf8.Buffer = ExAllocatePoolWithTag(pooltype, hl->utf8.MaximumLength, ALLOC_TAG);
                         RtlCopyMemory(hl->utf8.Buffer, ier->name, ier->n);
                     }
                     
@@ -1491,7 +1534,7 @@ nextitem:
                     if (stringlen == 0)
                         hl->name.Buffer = NULL;
                     else {
-                        hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG);
+                        hl->name.Buffer = ExAllocatePoolWithTag(pooltype, hl->name.MaximumLength, ALLOC_TAG);
                         
                         if (!hl->name.Buffer) {
                             ERR("out of memory\n");
@@ -1599,7 +1642,7 @@ NTSTATUS open_fcb_stream(device_extension* Vcb, root* subvol, UINT64 inode, ANSI
         return Status;
     }
     
-    if (keycmp(&tp.item->key, &searchkey)) {
+    if (keycmp(tp.item->key, searchkey)) {
         ERR("error - could not find key for xattr\n");
         free_fcb(fcb);
         return STATUS_INTERNAL_ERROR;
@@ -1664,7 +1707,8 @@ void insert_fileref_child(file_ref* parent, file_ref* child, BOOL do_lock) {
         ExReleaseResourceLite(&parent->nonpaged->children_lock);
 }
 
-NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* unparsed, ULONG* fn_offset, PIRP Irp) {
+NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* unparsed, ULONG* fn_offset,
+                      POOL_TYPE pooltype, BOOL case_sensitive, PIRP Irp) {
     UNICODE_STRING fnus2;
     file_ref *dir, *sf, *sf2;
     ULONG i, num_parts;
@@ -1707,7 +1751,7 @@ NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnu
         }
         
         if (fnus2.Length == sizeof(WCHAR)) {
-            if (Vcb->root_fileref->fcb->open_count == 0) { // don't allow root to be opened on unmounted FS
+            if (Vcb->root_fileref->open_count == 0) { // don't allow root to be opened on unmounted FS
                 ULONG cc;
                 IO_STATUS_BLOCK iosb;
                 
@@ -1737,6 +1781,9 @@ NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnu
     
     if (fnus->Length == 0) {
         num_parts = 0;
+    } else if (fnus->Length == wcslen(datastring) * sizeof(WCHAR) &&
+               RtlCompareMemory(fnus->Buffer, datastring, wcslen(datastring) * sizeof(WCHAR)) == wcslen(datastring) * sizeof(WCHAR)) {
+        num_parts = 0;
     } else {
         Status = split_path(&fnus2, &parts, &num_parts, &has_stream);
         if (!NT_SUCCESS(Status)) {
@@ -1763,12 +1810,10 @@ NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnu
         goto end2;
     }
     
-    ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
-    
     for (i = 0; i < num_parts; i++) {
         BOOL lastpart = (i == num_parts-1) || (i == num_parts-2 && has_stream);
         
-        sf2 = search_fileref_children(sf, &parts[i]);
+        sf2 = search_fileref_children(sf, &parts[i], case_sensitive);
         
         if (sf2 && sf2->fcb->type != BTRFS_TYPE_DIRECTORY && !lastpart) {
             WARN("passed path including file as subdirectory\n");
@@ -1778,6 +1823,13 @@ NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnu
             goto end;
         }
         
+        if (sf2 && sf2->deleted) {
+            TRACE("element in path has been deleted\n");
+            free_fileref(sf2);
+            Status = lastpart ? STATUS_OBJECT_NAME_NOT_FOUND : STATUS_OBJECT_PATH_NOT_FOUND;
+            goto end;
+        }
+        
         if (!sf2) {
             if (has_stream && i == num_parts - 1) {
                 UNICODE_STRING streamname;
@@ -1858,7 +1910,7 @@ NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnu
                 UINT8 type;
                 ANSI_STRING utf8;
                 
-                Status = find_file_in_dir(Vcb, &parts[i], sf, &subvol, &inode, &type, &index, &utf8, Irp);
+                Status = find_file_in_dir(Vcb, &parts[i], sf, &subvol, &inode, &type, &index, &utf8, case_sensitive, Irp);
                 if (Status == STATUS_OBJECT_NAME_NOT_FOUND) {
                     TRACE("could not find %.*S\n", parts[i].Length / sizeof(WCHAR), parts[i].Buffer);
 
@@ -1871,7 +1923,7 @@ NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnu
                     fcb* fcb;
                     ULONG strlen;
                     
-                    Status = open_fcb(Vcb, subvol, inode, type, &utf8, sf->fcb, &fcb, Irp);
+                    Status = open_fcb(Vcb, subvol, inode, type, &utf8, sf->fcb, &fcb, pooltype, Irp);
                     if (!NT_SUCCESS(Status)) {
                         ERR("open_fcb returned %08x\n", Status);
                         goto end;
@@ -1965,7 +2017,6 @@ NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnu
     
 end:
     free_fileref(sf);
-    ExReleaseResourceLite(&Vcb->fcb_lock);
     
 end2:
     if (parts)
@@ -2021,7 +2072,8 @@ end:
     return Status;
 }
 
-static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_STRING fpus, file_ref* parfileref, ULONG options, file_ref** pfr, LIST_ENTRY* rollback) {
+static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_STRING fpus, file_ref* parfileref, ULONG options,
+                                     FILE_FULL_EA_INFORMATION* ea, ULONG ealen, file_ref** pfr, LIST_ENTRY* rollback) {
     NTSTATUS Status;
     fcb* fcb;
     ULONG utf8len;
@@ -2078,12 +2130,10 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S
     parfileref->fcb->inode_item.st_mtime = now;
     ExReleaseResourceLite(parfileref->fcb->Header.Resource);
     
+    parfileref->fcb->inode_item_changed = TRUE;
     mark_fcb_dirty(parfileref->fcb);
     
-    if (parfileref->fcb->subvol->lastinode == 0)
-        get_last_inode(Vcb, parfileref->fcb->subvol, Irp);
-    
-    inode = parfileref->fcb->subvol->lastinode + 1;
+    inode = InterlockedIncrement64(&parfileref->fcb->subvol->lastinode);
     
     type = options & FILE_DIRECTORY_FILE ? BTRFS_TYPE_DIRECTORY : BTRFS_TYPE_FILE;
     
@@ -2108,8 +2158,6 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S
     if (IrpSp->Parameters.Create.FileAttributes == FILE_ATTRIBUTE_NORMAL)
         IrpSp->Parameters.Create.FileAttributes = defda;
     
-    parfileref->fcb->subvol->lastinode++;
-    
     fcb = create_fcb(pool_type);
     if (!fcb) {
         ERR("out of memory\n");
@@ -2119,8 +2167,10 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S
 
     fcb->Vcb = Vcb;
 
-    if (IrpSp->Flags & SL_OPEN_PAGING_FILE)
+    if (IrpSp->Flags & SL_OPEN_PAGING_FILE) {
         fcb->Header.Flags2 |= FSRTL_FLAG2_IS_PAGING_FILE;
+        Vcb->disallow_dismount = TRUE;
+    }
 
     fcb->inode_item.generation = Vcb->superblock.generation;
     fcb->inode_item.transid = Vcb->superblock.generation;
@@ -2161,6 +2211,8 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S
             fcb->inode_item.flags |= BTRFS_INODE_COMPRESS;
     }
     
+    fcb->inode_item_changed = TRUE;
+    
     fcb->Header.IsFastIoPossible = fast_io_possible(fcb);
     fcb->Header.AllocationSize.QuadPart = 0;
     fcb->Header.FileSize.QuadPart = 0;
@@ -2183,24 +2235,52 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S
     
     if (!NT_SUCCESS(Status)) {
         ERR("fcb_get_new_sd returned %08x\n", Status);
-        
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fcb(fcb);
-        ExReleaseResource(&Vcb->fcb_lock);
-        
         return Status;
     }
     
     fcb->sd_dirty = TRUE;
     
+    if (ea && ealen > 0) {
+        FILE_FULL_EA_INFORMATION* eainfo;
+        
+        fcb->ealen = 4;
+        
+        // capitalize EA names
+        eainfo = ea;
+        do {
+            STRING s;
+            
+            s.Length = s.MaximumLength = eainfo->EaNameLength;
+            s.Buffer = eainfo->EaName;
+            
+            RtlUpperString(&s, &s);
+            
+            fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength;
+            
+            if (eainfo->NextEntryOffset == 0)
+                break;
+            
+            eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset);
+        } while (TRUE);
+        
+        fcb->ea_xattr.Buffer = ExAllocatePoolWithTag(pool_type, ealen, ALLOC_TAG);
+        if (!fcb->ea_xattr.Buffer) {
+            ERR("out of memory\n");
+            free_fcb(fcb);
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = ealen;
+        RtlCopyMemory(fcb->ea_xattr.Buffer, ea, ealen);
+        
+        fcb->ea_changed = TRUE;
+    }
+    
     hl = ExAllocatePoolWithTag(pool_type, sizeof(hardlink), ALLOC_TAG);
     if (!hl) {
         ERR("out of memory\n");
-        
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fcb(fcb);
-        ExReleaseResource(&Vcb->fcb_lock);
-        
         return STATUS_INSUFFICIENT_RESOURCES;
     }
     
@@ -2213,11 +2293,7 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S
     if (!hl->utf8.Buffer) {
         ERR("out of memory\n");
         ExFreePool(hl);
-        
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fcb(fcb);
-        ExReleaseResource(&Vcb->fcb_lock);
-        
         return STATUS_INSUFFICIENT_RESOURCES;
     }
     RtlCopyMemory(hl->utf8.Buffer, utf8, utf8len);
@@ -2229,11 +2305,7 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S
         ERR("out of memory\n");
         ExFreePool(hl->utf8.Buffer);
         ExFreePool(hl);
-        
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fcb(fcb);
-        ExReleaseResource(&Vcb->fcb_lock);
-        
         return STATUS_INSUFFICIENT_RESOURCES;
     }
     
@@ -2244,11 +2316,7 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S
     fileref = create_fileref();
     if (!fileref) {
         ERR("out of memory\n");
-        
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fcb(fcb);
-        ExReleaseResource(&Vcb->fcb_lock);
-        
         return STATUS_INSUFFICIENT_RESOURCES;
     }
     
@@ -2267,11 +2335,7 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S
         
         if (!fileref->filepart.Buffer) {
             ERR("out of memory\n");
-            
-            ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
             free_fcb(fcb);
-            ExReleaseResource(&Vcb->fcb_lock);
-            
             return STATUS_INSUFFICIENT_RESOURCES;
         }
         
@@ -2281,9 +2345,7 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S
     Status = RtlUpcaseUnicodeString(&fileref->filepart_uc, &fileref->filepart, TRUE);
     if (!NT_SUCCESS(Status)) {
         ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fileref(fileref);
-        ExReleaseResource(&Vcb->fcb_lock);
         return Status;
     }
         
@@ -2292,9 +2354,7 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S
         
         if (!NT_SUCCESS(Status)) {
             ERR("extend_file returned %08x\n", Status);
-            ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
             free_fileref(fileref);
-            ExReleaseResource(&Vcb->fcb_lock);
             return Status;
         }
     }
@@ -2328,10 +2388,12 @@ static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_S
 }
 
 static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_ref** pparfileref, PUNICODE_STRING fpus, PUNICODE_STRING stream,
-                              PIRP Irp, ULONG options, POOL_TYPE pool_type, LIST_ENTRY* rollback) {
+                              PIRP Irp, ULONG options, POOL_TYPE pool_type, BOOL case_sensitive, LIST_ENTRY* rollback) {
     file_ref *fileref, *newpar, *parfileref;
     fcb* fcb;
     static char xapref[] = "user.";
+    static WCHAR DOSATTRIB[] = L"DOSATTRIB";
+    static WCHAR EA[] = L"EA";
     ULONG xapreflen = strlen(xapref), overhead;
     LARGE_INTEGER time;
     BTRFS_TIME now;
@@ -2348,9 +2410,7 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r
     
     parfileref = *pparfileref;
     
-    ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
-    Status = open_fileref(Vcb, &newpar, fpus, parfileref, FALSE, NULL, NULL, Irp);
-    ExReleaseResource(&Vcb->fcb_lock);
+    Status = open_fileref(Vcb, &newpar, fpus, parfileref, FALSE, NULL, NULL, PagedPool, case_sensitive, Irp);
     
     if (Status == STATUS_OBJECT_NAME_NOT_FOUND) {
         UNICODE_STRING fpus2;
@@ -2368,7 +2428,7 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r
         
         RtlCopyMemory(fpus2.Buffer, fpus->Buffer, fpus2.Length);
         
-        Status = file_create2(Irp, Vcb, &fpus2, parfileref, options, &newpar, rollback);
+        Status = file_create2(Irp, Vcb, &fpus2, parfileref, options, NULL, 0, &newpar, rollback);
     
         if (!NT_SUCCESS(Status)) {
             ERR("file_create2 returned %08x\n", Status);
@@ -2383,15 +2443,13 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r
         return Status;
     }
     
-    ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
     free_fileref(parfileref);
-    ExReleaseResource(&Vcb->fcb_lock);
     
     parfileref = newpar;
     *pparfileref = parfileref;
     
-    if (parfileref->fcb->type != BTRFS_TYPE_FILE && parfileref->fcb->type != BTRFS_TYPE_SYMLINK) {
-        WARN("parent not file or symlink\n");
+    if (parfileref->fcb->type != BTRFS_TYPE_FILE && parfileref->fcb->type != BTRFS_TYPE_SYMLINK && parfileref->fcb->type != BTRFS_TYPE_DIRECTORY) {
+        WARN("parent not file, directory, or symlink\n");
         return STATUS_INVALID_PARAMETER;
     }
     
@@ -2399,6 +2457,11 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r
         WARN("tried to create directory as stream\n");
         return STATUS_INVALID_PARAMETER;
     }
+    
+    if ((stream->Length == wcslen(DOSATTRIB) * sizeof(WCHAR) && RtlCompareMemory(stream->Buffer, DOSATTRIB, stream->Length) == stream->Length) || 
+        (stream->Length == wcslen(EA) * sizeof(WCHAR) && RtlCompareMemory(stream->Buffer, EA, stream->Length) == stream->Length)) {
+        return STATUS_OBJECT_NAME_INVALID;
+    }
         
     fcb = create_fcb(pool_type);
     if (!fcb) {
@@ -2428,9 +2491,7 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r
     Status = RtlUnicodeToUTF8N(NULL, 0, &utf8len, stream->Buffer, stream->Length);
     if (!NT_SUCCESS(Status)) {
         ERR("RtlUnicodeToUTF8N 1 returned %08x\n", Status);
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fcb(fcb);
-        ExReleaseResource(&Vcb->fcb_lock);
         return Status;
     }
     
@@ -2439,9 +2500,7 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r
     fcb->adsxattr.Buffer = ExAllocatePoolWithTag(pool_type, fcb->adsxattr.MaximumLength, ALLOC_TAG);
     if (!fcb->adsxattr.Buffer) {
         ERR("out of memory\n");
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fcb(fcb);
-        ExReleaseResource(&Vcb->fcb_lock);
         return STATUS_INSUFFICIENT_RESOURCES;
     }
     
@@ -2450,9 +2509,7 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r
     Status = RtlUnicodeToUTF8N(&fcb->adsxattr.Buffer[xapreflen], utf8len, &utf8len, stream->Buffer, stream->Length);
     if (!NT_SUCCESS(Status)) {
         ERR("RtlUnicodeToUTF8N 2 returned %08x\n", Status);
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fcb(fcb);
-        ExReleaseResource(&Vcb->fcb_lock);
         return Status;
     }
     
@@ -2470,13 +2527,11 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r
     Status = find_item(Vcb, parfileref->fcb->subvol, &tp, &searchkey, FALSE, Irp);
     if (!NT_SUCCESS(Status)) {
         ERR("find_item returned %08x\n", Status);
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fcb(fcb);
-        ExReleaseResource(&Vcb->fcb_lock);
         return Status;
     }
     
-    if (!keycmp(&tp.item->key, &searchkey))
+    if (!keycmp(tp.item->key, searchkey))
         overhead = tp.item->size;
     else
         overhead = 0;
@@ -2485,9 +2540,7 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r
     
     if (utf8len + xapreflen + overhead > fcb->adsmaxlen) {
         WARN("not enough room for new DIR_ITEM (%u + %u > %u)", utf8len + xapreflen, overhead, fcb->adsmaxlen);
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fcb(fcb);
-        ExReleaseResource(&Vcb->fcb_lock);
         return STATUS_DISK_FULL;
     } else
         fcb->adsmaxlen -= overhead + utf8len + xapreflen;
@@ -2495,9 +2548,7 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r
     fileref = create_fileref();
     if (!fileref) {
         ERR("out of memory\n");
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fcb(fcb);
-        ExReleaseResource(&Vcb->fcb_lock);
         return STATUS_INSUFFICIENT_RESOURCES;
     }
     
@@ -2507,9 +2558,7 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r
     fileref->filepart.Buffer = ExAllocatePoolWithTag(pool_type, fileref->filepart.MaximumLength, ALLOC_TAG);
     if (!fileref->filepart.Buffer) {
         ERR("out of memory\n");
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fileref(fileref);
-        ExReleaseResource(&Vcb->fcb_lock);
         return STATUS_INSUFFICIENT_RESOURCES;
     }
     
@@ -2518,19 +2567,15 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r
     Status = RtlUpcaseUnicodeString(&fileref->filepart_uc, &fileref->filepart, TRUE);
     if (!NT_SUCCESS(Status)) {
         ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fileref(fileref);
-        ExReleaseResource(&Vcb->fcb_lock);
         return Status;
     }
     
     mark_fcb_dirty(fcb);
     mark_fileref_dirty(fileref);
     
-    ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
     InsertTailList(&fcb->subvol->fcbs, &fcb->list_entry);
     InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all);
-    ExReleaseResource(&Vcb->fcb_lock);
     
     KeQuerySystemTime(&time);
     win_time_to_unix(time, &now);
@@ -2538,6 +2583,7 @@ static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_r
     parfileref->fcb->inode_item.transid = Vcb->superblock.generation;
     parfileref->fcb->inode_item.sequence++;
     parfileref->fcb->inode_item.st_ctime = now;
+    parfileref->fcb->inode_item_changed = TRUE;
     
     mark_fcb_dirty(parfileref->fcb);
     
@@ -2587,9 +2633,7 @@ static NTSTATUS STDCALL file_create(PIRP Irp, device_extension* Vcb, PFILE_OBJEC
     } else
         related = NULL;
     
-    ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
-    Status = open_fileref(Vcb, &parfileref, &FileObject->FileName, related, TRUE, NULL, NULL, Irp);
-    ExReleaseResource(&Vcb->fcb_lock);
+    Status = open_fileref(Vcb, &parfileref, &FileObject->FileName, related, TRUE, NULL, NULL, pool_type, IrpSp->Flags & SL_CASE_SENSITIVE, Irp);
     
     if (!NT_SUCCESS(Status))
         goto end;
@@ -2659,7 +2703,7 @@ static NTSTATUS STDCALL file_create(PIRP Irp, device_extension* Vcb, PFILE_OBJEC
     }
     
     if (stream.Length > 0) {
-        Status = create_stream(Vcb, &fileref, &parfileref, &fpus, &stream, Irp, options, pool_type, rollback);
+        Status = create_stream(Vcb, &fileref, &parfileref, &fpus, &stream, Irp, options, pool_type, IrpSp->Flags & SL_CASE_SENSITIVE, rollback);
         if (!NT_SUCCESS(Status)) {
             ERR("create_stream returned %08x\n", Status);
             goto end;
@@ -2670,7 +2714,18 @@ static NTSTATUS STDCALL file_create(PIRP Irp, device_extension* Vcb, PFILE_OBJEC
             goto end;
         }
         
-        Status = file_create2(Irp, Vcb, &fpus, parfileref, options, &fileref, rollback);
+        if (Irp->AssociatedIrp.SystemBuffer && IrpSp->Parameters.Create.EaLength > 0) {
+            ULONG offset;
+            
+            Status = IoCheckEaBufferValidity(Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.Create.EaLength, &offset);
+            if (!NT_SUCCESS(Status)) {
+                ERR("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset);
+                goto end;
+            }
+        }
+        
+        Status = file_create2(Irp, Vcb, &fpus, parfileref, options, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.Create.EaLength,
+                              &fileref, rollback);
         
         if (!NT_SUCCESS(Status)) {
             ERR("file_create2 returned %08x\n", Status);
@@ -2687,9 +2742,7 @@ static NTSTATUS STDCALL file_create(PIRP Irp, device_extension* Vcb, PFILE_OBJEC
     if (!ccb) {
         ERR("out of memory\n");
         Status = STATUS_INSUFFICIENT_RESOURCES;
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fileref(fileref);
-        ExReleaseResource(&Vcb->fcb_lock);
         goto end;
     }
     
@@ -2706,13 +2759,15 @@ static NTSTATUS STDCALL file_create(PIRP Irp, device_extension* Vcb, PFILE_OBJEC
     ccb->has_wildcard = FALSE;
     ccb->specific_file = FALSE;
     ccb->access = access_state->OriginalDesiredAccess;
+    ccb->case_sensitive = IrpSp->Flags & SL_CASE_SENSITIVE;
     
 #ifdef DEBUG_FCB_REFCOUNTS
-    oc = InterlockedIncrement(&fileref->fcb->open_count);
-    ERR("fcb %p: open_count now %i\n", fileref->fcb, oc);
+    oc = InterlockedIncrement(&fileref->open_count);
+    ERR("fileref %p: open_count now %i\n", fileref, oc);
 #else
-    InterlockedIncrement(&fileref->fcb->open_count);
+    InterlockedIncrement(&fileref->open_count);
 #endif
+    InterlockedIncrement(&Vcb->open_files);
     
     FileObject->FsContext2 = ccb;
     
@@ -2756,11 +2811,8 @@ end:
         ExFreePool(fpus.Buffer);
     
 end2:
-    if (parfileref) {
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
+    if (parfileref)
         free_fileref(parfileref);
-        ExReleaseResource(&Vcb->fcb_lock);
-    }
     
     return Status;
 }
@@ -3019,6 +3071,8 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
     USHORT unparsed;
     ULONG fn_offset = 0;
     file_ref *related, *fileref;
+    POOL_TYPE pool_type = Stack->Flags & SL_OPEN_PAGING_FILE ? NonPagedPool : PagedPool;
+    ACCESS_MASK granted_access;
 #ifdef DEBUG_FCB_REFCOUNTS
     LONG oc;
 #endif
@@ -3031,7 +3085,7 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
     if (options & FILE_DIRECTORY_FILE && RequestedDisposition == FILE_SUPERSEDE) {
         WARN("error - supersede requested with FILE_DIRECTORY_FILE\n");
         Status = STATUS_INVALID_PARAMETER;
-        goto exit;
+        goto exit2;
     }
 
     FileObject = Stack->FileObject;
@@ -3081,14 +3135,16 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
     
     if (Vcb->readonly && (RequestedDisposition == FILE_SUPERSEDE || RequestedDisposition == FILE_CREATE || RequestedDisposition == FILE_OVERWRITE)) {
         Status = STATUS_MEDIA_WRITE_PROTECTED;
-        goto exit;
+        goto exit2;
     }
     
     if (Vcb->readonly && Stack->Parameters.Create.SecurityContext->DesiredAccess &
         (FILE_WRITE_DATA | FILE_APPEND_DATA | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES | DELETE | WRITE_OWNER | WRITE_DAC)) {
         Status = STATUS_MEDIA_WRITE_PROTECTED;
-        goto exit;
+        goto exit2;
     }
+    
+    ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
 
     if (options & FILE_OPEN_BY_FILE_ID) {
         if (FileObject->FileName.Length == sizeof(UINT64) && related && RequestedDisposition == FILE_OPEN) {
@@ -3104,9 +3160,7 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
                 increase_fileref_refcount(fileref);
                 Status = STATUS_SUCCESS;
             } else {
-                ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
                 Status = open_fileref_by_inode(Vcb, related->fcb->subvol, inode, &fileref, Irp);
-                ExReleaseResource(&Vcb->fcb_lock);
             }
         } else {
             WARN("FILE_OPEN_BY_FILE_ID only supported for inodes\n");
@@ -3119,9 +3173,8 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
             goto exit;
         }
         
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
-        Status = open_fileref(Vcb, &fileref, &FileObject->FileName, related, Stack->Flags & SL_OPEN_TARGET_DIRECTORY, &unparsed, &fn_offset, Irp);
-        ExReleaseResource(&Vcb->fcb_lock);
+        Status = open_fileref(Vcb, &fileref, &FileObject->FileName, related, Stack->Flags & SL_OPEN_TARGET_DIRECTORY, &unparsed, &fn_offset,
+                              pool_type, Stack->Flags & SL_CASE_SENSITIVE, Irp);
     }
     
     if (Status == STATUS_REPARSE) {
@@ -3134,9 +3187,7 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
         if (!NT_SUCCESS(Status)) {
             ERR("get_reparse_block returned %08x\n", Status);
             
-            ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
             free_fileref(fileref);
-            ExReleaseResourceLite(&Vcb->fcb_lock);
             goto exit;
         }
         
@@ -3147,16 +3198,12 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
         
         Irp->Tail.Overlay.AuxiliaryBuffer = (void*)data;
         
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fileref(fileref);
-        ExReleaseResourceLite(&Vcb->fcb_lock);
         goto exit;
     }
     
     if (NT_SUCCESS(Status) && fileref->deleted) {
-        ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
         free_fileref(fileref);
-        ExReleaseResourceLite(&Vcb->fcb_lock);
         
         Status = STATUS_OBJECT_NAME_NOT_FOUND;
     }
@@ -3165,9 +3212,7 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
         if (RequestedDisposition == FILE_CREATE) {
             TRACE("file %S already exists, returning STATUS_OBJECT_NAME_COLLISION\n", file_desc_fileref(fileref));
             Status = STATUS_OBJECT_NAME_COLLISION;
-            ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
             free_fileref(fileref);
-            ExReleaseResource(&Vcb->fcb_lock);
             goto exit;
         }
     } else if (Status == STATUS_OBJECT_NAME_NOT_FOUND) {
@@ -3189,27 +3234,35 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
         if (RequestedDisposition == FILE_SUPERSEDE || RequestedDisposition == FILE_OVERWRITE || RequestedDisposition == FILE_OVERWRITE_IF) {
             if (fileref->fcb->type == BTRFS_TYPE_DIRECTORY || fileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) {
                 Status = STATUS_ACCESS_DENIED;
-                ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
                 free_fileref(fileref);
-                ExReleaseResource(&Vcb->fcb_lock);
                 goto exit;
             }
             
             if (Vcb->readonly) {
                 Status = STATUS_MEDIA_WRITE_PROTECTED;
-                ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
                 free_fileref(fileref);
-                ExReleaseResource(&Vcb->fcb_lock);
                 goto exit;
             }
         }
         
-        if (fileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY && Stack->Parameters.Create.SecurityContext->DesiredAccess &
+        SeLockSubjectContext(&Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext);
+        
+        if (!SeAccessCheck(fileref->fcb->ads ? fileref->parent->fcb->sd : fileref->fcb->sd,
+                           &Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext,
+                           FALSE, Stack->Parameters.Create.SecurityContext->DesiredAccess, 0, NULL,
+                           IoGetFileObjectGenericMapping(), Stack->Flags & SL_FORCE_ACCESS_CHECK ? UserMode : Irp->RequestorMode,
+                           &granted_access, &Status)) {
+            SeUnlockSubjectContext(&Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext);
+            WARN("SeAccessCheck failed, returning %08x\n", Status);
+            goto exit;
+        }
+        
+        SeUnlockSubjectContext(&Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext);
+        
+        if (fileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY && granted_access &
             (FILE_WRITE_DATA | FILE_APPEND_DATA | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES | DELETE | WRITE_OWNER | WRITE_DAC)) {
             Status = STATUS_ACCESS_DENIED;
-            ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
             free_fileref(fileref);
-            ExReleaseResource(&Vcb->fcb_lock);
             goto exit;
         }
         
@@ -3221,14 +3274,12 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
                 WARN("could not open as deletion pending\n");
                 Status = STATUS_DELETE_PENDING;
                 
-                ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
                 free_fileref(fileref);
-                ExReleaseResourceLite(&Vcb->fcb_lock);
                 goto exit;
             }
             sf = sf->parent;
         }
-        
+
         if (fileref->fcb->atts & FILE_ATTRIBUTE_READONLY) {
             ACCESS_MASK allowed = DELETE | READ_CONTROL | WRITE_OWNER | WRITE_DAC |
                                     SYNCHRONIZE | ACCESS_SYSTEM_SECURITY | FILE_READ_DATA |
@@ -3239,11 +3290,9 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
             if (fileref->fcb->type == BTRFS_TYPE_DIRECTORY)
                 allowed |= FILE_ADD_SUBDIRECTORY | FILE_ADD_FILE | FILE_DELETE_CHILD;
             
-            if (Stack->Parameters.Create.SecurityContext->DesiredAccess & ~allowed) {
+            if (granted_access & ~allowed) {
                 Status = STATUS_ACCESS_DENIED;
-                ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
                 free_fileref(fileref);
-                ExReleaseResource(&Vcb->fcb_lock);
                 goto exit;
             }
         }
@@ -3251,9 +3300,7 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
         if (options & FILE_DELETE_ON_CLOSE && (fileref == Vcb->root_fileref || Vcb->readonly ||
             fileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY || fileref->fcb->atts & FILE_ATTRIBUTE_READONLY)) {
             Status = STATUS_CANNOT_DELETE;
-            ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
             free_fileref(fileref);
-            ExReleaseResource(&Vcb->fcb_lock);
             goto exit;
         }
         
@@ -3270,10 +3317,7 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
             Status = get_reparse_block(fileref->fcb, (UINT8**)&data);
             if (!NT_SUCCESS(Status)) {
                 ERR("get_reparse_block returned %08x\n", Status);
-                
-                ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
                 free_fileref(fileref);
-                ExReleaseResourceLite(&Vcb->fcb_lock);
                 goto exit;
             }
             
@@ -3285,56 +3329,41 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
             
             Irp->Tail.Overlay.AuxiliaryBuffer = (void*)data;
             
-            ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
             free_fileref(fileref);
-            ExReleaseResourceLite(&Vcb->fcb_lock);
             goto exit;
         }
         
-        if (fileref->fcb->type == BTRFS_TYPE_DIRECTORY) {
+        if (fileref->fcb->type == BTRFS_TYPE_DIRECTORY && !fileref->fcb->ads) {
             if (options & FILE_NON_DIRECTORY_FILE && !(fileref->fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT)) {
-                ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
                 free_fileref(fileref);
-                ExReleaseResourceLite(&Vcb->fcb_lock);
-                
                 Status = STATUS_FILE_IS_A_DIRECTORY;
                 goto exit;
             }
         } else if (options & FILE_DIRECTORY_FILE) {
             TRACE("returning STATUS_NOT_A_DIRECTORY (type = %u, %S)\n", fileref->fcb->type, file_desc_fileref(fileref));
-            
-            ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
             free_fileref(fileref);
-            ExReleaseResourceLite(&Vcb->fcb_lock);
-            
             Status = STATUS_NOT_A_DIRECTORY;
             goto exit;
         }
         
-        if (fileref->fcb->open_count > 0) {
-            Status = IoCheckShareAccess(Stack->Parameters.Create.SecurityContext->DesiredAccess,
-                                        Stack->Parameters.Create.ShareAccess, FileObject, &fileref->fcb->share_access, TRUE);
+        if (fileref->open_count > 0) {
+            Status = IoCheckShareAccess(granted_access, Stack->Parameters.Create.ShareAccess, FileObject, &fileref->fcb->share_access, TRUE);
             
             if (!NT_SUCCESS(Status)) {
                 WARN("IoCheckShareAccess failed, returning %08x\n", Status);
                 
-                ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
                 free_fileref(fileref);
-                ExReleaseResourceLite(&Vcb->fcb_lock);
                 goto exit;
             }
         } else {
-            IoSetShareAccess(Stack->Parameters.Create.SecurityContext->DesiredAccess,
-                             Stack->Parameters.Create.ShareAccess, FileObject, &fileref->fcb->share_access);
+            IoSetShareAccess(granted_access, Stack->Parameters.Create.ShareAccess, FileObject, &fileref->fcb->share_access);
         }
 
-        if (Stack->Parameters.Create.SecurityContext->DesiredAccess & FILE_WRITE_DATA || options & FILE_DELETE_ON_CLOSE) {
+        if (granted_access & FILE_WRITE_DATA || options & FILE_DELETE_ON_CLOSE) {
             if (!MmFlushImageSection(&fileref->fcb->nonpaged->segment_object, MmFlushForWrite)) {
                 Status = (options & FILE_DELETE_ON_CLOSE) ? STATUS_CANNOT_DELETE : STATUS_SHARING_VIOLATION;
                 
-                ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
                 free_fileref(fileref);
-                ExReleaseResourceLite(&Vcb->fcb_lock);
                 goto exit;
             }
         }
@@ -3347,9 +3376,7 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
             if ((RequestedDisposition == FILE_OVERWRITE || RequestedDisposition == FILE_OVERWRITE_IF) && fileref->fcb->atts & FILE_ATTRIBUTE_READONLY) {
                 WARN("cannot overwrite readonly file\n");
                 Status = STATUS_ACCESS_DENIED;
-                ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
                 free_fileref(fileref);
-                ExReleaseResource(&Vcb->fcb_lock);
                 goto exit;
             }
     
@@ -3365,9 +3392,7 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
             Status = truncate_file(fileref->fcb, 0, Irp, rollback);
             if (!NT_SUCCESS(Status)) {
                 ERR("truncate_file returned %08x\n", Status);
-                ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
                 free_fileref(fileref);
-                ExReleaseResource(&Vcb->fcb_lock);
                 goto exit;
             }
             
@@ -3376,13 +3401,67 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
                 
                 if (!NT_SUCCESS(Status)) {
                     ERR("extend_file returned %08x\n", Status);
-                    ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
                     free_fileref(fileref);
-                    ExReleaseResource(&Vcb->fcb_lock);
                     goto exit;
                 }
             }
             
+            if (Irp->AssociatedIrp.SystemBuffer && Stack->Parameters.Create.EaLength > 0) {
+                ULONG offset;
+                FILE_FULL_EA_INFORMATION* eainfo;
+                
+                Status = IoCheckEaBufferValidity(Irp->AssociatedIrp.SystemBuffer, Stack->Parameters.Create.EaLength, &offset);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset);
+                    free_fileref(fileref);
+                    goto exit;
+                }
+                
+                fileref->fcb->ealen = 4;
+                
+                // capitalize EA name
+                eainfo = Irp->AssociatedIrp.SystemBuffer;
+                do {
+                    STRING s;
+                    
+                    s.Length = s.MaximumLength = eainfo->EaNameLength;
+                    s.Buffer = eainfo->EaName;
+                    
+                    RtlUpperString(&s, &s);
+                    
+                    fileref->fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength;
+                    
+                    if (eainfo->NextEntryOffset == 0)
+                        break;
+                    
+                    eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset);
+                } while (TRUE);
+                
+                if (fileref->fcb->ea_xattr.Buffer)
+                    ExFreePool(fileref->fcb->ea_xattr.Buffer);
+                
+                fileref->fcb->ea_xattr.Buffer = ExAllocatePoolWithTag(pool_type, Stack->Parameters.Create.EaLength, ALLOC_TAG);
+                if (!fileref->fcb->ea_xattr.Buffer) {
+                    ERR("out of memory\n");
+                    Status = STATUS_INSUFFICIENT_RESOURCES;
+                    
+                    free_fileref(fileref);
+                    goto exit;
+                }
+                
+                fileref->fcb->ea_xattr.Length = fileref->fcb->ea_xattr.MaximumLength = Stack->Parameters.Create.EaLength;
+                RtlCopyMemory(fileref->fcb->ea_xattr.Buffer, Irp->AssociatedIrp.SystemBuffer, Stack->Parameters.Create.EaLength);
+            } else {
+                if (fileref->fcb->ea_xattr.Length > 0) {
+                    ExFreePool(fileref->fcb->ea_xattr.Buffer);
+                    fileref->fcb->ea_xattr.Buffer = NULL;
+                    fileref->fcb->ea_xattr.Length = fileref->fcb->ea_xattr.MaximumLength = 0;
+                    
+                    fileref->fcb->ea_changed = TRUE;
+                    fileref->fcb->ealen = 0;
+                }
+            }
+            
             filter = FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_LAST_WRITE;
             
             mark_fcb_dirty(fileref->fcb);
@@ -3410,11 +3489,30 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
             fileref->fcb->inode_item.sequence++;
             fileref->fcb->inode_item.st_ctime = now;
             fileref->fcb->inode_item.st_mtime = now;
+            fileref->fcb->inode_item_changed = TRUE;
 
             // FIXME - truncate streams
             // FIXME - do we need to alter parent directory's times?
             
             send_notification_fcb(fileref, filter, FILE_ACTION_MODIFIED);
+        } else {
+            if (options & FILE_NO_EA_KNOWLEDGE && fileref->fcb->ea_xattr.Length > 0) {
+                FILE_FULL_EA_INFORMATION* ffei = (FILE_FULL_EA_INFORMATION*)fileref->fcb->ea_xattr.Buffer;
+                
+                do {
+                    if (ffei->Flags & FILE_NEED_EA) {
+                        WARN("returning STATUS_ACCESS_DENIED as no EA knowledge\n");
+                        free_fileref(fileref);
+                        Status = STATUS_ACCESS_DENIED;
+                        goto exit;
+                    }
+                    
+                    if (ffei->NextEntryOffset == 0)
+                        break;
+                    
+                    ffei = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ffei) + ffei->NextEntryOffset);
+                } while (TRUE);
+            }
         }
     
         FileObject->FsContext = fileref->fcb;
@@ -3422,11 +3520,7 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
         ccb = ExAllocatePoolWithTag(NonPagedPool, sizeof(*ccb), ALLOC_TAG);
         if (!ccb) {
             ERR("out of memory\n");
-            
-            ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
             free_fileref(fileref);
-            ExReleaseResourceLite(&Vcb->fcb_lock);
-
             Status = STATUS_INSUFFICIENT_RESOURCES;
             goto exit;
         }
@@ -3441,7 +3535,8 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
         RtlInitUnicodeString(&ccb->query_string, NULL);
         ccb->has_wildcard = FALSE;
         ccb->specific_file = FALSE;
-        ccb->access = Stack->Parameters.Create.SecurityContext->DesiredAccess;
+        ccb->access = granted_access;
+        ccb->case_sensitive = Stack->Flags & SL_CASE_SENSITIVE;
         
         ccb->fileref = fileref;
         
@@ -3472,12 +3567,45 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
             }
         }
         
+        // Make sure paging files don't have any extents marked as being prealloc,
+        // as this would mean we'd have to lock exclusively when writing.
+        if (Stack->Flags & SL_OPEN_PAGING_FILE) {
+            LIST_ENTRY* le;
+            BOOL changed = FALSE;
+            
+            ExAcquireResourceExclusiveLite(fileref->fcb->Header.Resource, TRUE);
+            
+            le = fileref->fcb->extents.Flink;
+            
+            while (le != &fileref->fcb->extents) {
+                extent* ext = CONTAINING_RECORD(le, extent, list_entry);
+                
+                if (ext->data->type == EXTENT_TYPE_PREALLOC) {
+                    ext->data->type = EXTENT_TYPE_REGULAR;
+                    changed = TRUE;
+                }
+                
+                le = le->Flink;
+            }
+            
+            ExReleaseResourceLite(fileref->fcb->Header.Resource);
+            
+            if (changed) {
+                fileref->fcb->extents_changed = TRUE;
+                mark_fcb_dirty(fileref->fcb);
+            }
+            
+            fileref->fcb->Header.Flags2 |= FSRTL_FLAG2_IS_PAGING_FILE;
+            Vcb->disallow_dismount = TRUE;
+        }
+        
 #ifdef DEBUG_FCB_REFCOUNTS
-        oc = InterlockedIncrement(&fileref->fcb->open_count);
-        ERR("fcb %p: open_count now %i\n", fileref->fcb, oc);
+        oc = InterlockedIncrement(&fileref->open_count);
+        ERR("fileref %p: open_count now %i\n", fileref, oc);
 #else
-        InterlockedIncrement(&fileref->fcb->open_count);
+        InterlockedIncrement(&fileref->open_count);
 #endif
+        InterlockedIncrement(&Vcb->open_files);
     } else {
         Status = file_create(Irp, DeviceObject->DeviceExtension, FileObject, &FileObject->FileName, RequestedDisposition, options, rollback);
         Irp->IoStatus.Information = NT_SUCCESS(Status) ? FILE_CREATED : 0;
@@ -3487,6 +3615,9 @@ static NTSTATUS STDCALL open_file(PDEVICE_OBJECT DeviceObject, PIRP Irp, LIST_EN
         FileObject->Flags |= FO_CACHE_SUPPORTED;
     
 exit:
+    ExReleaseResourceLite(&Vcb->fcb_lock);
+    
+exit2:
     if (NT_SUCCESS(Status)) {
         if (!FileObject->Vpb)
             FileObject->Vpb = DeviceObject->Vpb;
@@ -3501,7 +3632,7 @@ exit:
 NTSTATUS verify_vcb(device_extension* Vcb, PIRP Irp) {
     UINT64 i;
     
-    for (i = 0; i < Vcb->superblock.num_devices; i++) {
+    for (i = 0; i < Vcb->devices_loaded; i++) {
         if (Vcb->devices[i].removable) {
             NTSTATUS Status;
             ULONG cc;
@@ -3570,8 +3701,6 @@ NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
         goto exit;
     }
     
-    Vcb = DeviceObject->DeviceExtension;
-    
     Status = verify_vcb(Vcb, Irp);
     if (!NT_SUCCESS(Status)) {
         ERR("verify_vcb returned %08x\n", Status);
@@ -3646,15 +3775,17 @@ NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
             Status = STATUS_NOT_A_DIRECTORY;
             goto exit;
         }
+        
+        if (Vcb->removing) {
+            Status = STATUS_ACCESS_DENIED;
+            goto exit;
+        }
 
 #ifdef DEBUG_FCB_REFCOUNTS
         rc = InterlockedIncrement(&Vcb->volume_fcb->refcount);
-        oc = InterlockedIncrement(&Vcb->volume_fcb->open_count);
         WARN("fcb %p: refcount now %i (volume)\n", Vcb->volume_fcb, rc);
-        WARN("fcb %p: open_count now %i (volume)\n", Vcb->volume_fcb, oc);
 #else
         InterlockedIncrement(&Vcb->volume_fcb->refcount);
-        InterlockedIncrement(&Vcb->volume_fcb->open_count);
 #endif
         IrpSp->FileObject->FsContext = Vcb->volume_fcb;
         
@@ -3662,6 +3793,8 @@ NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
 
         if (!IrpSp->FileObject->Vpb)
             IrpSp->FileObject->Vpb = DeviceObject->Vpb;
+        
+        InterlockedIncrement(&Vcb->open_files);
 
         Irp->IoStatus.Information = FILE_OPENED;
         Status = STATUS_SUCCESS;
@@ -3688,7 +3821,7 @@ NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
         if (!NT_SUCCESS(Status))
             do_rollback(Vcb, &rollback);
         else
-            clear_rollback(&rollback);
+            clear_rollback(Vcb, &rollback);
         
         if (!skip_lock)
             ExReleaseResourceLite(&Vcb->tree_lock);
diff --git a/reactos/drivers/filesystems/btrfs/devctrl.c b/reactos/drivers/filesystems/btrfs/devctrl.c
new file mode 100644 (file)
index 0000000..11422e4
--- /dev/null
@@ -0,0 +1,210 @@
+/* Copyright (c) Mark Harmstone 2016
+ * 
+ * This file is part of WinBtrfs.
+ * 
+ * WinBtrfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public Licence as published by
+ * the Free Software Foundation, either version 3 of the Licence, or
+ * (at your option) any later version.
+ * 
+ * WinBtrfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public Licence for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public Licence
+ * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include "btrfs_drv.h"
+#ifndef __REACTOS__
+#include <winioctl.h>
+#endif
+#include <mountdev.h>
+#include <initguid.h>
+#include <diskguid.h>
+
+static NTSTATUS part0_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
+    NTSTATUS Status;
+    part0_device_extension* p0de = DeviceObject->DeviceExtension;
+    PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+    
+    TRACE("control code = %x\n", IrpSp->Parameters.DeviceIoControl.IoControlCode);
+    
+    switch (IrpSp->Parameters.DeviceIoControl.IoControlCode) {
+        case IOCTL_MOUNTDEV_QUERY_UNIQUE_ID:
+        {
+            MOUNTDEV_UNIQUE_ID* mduid;
+
+            if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_UNIQUE_ID)) {
+                Status = STATUS_BUFFER_TOO_SMALL;
+                Irp->IoStatus.Status = Status;
+                Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID);
+                IoCompleteRequest(Irp, IO_NO_INCREMENT);
+                return Status;
+            }
+
+            mduid = Irp->AssociatedIrp.SystemBuffer;
+            mduid->UniqueIdLength = sizeof(BTRFS_UUID);
+
+            if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_UNIQUE_ID) - 1 + mduid->UniqueIdLength) {
+                Status = STATUS_BUFFER_OVERFLOW;
+                Irp->IoStatus.Status = Status;
+                Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID);
+                IoCompleteRequest(Irp, IO_NO_INCREMENT);
+                return Status;
+            }
+
+            RtlCopyMemory(mduid->UniqueId, &p0de->uuid, sizeof(BTRFS_UUID));
+
+            Status = STATUS_SUCCESS;
+            Irp->IoStatus.Status = Status;
+            Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID) - 1 + mduid->UniqueIdLength;
+            IoCompleteRequest(Irp, IO_NO_INCREMENT);
+            
+            return Status;
+        }
+        
+        case IOCTL_MOUNTDEV_QUERY_DEVICE_NAME:
+        {
+            PMOUNTDEV_NAME name;
+
+            if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_NAME)) {
+                Status = STATUS_BUFFER_TOO_SMALL;
+                Irp->IoStatus.Status = Status;
+                Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME);
+                IoCompleteRequest(Irp, IO_NO_INCREMENT);
+                return Status;
+            }
+
+            name = Irp->AssociatedIrp.SystemBuffer;
+            name->NameLength = p0de->name.Length;
+
+            if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_NAME) - 1 + name->NameLength) {
+                Status = STATUS_BUFFER_OVERFLOW;
+                Irp->IoStatus.Status = Status;
+                Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME);
+                IoCompleteRequest(Irp, IO_NO_INCREMENT);
+                return Status;
+            }
+            
+            RtlCopyMemory(name->Name, p0de->name.Buffer, p0de->name.Length);
+
+            Status = STATUS_SUCCESS;
+            Irp->IoStatus.Status = Status;
+            Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME) - 1 + name->NameLength;
+            IoCompleteRequest(Irp, IO_NO_INCREMENT);
+            
+            return Status;
+        }
+    }
+    
+    IoSkipCurrentIrpStackLocation(Irp);
+    
+    Status = IoCallDriver(p0de->devobj, Irp);
+    
+    TRACE("returning %08x\n", Status);
+    
+    return Status;
+}
+
+static NTSTATUS mountdev_query_stable_guid(device_extension* Vcb, PIRP Irp) {
+    MOUNTDEV_STABLE_GUID* msg = Irp->UserBuffer;
+    PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+    
+    TRACE("IOCTL_MOUNTDEV_QUERY_STABLE_GUID\n");
+    
+    if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_STABLE_GUID))
+        return STATUS_INVALID_PARAMETER;
+
+    RtlCopyMemory(&msg->StableGuid, &Vcb->superblock.uuid, sizeof(GUID));
+    
+    Irp->IoStatus.Information = sizeof(MOUNTDEV_STABLE_GUID);
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS get_partition_info_ex(device_extension* Vcb, PIRP Irp) {
+    NTSTATUS Status;
+    PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+    PARTITION_INFORMATION_EX* piex;
+    
+    TRACE("IOCTL_DISK_GET_PARTITION_INFO_EX\n");
+    
+    Status = dev_ioctl(Vcb->devices[0].devobj, IOCTL_DISK_GET_PARTITION_INFO_EX, NULL, 0,
+                       Irp->UserBuffer, IrpSp->Parameters.DeviceIoControl.OutputBufferLength, TRUE, &Irp->IoStatus);
+    if (!NT_SUCCESS(Status))
+        return Status;
+    
+    piex = (PARTITION_INFORMATION_EX*)Irp->UserBuffer;
+    
+    if (piex->PartitionStyle == PARTITION_STYLE_MBR) {
+        piex->Mbr.PartitionType = PARTITION_IFS;
+        piex->Mbr.RecognizedPartition = TRUE;
+    } else if (piex->PartitionStyle == PARTITION_STYLE_GPT) {
+        piex->Gpt.PartitionType = PARTITION_BASIC_DATA_GUID;
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS is_writable(device_extension* Vcb, PIRP Irp) {
+    TRACE("IOCTL_DISK_IS_WRITABLE\n");
+    
+    return Vcb->readonly ? STATUS_MEDIA_WRITE_PROTECTED : STATUS_SUCCESS;
+}
+
+NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
+    NTSTATUS Status;
+    PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+    device_extension* Vcb = DeviceObject->DeviceExtension;
+    BOOL top_level;
+
+    FsRtlEnterFileSystem();
+
+    top_level = is_top_level(Irp);
+    
+    Irp->IoStatus.Information = 0;
+    
+    if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
+        Status = part0_device_control(DeviceObject, Irp);
+        goto end2;
+    }
+    
+    switch (IrpSp->Parameters.DeviceIoControl.IoControlCode) {
+        case IOCTL_MOUNTDEV_QUERY_STABLE_GUID:
+            Status = mountdev_query_stable_guid(Vcb, Irp);
+            goto end;
+            
+        case IOCTL_DISK_GET_PARTITION_INFO_EX:
+            Status = get_partition_info_ex(Vcb, Irp);
+            goto end;
+            
+        case IOCTL_DISK_IS_WRITABLE:
+            Status = is_writable(Vcb, Irp);
+            goto end;
+            
+        default:
+            TRACE("unhandled control code %x\n", IrpSp->Parameters.DeviceIoControl.IoControlCode);
+            break;
+    }
+    
+    IoSkipCurrentIrpStackLocation(Irp);
+    
+    Status = IoCallDriver(Vcb->devices[0].devobj, Irp);
+    
+    goto end2;
+    
+end:
+    Irp->IoStatus.Status = Status;
+
+    if (Status != STATUS_PENDING)
+        IoCompleteRequest(Irp, IO_NO_INCREMENT);
+    
+end2:
+    if (top_level) 
+        IoSetTopLevelIrp(NULL);
+    
+    FsRtlExitFileSystem();
+
+    return Status;
+}
index 5cc54e9..b70e68f 100644 (file)
@@ -37,10 +37,12 @@ ULONG STDCALL get_reparse_tag(device_extension* Vcb, root* subvol, UINT64 inode,
     ULONG tag = 0, br;
     NTSTATUS Status;
     
-    // FIXME - will this slow things down?
-    
-    if (type == BTRFS_TYPE_SYMLINK)
-        return IO_REPARSE_TAG_SYMLINK;
+    if (type == BTRFS_TYPE_SYMLINK) {
+        if (called_from_lxss())
+            return IO_REPARSE_TAG_LXSS_SYMLINK;
+        else
+            return IO_REPARSE_TAG_SYMLINK;
+    }
     
     if (type != BTRFS_TYPE_FILE && type != BTRFS_TYPE_DIRECTORY)
         return 0;
@@ -49,7 +51,7 @@ ULONG STDCALL get_reparse_tag(device_extension* Vcb, root* subvol, UINT64 inode,
         return 0;
     
     ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
-    Status = open_fcb(Vcb, subvol, inode, type, NULL, NULL, &fcb, Irp);
+    Status = open_fcb(Vcb, subvol, inode, type, NULL, NULL, &fcb, PagedPool, Irp);
     if (!NT_SUCCESS(Status)) {
         ERR("open_fcb returned %08x\n", Status);
         ExReleaseResourceLite(&Vcb->fcb_lock);
@@ -85,6 +87,43 @@ end:
     return tag;
 }
 
+static ULONG get_ea_len(device_extension* Vcb, root* subvol, UINT64 inode, PIRP Irp) {
+    UINT8* eadata;
+    UINT16 len;
+    
+    if (get_xattr(Vcb, subvol, inode, EA_EA, EA_EA_HASH, &eadata, &len, Irp)) {
+        ULONG offset;
+        NTSTATUS Status;
+        
+        Status = IoCheckEaBufferValidity((FILE_FULL_EA_INFORMATION*)eadata, len, &offset);
+        
+        if (!NT_SUCCESS(Status)) {
+            WARN("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset);
+            ExFreePool(eadata);
+            return 0;
+        } else {
+            FILE_FULL_EA_INFORMATION* eainfo;
+            ULONG ealen;
+            
+            ealen = 4;
+            eainfo = (FILE_FULL_EA_INFORMATION*)eadata;
+            do {
+                ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength;
+                
+                if (eainfo->NextEntryOffset == 0)
+                    break;
+                
+                eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset);
+            } while (TRUE);
+            
+            ExFreePool(eadata);
+            
+            return ealen;
+        }
+    } else
+        return 0;
+}
+
 static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, LONG* len, PIRP Irp, dir_entry* de, root* r) {
     PIO_STACK_LOCATION IrpSp;
     UINT32 needed;
@@ -92,7 +131,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L
     INODE_ITEM ii;
     NTSTATUS Status;
     ULONG stringlen;
-    ULONG atts;
+    ULONG atts, ealen;
     
     IrpSp = IoGetCurrentIrpStackLocation(Irp);
     
@@ -139,6 +178,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L
                         if (fcb2->inode == inode && !fcb2->ads) {
                             ii = fcb2->inode_item;
                             atts = fcb2->atts;
+                            ealen = fcb2->ealen;
                             found = TRUE;
                             break;
                         }
@@ -175,12 +215,20 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L
                     if (IrpSp->Parameters.QueryDirectory.FileInformationClass == FileBothDirectoryInformation ||
                         IrpSp->Parameters.QueryDirectory.FileInformationClass == FileDirectoryInformation ||
                         IrpSp->Parameters.QueryDirectory.FileInformationClass == FileFullDirectoryInformation ||
-                        IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation) {
+                        IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation || 
+                        IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdFullDirectoryInformation) {
                         
                         BOOL dotfile = de->namelen > 1 && de->name[0] == '.';
 
                         atts = get_file_attributes(fcb->Vcb, &ii, r, inode, de->type, dotfile, FALSE, Irp);
                     }
+                    
+                    if (IrpSp->Parameters.QueryDirectory.FileInformationClass == FileBothDirectoryInformation || 
+                        IrpSp->Parameters.QueryDirectory.FileInformationClass == FileFullDirectoryInformation || 
+                        IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation || 
+                        IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdFullDirectoryInformation) {
+                        ealen = get_ea_len(fcb->Vcb, r, inode, Irp);
+                    }
                 }
                 
                 break;
@@ -191,6 +239,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L
                 r = fcb->subvol;
                 inode = fcb->inode;
                 atts = fcb->atts;
+                ealen = fcb->ealen;
                 break;
                 
             case DirEntryType_Parent:
@@ -199,6 +248,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L
                     r = fileref->parent->fcb->subvol;
                     inode = fileref->parent->fcb->inode;
                     atts = fileref->parent->fcb->atts;
+                    ealen = fileref->parent->fcb->ealen;
                 } else {
                     ERR("no fileref\n");
                     return STATUS_INTERNAL_ERROR;
@@ -212,6 +262,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L
         IrpSp->Parameters.QueryDirectory.FileInformationClass == FileDirectoryInformation ||
         IrpSp->Parameters.QueryDirectory.FileInformationClass == FileFullDirectoryInformation ||
         IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation ||
+        IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdFullDirectoryInformation ||
         IrpSp->Parameters.QueryDirectory.FileInformationClass == FileNamesInformation) {
         
         Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, de->name, de->namelen);
@@ -245,7 +296,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L
             fbdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks;
             fbdi->FileAttributes = atts;
             fbdi->FileNameLength = stringlen;
-            fbdi->EaSize = get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp);
+            fbdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen;
             fbdi->ShortNameLength = 0;
 //             fibdi->ShortName[12];
             
@@ -320,7 +371,7 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L
             ffdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks;
             ffdi->FileAttributes = atts;
             ffdi->FileNameLength = stringlen;
-            ffdi->EaSize = get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp);
+            ffdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen;
             
             Status = RtlUTF8ToUnicodeN(ffdi->FileName, stringlen, &stringlen, de->name, de->namelen);
 
@@ -360,10 +411,10 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L
             fibdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks;
             fibdi->FileAttributes = atts;
             fibdi->FileNameLength = stringlen;
-            fibdi->EaSize = get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp);
+            fibdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen;
             fibdi->ShortNameLength = 0;
 //             fibdi->ShortName[12];
-            fibdi->FileId.QuadPart = inode;
+            fibdi->FileId.QuadPart = make_file_id(r, inode);
             
             Status = RtlUTF8ToUnicodeN(fibdi->FileName, stringlen, &stringlen, de->name, de->namelen);
 
@@ -378,8 +429,45 @@ static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, L
         }
 
         case FileIdFullDirectoryInformation:
-            FIXME("STUB: FileIdFullDirectoryInformation\n");
-            break;
+        {
+            FILE_ID_FULL_DIR_INFORMATION* fifdi = buf;
+            
+            TRACE("FileIdFullDirectoryInformation\n");
+            
+            needed = sizeof(FILE_ID_FULL_DIR_INFORMATION) - sizeof(WCHAR) + stringlen;
+            
+            if (needed > *len) {
+                TRACE("buffer overflow - %u > %u\n", needed, *len);
+                return STATUS_BUFFER_OVERFLOW;
+            }
+            
+//             if (!buf)
+//                 return STATUS_INVALID_POINTER;
+            
+            fifdi->NextEntryOffset = 0;
+            fifdi->FileIndex = 0;
+            fifdi->CreationTime.QuadPart = unix_time_to_win(&ii.otime);
+            fifdi->LastAccessTime.QuadPart = unix_time_to_win(&ii.st_atime);
+            fifdi->LastWriteTime.QuadPart = unix_time_to_win(&ii.st_mtime);
+            fifdi->ChangeTime.QuadPart = unix_time_to_win(&ii.st_ctime);
+            fifdi->EndOfFile.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_size;
+            fifdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks;
+            fifdi->FileAttributes = atts;
+            fifdi->FileNameLength = stringlen;
+            fifdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen;
+            fifdi->FileId.QuadPart = make_file_id(r, inode);
+            
+            Status = RtlUTF8ToUnicodeN(fifdi->FileName, stringlen, &stringlen, de->name, de->namelen);
+
+            if (!NT_SUCCESS(Status)) {
+                ERR("RtlUTF8ToUnicodeN returned %08x\n", Status);
+                return Status;
+            }
+            
+            *len -= needed;
+            
+            return STATUS_SUCCESS;
+        }
 
         case FileNamesInformation:
         {
@@ -528,12 +616,12 @@ static NTSTATUS STDCALL next_dir_entry(file_ref* fileref, UINT64* offset, dir_en
             goto end;
         }
         
-        if (keycmp(&tp.item->key, &searchkey) == -1) {
+        if (keycmp(tp.item->key, searchkey) == -1) {
             if (find_next_item(fileref->fcb->Vcb, &tp, &next_tp, FALSE, Irp))
                 tp = next_tp;
         }
         
-        if (keycmp(&tp.item->key, &searchkey) != -1 && tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
+        if (keycmp(tp.item->key, searchkey) != -1 && tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
             do {
                 if (fr) {
                     if (fr->index <= tp.item->key.offset && !fr->deleted) {
@@ -673,7 +761,7 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         return STATUS_INVALID_PARAMETER;
     }
     
-    if (!(ccb->access & FILE_LIST_DIRECTORY)) {
+    if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_LIST_DIRECTORY)) {
         WARN("insufficient privileges\n");
         return STATUS_ACCESS_DENIED;
     }
@@ -724,7 +812,7 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         
         if (IrpSp->Parameters.QueryDirectory.FileName->Buffer[0] != '*') {
             specific_file = TRUE;
-            if (FsRtlDoesNameContainWildCards(IrpSp->Parameters.QueryDirectory.FileName)) {
+            if (!ccb->case_sensitive || FsRtlDoesNameContainWildCards(IrpSp->Parameters.QueryDirectory.FileName)) {
                 has_wildcard = TRUE;
                 specific_file = FALSE;
             }
@@ -796,10 +884,14 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         UNICODE_STRING us;
         LIST_ENTRY* le;
         
-        Status = RtlUpcaseUnicodeString(&us, &ccb->query_string, TRUE);
-        if (!NT_SUCCESS(Status)) {
-            ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
-            goto end;
+        us.Buffer = NULL;
+        
+        if (!ccb->case_sensitive) {
+            Status = RtlUpcaseUnicodeString(&us, &ccb->query_string, TRUE);
+            if (!NT_SUCCESS(Status)) {
+                ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
+                goto end;
+            }
         }
         
         ExAcquireResourceSharedLite(&fileref->nonpaged->children_lock, TRUE);
@@ -807,11 +899,17 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         le = fileref->children.Flink;
         while (le != &fileref->children) {
             file_ref* fr2 = CONTAINING_RECORD(le, file_ref, list_entry);
-                
-            if (!fr2->deleted && fr2->filepart_uc.Length == us.Length &&
-                RtlCompareMemory(fr2->filepart_uc.Buffer, us.Buffer, us.Length) == us.Length) {
-                found = TRUE;
             
+            if (!fr2->deleted) {
+                if (!ccb->case_sensitive && fr2->filepart_uc.Length == us.Length &&
+                    RtlCompareMemory(fr2->filepart_uc.Buffer, us.Buffer, us.Length) == us.Length)
+                    found = TRUE;
+                else if (ccb->case_sensitive && fr2->filepart.Length == ccb->query_string.Length &&
+                    RtlCompareMemory(fr2->filepart.Buffer, ccb->query_string.Buffer, ccb->query_string.Length) == ccb->query_string.Length)
+                    found = TRUE;
+            }
+                
+            if (found) {
                 if (fr2->fcb->subvol == fcb->subvol) {
                     de.key.obj_id = fr2->fcb->inode;
                     de.key.obj_type = TYPE_INODE_ITEM;
@@ -847,7 +945,7 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
             ExFreePool(us.Buffer);
         
         if (!found) {
-            Status = find_file_in_dir(fcb->Vcb, &ccb->query_string, fileref, &found_subvol, &found_inode, &found_type, &found_index, &utf8, Irp);
+            Status = find_file_in_dir(fcb->Vcb, &ccb->query_string, fileref, &found_subvol, &found_inode, &found_type, &found_index, &utf8, FALSE, Irp);
             
             if (!NT_SUCCESS(Status)) {
                 Status = STATUS_NO_SUCH_FILE;
@@ -898,7 +996,7 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         di_uni_fn.Length = di_uni_fn.MaximumLength = stringlen;
         di_uni_fn.Buffer = uni_fn;
         
-        while (!FsRtlIsNameInExpression(&ccb->query_string, &di_uni_fn, TRUE, NULL)) {
+        while (!FsRtlIsNameInExpression(&ccb->query_string, &di_uni_fn, !ccb->case_sensitive, NULL)) {
             if (de.name_alloc)
                 ExFreePool(de.name);
             
@@ -961,6 +1059,7 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
                 case FileDirectoryInformation:
                 case FileIdBothDirectoryInformation:
                 case FileFullDirectoryInformation:
+                case FileIdFullDirectoryInformation:
                     length -= length % 8;
                     break;
                     
@@ -1011,7 +1110,7 @@ static NTSTATUS STDCALL query_directory(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
                         di_uni_fn.Buffer = uni_fn;
                     }
                     
-                    if (!has_wildcard || FsRtlIsNameInExpression(&ccb->query_string, &di_uni_fn, TRUE, NULL)) {
+                    if (!has_wildcard || FsRtlIsNameInExpression(&ccb->query_string, &di_uni_fn, !ccb->case_sensitive, NULL)) {
                         curitem = (UINT8*)buf + IrpSp->Parameters.QueryDirectory.Length - length;
                         count++;
                         
@@ -1086,7 +1185,7 @@ static NTSTATUS STDCALL notify_change_directory(device_extension* Vcb, PIRP Irp)
         return STATUS_INVALID_PARAMETER;
     }
     
-    if (!(ccb->access & FILE_LIST_DIRECTORY)) {
+    if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_LIST_DIRECTORY)) {
         WARN("insufficient privileges\n");
         return STATUS_ACCESS_DENIED;
     }
index 992b33d..8df4e57 100644 (file)
 
 #include "btrfs_drv.h"
 
+typedef struct {
+    UINT8 type;
+    
+    union {
+        EXTENT_DATA_REF edr;
+        SHARED_DATA_REF sdr;
+        TREE_BLOCK_REF tbr;
+        SHARED_BLOCK_REF sbr;
+    };
+    
+    UINT64 hash;
+    LIST_ENTRY list_entry;
+} extent_ref;
+
 static __inline ULONG get_extent_data_len(UINT8 type) {
     switch (type) {
         case TYPE_TREE_BLOCK_REF:
@@ -28,7 +42,8 @@ static __inline ULONG get_extent_data_len(UINT8 type) {
         case TYPE_EXTENT_REF_V0:
             return sizeof(EXTENT_REF_V0);
             
-        // FIXME - TYPE_SHARED_BLOCK_REF
+        case TYPE_SHARED_BLOCK_REF:
+            return sizeof(SHARED_BLOCK_REF);
             
         case TYPE_SHARED_DATA_REF:
             return sizeof(SHARED_DATA_REF);
@@ -55,7 +70,8 @@ static __inline UINT64 get_extent_data_refcount(UINT8 type, void* data) {
             return erv0->count;
         }
         
-        // FIXME - TYPE_SHARED_BLOCK_REF
+        case TYPE_SHARED_BLOCK_REF:
+            return 1;
         
         case TYPE_SHARED_DATA_REF:
         {
@@ -85,160 +101,477 @@ static __inline UINT64 get_extent_data_ref_hash(EXTENT_DATA_REF* edr) {
 static UINT64 get_extent_hash(UINT8 type, void* data) {
     if (type == TYPE_EXTENT_DATA_REF) {
         return get_extent_data_ref_hash((EXTENT_DATA_REF*)data);
+    } else if (type == TYPE_SHARED_BLOCK_REF) {
+        SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)data;
+        return sbr->offset;
+    } else if (type == TYPE_SHARED_DATA_REF) {
+        SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)data;
+        return sdr->offset;
+    } else if (type == TYPE_TREE_BLOCK_REF) {
+        TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)data;
+        return tbr->offset;
     } else {
         ERR("unhandled extent type %x\n", type);
         return 0;
     }
 }
 
-static NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
-    NTSTATUS Status;
-    KEY searchkey;
-    traverse_ptr tp;
-    ULONG datalen = get_extent_data_len(type), len, max_extent_item_size;
-    EXTENT_ITEM* ei;
-    UINT8* ptr;
-    UINT64 inline_rc, offset;
-    UINT8* data2;
-    EXTENT_ITEM* newei;
+static void free_extent_refs(LIST_ENTRY* extent_refs) {
+    while (!IsListEmpty(extent_refs)) {
+        LIST_ENTRY* le = RemoveHeadList(extent_refs);
+        extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+        
+        ExFreePool(er);
+    }
+}
+
+static NTSTATUS add_shared_data_extent_ref(LIST_ENTRY* extent_refs, UINT64 parent, UINT32 count) {
+    extent_ref* er2;
+    LIST_ENTRY* le;
     
-    // FIXME - handle A9s
+    if (!IsListEmpty(extent_refs)) {
+        le = extent_refs->Flink;
+        
+        while (le != extent_refs) {
+            extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+            
+            if (er->type == TYPE_SHARED_DATA_REF && er->sdr.offset == parent) {
+                er->sdr.count += count;
+                return STATUS_SUCCESS;
+            }
+            
+            le = le->Flink;
+        }
+    }
     
-    if (datalen == 0) {
-        ERR("unrecognized extent type %x\n", type);
-        return STATUS_INTERNAL_ERROR;
+    er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
+    if (!er2) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
     }
     
-    searchkey.obj_id = address;
-    searchkey.obj_type = TYPE_EXTENT_ITEM;
-    searchkey.offset = 0xffffffffffffffff;
+    er2->type = TYPE_SHARED_DATA_REF;
+    er2->sdr.offset = parent;
+    er2->sdr.count = count;
     
-    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
+    InsertTailList(extent_refs, &er2->list_entry);
     
-    // If entry doesn't exist yet, create new inline extent item
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_shared_block_extent_ref(LIST_ENTRY* extent_refs, UINT64 parent) {
+    extent_ref* er2;
+    LIST_ENTRY* le;
     
-    if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
-        ULONG eisize;
-        EXTENT_ITEM* ei;
-        BOOL is_tree = type == TYPE_TREE_BLOCK_REF;
-        UINT8* ptr;
-        
-        eisize = sizeof(EXTENT_ITEM);
-        if (is_tree) eisize += sizeof(EXTENT_ITEM2);
-        eisize += sizeof(UINT8);
-        eisize += datalen;
-        
-        ei = ExAllocatePoolWithTag(PagedPool, eisize, ALLOC_TAG);
-        if (!ei) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
-        
-        ei->refcount = get_extent_data_refcount(type, data);
-        ei->generation = Vcb->superblock.generation;
-        ei->flags = is_tree ? EXTENT_ITEM_TREE_BLOCK : EXTENT_ITEM_DATA;
-        ptr = (UINT8*)&ei[1];
+    if (!IsListEmpty(extent_refs)) {
+        le = extent_refs->Flink;
         
-        if (is_tree) {
-            EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)ptr;
-            ei2->firstitem = *firstitem;
-            ei2->level = level;
-            ptr = (UINT8*)&ei2[1];
+        while (le != extent_refs) {
+            extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+            
+            if (er->type == TYPE_SHARED_BLOCK_REF && er->sbr.offset == parent)
+                return STATUS_SUCCESS;
+            
+            le = le->Flink;
         }
+    }
+    
+    er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
+    if (!er2) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    er2->type = TYPE_SHARED_BLOCK_REF;
+    er2->sbr.offset = parent;
+    
+    InsertTailList(extent_refs, &er2->list_entry);
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_tree_block_extent_ref(LIST_ENTRY* extent_refs, UINT64 root) {
+    extent_ref* er2;
+    LIST_ENTRY* le;
+    
+    if (!IsListEmpty(extent_refs)) {
+        le = extent_refs->Flink;
         
-        *ptr = type;
-        RtlCopyMemory(ptr + 1, data, datalen);
-        
-        if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, eisize, NULL, Irp, rollback)) {
-            ERR("insert_tree_item failed\n");
-            return STATUS_INTERNAL_ERROR;
+        while (le != extent_refs) {
+            extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+            
+            if (er->type == TYPE_TREE_BLOCK_REF && er->tbr.offset == root)
+                return STATUS_SUCCESS;
+            
+            le = le->Flink;
         }
-        
-        // FIXME - add to space list?
+    }
+    
+    er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
+    if (!er2) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    er2->type = TYPE_TREE_BLOCK_REF;
+    er2->tbr.offset = root;
+    
+    InsertTailList(extent_refs, &er2->list_entry);
+    
+    return STATUS_SUCCESS;
+}
 
+static NTSTATUS construct_extent_item(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 flags, LIST_ENTRY* extent_refs,
+                                      KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
+    LIST_ENTRY *le, *next_le;
+    UINT64 refcount;
+    ULONG inline_len;
+    BOOL all_inline = TRUE;
+    extent_ref* first_noninline;
+    EXTENT_ITEM* ei;
+    UINT8* siptr;
+    
+    // FIXME - write skinny extents if is tree and incompat flag set
+    
+    if (IsListEmpty(extent_refs)) {
+        WARN("no extent refs found\n");
         return STATUS_SUCCESS;
-    } else if (tp.item->key.offset != size) {
-        ERR("extent %llx exists, but with size %llx rather than %llx expected\n", tp.item->key.obj_id, tp.item->key.offset, size);
-        return STATUS_INTERNAL_ERROR;
     }
+    
+    refcount = 0;
+    inline_len = sizeof(EXTENT_ITEM);
+    
+    if (flags & EXTENT_ITEM_TREE_BLOCK)
+        inline_len += sizeof(EXTENT_ITEM2);
+    
+    le = extent_refs->Flink;
+    while (le != extent_refs) {
+        extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+        UINT64 rc;
         
-    if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
-        EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
-        
-        TRACE("converting old-style extent at (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-        
-        ei = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM), ALLOC_TAG);
-        
-        if (!ei) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
-        
-        ei->refcount = eiv0->refcount;
-        ei->generation = Vcb->superblock.generation;
-        ei->flags = EXTENT_ITEM_DATA;
-        
-        delete_tree_item(Vcb, &tp, rollback);
+        next_le = le->Flink;
         
-        if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ei, sizeof(EXTENT_ITEM), NULL, Irp, rollback)) {
-            ERR("insert_tree_item failed\n");
-            ExFreePool(ei);
-            return STATUS_INTERNAL_ERROR;
-        }
+        rc = get_extent_data_refcount(er->type, &er->edr);
         
-        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            return Status;
+        if (rc == 0) {
+            RemoveEntryList(&er->list_entry);
+            
+            ExFreePool(er);
+        } else {
+            ULONG extlen = get_extent_data_len(er->type);
+            
+            refcount += rc;
+            
+            er->hash = get_extent_hash(er->type, &er->edr);
+            
+            if (all_inline) {
+                if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) {
+                    all_inline = FALSE;
+                    first_noninline = er;
+                } else
+                    inline_len += extlen + 1;
+            }
         }
-    }
         
-    if (tp.item->size < sizeof(EXTENT_ITEM)) {
-        ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
-        return STATUS_INTERNAL_ERROR;
+        le = next_le;
     }
     
-    ei = (EXTENT_ITEM*)tp.item->data;
+    ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
+    if (!ei) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
     
-    len = tp.item->size - sizeof(EXTENT_ITEM);
-    ptr = (UINT8*)&ei[1];
+    ei->refcount = refcount;
+    ei->generation = Vcb->superblock.generation;
+    ei->flags = flags;
     
-    if (ei->flags & EXTENT_ITEM_TREE_BLOCK) {
-        if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
-            ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
-            return STATUS_INTERNAL_ERROR;
+    if (flags & EXTENT_ITEM_TREE_BLOCK) {
+        EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)&ei[1];
+        
+        if (firstitem) {
+            ei2->firstitem.obj_id = firstitem->obj_id;
+            ei2->firstitem.obj_type = firstitem->obj_type;
+            ei2->firstitem.offset = firstitem->offset;
+        } else {
+            ei2->firstitem.obj_id = 0;
+            ei2->firstitem.obj_type = 0;
+            ei2->firstitem.offset = 0;
         }
         
-        len -= sizeof(EXTENT_ITEM2);
-        ptr += sizeof(EXTENT_ITEM2);
-    }
-    
-    inline_rc = 0;
+        ei2->level = level;
+        
+        siptr = (UINT8*)&ei2[1];
+    } else
+        siptr = (UINT8*)&ei[1];
     
-    // Loop through existing inline extent entries
+    // Do we need to sort the inline extent refs? The Linux driver doesn't seem to bother.
     
-    while (len > 0) {
-        UINT8 secttype = *ptr;
-        ULONG sectlen = get_extent_data_len(secttype);
-        UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
+    le = extent_refs->Flink;
+    while (le != extent_refs) {
+        extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+        ULONG extlen = get_extent_data_len(er->type);
         
-        len--;
+        if (!all_inline && er == first_noninline)
+            break;
         
-        if (sectlen > len) {
-            ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
-            return STATUS_INTERNAL_ERROR;
-        }
-
-        if (sectlen == 0) {
-            ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
-            return STATUS_INTERNAL_ERROR;
-        }
+        *siptr = er->type;
+        siptr++;
         
-        // If inline extent already present, increase refcount and return
+        if (extlen > 0) {
+            RtlCopyMemory(siptr, &er->edr, extlen);
+            siptr += extlen;
+        }
+         
+        le = le->Flink;
+    }
+    
+    if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, inline_len, NULL, Irp, rollback)) {
+        ERR("error - failed to insert item\n");
+        ExFreePool(ei);
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    if (!all_inline) {
+        le = &first_noninline->list_entry;
+        
+        while (le != extent_refs) {
+            extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+            ULONG len = get_extent_data_len(er->type);
+            UINT8* data;
+            
+            if (len > 0) {
+                data = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
+                
+                if (!data) {
+                    ERR("out of memory\n");
+                    return STATUS_INSUFFICIENT_RESOURCES;
+                }
+                
+                RtlCopyMemory(data, &er->edr, len);
+            } else
+                data = NULL;
+            
+            if (!insert_tree_item(Vcb, Vcb->extent_root, address, er->type, er->hash, data, len, NULL, Irp, rollback)) {
+                ERR("error - failed to insert item\n");
+                return STATUS_INTERNAL_ERROR;
+            }
+            
+            le = le->Flink;
+        }
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS convert_old_extent(device_extension* Vcb, UINT64 address, BOOL tree, KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
+    NTSTATUS Status;
+    KEY searchkey;
+    traverse_ptr tp, next_tp;
+    LIST_ENTRY extent_refs;
+    UINT64 size;
+    
+    InitializeListHead(&extent_refs);
+    
+    searchkey.obj_id = address;
+    searchkey.obj_type = TYPE_EXTENT_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+        ERR("old-style extent %llx not found\n", address);
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    size = tp.item->key.offset;
+    
+    delete_tree_item(Vcb, &tp, rollback);
+    
+    while (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) {
+        tp = next_tp;
+        
+        if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_EXTENT_REF_V0 && tp.item->size >= sizeof(EXTENT_REF_V0)) {
+            EXTENT_REF_V0* erv0 = (EXTENT_REF_V0*)tp.item->data;
+            
+            if (tree) {
+                if (tp.item->key.offset == tp.item->key.obj_id) { // top of the tree
+                    Status = add_tree_block_extent_ref(&extent_refs, erv0->root);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("add_tree_block_extent_ref returned %08x\n", Status);
+                        goto end;
+                    }
+                } else {
+                    Status = add_shared_block_extent_ref(&extent_refs, tp.item->key.offset);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("add_shared_block_extent_ref returned %08x\n", Status);
+                        goto end;
+                    }
+                }
+            } else {
+                Status = add_shared_data_extent_ref(&extent_refs, tp.item->key.offset, erv0->count);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("add_shared_data_extent_ref returned %08x\n", Status);
+                    goto end;
+                }
+            }
+            
+            delete_tree_item(Vcb, &tp, rollback);
+        }
+
+        if (tp.item->key.obj_id > address || tp.item->key.obj_type > TYPE_EXTENT_REF_V0)
+            break;
+    }
+
+    Status = construct_extent_item(Vcb, address, size, tree ? (EXTENT_ITEM_TREE_BLOCK | EXTENT_ITEM_SHARED_BACKREFS) : EXTENT_ITEM_DATA,
+                                   &extent_refs, firstitem, level, Irp, rollback);
+    if (!NT_SUCCESS(Status))
+        ERR("construct_extent_item returned %08x\n", Status);
+
+end:
+    free_extent_refs(&extent_refs);
+    
+    return Status;
+}
+
+NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
+    NTSTATUS Status;
+    KEY searchkey;
+    traverse_ptr tp;
+    ULONG datalen = get_extent_data_len(type), len, max_extent_item_size;
+    EXTENT_ITEM* ei;
+    UINT8* ptr;
+    UINT64 inline_rc, offset;
+    UINT8* data2;
+    EXTENT_ITEM* newei;
+    BOOL skinny;
+    BOOL is_tree = type == TYPE_TREE_BLOCK_REF || type == TYPE_SHARED_BLOCK_REF;
+    
+    if (datalen == 0) {
+        ERR("unrecognized extent type %x\n", type);
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    searchkey.obj_id = address;
+    searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    // If entry doesn't exist yet, create new inline extent item
+    
+    if (tp.item->key.obj_id != searchkey.obj_id || (tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM)) {
+        ULONG eisize;
+        EXTENT_ITEM* ei;
+        UINT8* ptr;
+        
+        eisize = sizeof(EXTENT_ITEM);
+        if (is_tree) eisize += sizeof(EXTENT_ITEM2);
+        eisize += sizeof(UINT8);
+        eisize += datalen;
+        
+        ei = ExAllocatePoolWithTag(PagedPool, eisize, ALLOC_TAG);
+        if (!ei) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        ei->refcount = get_extent_data_refcount(type, data);
+        ei->generation = Vcb->superblock.generation;
+        ei->flags = is_tree ? EXTENT_ITEM_TREE_BLOCK : EXTENT_ITEM_DATA;
+        ptr = (UINT8*)&ei[1];
+        
+        if (is_tree && !(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
+            EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)ptr;
+            ei2->firstitem = *firstitem;
+            ei2->level = level;
+            ptr = (UINT8*)&ei2[1];
+        }
+        
+        *ptr = type;
+        RtlCopyMemory(ptr + 1, data, datalen);
+        
+        if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA && is_tree) {
+            if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, ei, eisize, NULL, Irp, rollback)) {
+                ERR("insert_tree_item failed\n");
+                return STATUS_INTERNAL_ERROR;
+            }
+        } else {
+            if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, eisize, NULL, Irp, rollback)) {
+                ERR("insert_tree_item failed\n");
+                return STATUS_INTERNAL_ERROR;
+            }
+        }
+
+        return STATUS_SUCCESS;
+    } else if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset != size) {
+        ERR("extent %llx exists, but with size %llx rather than %llx expected\n", tp.item->key.obj_id, tp.item->key.offset, size);
+        return STATUS_INTERNAL_ERROR;
+    }
+
+    skinny = tp.item->key.obj_type == TYPE_METADATA_ITEM;
+
+    if (tp.item->size == sizeof(EXTENT_ITEM_V0) && !skinny) {
+        Status = convert_old_extent(Vcb, address, is_tree, firstitem, level, Irp, rollback);
+        
+        if (!NT_SUCCESS(Status)) {
+            ERR("convert_old_extent returned %08x\n", Status);
+            return Status;
+        }
+
+        return increase_extent_refcount(Vcb, address, size, type, data, firstitem, level, Irp, rollback);
+    }
+        
+    if (tp.item->size < sizeof(EXTENT_ITEM)) {
+        ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    ei = (EXTENT_ITEM*)tp.item->data;
+    
+    len = tp.item->size - sizeof(EXTENT_ITEM);
+    ptr = (UINT8*)&ei[1];
+    
+    if (ei->flags & EXTENT_ITEM_TREE_BLOCK && !skinny) {
+        if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
+            ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
+            return STATUS_INTERNAL_ERROR;
+        }
+        
+        len -= sizeof(EXTENT_ITEM2);
+        ptr += sizeof(EXTENT_ITEM2);
+    }
+    
+    inline_rc = 0;
+    
+    // Loop through existing inline extent entries
+    
+    while (len > 0) {
+        UINT8 secttype = *ptr;
+        ULONG sectlen = get_extent_data_len(secttype);
+        UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
+        
+        len--;
+        
+        if (sectlen > len) {
+            ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
+            return STATUS_INTERNAL_ERROR;
+        }
+
+        if (sectlen == 0) {
+            ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
+            return STATUS_INTERNAL_ERROR;
+        }
+        
+        // If inline extent already present, increase refcount and return
         
         if (secttype == type) {
             if (type == TYPE_EXTENT_DATA_REF) {
@@ -273,8 +606,50 @@ static NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address,
                     return STATUS_SUCCESS;
                 }
             } else if (type == TYPE_TREE_BLOCK_REF) {
-                ERR("trying to increase refcount of tree extent\n");
-                return STATUS_INTERNAL_ERROR;
+                TREE_BLOCK_REF* secttbr = (TREE_BLOCK_REF*)(ptr + sizeof(UINT8));
+                TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)data;
+                
+                if (secttbr->offset == tbr->offset) {
+                    TRACE("trying to increase refcount of non-shared tree extent\n");
+                    return STATUS_SUCCESS;
+                }
+            } else if (type == TYPE_SHARED_BLOCK_REF) {
+                SHARED_BLOCK_REF* sectsbr = (SHARED_BLOCK_REF*)(ptr + sizeof(UINT8));
+                SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)data;
+                
+                if (sectsbr->offset == sbr->offset)
+                    return STATUS_SUCCESS;
+            } else if (type == TYPE_SHARED_DATA_REF) {
+                SHARED_DATA_REF* sectsdr = (SHARED_DATA_REF*)(ptr + sizeof(UINT8));
+                SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)data;
+                
+                if (sectsdr->offset == sdr->offset) {
+                    UINT32 rc = get_extent_data_refcount(type, data);
+                    SHARED_DATA_REF* sectsdr2;
+                    
+                    newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+                    if (!newei) {
+                        ERR("out of memory\n");
+                        return STATUS_INSUFFICIENT_RESOURCES;
+                    }
+                    
+                    RtlCopyMemory(newei, tp.item->data, tp.item->size);
+                    
+                    newei->generation = Vcb->superblock.generation;
+                    newei->refcount += rc;
+                    
+                    sectsdr2 = (SHARED_DATA_REF*)((UINT8*)newei + ((UINT8*)sectsdr - tp.item->data));
+                    sectsdr2->count += rc;
+                    
+                    delete_tree_item(Vcb, &tp, rollback);
+                    
+                    if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) {
+                        ERR("insert_tree_item failed\n");
+                        return STATUS_INTERNAL_ERROR;
+                    }
+                    
+                    return STATUS_SUCCESS;
+                }
             } else {
                 ERR("unhandled extent type %x\n", type);
                 return STATUS_INTERNAL_ERROR;
@@ -296,7 +671,7 @@ static NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address,
         len = tp.item->size - sizeof(EXTENT_ITEM);
         ptr = (UINT8*)&ei[1];
         
-        if (ei->flags & EXTENT_ITEM_TREE_BLOCK) {
+        if (ei->flags & EXTENT_ITEM_TREE_BLOCK && !skinny) {
             len -= sizeof(EXTENT_ITEM2);
             ptr += sizeof(EXTENT_ITEM2);
         }
@@ -358,7 +733,7 @@ static NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address,
             return Status;
         }
         
-        if (!keycmp(&tp.item->key, &searchkey)) {
+        if (!keycmp(tp.item->key, searchkey)) {
             if (tp.item->size < datalen) {
                 ERR("(%llx,%x,%llx) was %x bytes, expecting %x\n", tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, tp.item->size, datalen);
                 return STATUS_INTERNAL_ERROR;
@@ -372,8 +747,14 @@ static NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address,
                 
                 edr->count += get_extent_data_refcount(type, data);
             } else if (type == TYPE_TREE_BLOCK_REF) {
-                ERR("trying to increase refcount of tree extent\n");
-                return STATUS_INTERNAL_ERROR;
+                TRACE("trying to increase refcount of non-shared tree extent\n");
+                return STATUS_SUCCESS;
+            } else if (type == TYPE_SHARED_BLOCK_REF)
+                return STATUS_SUCCESS;
+            else if (type == TYPE_SHARED_DATA_REF) {
+                SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)data2;
+                
+                sdr->count += get_extent_data_refcount(type, data);
             } else {
                 ERR("unhandled extent type %x\n", type);
                 return STATUS_INTERNAL_ERROR;
@@ -446,8 +827,8 @@ void decrease_chunk_usage(chunk* c, UINT64 delta) {
     TRACE("decreasing size of chunk %llx by %llx\n", c->offset, delta);
 }
 
-static NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem,
-                                         UINT8 level, UINT64 parent, PIRP Irp, LIST_ENTRY* rollback) {
+NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem,
+                                  UINT8 level, UINT64 parent, PIRP Irp, LIST_ENTRY* rollback) {
     KEY searchkey;
     NTSTATUS Status;
     traverse_ptr tp, tp2;
@@ -457,57 +838,53 @@ static NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address,
     UINT8* ptr;
     UINT32 rc = data ? get_extent_data_refcount(type, data) : 1;
     ULONG datalen = get_extent_data_len(type);
+    BOOL is_tree = (type == TYPE_TREE_BLOCK_REF || type == TYPE_SHARED_BLOCK_REF), skinny = FALSE;
     
-    // FIXME - handle trees
-    
-    searchkey.obj_id = address;
-    searchkey.obj_type = TYPE_EXTENT_ITEM;
-    searchkey.offset = 0xffffffffffffffff;
-    
-    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
+    if (is_tree && Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
+        searchkey.obj_id = address;
+        searchkey.obj_type = TYPE_METADATA_ITEM;
+        searchkey.offset = 0xffffffffffffffff;
+        
+        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("error - find_item returned %08x\n", Status);
+            return Status;
+        }
+        
+        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
+            skinny = TRUE;
     }
     
-    if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
-        ERR("could not find EXTENT_ITEM for address %llx\n", address);
-        return STATUS_INTERNAL_ERROR;
-    }
-    
-    if (tp.item->key.offset != size) {
-        ERR("extent %llx had length %llx, not %llx as expected\n", address, tp.item->key.offset, size);
-        return STATUS_INTERNAL_ERROR;
-    }
-    
-    if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
-        EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
-        
-        TRACE("converting old-style extent at (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-        
-        ei = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM), ALLOC_TAG);
+    if (!skinny) {
+        searchkey.obj_id = address;
+        searchkey.obj_type = TYPE_EXTENT_ITEM;
+        searchkey.offset = 0xffffffffffffffff;
         
-        if (!ei) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
+        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("error - find_item returned %08x\n", Status);
+            return Status;
         }
         
-        ei->refcount = eiv0->refcount;
-        ei->generation = Vcb->superblock.generation;
-        ei->flags = EXTENT_ITEM_DATA;
-        
-        delete_tree_item(Vcb, &tp, rollback);
+        if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+            ERR("could not find EXTENT_ITEM for address %llx\n", address);
+            return STATUS_INTERNAL_ERROR;
+        }
         
-        if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ei, sizeof(EXTENT_ITEM), &tp, Irp, rollback)) {
-            ERR("insert_tree_item failed\n");
-            ExFreePool(ei);
+        if (tp.item->key.offset != size) {
+            ERR("extent %llx had length %llx, not %llx as expected\n", address, tp.item->key.offset, size);
             return STATUS_INTERNAL_ERROR;
         }
         
-        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            return Status;
+        if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
+            Status = convert_old_extent(Vcb, address, is_tree, firstitem, level, Irp, rollback);
+            
+            if (!NT_SUCCESS(Status)) {
+                ERR("convert_old_extent returned %08x\n", Status);
+                return Status;
+            }
+
+            return decrease_extent_refcount(Vcb, address, size, type, data, firstitem, level, parent, Irp, rollback);
         }
     }
     
@@ -521,7 +898,7 @@ static NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address,
     len = tp.item->size - sizeof(EXTENT_ITEM);
     ptr = (UINT8*)&ei[1];
     
-    if (ei->flags & EXTENT_ITEM_TREE_BLOCK) {
+    if (ei->flags & EXTENT_ITEM_TREE_BLOCK && !skinny) {
         if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
             ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
             return STATUS_INTERNAL_ERROR;
@@ -648,6 +1025,80 @@ static NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address,
                         return STATUS_INTERNAL_ERROR;
                     }
                     
+                    return STATUS_SUCCESS;
+                }
+            } else if (type == TYPE_TREE_BLOCK_REF) {
+                TREE_BLOCK_REF* secttbr = (TREE_BLOCK_REF*)(ptr + sizeof(UINT8));
+                TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)data;
+                ULONG neweilen;
+                EXTENT_ITEM* newei;
+                
+                if (secttbr->offset == tbr->offset) {
+                    if (ei->refcount == 1) {
+                        delete_tree_item(Vcb, &tp, rollback);
+                        return STATUS_SUCCESS;
+                    }
+
+                    neweilen = tp.item->size - sizeof(UINT8) - sectlen;
+                    
+                    newei = ExAllocatePoolWithTag(PagedPool, neweilen, ALLOC_TAG);
+                    if (!newei) {
+                        ERR("out of memory\n");
+                        return STATUS_INSUFFICIENT_RESOURCES;
+                    }
+                    
+                    RtlCopyMemory(newei, ei, ptr - tp.item->data);
+                    
+                    if (len > sectlen)
+                        RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data), ptr + sectlen + sizeof(UINT8), len - sectlen);
+                    
+                    newei->generation = Vcb->superblock.generation;
+                    newei->refcount--;
+                    
+                    delete_tree_item(Vcb, &tp, rollback);
+                    
+                    if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, neweilen, NULL, Irp, rollback)) {
+                        ERR("insert_tree_item failed\n");
+                        return STATUS_INTERNAL_ERROR;
+                    }
+                    
+                    return STATUS_SUCCESS;
+                }
+            } else if (type == TYPE_SHARED_BLOCK_REF) {
+                SHARED_BLOCK_REF* sectsbr = (SHARED_BLOCK_REF*)(ptr + sizeof(UINT8));
+                SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)data;
+                ULONG neweilen;
+                EXTENT_ITEM* newei;
+                
+                if (sectsbr->offset == sbr->offset) {
+                    if (ei->refcount == 1) {
+                        delete_tree_item(Vcb, &tp, rollback);
+                        return STATUS_SUCCESS;
+                    }
+                    
+                    neweilen = tp.item->size - sizeof(UINT8) - sectlen;
+                    
+                    newei = ExAllocatePoolWithTag(PagedPool, neweilen, ALLOC_TAG);
+                    if (!newei) {
+                        ERR("out of memory\n");
+                        return STATUS_INSUFFICIENT_RESOURCES;
+                    }
+                    
+                    RtlCopyMemory(newei, ei, ptr - tp.item->data);
+                    
+                    if (len > sectlen)
+                        RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data), ptr + sectlen + sizeof(UINT8), len - sectlen);
+                    
+                    newei->generation = Vcb->superblock.generation;
+                    newei->refcount--;
+                    
+                    delete_tree_item(Vcb, &tp, rollback);
+                    
+                    if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, neweilen, NULL, Irp, rollback)) {
+                        ERR("insert_tree_item failed\n");
+                        return STATUS_INTERNAL_ERROR;
+                    }
+                    
                     return STATUS_SUCCESS;
                 }
             } else {
@@ -676,7 +1127,7 @@ static NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address,
         return Status;
     }
     
-    if (keycmp(&tp2.item->key, &searchkey)) {
+    if (keycmp(tp2.item->key, searchkey)) {
         ERR("(%llx,%x,%llx) not found\n", tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset);
         return STATUS_INTERNAL_ERROR;
     }
@@ -780,6 +1231,80 @@ static NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address,
                 return STATUS_INTERNAL_ERROR;
             }
             
+            return STATUS_SUCCESS;
+        } else {
+            ERR("error - collision?\n");
+            return STATUS_INTERNAL_ERROR;
+        }
+    } else if (type == TYPE_SHARED_BLOCK_REF) {
+        SHARED_BLOCK_REF* sectsbr = (SHARED_BLOCK_REF*)tp2.item->data;
+        SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)data;
+        EXTENT_ITEM* newei;
+        
+        if (sectsbr->offset == sbr->offset) {
+            if (ei->refcount == 1) {
+                delete_tree_item(Vcb, &tp, rollback);
+                delete_tree_item(Vcb, &tp2, rollback);
+                return STATUS_SUCCESS;
+            }
+            
+            delete_tree_item(Vcb, &tp2, rollback);
+            
+            newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+            if (!newei) {
+                ERR("out of memory\n");
+                return STATUS_INSUFFICIENT_RESOURCES;
+            }
+            
+            RtlCopyMemory(newei, tp.item->data, tp.item->size);
+
+            newei->generation = Vcb->superblock.generation;
+            newei->refcount -= rc;
+            
+            delete_tree_item(Vcb, &tp, rollback);
+            
+            if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) {
+                ERR("insert_tree_item failed\n");
+                return STATUS_INTERNAL_ERROR;
+            }
+            
+            return STATUS_SUCCESS;
+        } else {
+            ERR("error - collision?\n");
+            return STATUS_INTERNAL_ERROR;
+        }
+    } else if (type == TYPE_TREE_BLOCK_REF) {
+        TREE_BLOCK_REF* secttbr = (TREE_BLOCK_REF*)tp2.item->data;
+        TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)data;
+        EXTENT_ITEM* newei;
+        
+        if (secttbr->offset == tbr->offset) {
+            if (ei->refcount == 1) {
+                delete_tree_item(Vcb, &tp, rollback);
+                delete_tree_item(Vcb, &tp2, rollback);
+                return STATUS_SUCCESS;
+            }
+            
+            delete_tree_item(Vcb, &tp2, rollback);
+            
+            newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+            if (!newei) {
+                ERR("out of memory\n");
+                return STATUS_INSUFFICIENT_RESOURCES;
+            }
+            
+            RtlCopyMemory(newei, tp.item->data, tp.item->size);
+
+            newei->generation = Vcb->superblock.generation;
+            newei->refcount -= rc;
+            
+            delete_tree_item(Vcb, &tp, rollback);
+            
+            if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) {
+                ERR("insert_tree_item failed\n");
+                return STATUS_INTERNAL_ERROR;
+            }
+            
             return STATUS_SUCCESS;
         } else {
             ERR("error - collision?\n");
@@ -834,336 +1359,575 @@ NTSTATUS decrease_extent_refcount_data(device_extension* Vcb, UINT64 address, UI
     return decrease_extent_refcount(Vcb, address, size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, 0, Irp, rollback);
 }
 
-NTSTATUS decrease_extent_refcount_shared_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 treeaddr, UINT64 parent, PIRP Irp, LIST_ENTRY* rollback) {
-    SHARED_DATA_REF sdr;
-
-    sdr.offset = treeaddr;
-    sdr.count = 1;
+NTSTATUS decrease_extent_refcount_tree(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root,
+                                       UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
+    TREE_BLOCK_REF tbr;
     
-    return decrease_extent_refcount(Vcb, address, size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, parent, Irp, rollback);
-}
-
-NTSTATUS decrease_extent_refcount_old(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 treeaddr, PIRP Irp, LIST_ENTRY* rollback) {
-    return decrease_extent_refcount(Vcb, address, size, TYPE_EXTENT_REF_V0, NULL, NULL, 0, treeaddr, Irp, rollback);
-}
-
-typedef struct {
-    UINT8 type;
-    void* data;
-    BOOL allocated;
-    UINT64 hash;
-    LIST_ENTRY list_entry;
-} extent_ref;
-
-static void free_extent_refs(LIST_ENTRY* extent_refs) {
-    while (!IsListEmpty(extent_refs)) {
-        LIST_ENTRY* le = RemoveHeadList(extent_refs);
-        extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
-        
-        if (er->allocated)
-            ExFreePool(er->data);
-        
-        ExFreePool(er);
-    }
+    tbr.offset = root;
+    
+    return decrease_extent_refcount(Vcb, address, size, TYPE_TREE_BLOCK_REF, &tbr, NULL/*FIXME*/, level, 0, Irp, rollback);
 }
 
-static NTSTATUS add_data_extent_ref(LIST_ENTRY* extent_refs, UINT64 tree_id, UINT64 obj_id, UINT64 offset) {
-    extent_ref* er2;
+static UINT64 find_extent_data_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, PIRP Irp) {
+    NTSTATUS Status;
+    KEY searchkey;
+    traverse_ptr tp;
     EXTENT_DATA_REF* edr;
-    LIST_ENTRY* le;
     
-    if (!IsListEmpty(extent_refs)) {
-        le = extent_refs->Flink;
+    searchkey.obj_id = address;
+    searchkey.obj_type = TYPE_EXTENT_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return 0;
+    }
+    
+    if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+        TRACE("could not find address %llx in extent tree\n", address);
+        return 0;
+    }
+    
+    if (tp.item->key.offset != size) {
+        ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, size);
+        return 0;
+    }
+    
+    if (tp.item->size >= sizeof(EXTENT_ITEM)) {
+        EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
+        UINT32 len = tp.item->size - sizeof(EXTENT_ITEM);
+        UINT8* ptr = (UINT8*)&ei[1];
         
-        while (le != extent_refs) {
-            extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+        while (len > 0) {
+            UINT8 secttype = *ptr;
+            ULONG sectlen = get_extent_data_len(secttype);
+            UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
+            
+            len--;
+            
+            if (sectlen > len) {
+                ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
+                return 0;
+            }
+
+            if (sectlen == 0) {
+                ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
+                return 0;
+            }
             
-            if (er->type == TYPE_EXTENT_DATA_REF) {
-                edr = (EXTENT_DATA_REF*)er->data;
+            if (secttype == TYPE_EXTENT_DATA_REF) {
+                EXTENT_DATA_REF* sectedr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8));
                 
-                if (edr->root == tree_id && edr->objid == obj_id && edr->offset == offset) {
-                    edr->count++;
-                    return STATUS_SUCCESS;
-                }
+                if (sectedr->root == root && sectedr->objid == objid && sectedr->offset == offset)
+                    return sectcount;
             }
             
-            le = le->Flink;
+            len -= sectlen;
+            ptr += sizeof(UINT8) + sectlen;
         }
     }
     
-    er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
-    if (!er2) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
+    searchkey.obj_id = address;
+    searchkey.obj_type = TYPE_EXTENT_DATA_REF;
+    searchkey.offset = get_extent_data_ref_hash2(root, objid, offset);
     
-    edr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA_REF), ALLOC_TAG);
-    if (!edr) {
-        ERR("out of memory\n");
-        ExFreePool(er2);
-        return STATUS_INSUFFICIENT_RESOURCES;
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return 0;
     }
     
-    edr->root = tree_id;
-    edr->objid = obj_id;
-    edr->offset = offset;
-    edr->count = 1; // FIXME - not necessarily
-    
-    er2->type = TYPE_EXTENT_DATA_REF;
-    er2->data = edr;
-    er2->allocated = TRUE;
-    
-    InsertTailList(extent_refs, &er2->list_entry);
+    if (!keycmp(searchkey, tp.item->key)) {    
+        if (tp.item->size < sizeof(EXTENT_DATA_REF))
+            ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA_REF));
+        else {    
+            edr = (EXTENT_DATA_REF*)tp.item->data;
+            
+            return edr->count;
+        }
+    }
     
-    return STATUS_SUCCESS;
+    return 0;
 }
 
-static NTSTATUS construct_extent_item(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 flags, LIST_ENTRY* extent_refs, PIRP Irp, LIST_ENTRY* rollback) {
-    LIST_ENTRY *le, *next_le;
-    UINT64 refcount;
-    ULONG inline_len;
-    BOOL all_inline = TRUE;
-    extent_ref* first_noninline;
+UINT64 get_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp) {
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
     EXTENT_ITEM* ei;
-    UINT8* siptr;
     
-    if (IsListEmpty(extent_refs)) {
-        WARN("no extent refs found\n");
-        return STATUS_SUCCESS;
+    searchkey.obj_id = address;
+    searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return 0;
     }
     
-    refcount = 0;
-    inline_len = sizeof(EXTENT_ITEM);
+    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA && tp.item->key.obj_id == address &&
+        tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
+        ei = (EXTENT_ITEM*)tp.item->data;
     
-    le = extent_refs->Flink;
-    while (le != extent_refs) {
-        extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
-        UINT64 rc;
-        
-        next_le = le->Flink;
-        
-        rc = get_extent_data_refcount(er->type, er->data);
-        
-        if (rc == 0) {
-            if (er->allocated)
-                ExFreePool(er->data);
-            
-            RemoveEntryList(&er->list_entry);
-            
-            ExFreePool(er);
-        } else {
-            ULONG extlen = get_extent_data_len(er->type);
-            
-            refcount += rc;
-            
-            if (er->type == TYPE_EXTENT_DATA_REF)
-                er->hash = get_extent_data_ref_hash(er->data);
-            else
-                er->hash = 0;
-            
-            if (all_inline) {
-                if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) {
-                    all_inline = FALSE;
-                    first_noninline = er;
-                } else
-                    inline_len += extlen + 1;
-            }
-        }
+        return ei->refcount;
+    }
+    
+    if (tp.item->key.obj_id != address || tp.item->key.obj_type != TYPE_EXTENT_ITEM) {
+        ERR("couldn't find (%llx,%x,%llx) in extent tree\n", address, TYPE_EXTENT_ITEM, size);
+        return 0;
+    } else if (tp.item->key.offset != size) {
+        ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, size);
+        return 0;
+    }
+    
+    if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
+        EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
         
-        le = next_le;
+        return eiv0->refcount;
+    } else if (tp.item->size < sizeof(EXTENT_ITEM)) {
+        ERR("(%llx,%x,%llx) was %x bytes, expected at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type,
+                                                                       tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
+        return 0;
     }
     
-    ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
-    if (!ei) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
+    ei = (EXTENT_ITEM*)tp.item->data;
+    
+    return ei->refcount;
+}
+
+BOOL is_extent_unique(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp) {
+    KEY searchkey;
+    traverse_ptr tp, next_tp;
+    NTSTATUS Status;
+    UINT64 rc, rcrun, root = 0, inode = 0;
+    UINT32 len;
+    EXTENT_ITEM* ei;
+    UINT8* ptr;
+    BOOL b;
+    
+    rc = get_extent_refcount(Vcb, address, size, Irp);
+
+    if (rc == 1)
+        return TRUE;
+    
+    if (rc == 0)
+        return FALSE;
+    
+    searchkey.obj_id = address;
+    searchkey.obj_type = TYPE_EXTENT_ITEM;
+    searchkey.offset = size;
+    
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        WARN("error - find_item returned %08x\n", Status);
+        return FALSE;
     }
     
-    ei->refcount = refcount;
-    ei->generation = Vcb->superblock.generation;
-    ei->flags = flags;
+    if (keycmp(tp.item->key, searchkey)) {
+        WARN("could not find (%llx,%x,%llx)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+        return FALSE;
+    }
     
-    // Do we need to sort the inline extent refs? The Linux driver doesn't seem to bother.
+    if (tp.item->size == sizeof(EXTENT_ITEM_V0))
+        return FALSE;
     
-    siptr = (UINT8*)&ei[1];
-    le = extent_refs->Flink;
-    while (le != extent_refs) {
-        extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
-        ULONG extlen = get_extent_data_len(er->type);
+    if (tp.item->size < sizeof(EXTENT_ITEM)) {
+        WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
+        return FALSE;
+    }
+    
+    ei = (EXTENT_ITEM*)tp.item->data;
+    
+    len = tp.item->size - sizeof(EXTENT_ITEM);
+    ptr = (UINT8*)&ei[1];
+    
+    if (ei->flags & EXTENT_ITEM_TREE_BLOCK) {
+        if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
+            WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
+            return FALSE;
+        }
         
-        if (!all_inline && er == first_noninline)
-            break;
+        len -= sizeof(EXTENT_ITEM2);
+        ptr += sizeof(EXTENT_ITEM2);
+    }
+    
+    rcrun = 0;
+    
+    // Loop through inline extent entries
+    
+    while (len > 0) {
+        UINT8 secttype = *ptr;
+        ULONG sectlen = get_extent_data_len(secttype);
+        UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
         
-        *siptr = er->type;
-        siptr++;
+        len--;
         
-        if (extlen > 0) {
-            RtlCopyMemory(siptr, er->data, extlen);
-            siptr += extlen;
+        if (sectlen > len) {
+            WARN("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
+            return FALSE;
         }
-         
-        le = le->Flink;
+
+        if (sectlen == 0) {
+            WARN("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
+            return FALSE;
+        }
+        
+        if (secttype == TYPE_EXTENT_DATA_REF) {
+            EXTENT_DATA_REF* sectedr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8));
+            
+            if (root == 0 && inode == 0) {
+                root = sectedr->root;
+                inode = sectedr->objid;
+            } else if (root != sectedr->root || inode != sectedr->objid)
+                return FALSE;
+        } else
+            return FALSE;
+        
+        len -= sectlen;
+        ptr += sizeof(UINT8) + sectlen;
+        rcrun += sectcount;
     }
     
-    if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, inline_len, NULL, Irp, rollback)) {
-        ERR("error - failed to insert item\n");
-        ExFreePool(ei);
-        return STATUS_INTERNAL_ERROR;
-    }
+    if (rcrun == rc)
+        return TRUE;
+
+    // Loop through non-inlines if some refs still unaccounted for
     
-    if (!all_inline) {
-        le = &first_noninline->list_entry;
+    do {
+        b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp);
         
-        while (le != extent_refs) {
-            extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) {
+            EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)tp.item->data;
             
-            if (!insert_tree_item(Vcb, Vcb->extent_root, address, er->type, er->hash, er->data, get_extent_data_len(er->type), NULL, Irp, rollback)) {
-                ERR("error - failed to insert item\n");
-                return STATUS_INTERNAL_ERROR;
+            if (tp.item->size < sizeof(EXTENT_DATA_REF)) {
+                WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
+                     tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
+                return FALSE;
             }
             
-            er->allocated = FALSE;
+            if (root == 0 && inode == 0) {
+                root = edr->root;
+                inode = edr->objid;
+            } else if (root != edr->root || inode != edr->objid)
+                return FALSE;
             
-            le = le->Flink;
+            rcrun += edr->count;
         }
-    }
+        
+        if (rcrun == rc)
+            return TRUE;
+        
+        if (b) {
+            tp = next_tp;
+            
+            if (tp.item->key.obj_id > searchkey.obj_id)
+                break;
+        }
+    } while (b);
     
-    return STATUS_SUCCESS;
+    // If we reach this point, there's still some refs unaccounted for somewhere.
+    // Return FALSE in case we mess things up elsewhere.
+    
+    return FALSE;
 }
 
-static NTSTATUS populate_extent_refs_from_tree(device_extension* Vcb, UINT64 tree_address, UINT64 extent_address, LIST_ENTRY* extent_refs) {
-    UINT8* buf;
-    tree_header* th;
+UINT64 get_extent_flags(device_extension* Vcb, UINT64 address, PIRP Irp) {
+    KEY searchkey;
+    traverse_ptr tp;
     NTSTATUS Status;
+    EXTENT_ITEM* ei;
     
-    buf = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
-    if (!buf) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-
-    Status = read_data(Vcb, tree_address, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, NULL);
+    searchkey.obj_id = address;
+    searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
     if (!NT_SUCCESS(Status)) {
-        ERR("read_data returned %08x\n", Status);
-        ExFreePool(buf);
-        return Status;
+        ERR("error - find_item returned %08x\n", Status);
+        return 0;
     }
     
-    th = (tree_header*)buf;
-
-    if (th->level == 0) {
-        UINT32 i;
-        leaf_node* ln = (leaf_node*)&th[1];
-        
-        for (i = 0; i < th->num_items; i++) {
-            if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) && ln[i].offset + ln[i].size <= Vcb->superblock.node_size - sizeof(tree_header)) {
-                EXTENT_DATA* ed = (EXTENT_DATA*)(((UINT8*)&th[1]) + ln[i].offset);
-                
-                if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
-                    EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ed->data[0];
-                    
-                    if (ed2->address == extent_address) {
-                        Status = add_data_extent_ref(extent_refs, th->tree_id, ln[i].key.obj_id, ln[i].key.offset);
-                        if (!NT_SUCCESS(Status)) {
-                            ERR("add_data_extent_ref returned %08x\n", Status);
-                            ExFreePool(buf);
-                            return Status;
-                        }
-                    }
-                }
-            }
-        }
-    } else
-        WARN("shared data ref pointed to tree of level %x\n", th->level);
+    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA && tp.item->key.obj_id == address &&
+        tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
+        ei = (EXTENT_ITEM*)tp.item->data;
     
-    ExFreePool(buf);
+        return ei->flags;
+    }
     
-    return STATUS_SUCCESS;
+    if (tp.item->key.obj_id != address || tp.item->key.obj_type != TYPE_EXTENT_ITEM) {
+        ERR("couldn't find %llx in extent tree\n", address);
+        return 0;
+    }
+    
+    if (tp.item->size == sizeof(EXTENT_ITEM_V0))
+        return 0;
+    else if (tp.item->size < sizeof(EXTENT_ITEM)) {
+        ERR("(%llx,%x,%llx) was %x bytes, expected at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type,
+                                                                   tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
+        return 0;
+    }
+    
+    ei = (EXTENT_ITEM*)tp.item->data;
+    
+    return ei->flags;
 }
 
-NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp, LIST_ENTRY* rollback) {
+void update_extent_flags(device_extension* Vcb, UINT64 address, UINT64 flags, PIRP Irp) {
     KEY searchkey;
-    traverse_ptr tp, next_tp;
-    BOOL b;
-    LIST_ENTRY extent_refs;
+    traverse_ptr tp;
     NTSTATUS Status;
+    EXTENT_ITEM* ei;
     
     searchkey.obj_id = address;
-    searchkey.obj_type = TYPE_EXTENT_ITEM;
-    searchkey.offset = size;
+    searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
     
     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
     if (!NT_SUCCESS(Status)) {
         ERR("error - find_item returned %08x\n", Status);
-        return Status;
+        return;
     }
     
-    if (keycmp(&tp.item->key, &searchkey)) {
-        WARN("extent item not found for address %llx, size %llx\n", address, size);
-        return STATUS_SUCCESS;
+    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA && tp.item->key.obj_id == address &&
+        tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
+        ei = (EXTENT_ITEM*)tp.item->data;
+        ei->flags = flags;
+        return;
     }
     
-    if (tp.item->size != sizeof(EXTENT_ITEM_V0)) {
-        TRACE("extent does not appear to be old - returning STATUS_SUCCESS\n");
-        return STATUS_SUCCESS;
+    if (tp.item->key.obj_id != address || tp.item->key.obj_type != TYPE_EXTENT_ITEM) {
+        ERR("couldn't find %llx in extent tree\n", address);
+        return;
     }
     
-    delete_tree_item(Vcb, &tp, rollback);
+    if (tp.item->size == sizeof(EXTENT_ITEM_V0))
+        return;
+    else if (tp.item->size < sizeof(EXTENT_ITEM)) {
+        ERR("(%llx,%x,%llx) was %x bytes, expected at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type,
+                                                                   tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
+        return;
+    }
     
-    searchkey.obj_id = address;
-    searchkey.obj_type = TYPE_EXTENT_REF_V0;
-    searchkey.offset = 0;
+    ei = (EXTENT_ITEM*)tp.item->data;
+    ei->flags = flags;
+}
+
+static changed_extent* get_changed_extent_item(chunk* c, UINT64 address, UINT64 size, BOOL no_csum) {
+    LIST_ENTRY* le;
+    changed_extent* ce;
     
-    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
+    le = c->changed_extents.Flink;
+    while (le != &c->changed_extents) {
+        ce = CONTAINING_RECORD(le, changed_extent, list_entry);
+        
+        if (ce->address == address && ce->size == size)
+            return ce;
+        
+        le = le->Flink;
     }
     
-    InitializeListHead(&extent_refs);
+    ce = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent), ALLOC_TAG);
+    if (!ce) {
+        ERR("out of memory\n");
+        return NULL;
+    }
     
-    do {
-        b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp);
+    ce->address = address;
+    ce->size = size;
+    ce->old_size = size;
+    ce->count = 0;
+    ce->old_count = 0;
+    ce->no_csum = no_csum;
+    ce->superseded = FALSE;
+    InitializeListHead(&ce->refs);
+    InitializeListHead(&ce->old_refs);
+    
+    InsertTailList(&c->changed_extents, &ce->list_entry);
+    
+    return ce;
+}
+
+NTSTATUS update_changed_extent_ref(device_extension* Vcb, chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, signed long long count,
+                                   BOOL no_csum, BOOL superseded, PIRP Irp) {
+    LIST_ENTRY* le;
+    changed_extent* ce;
+    changed_extent_ref* cer;
+    NTSTATUS Status;
+    KEY searchkey;
+    traverse_ptr tp;
+    UINT64 old_count;
+    
+    ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
+    
+    ce = get_changed_extent_item(c, address, size, no_csum);
+    
+    if (!ce) {
+        ERR("get_changed_extent_item failed\n");
+        Status = STATUS_INTERNAL_ERROR;
+        goto end;
+    }
+    
+    if (IsListEmpty(&ce->refs) && IsListEmpty(&ce->old_refs)) { // new entry
+        searchkey.obj_id = address;
+        searchkey.obj_type = TYPE_EXTENT_ITEM;
+        searchkey.offset = 0xffffffffffffffff;
         
-        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
-            Status = populate_extent_refs_from_tree(Vcb, tp.item->key.offset, address, &extent_refs);
-            if (!NT_SUCCESS(Status)) {
-                ERR("populate_extent_refs_from_tree returned %08x\n", Status);
-                return Status;
-            }
+        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("error - find_item returned %08x\n", Status);
+            goto end;
+        }
+        
+        if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+            ERR("could not find address %llx in extent tree\n", address);
+            Status = STATUS_INTERNAL_ERROR;
+            goto end;
+        }
+        
+        if (tp.item->key.offset != size) {
+            ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, size);
+            Status = STATUS_INTERNAL_ERROR;
+            goto end;
+        }
+        
+        if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
+            EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
             
-            delete_tree_item(Vcb, &tp, rollback);
+            ce->count = ce->old_count = eiv0->refcount;
+        } else if (tp.item->size >= sizeof(EXTENT_ITEM)) {
+            EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
+            
+            ce->count = ce->old_count = ei->refcount;
+        } else {
+            ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
+            Status = STATUS_INTERNAL_ERROR;
+            goto end;
         }
+    }
+    
+    le = ce->refs.Flink;
+    while (le != &ce->refs) {
+        cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
         
-        if (b) {
-            tp = next_tp;
+        if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == root && cer->edr.objid == objid && cer->edr.offset == offset) {
+            ce->count += count;
+            cer->edr.count += count;
+            Status = STATUS_SUCCESS;
             
-            if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
-                break;
+            if (superseded)
+                ce->superseded = TRUE;
+            
+            goto end;
         }
-    } while (b);
+        
+        le = le->Flink;
+    }
     
-    Status = construct_extent_item(Vcb, address, size, EXTENT_ITEM_DATA, &extent_refs, Irp, rollback);
-    if (!NT_SUCCESS(Status)) {
-        ERR("construct_extent_item returned %08x\n", Status);
-        free_extent_refs(&extent_refs);
-        return Status;
+    old_count = find_extent_data_refcount(Vcb, address, size, root, objid, offset, Irp);
+    
+    if (old_count > 0) {
+        cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
+    
+        if (!cer) {
+            ERR("out of memory\n");
+            Status = STATUS_INSUFFICIENT_RESOURCES;
+            goto end;
+        }
+        
+        cer->type = TYPE_EXTENT_DATA_REF;
+        cer->edr.root = root;
+        cer->edr.objid = objid;
+        cer->edr.offset = offset;
+        cer->edr.count = old_count;
+        
+        InsertTailList(&ce->old_refs, &cer->list_entry);
     }
     
-    free_extent_refs(&extent_refs);
+    cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
     
-    return STATUS_SUCCESS;
+    if (!cer) {
+        ERR("out of memory\n");
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto end;
+    }
+    
+    cer->type = TYPE_EXTENT_DATA_REF;
+    cer->edr.root = root;
+    cer->edr.objid = objid;
+    cer->edr.offset = offset;
+    cer->edr.count = old_count + count;
+    
+    InsertTailList(&ce->refs, &cer->list_entry);
+    
+    ce->count += count;
+    
+    if (superseded)
+        ce->superseded = TRUE;
+    
+    Status = STATUS_SUCCESS;
+    
+end:
+    ExReleaseResourceLite(&c->changed_extents_lock);
+    
+    return Status;
+}
+
+void add_changed_extent_ref(chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, UINT32 count, BOOL no_csum) {
+    changed_extent* ce;
+    changed_extent_ref* cer;
+    LIST_ENTRY* le;
+    
+    ce = get_changed_extent_item(c, address, size, no_csum);
+    
+    if (!ce) {
+        ERR("get_changed_extent_item failed\n");
+        return;
+    }
+    
+    le = ce->refs.Flink;
+    while (le != &ce->refs) {
+        cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
+        
+        if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == root && cer->edr.objid == objid && cer->edr.offset == offset) {
+            ce->count += count;
+            cer->edr.count += count;
+            return;
+        }
+        
+        le = le->Flink;
+    }
+    
+    cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
+    
+    if (!cer) {
+        ERR("out of memory\n");
+        return;
+    }
+    
+    cer->type = TYPE_EXTENT_DATA_REF;
+    cer->edr.root = root;
+    cer->edr.objid = objid;
+    cer->edr.offset = offset;
+    cer->edr.count = count;
+    
+    InsertTailList(&ce->refs, &cer->list_entry);
+    
+    ce->count += count;
 }
 
-UINT64 find_extent_data_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, PIRP Irp) {
+UINT64 find_extent_shared_tree_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp) {
     NTSTATUS Status;
     KEY searchkey;
     traverse_ptr tp;
-    EXTENT_DATA_REF* edr;
-    BOOL old = FALSE;
+    UINT64 inline_rc;
+    EXTENT_ITEM* ei;
+    UINT32 len;
+    UINT8* ptr;
     
     searchkey.obj_id = address;
-    searchkey.obj_type = TYPE_EXTENT_ITEM;
+    searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
     searchkey.offset = 0xffffffffffffffff;
     
     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
@@ -1172,82 +1936,75 @@ UINT64 find_extent_data_refcount(device_extension* Vcb, UINT64 address, UINT64 s
         return 0;
     }
     
-    if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+    if (tp.item->key.obj_id != searchkey.obj_id || (tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM)) {
         TRACE("could not find address %llx in extent tree\n", address);
         return 0;
     }
     
-    if (tp.item->key.offset != size) {
-        ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, size);
+    if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset != Vcb->superblock.node_size) {
+        ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, Vcb->superblock.node_size);
         return 0;
     }
     
-    if (tp.item->size >= sizeof(EXTENT_ITEM)) {
-        EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
-        UINT32 len = tp.item->size - sizeof(EXTENT_ITEM);
-        UINT8* ptr = (UINT8*)&ei[1];
+    if (tp.item->size < sizeof(EXTENT_ITEM)) {
+        ERR("(%llx,%x,%llx): size was %u, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
+        return 0;
+    }
+    
+    ei = (EXTENT_ITEM*)tp.item->data;
+    inline_rc = 0;
+    
+    len = tp.item->size - sizeof(EXTENT_ITEM);
+    ptr = (UINT8*)&ei[1];
+    
+    if (searchkey.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) {
+        if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
+            ERR("(%llx,%x,%llx): size was %u, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
+                                                                       tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
+            return 0;
+        }
         
-        while (len > 0) {
-            UINT8 secttype = *ptr;
-            ULONG sectlen = get_extent_data_len(secttype);
-            UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
-            
-            len--;
-            
-            if (sectlen > len) {
-                ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
-                return 0;
-            }
+        len -= sizeof(EXTENT_ITEM2);
+        ptr += sizeof(EXTENT_ITEM2);
+    }
+    
+    while (len > 0) {
+        UINT8 secttype = *ptr;
+        ULONG sectlen = get_extent_data_len(secttype);
+        UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
+        
+        len--;
+        
+        if (sectlen > len) {
+            ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
+            return 0;
+        }
 
-            if (sectlen == 0) {
-                ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
-                return 0;
-            }
-            
-            if (secttype == TYPE_EXTENT_DATA_REF) {
-                EXTENT_DATA_REF* sectedr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8));
-                
-                if (sectedr->root == root && sectedr->objid == objid && sectedr->offset == offset)
-                    return sectcount;
-            } else if (secttype == TYPE_SHARED_DATA_REF) {
-                SHARED_DATA_REF* sectsdr = (SHARED_DATA_REF*)(ptr + sizeof(UINT8));
-                BOOL found = FALSE;
-                LIST_ENTRY* le;
-                
-                le = Vcb->shared_extents.Flink;
-                while (le != &Vcb->shared_extents) {
-                    shared_data* sd = CONTAINING_RECORD(le, shared_data, list_entry);
-                    
-                    if (sd->address == sectsdr->offset) {
-                        LIST_ENTRY* le2 = sd->entries.Flink;
-                        while (le2 != &sd->entries) {
-                            shared_data_entry* sde = CONTAINING_RECORD(le2, shared_data_entry, list_entry);
-                            
-                            if (sde->edr.root == root && sde->edr.objid == objid && sde->edr.offset == offset)
-                                return sde->edr.count;
-                            
-                            le2 = le2->Flink;
-                        }
-                        found = TRUE;
-                        break;
-                    }
-                    
-                    le = le->Flink;
-                }
-                
-                if (!found)
-                    WARN("shared data extents not loaded for tree at %llx\n", sectsdr->offset);        
-            }
+        if (sectlen == 0) {
+            ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
+            return 0;
+        }
+        
+        if (secttype == TYPE_SHARED_BLOCK_REF) {
+            SHARED_BLOCK_REF* sectsbr = (SHARED_BLOCK_REF*)(ptr + sizeof(UINT8));
             
-            len -= sectlen;
-            ptr += sizeof(UINT8) + sectlen;
+            if (sectsbr->offset == parent)
+                return 1;
         }
-    } else if (tp.item->size == sizeof(EXTENT_ITEM_V0))
-        old = TRUE;
+        
+        len -= sectlen;
+        ptr += sizeof(UINT8) + sectlen;
+        inline_rc += sectcount;
+    }
+    
+    // FIXME - what if old?
+    
+    if (inline_rc == ei->refcount)
+        return 0;
     
     searchkey.obj_id = address;
-    searchkey.obj_type = TYPE_EXTENT_DATA_REF;
-    searchkey.offset = get_extent_data_ref_hash2(root, objid, offset);
+    searchkey.obj_type = TYPE_SHARED_BLOCK_REF;
+    searchkey.offset = parent;
     
     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
     if (!NT_SUCCESS(Status)) {
@@ -1255,139 +2012,102 @@ UINT64 find_extent_data_refcount(device_extension* Vcb, UINT64 address, UINT64 s
         return 0;
     }
     
-    if (!keycmp(&searchkey, &tp.item->key)) {    
-        if (tp.item->size < sizeof(EXTENT_DATA_REF))
-            ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA_REF));
-        else {    
-            edr = (EXTENT_DATA_REF*)tp.item->data;
-            
-            return edr->count;
-        }
+    if (!keycmp(searchkey, tp.item->key)) {    
+        if (tp.item->size < sizeof(SHARED_BLOCK_REF))
+            ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(SHARED_BLOCK_REF));
+        else
+            return 1;
+    }
+    
+    return 0;
+}
+
+UINT64 find_extent_shared_data_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp) {
+    NTSTATUS Status;
+    KEY searchkey;
+    traverse_ptr tp;
+    UINT64 inline_rc;
+    EXTENT_ITEM* ei;
+    UINT32 len;
+    UINT8* ptr;
+    
+    searchkey.obj_id = address;
+    searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return 0;
+    }
+    
+    if (tp.item->key.obj_id != searchkey.obj_id || (tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM)) {
+        TRACE("could not find address %llx in extent tree\n", address);
+        return 0;
     }
-     
-    if (old) {
-        BOOL b;
+    
+    if (tp.item->size < sizeof(EXTENT_ITEM)) {
+        ERR("(%llx,%x,%llx): size was %u, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
+        return 0;
+    }
+    
+    ei = (EXTENT_ITEM*)tp.item->data;
+    inline_rc = 0;
+    
+    len = tp.item->size - sizeof(EXTENT_ITEM);
+    ptr = (UINT8*)&ei[1];
+    
+    while (len > 0) {
+        UINT8 secttype = *ptr;
+        ULONG sectlen = get_extent_data_len(secttype);
+        UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
         
-        searchkey.obj_id = address;
-        searchkey.obj_type = TYPE_EXTENT_REF_V0;
-        searchkey.offset = 0;
+        len--;
         
-        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
+        if (sectlen > len) {
+            ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
             return 0;
         }
-        
-        do {
-            traverse_ptr next_tp;
-            
-            b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp);
-            
-            if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
-                if (tp.item->size >= sizeof(EXTENT_REF_V0)) {
-                    EXTENT_REF_V0* erv0 = (EXTENT_REF_V0*)tp.item->data;
-                    
-                    if (erv0->root == root && erv0->objid == objid) {
-                        LIST_ENTRY* le;
-                        BOOL found = FALSE;
-                    
-                        le = Vcb->shared_extents.Flink;
-                        while (le != &Vcb->shared_extents) {
-                            shared_data* sd = CONTAINING_RECORD(le, shared_data, list_entry);
-                            
-                            if (sd->address == tp.item->key.offset) {
-                                LIST_ENTRY* le2 = sd->entries.Flink;
-                                while (le2 != &sd->entries) {
-                                    shared_data_entry* sde = CONTAINING_RECORD(le2, shared_data_entry, list_entry);
-                                    
-                                    if (sde->edr.root == root && sde->edr.objid == objid && sde->edr.offset == offset)
-                                        return sde->edr.count;
-                                    
-                                    le2 = le2->Flink;
-                                }
-                                found = TRUE;
-                                break;
-                            }
-                            
-                            le = le->Flink;
-                        }
-                        
-                        if (!found)
-                            WARN("shared data extents not loaded for tree at %llx\n", tp.item->key.offset);
-                    }
-                } else {
-                    ERR("(%llx,%x,%llx) was %x bytes, not %x as expected\n", tp.item->key.obj_id, tp.item->key.obj_type,
-                        tp.item->key.offset, tp.item->size, sizeof(EXTENT_REF_V0));
-                }
-            }
-            
-            if (b) {
-                tp = next_tp;
-                
-                if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
-                    break;
-            }
-        } while (b);
-    } else {
-        BOOL b;
-        
-        searchkey.obj_id = address;
-        searchkey.obj_type = TYPE_SHARED_DATA_REF;
-        searchkey.offset = 0;
-        
-        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
+
+        if (sectlen == 0) {
+            ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
             return 0;
         }
         
-        do {
-            traverse_ptr next_tp;
+        if (secttype == TYPE_SHARED_DATA_REF) {
+            SHARED_DATA_REF* sectsdr = (SHARED_DATA_REF*)(ptr + sizeof(UINT8));
             
-            b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp);
-            
-            if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
-                if (tp.item->size >= sizeof(SHARED_DATA_REF)) {
-                    SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)tp.item->data;
-                    LIST_ENTRY* le;
-                    BOOL found = FALSE;
-                    
-                    le = Vcb->shared_extents.Flink;
-                    while (le != &Vcb->shared_extents) {
-                        shared_data* sd = CONTAINING_RECORD(le, shared_data, list_entry);
-                        
-                        if (sd->address == sdr->offset) {
-                            LIST_ENTRY* le2 = sd->entries.Flink;
-                            while (le2 != &sd->entries) {
-                                shared_data_entry* sde = CONTAINING_RECORD(le2, shared_data_entry, list_entry);
-                                
-                                if (sde->edr.root == root && sde->edr.objid == objid && sde->edr.offset == offset)
-                                    return sde->edr.count;
-                                
-                                le2 = le2->Flink;
-                            }
-                            found = TRUE;
-                            break;
-                        }
-                        
-                        le = le->Flink;
-                    }
-
-                    if (!found)
-                        WARN("shared data extents not loaded for tree at %llx\n", sdr->offset);
-                } else {
-                    ERR("(%llx,%x,%llx) was %x bytes, not %x as expected\n", tp.item->key.obj_id, tp.item->key.obj_type,
-                        tp.item->key.offset, tp.item->size, sizeof(SHARED_DATA_REF));
-                }
-            }
-
-            if (b) {
-                tp = next_tp;
-
-                if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
-                    break;
-            }
-        } while (b);
+            if (sectsdr->offset == parent)
+                return sectsdr->count;
+        }
+        
+        len -= sectlen;
+        ptr += sizeof(UINT8) + sectlen;
+        inline_rc += sectcount;
+    }
+    
+    // FIXME - what if old?
+    
+    if (inline_rc == ei->refcount)
+        return 0;
+    
+    searchkey.obj_id = address;
+    searchkey.obj_type = TYPE_SHARED_DATA_REF;
+    searchkey.offset = parent;
+    
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return 0;
+    }
+    
+    if (!keycmp(searchkey, tp.item->key)) {    
+        if (tp.item->size < sizeof(SHARED_DATA_REF))
+            ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(SHARED_DATA_REF));
+        else {
+            SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)tp.item->data;
+            return sdr->count;
+        }
     }
     
     return 0;
index 092ed28..e1e1272 100644 (file)
@@ -58,7 +58,46 @@ static NTSTATUS STDCALL set_basic_information(device_extension* Vcb, PIRP Irp, P
     // FIXME - what about subvol roots?
     
     // FIXME - link FILE_ATTRIBUTE_READONLY to st_mode
-    // FIXME - handle times == -1
+    
+    if (fbi->CreationTime.QuadPart == -1)
+        ccb->user_set_creation_time = TRUE;
+    else if (fbi->CreationTime.QuadPart != 0) {
+        win_time_to_unix(fbi->CreationTime, &fcb->inode_item.otime);
+        inode_item_changed = TRUE;
+        filter |= FILE_NOTIFY_CHANGE_CREATION;
+        
+        ccb->user_set_creation_time = TRUE;
+    }
+    
+    if (fbi->LastAccessTime.QuadPart == -1)
+        ccb->user_set_access_time = TRUE;
+    else if (fbi->LastAccessTime.QuadPart != 0) {
+        win_time_to_unix(fbi->LastAccessTime, &fcb->inode_item.st_atime);
+        inode_item_changed = TRUE;
+        filter |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
+        
+        ccb->user_set_access_time = TRUE;
+    }
+    
+    if (fbi->LastWriteTime.QuadPart == -1)
+        ccb->user_set_write_time = TRUE;
+    else if (fbi->LastWriteTime.QuadPart != 0) {
+        win_time_to_unix(fbi->LastWriteTime, &fcb->inode_item.st_mtime);
+        inode_item_changed = TRUE;
+        filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
+        
+        ccb->user_set_write_time = TRUE;
+    }
+    
+    if (fbi->ChangeTime.QuadPart == -1)
+        ccb->user_set_change_time = TRUE;
+    else if (fbi->ChangeTime.QuadPart != 0) {
+        win_time_to_unix(fbi->ChangeTime, &fcb->inode_item.st_ctime);
+        inode_item_changed = TRUE;
+        // no filter for this
+        
+        ccb->user_set_change_time = TRUE;
+    }
     
     // FileAttributes == 0 means don't set - undocumented, but seen in fastfat
     if (fbi->FileAttributes != 0) {
@@ -74,6 +113,9 @@ static NTSTATUS STDCALL set_basic_information(device_extension* Vcb, PIRP Irp, P
                 
         fcb->atts_changed = TRUE;
         
+        if (fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT)
+            fbi->FileAttributes |= FILE_ATTRIBUTE_REPARSE_POINT;
+        
         if (defda == fbi->FileAttributes)
             fcb->atts_deleted = TRUE;
         
@@ -82,7 +124,9 @@ static NTSTATUS STDCALL set_basic_information(device_extension* Vcb, PIRP Irp, P
         KeQuerySystemTime(&time);
         win_time_to_unix(time, &now);
         
-        fcb->inode_item.st_ctime = now;
+        if (!ccb->user_set_change_time)
+            fcb->inode_item.st_ctime = now;
+        
         fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
         fcb->subvol->root_item.ctime = now;
         
@@ -90,42 +134,11 @@ static NTSTATUS STDCALL set_basic_information(device_extension* Vcb, PIRP Irp, P
         
         filter |= FILE_NOTIFY_CHANGE_ATTRIBUTES;
     }
-    
-    if (fbi->CreationTime.QuadPart == -1) {
-        FIXME("FIXME - support CreationTime == -1\n"); // FIXME - set ccb flag
-    } else if (fbi->CreationTime.QuadPart != 0) {
-        win_time_to_unix(fbi->CreationTime, &fcb->inode_item.otime);
-        inode_item_changed = TRUE;
-        filter |= FILE_NOTIFY_CHANGE_CREATION;
-    }
-    
-    if (fbi->LastAccessTime.QuadPart == -1) {
-        FIXME("FIXME - support LastAccessTime == -1\n"); // FIXME - set ccb flag
-    } else if (fbi->LastAccessTime.QuadPart != 0) {
-        win_time_to_unix(fbi->LastAccessTime, &fcb->inode_item.st_atime);
-        inode_item_changed = TRUE;
-        filter |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
-    }
-    
-    if (fbi->LastWriteTime.QuadPart == -1) {
-        FIXME("FIXME - support LastWriteTime == -1\n"); // FIXME - set ccb flag
-    } else if (fbi->LastWriteTime.QuadPart != 0) {
-        win_time_to_unix(fbi->LastWriteTime, &fcb->inode_item.st_mtime);
-        inode_item_changed = TRUE;
-        filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
-    }
-    
-    if (fbi->ChangeTime.QuadPart == -1) {
-        FIXME("FIXME - support ChangeTime == -1\n"); // FIXME - set ccb flag
-    } else if (fbi->ChangeTime.QuadPart != 0) {
-        win_time_to_unix(fbi->ChangeTime, &fcb->inode_item.st_ctime);
-        inode_item_changed = TRUE;
-        // no filter for this
-    }
 
     if (inode_item_changed) {
         fcb->inode_item.transid = Vcb->superblock.generation;
         fcb->inode_item.sequence++;
+        fcb->inode_item_changed = TRUE;
         
         mark_fcb_dirty(fcb);
     }
@@ -152,6 +165,8 @@ static NTSTATUS STDCALL set_disposition_information(device_extension* Vcb, PIRP
     if (!fileref)
         return STATUS_INVALID_PARAMETER;
     
+    ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
+    
     ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
     
     TRACE("changing delete_on_close to %s for %S (fcb %p)\n", fdi->DeleteFile ? "TRUE" : "FALSE", file_desc(FileObject), fcb);
@@ -194,147 +209,10 @@ static NTSTATUS STDCALL set_disposition_information(device_extension* Vcb, PIRP
     
 end:
     ExReleaseResourceLite(fcb->Header.Resource);
-
-    return Status;
-}
-
-static NTSTATUS add_inode_extref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, UINT64 index, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    INODE_EXTREF* ier;
-    NTSTATUS Status;
-    
-    searchkey.obj_id = inode;
-    searchkey.obj_type = TYPE_INODE_EXTREF;
-    searchkey.offset = calc_crc32c((UINT32)parinode, (UINT8*)utf8->Buffer, utf8->Length);
-
-    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (!keycmp(&searchkey, &tp.item->key)) {
-        ULONG iersize = tp.item->size + sizeof(INODE_EXTREF) - 1 + utf8->Length;
-        UINT8* ier2;
-        UINT32 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
-        
-        if (iersize > maxlen) {
-            ERR("item would be too long (%u > %u)\n", iersize, maxlen);
-            return STATUS_INTERNAL_ERROR;
-        }
-        
-        ier2 = ExAllocatePoolWithTag(PagedPool, iersize, ALLOC_TAG);
-        if (!ier2) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
-        
-        if (tp.item->size > 0)
-            RtlCopyMemory(ier2, tp.item->data, tp.item->size);
-        
-        ier = (INODE_EXTREF*)&ier2[tp.item->size];
-        ier->dir = parinode;
-        ier->index = index;
-        ier->n = utf8->Length;
-        RtlCopyMemory(ier->name, utf8->Buffer, utf8->Length);
-        
-        delete_tree_item(Vcb, &tp, rollback);
-        
-        if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ier2, iersize, NULL, Irp, rollback)) {
-            ERR("error - failed to insert item\n");
-            return STATUS_INTERNAL_ERROR;
-        }
-    } else {
-        ier = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_EXTREF) - 1 + utf8->Length, ALLOC_TAG);
-        if (!ier) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
-
-        ier->dir = parinode;
-        ier->index = index;
-        ier->n = utf8->Length;
-        RtlCopyMemory(ier->name, utf8->Buffer, utf8->Length);
-    
-        if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ier, sizeof(INODE_EXTREF) - 1 + utf8->Length, NULL, Irp, rollback)) {
-            ERR("error - failed to insert item\n");
-            return STATUS_INTERNAL_ERROR;
-        }
-    }
-    
-    return STATUS_SUCCESS;
-}
-
-NTSTATUS add_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, UINT64 index, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    INODE_REF* ir;
-    NTSTATUS Status;
     
-    searchkey.obj_id = inode;
-    searchkey.obj_type = TYPE_INODE_REF;
-    searchkey.offset = parinode;
-    
-    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (!keycmp(&searchkey, &tp.item->key)) {
-        ULONG irsize = tp.item->size + sizeof(INODE_REF) - 1 + utf8->Length;
-        UINT8* ir2;
-        UINT32 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
-        
-        if (irsize > maxlen) {
-            if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) {
-                TRACE("INODE_REF too long, creating INODE_EXTREF\n");
-                return add_inode_extref(Vcb, subvol, inode, parinode, index, utf8, Irp, rollback);
-            } else {
-                ERR("item would be too long (%u > %u)\n", irsize, maxlen);
-                return STATUS_INTERNAL_ERROR;
-            }
-        }
-        
-        ir2 = ExAllocatePoolWithTag(PagedPool, irsize, ALLOC_TAG);
-        if (!ir2) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
-        
-        if (tp.item->size > 0)
-            RtlCopyMemory(ir2, tp.item->data, tp.item->size);
-        
-        ir = (INODE_REF*)&ir2[tp.item->size];
-        ir->index = index;
-        ir->n = utf8->Length;
-        RtlCopyMemory(ir->name, utf8->Buffer, utf8->Length);
-        
-        delete_tree_item(Vcb, &tp, rollback);
-        
-        if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ir2, irsize, NULL, Irp, rollback)) {
-            ERR("error - failed to insert item\n");
-            return STATUS_INTERNAL_ERROR;
-        }
-    } else {
-        ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + utf8->Length, ALLOC_TAG);
-        if (!ir) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
+    ExReleaseResourceLite(&Vcb->fcb_lock);
 
-        ir->index = index;
-        ir->n = utf8->Length;
-        RtlCopyMemory(ir->name, utf8->Buffer, utf8->Length);
-    
-        if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ir, sizeof(INODE_REF) - 1 + ir->n, NULL, Irp, rollback)) {
-            ERR("error - failed to insert item\n");
-            return STATUS_INTERNAL_ERROR;
-        }
-    }
-    
-    return STATUS_SUCCESS;
+    return Status;
 }
 
 BOOL has_open_children(file_ref* fileref) {
@@ -346,7 +224,7 @@ BOOL has_open_children(file_ref* fileref) {
     while (le != &fileref->children) {
         file_ref* c = CONTAINING_RECORD(le, file_ref, list_entry);
         
-        if (c->fcb->open_count > 0)
+        if (c->open_count > 0)
             return TRUE;
         
         if (has_open_children(c))
@@ -425,6 +303,7 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) {
     }
     
     RtlCopyMemory(&fcb->inode_item, &oldfcb->inode_item, sizeof(INODE_ITEM));
+    fcb->inode_item_changed = TRUE;
     
     if (oldfcb->sd && RtlLengthSecurityDescriptor(oldfcb->sd) > 0) {
         fcb->sd = ExAllocatePoolWithTag(PagedPool, RtlLengthSecurityDescriptor(oldfcb->sd), ALLOC_TAG);
@@ -564,6 +443,23 @@ static NTSTATUS duplicate_fcb(fcb* oldfcb, fcb** pfcb) {
         
         RtlCopyMemory(fcb->reparse_xattr.Buffer, oldfcb->reparse_xattr.Buffer, fcb->reparse_xattr.Length);
     }
+    
+    if (oldfcb->ea_xattr.Buffer && oldfcb->ea_xattr.Length > 0) {
+        fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = oldfcb->ea_xattr.Length;
+        
+        fcb->ea_xattr.Buffer = ExAllocatePoolWithTag(PagedPool, fcb->ea_xattr.MaximumLength, ALLOC_TAG);
+        if (!fcb->ea_xattr.Buffer) {
+            ERR("out of memory\n");
+            
+            ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
+            free_fcb(fcb);
+            ExReleaseResourceLite(&Vcb->fcb_lock);
+            
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        RtlCopyMemory(fcb->ea_xattr.Buffer, oldfcb->ea_xattr.Buffer, fcb->ea_xattr.Length);
+    }
 
 end:
     *pfcb = fcb;
@@ -598,7 +494,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) {
         
         if (!fr->deleted) {
             me2 = ExAllocatePoolWithTag(PagedPool, sizeof(move_entry), ALLOC_TAG);
-            if (!me) {
+            if (!me2) {
                 ERR("out of memory\n");
                 Status = STATUS_INSUFFICIENT_RESOURCES;
                 goto end;
@@ -651,7 +547,8 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) {
                 }
                 
                 if (xa->n > xapreflen && RtlCompareMemory(xa->name, xapref, xapreflen) == xapreflen &&
-                    (tp.item->key.offset != EA_DOSATTRIB_HASH || xa->n != strlen(EA_DOSATTRIB) || RtlCompareMemory(xa->name, EA_DOSATTRIB, xa->n) != xa->n)
+                    (tp.item->key.offset != EA_DOSATTRIB_HASH || xa->n != strlen(EA_DOSATTRIB) || RtlCompareMemory(xa->name, EA_DOSATTRIB, xa->n) != xa->n) &&
+                    (tp.item->key.offset != EA_EA_HASH || xa->n != strlen(EA_EA) || RtlCompareMemory(xa->name, EA_EA, xa->n) != xa->n)
                 ) {
                     BOOL found = FALSE;
                 
@@ -766,7 +663,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) {
                         insert_fileref_child(me->fileref, fr, FALSE);
 
                         me2 = ExAllocatePoolWithTag(PagedPool, sizeof(move_entry), ALLOC_TAG);
-                        if (!me) {
+                        if (!me2) {
                             ERR("out of memory\n");
                             Status = STATUS_INSUFFICIENT_RESOURCES;
                             
@@ -906,7 +803,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) {
                         }
                         
                         ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE);
-                        Status = open_fcb(me->fileref->fcb->Vcb, subvol, inode, di->type, &utf8, me->fileref->fcb, &fcb, Irp);
+                        Status = open_fcb(me->fileref->fcb->Vcb, subvol, inode, di->type, &utf8, me->fileref->fcb, &fcb, PagedPool, Irp);
                         ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock);
                         
                         if (!NT_SUCCESS(Status)) {
@@ -991,7 +888,7 @@ static NTSTATUS add_children_to_move_list(move_entry* me, PIRP Irp) {
                             fr->fcb->fileref = fr;
                         
                         me2 = ExAllocatePoolWithTag(PagedPool, sizeof(move_entry), ALLOC_TAG);
-                        if (!me) {
+                        if (!me2) {
                             ERR("out of memory\n");
                             Status = STATUS_INSUFFICIENT_RESOURCES;
                             
@@ -1110,6 +1007,7 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_
                     me->dummyfcb->atts_deleted = me->fileref->fcb->atts_deleted;
                     me->dummyfcb->extents_changed = me->fileref->fcb->extents_changed;
                     me->dummyfcb->reparse_xattr_changed = me->fileref->fcb->reparse_xattr_changed;
+                    me->dummyfcb->ea_changed = me->fileref->fcb->ea_changed;
                 }
                 
                 me->dummyfcb->created = me->fileref->fcb->created;
@@ -1119,11 +1017,8 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_
                 if (!me->fileref->fcb->ads) {
                     LIST_ENTRY* le2;
                     
-                    if (destdir->fcb->subvol->lastinode == 0)
-                        get_last_inode(destdir->fcb->Vcb, destdir->fcb->subvol, Irp);
-
                     me->fileref->fcb->subvol = destdir->fcb->subvol;
-                    me->fileref->fcb->inode = ++destdir->fcb->subvol->lastinode; // FIXME - do proper function for this
+                    me->fileref->fcb->inode = InterlockedIncrement64(&destdir->fcb->subvol->lastinode);
                     me->fileref->fcb->inode_item.st_nlink = 1;
                     
                     defda = get_file_attributes(me->fileref->fcb->Vcb, &me->fileref->fcb->inode_item, me->fileref->fcb->subvol, me->fileref->fcb->inode,
@@ -1133,6 +1028,8 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_
                     me->fileref->fcb->atts_changed = defda != me->fileref->fcb->atts;
                     me->fileref->fcb->extents_changed = !IsListEmpty(&me->fileref->fcb->extents);
                     me->fileref->fcb->reparse_xattr_changed = !!me->fileref->fcb->reparse_xattr.Buffer;
+                    me->fileref->fcb->ea_changed = !!me->fileref->fcb->ea_xattr.Buffer;
+                    me->fileref->fcb->inode_item_changed = TRUE;
                     
                     le2 = me->fileref->fcb->extents.Flink;
                     while (le2 != &me->fileref->fcb->extents) {
@@ -1149,7 +1046,7 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_
                                     ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
                                 } else {
                                     Status = update_changed_extent_ref(me->fileref->fcb->Vcb, c, ed2->address, ed2->size, me->fileref->fcb->subvol->id, me->fileref->fcb->inode,
-                                                                       ext->offset - ed2->offset, 1, me->fileref->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
+                                                                       ext->offset - ed2->offset, 1, me->fileref->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
                                     
                                     if (!NT_SUCCESS(Status)) {
                                         ERR("update_changed_extent_ref returned %08x\n", Status);
@@ -1190,6 +1087,7 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_
                     ExFreePool(hl);
                 }
                 
+                me->fileref->fcb->inode_item_changed = TRUE;
                 mark_fcb_dirty(me->fileref->fcb);
                 
                 if ((!me->dummyfcb->ads && me->dummyfcb->inode_item.st_nlink > 1) || (me->dummyfcb->ads && me->parent->dummyfcb->inode_item.st_nlink > 1)) {
@@ -1211,6 +1109,7 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_
             } else {
                 ExAcquireResourceExclusiveLite(me->fileref->fcb->Header.Resource, TRUE);
                 me->fileref->fcb->inode_item.st_nlink++;
+                me->fileref->fcb->inode_item_changed = TRUE;
                 ExReleaseResourceLite(me->fileref->fcb->Header.Resource);
             }
         }
@@ -1333,6 +1232,7 @@ static NTSTATUS move_across_subvols(file_ref* fileref, file_ref* destdir, PANSI_
             me->fileref->parent->fcb->inode_item.sequence++;
             me->fileref->parent->fcb->inode_item.st_ctime = now;
             me->fileref->parent->fcb->inode_item.st_mtime = now;
+            me->fileref->parent->fcb->inode_item_changed = TRUE;
             mark_fcb_dirty(me->fileref->parent->fcb);
         }
 
@@ -1530,10 +1430,13 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp,
         
         related = relatedccb->fileref;
         increase_fileref_refcount(related);
+    } else if (fnus.Length >= sizeof(WCHAR) && fnus.Buffer[0] != '\\') {
+        related = fileref->parent;
+        increase_fileref_refcount(related);
     }
 
     ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
-    Status = open_fileref(Vcb, &oldfileref, &fnus, related, FALSE, NULL, NULL, Irp);
+    Status = open_fileref(Vcb, &oldfileref, &fnus, related, FALSE, NULL, NULL, PagedPool, ccb->case_sensitive,  Irp);
     ExReleaseResourceLite(&Vcb->fcb_lock);
 
     if (NT_SUCCESS(Status)) {
@@ -1543,7 +1446,7 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp,
             if (!IrpSp->Parameters.SetFile.ReplaceIfExists) {
                 Status = STATUS_OBJECT_NAME_COLLISION;
                 goto end;
-            } else if ((oldfileref->fcb->open_count >= 1 || has_open_children(oldfileref)) && !oldfileref->deleted) {
+            } else if ((oldfileref->open_count >= 1 || has_open_children(oldfileref)) && !oldfileref->deleted) {
                 WARN("trying to overwrite open file\n");
                 Status = STATUS_ACCESS_DENIED;
                 goto end;
@@ -1566,7 +1469,7 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp,
     
     if (!related) {
         ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
-        Status = open_fileref(Vcb, &related, &fnus, NULL, TRUE, NULL, NULL, Irp);
+        Status = open_fileref(Vcb, &related, &fnus, NULL, TRUE, NULL, NULL, PagedPool, ccb->case_sensitive, Irp);
         ExReleaseResourceLite(&Vcb->fcb_lock);
 
         if (!NT_SUCCESS(Status)) {
@@ -1669,8 +1572,11 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp,
         
         fcb->inode_item.transid = Vcb->superblock.generation;
         fcb->inode_item.sequence++;
-        fcb->inode_item.st_ctime = now;
         
+        if (!ccb->user_set_change_time)
+            fcb->inode_item.st_ctime = now;
+        
+        fcb->inode_item_changed = TRUE;
         mark_fcb_dirty(fcb);
         
         // update parent's INODE_ITEM
@@ -1683,6 +1589,7 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp,
         related->fcb->inode_item.st_ctime = now;
         related->fcb->inode_item.st_mtime = now;
         
+        related->fcb->inode_item_changed = TRUE;
         mark_fcb_dirty(related->fcb);
         send_notification_fileref(related, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED);
         
@@ -1826,8 +1733,11 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp,
     
     fcb->inode_item.transid = Vcb->superblock.generation;
     fcb->inode_item.sequence++;
-    fcb->inode_item.st_ctime = now;
     
+    if (!ccb->user_set_change_time)
+        fcb->inode_item.st_ctime = now;
+    
+    fcb->inode_item_changed = TRUE;
     mark_fcb_dirty(fcb);
     
     // update new parent's INODE_ITEM
@@ -1840,6 +1750,7 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp,
     related->fcb->inode_item.st_ctime = now;
     related->fcb->inode_item.st_mtime = now;
     
+    related->fcb->inode_item_changed = TRUE;
     mark_fcb_dirty(related->fcb);
     
     // update old parent's INODE_ITEM
@@ -1856,6 +1767,7 @@ static NTSTATUS STDCALL set_rename_information(device_extension* Vcb, PIRP Irp,
     free_fileref(fr2);
     ExReleaseResourceLite(&Vcb->fcb_lock);
     
+    fr2->parent->fcb->inode_item_changed = TRUE;
     mark_fcb_dirty(fr2->parent->fcb);
     
     send_notification_fileref(fr2, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_REMOVED);
@@ -1885,7 +1797,7 @@ end:
     }
     
     if (NT_SUCCESS(Status))
-        clear_rollback(&rollback);
+        clear_rollback(Vcb, &rollback);
     else
         do_rollback(Vcb, &rollback);
 
@@ -1965,6 +1877,7 @@ NTSTATUS STDCALL stream_set_end_of_file_information(device_extension* Vcb, UINT6
     fileref->parent->fcb->inode_item.sequence++;
     fileref->parent->fcb->inode_item.st_ctime = now;
     
+    fileref->parent->fcb->inode_item_changed = TRUE;
     mark_fcb_dirty(fileref->parent->fcb);
 
     fileref->parent->fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
@@ -2051,10 +1964,12 @@ static NTSTATUS STDCALL set_end_of_file_information(device_extension* Vcb, PIRP
     CcSetFileSizes(FileObject, &ccfs);
     TRACE("setting FileSize for %S to %llx\n", file_desc(FileObject), ccfs.FileSize);
     
-    KeQuerySystemTime(&time);
-    
-    win_time_to_unix(time, &fcb->inode_item.st_mtime);
+    if (!ccb->user_set_write_time) {
+        KeQuerySystemTime(&time);
+        win_time_to_unix(time, &fcb->inode_item.st_mtime);
+    }
     
+    fcb->inode_item_changed = TRUE;
     mark_fcb_dirty(fcb);
     send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_LAST_WRITE | FILE_NOTIFY_CHANGE_SIZE, FILE_ACTION_MODIFIED);
 
@@ -2062,7 +1977,7 @@ static NTSTATUS STDCALL set_end_of_file_information(device_extension* Vcb, PIRP
 
 end:
     if (NT_SUCCESS(Status))
-        clear_rollback(&rollback);
+        clear_rollback(Vcb, &rollback);
     else
         do_rollback(Vcb, &rollback);
 
@@ -2200,7 +2115,7 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF
     }
 
     ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
-    Status = open_fileref(Vcb, &oldfileref, &fnus, related, FALSE, NULL, NULL, Irp);
+    Status = open_fileref(Vcb, &oldfileref, &fnus, related, FALSE, NULL, NULL, PagedPool, ccb->case_sensitive, Irp);
     ExReleaseResourceLite(&Vcb->fcb_lock);
 
     if (NT_SUCCESS(Status)) {
@@ -2210,7 +2125,7 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF
             if (!fli->ReplaceIfExists) {
                 Status = STATUS_OBJECT_NAME_COLLISION;
                 goto end;
-            } else if (oldfileref->fcb->open_count >= 1 && !oldfileref->deleted) {
+            } else if (oldfileref->open_count >= 1 && !oldfileref->deleted) {
                 WARN("trying to overwrite open file\n");
                 Status = STATUS_ACCESS_DENIED;
                 goto end;
@@ -2234,7 +2149,7 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF
     
     if (!related) {
         ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
-        Status = open_fileref(Vcb, &related, &fnus, NULL, TRUE, NULL, NULL, Irp);
+        Status = open_fileref(Vcb, &related, &fnus, NULL, TRUE, NULL, NULL, PagedPool, ccb->case_sensitive, Irp);
         ExReleaseResourceLite(&Vcb->fcb_lock);
 
         if (!NT_SUCCESS(Status)) {
@@ -2361,8 +2276,11 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF
     fcb->inode_item.transid = Vcb->superblock.generation;
     fcb->inode_item.sequence++;
     fcb->inode_item.st_nlink++;
-    fcb->inode_item.st_ctime = now;
     
+    if (!ccb->user_set_change_time)
+        fcb->inode_item.st_ctime = now;
+    
+    fcb->inode_item_changed = TRUE;
     mark_fcb_dirty(fcb);
     
     // update parent's INODE_ITEM
@@ -2374,6 +2292,7 @@ static NTSTATUS STDCALL set_link_information(device_extension* Vcb, PIRP Irp, PF
     parfcb->inode_item.sequence++;
     parfcb->inode_item.st_ctime = now;
     
+    parfcb->inode_item_changed = TRUE;
     mark_fcb_dirty(parfcb);
     
     send_notification_fileref(fr2, FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED);
@@ -2400,7 +2319,7 @@ end:
     }
     
     if (NT_SUCCESS(Status))
-        clear_rollback(&rollback);
+        clear_rollback(Vcb, &rollback);
     else
         do_rollback(Vcb, &rollback);
 
@@ -2529,13 +2448,6 @@ NTSTATUS STDCALL drv_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp
         {
             TRACE("FilePositionInformation\n");
             
-            if (Irp->RequestorMode == UserMode &&
-                (!(ccb->access & (FILE_READ_DATA | FILE_WRITE_DATA)) || !(ccb->options & (FILE_SYNCHRONOUS_IO_ALERT | FILE_SYNCHRONOUS_IO_NONALERT)))) {
-                WARN("insufficient privileges\n");
-                Status = STATUS_ACCESS_DENIED;
-                break;
-            }
-            
             Status = set_position_information(Vcb, Irp, IrpSp->FileObject);
             
             break;
@@ -2674,19 +2586,23 @@ static NTSTATUS STDCALL fill_in_file_standard_information(FILE_STANDARD_INFORMAT
     return STATUS_SUCCESS;
 }
 
-static NTSTATUS STDCALL fill_in_file_internal_information(FILE_INTERNAL_INFORMATION* fii, UINT64 inode, LONG* length) {
+static NTSTATUS STDCALL fill_in_file_internal_information(FILE_INTERNAL_INFORMATION* fii, fcb* fcb, LONG* length) {
     *length -= sizeof(FILE_INTERNAL_INFORMATION);
     
-    fii->IndexNumber.QuadPart = inode;
+    fii->IndexNumber.QuadPart = make_file_id(fcb->subvol, fcb->inode);
     
     return STATUS_SUCCESS;
 }  
     
-static NTSTATUS STDCALL fill_in_file_ea_information(FILE_EA_INFORMATION* eai, LONG* length) {
+static NTSTATUS STDCALL fill_in_file_ea_information(FILE_EA_INFORMATION* eai, fcb* fcb, LONG* length) {
     *length -= sizeof(FILE_EA_INFORMATION);
     
-    // FIXME - should this be the reparse tag for symlinks?
-    eai->EaSize = 0;
+    /* This value appears to be the size of the structure NTFS stores on disk, and not,
+     * as might be expected, the size of FILE_FULL_EA_INFORMATION (which is what we store).
+     * The formula is 4 bytes as a header, followed by 5 + NameLength + ValueLength for each
+     * item. */
+    
+    eai->EaSize = fcb->ealen;
     
     return STATUS_SUCCESS;
 }
@@ -2977,20 +2893,22 @@ static NTSTATUS STDCALL fill_in_file_stream_information(FILE_STREAM_INFORMATION*
         goto end;
     }
     
-    si = ExAllocatePoolWithTag(PagedPool, sizeof(stream_info), ALLOC_TAG);
-    if (!si) {
-        ERR("out of memory\n");
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-        goto end;
+    if (fileref->fcb->type != BTRFS_TYPE_DIRECTORY) {
+        si = ExAllocatePoolWithTag(PagedPool, sizeof(stream_info), ALLOC_TAG);
+        if (!si) {
+            ERR("out of memory\n");
+            Status = STATUS_INSUFFICIENT_RESOURCES;
+            goto end;
+        }
+        
+        si->name.Length = si->name.MaximumLength = 0;
+        si->name.Buffer = NULL;
+        si->size = fileref->fcb->inode_item.st_size;
+        si->ignore = FALSE;
+        
+        InsertTailList(&streamlist, &si->list_entry);
     }
     
-    si->name.Length = si->name.MaximumLength = 0;
-    si->name.Buffer = NULL;
-    si->size = fileref->fcb->inode_item.st_size;
-    si->ignore = FALSE;
-    
-    InsertTailList(&streamlist, &si->list_entry);
-    
     do {
         if (tp.item->key.obj_id == fileref->fcb->inode && tp.item->key.obj_type == TYPE_XATTR_ITEM) {
             if (tp.item->size < sizeof(DIR_ITEM)) {
@@ -3007,7 +2925,9 @@ static NTSTATUS STDCALL fill_in_file_stream_information(FILE_STREAM_INFORMATION*
                     }
                     
                     if (xa->n > strlen(xapref) && RtlCompareMemory(xa->name, xapref, strlen(xapref)) == strlen(xapref) &&
-                        (tp.item->key.offset != EA_DOSATTRIB_HASH || xa->n != strlen(EA_DOSATTRIB) || RtlCompareMemory(xa->name, EA_DOSATTRIB, xa->n) != xa->n)) {
+                        (tp.item->key.offset != EA_DOSATTRIB_HASH || xa->n != strlen(EA_DOSATTRIB) || RtlCompareMemory(xa->name, EA_DOSATTRIB, xa->n) != xa->n) &&
+                        (tp.item->key.offset != EA_EA_HASH || xa->n != strlen(EA_EA) || RtlCompareMemory(xa->name, EA_EA, xa->n) != xa->n)
+                    ) {
                         Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, &xa->name[strlen(xapref)], xa->n - strlen(xapref));
                         if (!NT_SUCCESS(Status)) {
                             ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status);
@@ -3504,7 +3424,7 @@ NTSTATUS open_fileref_by_inode(device_extension* Vcb, root* subvol, UINT64 inode
     hardlink* hl;
     file_ref *parfr, *fr;
     
-    Status = open_fcb(Vcb, subvol, inode, 0, NULL, NULL, &fcb, Irp);
+    Status = open_fcb(Vcb, subvol, inode, 0, NULL, NULL, &fcb, PagedPool, Irp);
     if (!NT_SUCCESS(Status)) {
         ERR("open_fcb returned %08x\n", Status);
         return Status;
@@ -3785,7 +3705,7 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec
             
             TRACE("FileAllInformation\n");
             
-            if (!(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
+            if (Irp->RequestorMode != KernelMode && !(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
                 WARN("insufficient privileges\n");
                 Status = STATUS_ACCESS_DENIED;
                 goto exit;
@@ -3809,10 +3729,10 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec
                 fill_in_file_standard_information(&fai->StandardInformation, fcb, fileref, &length);
             
             if (length > 0)
-                fill_in_file_internal_information(&fai->InternalInformation, fcb->inode, &length);
+                fill_in_file_internal_information(&fai->InternalInformation, fcb, &length);
             
             if (length > 0)
-                fill_in_file_ea_information(&fai->EaInformation, &length);
+                fill_in_file_ea_information(&fai->EaInformation, fcb, &length);
             
             if (length > 0)
                 fill_in_file_access_information(&fai->AccessInformation, &length);
@@ -3840,7 +3760,7 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec
             
             TRACE("FileAttributeTagInformation\n");
             
-            if (!(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
+            if (Irp->RequestorMode != KernelMode && !(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
                 WARN("insufficient privileges\n");
                 Status = STATUS_ACCESS_DENIED;
                 goto exit;
@@ -3858,7 +3778,7 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec
             
             TRACE("FileBasicInformation\n");
             
-            if (!(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
+            if (Irp->RequestorMode != KernelMode && !(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
                 WARN("insufficient privileges\n");
                 Status = STATUS_ACCESS_DENIED;
                 goto exit;
@@ -3896,7 +3816,7 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec
             
             TRACE("FileEaInformation\n");
             
-            Status = fill_in_file_ea_information(eai, &length);
+            Status = fill_in_file_ea_information(eai, fcb, &length);
             
             break;
         }
@@ -3907,7 +3827,7 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec
             
             TRACE("FileInternalInformation\n");
             
-            Status = fill_in_file_internal_information(fii, fcb->inode, &length);
+            Status = fill_in_file_internal_information(fii, fcb, &length);
             
             break;
         }
@@ -3929,7 +3849,7 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec
             
             TRACE("FileNetworkOpenInformation\n");
             
-            if (!(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
+            if (Irp->RequestorMode != KernelMode && !(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
                 WARN("insufficient privileges\n");
                 Status = STATUS_ACCESS_DENIED;
                 goto exit;
@@ -3946,12 +3866,6 @@ static NTSTATUS STDCALL query_info(device_extension* Vcb, PFILE_OBJECT FileObjec
             
             TRACE("FilePositionInformation\n");
             
-            if (!(ccb->access & (FILE_READ_DATA | FILE_WRITE_DATA)) || !(ccb->options & (FILE_SYNCHRONOUS_IO_ALERT | FILE_SYNCHRONOUS_IO_NONALERT))) {
-                WARN("insufficient privileges\n");
-                Status = STATUS_ACCESS_DENIED;
-                goto exit;
-            }
-            
             Status = fill_in_file_position_information(fpi, FileObject, &length);
             
             break;
@@ -4114,3 +4028,521 @@ exit:
     
     return Status;
 }
+
+NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
+    NTSTATUS Status;
+    BOOL top_level;
+    device_extension* Vcb = DeviceObject->DeviceExtension;
+    PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+    PFILE_OBJECT FileObject = IrpSp->FileObject;
+    fcb* fcb;
+    ccb* ccb;
+    FILE_FULL_EA_INFORMATION* ffei;
+    ULONG retlen = 0;
+#ifdef __REACTOS__
+    Status = STATUS_INTERNAL_ERROR;
+#endif
+    
+    TRACE("(%p, %p)\n", DeviceObject, Irp);
+
+    FsRtlEnterFileSystem();
+
+    top_level = is_top_level(Irp);
+    
+    if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
+        Status = part0_passthrough(DeviceObject, Irp);
+        goto exit;
+    }
+    
+    ffei = map_user_buffer(Irp);
+    if (!ffei) {
+        ERR("could not get output buffer\n");
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
+    }
+    
+    if (!FileObject) {
+        ERR("no file object\n");
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
+    }
+    
+    fcb = FileObject->FsContext;
+    
+    if (!fcb) {
+        ERR("no fcb\n");
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
+    }
+    
+    ccb = FileObject->FsContext2;
+    
+    if (!ccb) {
+        ERR("no ccb\n");
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
+    }
+    
+    if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_READ_EA | FILE_WRITE_EA))) {
+        WARN("insufficient privileges\n");
+        Status = STATUS_ACCESS_DENIED;
+        goto end;
+    }
+    
+    ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE);
+    
+    if (fcb->ea_xattr.Length == 0)
+        goto end2;
+    
+    if (IrpSp->Parameters.QueryEa.EaList) {
+        FILE_FULL_EA_INFORMATION *ea, *out;
+        FILE_GET_EA_INFORMATION* in;
+        
+        in = IrpSp->Parameters.QueryEa.EaList;
+        do {
+            STRING s;
+            
+            s.Length = s.MaximumLength = in->EaNameLength;
+            s.Buffer = in->EaName;
+            
+            RtlUpperString(&s, &s);
+            
+            if (in->NextEntryOffset == 0)
+                break;
+            
+            in = (FILE_GET_EA_INFORMATION*)(((UINT8*)in) + in->NextEntryOffset);
+        } while (TRUE);
+        
+        ea = (FILE_FULL_EA_INFORMATION*)fcb->ea_xattr.Buffer;
+        out = NULL;
+        
+        do {
+            BOOL found = FALSE;
+            
+            in = IrpSp->Parameters.QueryEa.EaList;
+            do {
+                if (in->EaNameLength == ea->EaNameLength &&
+                    RtlCompareMemory(in->EaName, ea->EaName, in->EaNameLength) == in->EaNameLength) {
+                    found = TRUE;
+                    break;
+                }
+                
+                if (in->NextEntryOffset == 0)
+                    break;
+            
+                in = (FILE_GET_EA_INFORMATION*)(((UINT8*)in) + in->NextEntryOffset);
+            } while (TRUE);
+            
+            if (found) {
+                UINT8 padding = retlen % 4 > 0 ? (4 - (retlen % 4)) : 0;
+                
+                if (offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength > IrpSp->Parameters.QueryEa.Length - retlen - padding) {
+                    Status = STATUS_BUFFER_OVERFLOW;
+                    retlen = 0;
+                    goto end2;
+                }
+                
+                retlen += padding;
+            
+                if (out) {
+                    out->NextEntryOffset = offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + out->EaNameLength + 1 + out->EaValueLength + padding;
+                    out = (FILE_FULL_EA_INFORMATION*)(((UINT8*)out) + out->NextEntryOffset);
+                } else
+                    out = ffei;
+                    
+                out->NextEntryOffset = 0;
+                out->Flags = ea->Flags;
+                out->EaNameLength = ea->EaNameLength;
+                out->EaValueLength = ea->EaValueLength;
+                RtlCopyMemory(out->EaName, ea->EaName, ea->EaNameLength + ea->EaValueLength + 1);
+                
+                retlen += offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength;
+                
+                if (IrpSp->Flags & SL_RETURN_SINGLE_ENTRY)
+                    break;
+            }
+            
+            if (ea->NextEntryOffset == 0)
+                break;
+            
+            ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset);
+        } while (TRUE);
+    } else {
+        FILE_FULL_EA_INFORMATION *ea, *out;
+        ULONG index;
+        
+        if (IrpSp->Flags & SL_INDEX_SPECIFIED) {
+            // The index is 1-based
+            if (IrpSp->Parameters.QueryEa.EaIndex == 0) {
+                Status = STATUS_NONEXISTENT_EA_ENTRY;
+                goto end;
+            } else
+                index = IrpSp->Parameters.QueryEa.EaIndex - 1;
+        } else if (IrpSp->Flags & SL_RESTART_SCAN)
+            index = ccb->ea_index = 0;
+        else
+            index = ccb->ea_index;
+        
+        ea = (FILE_FULL_EA_INFORMATION*)fcb->ea_xattr.Buffer;
+        
+        if (index > 0) {
+            ULONG i;
+            
+            for (i = 0; i < index; i++) {
+                if (ea->NextEntryOffset == 0) // last item
+                    goto end2;
+                
+                ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset);
+            }
+        }
+        
+        out = NULL;
+        
+        do {
+            UINT8 padding = retlen % 4 > 0 ? (4 - (retlen % 4)) : 0;
+            
+            if (offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength > IrpSp->Parameters.QueryEa.Length - retlen - padding) {
+                Status = retlen == 0 ? STATUS_BUFFER_TOO_SMALL : STATUS_BUFFER_OVERFLOW;
+                goto end2;
+            }
+            
+            retlen += padding;
+        
+            if (out) {
+                out->NextEntryOffset = offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + out->EaNameLength + 1 + out->EaValueLength + padding;
+                out = (FILE_FULL_EA_INFORMATION*)(((UINT8*)out) + out->NextEntryOffset);
+            } else
+                out = ffei;
+                
+            out->NextEntryOffset = 0;
+            out->Flags = ea->Flags;
+            out->EaNameLength = ea->EaNameLength;
+            out->EaValueLength = ea->EaValueLength;
+            RtlCopyMemory(out->EaName, ea->EaName, ea->EaNameLength + ea->EaValueLength + 1);
+            
+            retlen += offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength;
+            
+            if (!(IrpSp->Flags & SL_INDEX_SPECIFIED))
+                ccb->ea_index++;
+            
+            if (ea->NextEntryOffset == 0 || IrpSp->Flags & SL_RETURN_SINGLE_ENTRY)
+                break;
+            
+            ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset);
+        } while (TRUE);
+    }
+    
+    Status = STATUS_SUCCESS;
+    
+end2:
+    ExReleaseResourceLite(fcb->Header.Resource);
+    
+end:
+    Irp->IoStatus.Status = Status;
+    Irp->IoStatus.Information = NT_SUCCESS(Status) || Status == STATUS_BUFFER_OVERFLOW ? retlen : 0;
+
+    IoCompleteRequest( Irp, IO_NO_INCREMENT );
+
+exit:
+    if (top_level) 
+        IoSetTopLevelIrp(NULL);
+    
+    FsRtlExitFileSystem();
+
+    return Status;
+}
+
+typedef struct {
+    ANSI_STRING name;
+    ANSI_STRING value;
+    UCHAR flags;
+    LIST_ENTRY list_entry;
+} ea_item;
+
+NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
+    device_extension* Vcb = DeviceObject->DeviceExtension;
+    NTSTATUS Status;
+    BOOL top_level;
+    PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+    PFILE_OBJECT FileObject = IrpSp->FileObject;
+    fcb* fcb;
+    ccb* ccb;
+    FILE_FULL_EA_INFORMATION* ffei;
+    ULONG offset;
+    LIST_ENTRY ealist;
+    ea_item* item;
+    FILE_FULL_EA_INFORMATION* ea;
+    LIST_ENTRY* le;
+    LARGE_INTEGER time;
+    BTRFS_TIME now;
+    
+    TRACE("(%p, %p)\n", DeviceObject, Irp);
+
+    FsRtlEnterFileSystem();
+
+    top_level = is_top_level(Irp);
+    
+    if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
+        Status = part0_passthrough(DeviceObject, Irp);
+        goto exit;
+    }
+    
+    if (Vcb->readonly) {
+        Status = STATUS_MEDIA_WRITE_PROTECTED;
+        goto end;
+    }
+    
+    ffei = map_user_buffer(Irp);
+    if (!ffei) {
+        ERR("could not get output buffer\n");
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
+    }
+    
+    Status = IoCheckEaBufferValidity(ffei, IrpSp->Parameters.SetEa.Length, &offset);
+    if (!NT_SUCCESS(Status)) {
+        ERR("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset);
+        goto end;
+    }
+    
+    if (!FileObject) {
+        ERR("no file object\n");
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
+    }
+    
+    fcb = FileObject->FsContext;
+    
+    if (!fcb) {
+        ERR("no fcb\n");
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
+    }
+    
+    ccb = FileObject->FsContext2;
+    
+    if (!ccb) {
+        ERR("no ccb\n");
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
+    }
+    
+    if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_EA)) {
+        WARN("insufficient privileges\n");
+        Status = STATUS_ACCESS_DENIED;
+        goto end;
+    }
+    
+    InitializeListHead(&ealist);
+    
+    ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
+    
+    if (fcb->ea_xattr.Length > 0) {
+        ea = (FILE_FULL_EA_INFORMATION*)fcb->ea_xattr.Buffer;
+        
+        do {
+            item = ExAllocatePoolWithTag(PagedPool, sizeof(ea_item), ALLOC_TAG);
+            if (!item) {
+                ERR("out of memory\n");
+                Status = STATUS_INSUFFICIENT_RESOURCES;
+                goto end2;
+            }
+            
+            item->name.Length = item->name.MaximumLength = ea->EaNameLength;
+            item->name.Buffer = ea->EaName;
+            
+            item->value.Length = item->value.MaximumLength = ea->EaValueLength;
+            item->value.Buffer = &ea->EaName[ea->EaNameLength + 1];
+            
+            item->flags = ea->Flags;
+            
+            InsertTailList(&ealist, &item->list_entry);
+            
+            if (ea->NextEntryOffset == 0)
+                break;
+            
+            ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset);
+        } while (TRUE);
+    }
+    
+    ea = ffei;
+    
+    do {
+        STRING s;
+        BOOL found = FALSE;
+        
+        s.Length = s.MaximumLength = ea->EaNameLength;
+        s.Buffer = ea->EaName;
+        
+        RtlUpperString(&s, &s);
+        
+        le = ealist.Flink;
+        while (le != &ealist) {
+            item = CONTAINING_RECORD(le, ea_item, list_entry);
+            
+            if (item->name.Length == s.Length &&
+                RtlCompareMemory(item->name.Buffer, s.Buffer, s.Length) == s.Length) {
+                item->flags = ea->Flags;
+                item->value.Length = item->value.MaximumLength = ea->EaValueLength;
+                item->value.Buffer = &ea->EaName[ea->EaNameLength + 1];
+                found = TRUE;
+                break;
+            }
+            
+            le = le->Flink;
+        }
+        
+        if (!found) {
+            item = ExAllocatePoolWithTag(PagedPool, sizeof(ea_item), ALLOC_TAG);
+            if (!item) {
+                ERR("out of memory\n");
+                Status = STATUS_INSUFFICIENT_RESOURCES;
+                goto end2;
+            }
+            
+            item->name.Length = item->name.MaximumLength = ea->EaNameLength;
+            item->name.Buffer = ea->EaName;
+            
+            item->value.Length = item->value.MaximumLength = ea->EaValueLength;
+            item->value.Buffer = &ea->EaName[ea->EaNameLength + 1];
+            
+            item->flags = ea->Flags;
+            
+            InsertTailList(&ealist, &item->list_entry);
+        }
+        
+        if (ea->NextEntryOffset == 0)
+            break;
+        
+        ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset);
+    } while (TRUE);
+    
+    // remove entries with zero-length value
+    le = ealist.Flink;
+    while (le != &ealist) {
+        LIST_ENTRY* le2 = le->Flink;
+        
+        item = CONTAINING_RECORD(le, ea_item, list_entry);
+        
+        if (item->value.Length == 0) {
+            RemoveEntryList(&item->list_entry);
+            ExFreePool(item);
+        }
+        
+        le = le2;
+    }
+    
+    if (IsListEmpty(&ealist)) {
+        fcb->ealen = 0;
+        
+        if (fcb->ea_xattr.Buffer)
+            ExFreePool(fcb->ea_xattr.Buffer);
+        
+        fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = 0;
+        fcb->ea_xattr.Buffer = NULL;
+    } else {
+        ULONG size = 0;
+        char *buf, *oldbuf;
+        
+        le = ealist.Flink;
+        while (le != &ealist) {
+            item = CONTAINING_RECORD(le, ea_item, list_entry);
+            
+            if (size % 4 > 0)
+                size += 4 - (size % 4);
+            
+            size += offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + item->name.Length + 1 + item->value.Length;
+            
+            le = le->Flink;
+        }
+        
+        buf = ExAllocatePoolWithTag(PagedPool, size, ALLOC_TAG);
+        if (!buf) {
+            ERR("out of memory\n");
+            Status = STATUS_INSUFFICIENT_RESOURCES;
+            goto end2;
+        }
+        
+        oldbuf = fcb->ea_xattr.Buffer;
+        
+        fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = size;
+        fcb->ea_xattr.Buffer = buf;
+        
+        fcb->ealen = 4;
+        ea = NULL;
+        
+        le = ealist.Flink;
+        while (le != &ealist) {
+            item = CONTAINING_RECORD(le, ea_item, list_entry);
+            
+            if (ea) {
+                ea->NextEntryOffset = offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + ea->EaValueLength;
+                
+                if (ea->NextEntryOffset % 4 > 0)
+                    ea->NextEntryOffset += 4 - (ea->NextEntryOffset % 4);
+                
+                ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset);
+            } else
+                ea = (FILE_FULL_EA_INFORMATION*)fcb->ea_xattr.Buffer;
+            
+            ea->NextEntryOffset = 0;
+            ea->Flags = item->flags;
+            ea->EaNameLength = item->name.Length;
+            ea->EaValueLength = item->value.Length;
+            
+            RtlCopyMemory(ea->EaName, item->name.Buffer, item->name.Length);
+            ea->EaName[item->name.Length] = 0;
+            RtlCopyMemory(&ea->EaName[item->name.Length + 1], item->value.Buffer, item->value.Length);
+            
+            fcb->ealen += 5 + item->name.Length + item->value.Length;
+            
+            le = le->Flink;
+        }
+        
+        if (oldbuf)
+            ExFreePool(oldbuf);
+    }
+    
+    fcb->ea_changed = TRUE;
+    
+    KeQuerySystemTime(&time);
+    win_time_to_unix(time, &now);
+
+    fcb->inode_item.transid = Vcb->superblock.generation;
+    fcb->inode_item.sequence++;
+    
+    if (!ccb->user_set_change_time)
+        fcb->inode_item.st_ctime = now;
+    
+    fcb->inode_item_changed = TRUE;
+    mark_fcb_dirty(fcb);
+    
+    send_notification_fileref(ccb->fileref, FILE_NOTIFY_CHANGE_EA, FILE_ACTION_MODIFIED);
+    
+    Status = STATUS_SUCCESS;
+    
+end2:
+    ExReleaseResourceLite(fcb->Header.Resource);
+    
+    while (!IsListEmpty(&ealist)) {
+        le = RemoveHeadList(&ealist);
+        
+        item = CONTAINING_RECORD(le, ea_item, list_entry);
+        
+        ExFreePool(item);
+    }
+    
+end:
+    Irp->IoStatus.Status = Status;
+    Irp->IoStatus.Information = 0;
+
+    IoCompleteRequest(Irp, IO_NO_INCREMENT);
+    
+exit:
+    if (top_level) 
+        IoSetTopLevelIrp(NULL);
+    
+    FsRtlExitFileSystem();
+
+    return Status;
+}
index 48eabad..78cd92a 100644 (file)
 
 #include "btrfs_drv.h"
 
+#define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
+
+// #define DEBUG_WRITE_LOOPS
+
+typedef struct {
+    KEVENT Event;
+    IO_STATUS_BLOCK iosb;
+} write_context;
+
+typedef struct {
+    EXTENT_ITEM_TREE eit;
+    UINT8 type;
+    TREE_BLOCK_REF tbr;
+} EXTENT_ITEM_TREE2;
+
+typedef struct {
+    EXTENT_ITEM ei;
+    UINT8 type;
+    TREE_BLOCK_REF tbr;
+} EXTENT_ITEM_SKINNY_METADATA;
+
+typedef struct {
+    UINT64 address;
+    UINT32 length;
+    BOOL overlap;
+    UINT8* data;
+    LIST_ENTRY list_entry;
+} tree_write;
+
+static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENTRY* rollback);
+
+static BOOL insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, UINT64 objid, UINT64 objtype, UINT64 offset,
+                                   void* data, UINT16 datalen, enum batch_operation operation, PIRP Irp, LIST_ENTRY* rollback);
+
+static NTSTATUS STDCALL write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
+    write_context* context = conptr;
+    
+    context->iosb = Irp->IoStatus;
+    KeSetEvent(&context->Event, 0, FALSE);
+    
+//     return STATUS_SUCCESS;
+    return STATUS_MORE_PROCESSING_REQUIRED;
+}
+
+NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length) {
+    NTSTATUS Status;
+    LARGE_INTEGER offset;
+    PIRP Irp;
+    PIO_STACK_LOCATION IrpSp;
+    write_context* context = NULL;
+    
+    TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
+    
+    context = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_context), ALLOC_TAG);
+    if (!context) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    RtlZeroMemory(context, sizeof(write_context));
+    
+    KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
+    
+    offset.QuadPart = address;
+    
+//     Irp = IoBuildSynchronousFsdRequest(IRP_MJ_WRITE, Vcb->device, data, length, &offset, NULL, &context->iosb);
+    
+    Irp = IoAllocateIrp(device->StackSize, FALSE);
+    
+    if (!Irp) {
+        ERR("IoAllocateIrp failed\n");
+        Status = STATUS_INTERNAL_ERROR;
+        goto exit2;
+    }
+    
+    IrpSp = IoGetNextIrpStackLocation(Irp);
+    IrpSp->MajorFunction = IRP_MJ_WRITE;
+    
+    if (device->Flags & DO_BUFFERED_IO) {
+        Irp->AssociatedIrp.SystemBuffer = data;
+
+        Irp->Flags = IRP_BUFFERED_IO;
+    } else if (device->Flags & DO_DIRECT_IO) {
+        Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
+        if (!Irp->MdlAddress) {
+            DbgPrint("IoAllocateMdl failed\n");
+            goto exit;
+        }
+        
+        MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
+    } else {
+        Irp->UserBuffer = data;
+    }
+
+    IrpSp->Parameters.Write.Length = length;
+    IrpSp->Parameters.Write.ByteOffset = offset;
+    
+    Irp->UserIosb = &context->iosb;
+
+    Irp->UserEvent = &context->Event;
+
+    IoSetCompletionRoutine(Irp, write_completion, context, TRUE, TRUE, TRUE);
+
+    Status = IoCallDriver(device, Irp);
+    
+    if (Status == STATUS_PENDING) {
+        KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
+        Status = context->iosb.Status;
+    }
+    
+    if (!NT_SUCCESS(Status)) {
+        ERR("IoCallDriver returned %08x\n", Status);
+    }
+    
+    if (device->Flags & DO_DIRECT_IO) {
+        MmUnlockPages(Irp->MdlAddress);
+        IoFreeMdl(Irp->MdlAddress);
+    }
+    
+exit:
+    IoFreeIrp(Irp);
+    
+exit2:
+    if (context)
+        ExFreePool(context);
+    
+    return Status;
+}
+
+static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
+    // FIXME - loop through c->deleting and do TRIM if device supports it
+    // FIXME - also find way of doing TRIM of dropped chunks
+    
+    while (!IsListEmpty(&c->deleting)) {
+        space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
+        
+        RemoveEntryList(&s->list_entry);
+        ExFreePool(s);
+    }
+}
+
+static void clean_space_cache(device_extension* Vcb) {
+    chunk* c;
+    
+    TRACE("(%p)\n", Vcb);
+    
+    while (!IsListEmpty(&Vcb->chunks_changed)) {
+        c = CONTAINING_RECORD(Vcb->chunks_changed.Flink, chunk, list_entry_changed);
+        
+        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+        
+        clean_space_cache_chunk(Vcb, c);
+        RemoveEntryList(&c->list_entry_changed);
+        c->list_entry_changed.Flink = NULL;
+        
+        ExReleaseResourceLite(&c->lock);
+    }
+}
+
+static BOOL trees_consistent(device_extension* Vcb, LIST_ENTRY* rollback) {
+    ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
+    LIST_ENTRY* le;
+    
+    le = Vcb->trees.Flink;
+    while (le != &Vcb->trees) {
+        tree* t = CONTAINING_RECORD(le, tree, list_entry);
+        
+        if (t->write) {
+            if (t->header.num_items == 0 && t->parent) {
+#ifdef DEBUG_WRITE_LOOPS
+                ERR("empty tree found, looping again\n");
+#endif
+                return FALSE;
+            }
+            
+            if (t->size > maxsize) {
+#ifdef DEBUG_WRITE_LOOPS
+                ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize);
+#endif
+                return FALSE;
+            }
+            
+            if (!t->has_new_address) {
+#ifdef DEBUG_WRITE_LOOPS
+                ERR("tree found without new address, looping again\n");
+#endif
+                return FALSE;
+            }
+        }
+        
+        le = le->Flink;
+    }
+    
+    return TRUE;
+}
+
+static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+    UINT8 level;
+    LIST_ENTRY* le;
+    
+    for (level = 0; level <= 255; level++) {
+        BOOL nothing_found = TRUE;
+        
+        TRACE("level = %u\n", level);
+        
+        le = Vcb->trees.Flink;
+        while (le != &Vcb->trees) {
+            tree* t = CONTAINING_RECORD(le, tree, list_entry);
+            
+            if (t->write && t->header.level == level) {
+                TRACE("tree %p: root = %llx, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent);
+                
+                nothing_found = FALSE;
+                
+                if (t->parent) {
+                    if (!t->parent->write)
+                        TRACE("adding tree %p (level %x)\n", t->parent, t->header.level);
+                        
+                    t->parent->write = TRUE;
+                } else if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
+                    KEY searchkey;
+                    traverse_ptr tp;
+                    NTSTATUS Status;
+                    
+                    searchkey.obj_id = t->root->id;
+                    searchkey.obj_type = TYPE_ROOT_ITEM;
+                    searchkey.offset = 0xffffffffffffffff;
+                    
+                    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("error - find_item returned %08x\n", Status);
+                        return Status;
+                    }
+                    
+                    if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+                        ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
+                        return STATUS_INTERNAL_ERROR;
+                    }
+                    
+                    if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
+                        ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
+                        
+                        if (!ri) {
+                            ERR("out of memory\n");
+                            return STATUS_INSUFFICIENT_RESOURCES;
+                        }
+                        
+                        RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
+                        
+                        delete_tree_item(Vcb, &tp, rollback);
+                        
+                        if (!insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp, rollback)) {
+                            ERR("insert_tree_item failed\n");
+                            return STATUS_INTERNAL_ERROR;
+                        }
+                    }
+                }
+            }
+            
+            le = le->Flink;
+        }
+        
+        if (nothing_found)
+            break;
+    }
+
+    return STATUS_SUCCESS;
+}
+
+static void add_parents_to_cache(device_extension* Vcb, tree* t) {
+    while (t->parent) {
+        t = t->parent;
+        t->write = TRUE;
+    }
+}
+
+static BOOL insert_tree_extent_skinny(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64 address, PIRP Irp, LIST_ENTRY* rollback) {
+    EXTENT_ITEM_SKINNY_METADATA* eism;
+    traverse_ptr insert_tp;
+    
+    eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
+    if (!eism) {
+        ERR("out of memory\n");
+        return FALSE;
+    }
+    
+    eism->ei.refcount = 1;
+    eism->ei.generation = Vcb->superblock.generation;
+    eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
+    eism->type = TYPE_TREE_BLOCK_REF;
+    eism->tbr.offset = root_id;
+    
+    if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp, rollback)) {
+        ERR("insert_tree_item failed\n");
+        ExFreePool(eism);
+        return FALSE;
+    }
+    
+    ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+    
+    space_list_subtract(Vcb, c, FALSE, address, Vcb->superblock.node_size, rollback);
+
+    ExReleaseResourceLite(&c->lock);
+    
+    add_parents_to_cache(Vcb, insert_tp.tree);
+    
+    return TRUE;
+}
+
+static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64* new_address, PIRP Irp, LIST_ENTRY* rollback) {
+    UINT64 address;
+    EXTENT_ITEM_TREE2* eit2;
+    traverse_ptr insert_tp;
+    
+    TRACE("(%p, %x, %llx, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback);
+    
+    if (!find_address_in_chunk(Vcb, c, Vcb->superblock.node_size, &address))
+        return FALSE;
+    
+    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
+        BOOL b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback);
+        
+        if (b)
+            *new_address = address;
+        
+        return b;
+    }
+    
+    eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
+    if (!eit2) {
+        ERR("out of memory\n");
+        return FALSE;
+    }
+
+    eit2->eit.extent_item.refcount = 1;
+    eit2->eit.extent_item.generation = Vcb->superblock.generation;
+    eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
+//     eit2->eit.firstitem = wt->firstitem;
+    eit2->eit.level = level;
+    eit2->type = TYPE_TREE_BLOCK_REF;
+    eit2->tbr.offset = root_id;
+    
+// #ifdef DEBUG_PARANOID
+//     if (wt->firstitem.obj_type == 0xcc) { // TESTING
+//         ERR("error - firstitem not set (wt = %p, tree = %p, address = %x)\n", wt, wt->tree, (UINT32)address);
+//         ERR("num_items = %u, level = %u, root = %x, delete = %u\n", wt->tree->header.num_items, wt->tree->header.level, (UINT32)wt->tree->root->id, wt->delete);
+//         int3;
+//     }
+// #endif
+    
+    if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp, rollback)) {
+        ERR("insert_tree_item failed\n");
+        ExFreePool(eit2);
+        return FALSE;
+    }
+    
+    ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+    
+    space_list_subtract(Vcb, c, FALSE, address, Vcb->superblock.node_size, rollback);
+    
+    ExReleaseResourceLite(&c->lock);
+
+    add_parents_to_cache(Vcb, insert_tp.tree);
+    
+    *new_address = address;
+    
+    return TRUE;
+}
+
+NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
+    chunk *origchunk = NULL, *c;
+    LIST_ENTRY* le;
+    UINT64 flags = t->flags, addr;
+    
+    if (flags == 0) {
+        if (t->root->id == BTRFS_ROOT_CHUNK)
+            flags = BLOCK_FLAG_SYSTEM | BLOCK_FLAG_DUPLICATE;
+        else if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS)
+            flags = BLOCK_FLAG_DATA | BLOCK_FLAG_METADATA;
+        else
+            flags = BLOCK_FLAG_METADATA | BLOCK_FLAG_DUPLICATE;
+    }
+    
+//     TRACE("flags = %x\n", (UINT32)wt->flags);
+    
+//     if (!chunk_test) { // TESTING
+//         if ((c = alloc_chunk(Vcb, flags))) {
+//             if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
+//                 if (insert_tree_extent(Vcb, t, c)) {
+//                     chunk_test = TRUE;
+//                     return STATUS_SUCCESS;
+//                 }
+//             }
+//         }
+//     }
+    
+    if (t->has_address) {
+        origchunk = get_chunk_from_address(Vcb, t->header.address);
+        
+        if (!origchunk->readonly && insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) {
+            t->new_address = addr;
+            t->has_new_address = TRUE;
+            return STATUS_SUCCESS;
+        }
+    }
+    
+    ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
+    
+    le = Vcb->chunks.Flink;
+    while (le != &Vcb->chunks) {
+        c = CONTAINING_RECORD(le, chunk, list_entry);
+        
+        if (!c->readonly) {
+            ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+            
+            if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
+                if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
+                    ExReleaseResourceLite(&c->lock);
+                    ExReleaseResourceLite(&Vcb->chunk_lock);
+                    t->new_address = addr;
+                    t->has_new_address = TRUE;
+                    return STATUS_SUCCESS;
+                }
+            }
+            
+            ExReleaseResourceLite(&c->lock);
+        }
+
+        le = le->Flink;
+    }
+    
+    // allocate new chunk if necessary
+    if ((c = alloc_chunk(Vcb, flags))) {
+        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+        
+        if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
+            if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
+                ExReleaseResourceLite(&c->lock);
+                ExReleaseResourceLite(&Vcb->chunk_lock);
+                t->new_address = addr;
+                t->has_new_address = TRUE;
+                return STATUS_SUCCESS;
+            }
+        }
+        
+        ExReleaseResourceLite(&c->lock);
+    }
+    
+    ExReleaseResourceLite(&Vcb->chunk_lock);
+    
+    ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
+
+    return STATUS_DISK_FULL;
+}
+
+// TESTING
+// static void check_tree_num_items(tree* t) {
+//     LIST_ENTRY* le2;
+//     UINT32 ni;
+//     
+//     le2 = t->itemlist.Flink;
+//     ni = 0;
+//     while (le2 != &t->itemlist) {
+//         tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+//         if (!td->ignore)
+//             ni++;
+//         le2 = le2->Flink;
+//     }
+//     
+//     if (t->header.num_items != ni) {
+//         ERR("tree %p not okay: num_items was %x, expecting %x\n", t, ni, t->header.num_items);
+//         int3;
+//     } else {
+//         ERR("tree %p okay\n", t);
+//     }
+// }
+// 
+// static void check_trees_num_items(LIST_ENTRY* tc) {
+//     LIST_ENTRY* le = tc->Flink;
+//     while (le != tc) {
+//         tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
+//         
+//         check_tree_num_items(tc2->tree);
+//         
+//         le = le->Flink;
+//     }    
+// }
+
+static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
+    NTSTATUS Status;
+    UINT64 rc, root;
+    
+    TRACE("(%p, %llx, %p)\n", Vcb, address, t);
+
+    rc = get_extent_refcount(Vcb, address, Vcb->superblock.node_size, Irp);
+    if (rc == 0) {
+        ERR("error - refcount for extent %llx was 0\n", address);
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    if (t->parent)
+        root = t->parent->header.tree_id;
+    else
+        root = t->header.tree_id;
+    
+    Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, t->header.level, Irp, rollback);
+    if (!NT_SUCCESS(Status)) {
+        ERR("decrease_extent_refcount_tree returned %08x\n", Status);
+        return Status;
+    }
+
+    if (rc == 1) {
+        chunk* c = get_chunk_from_address(Vcb, address);
+        
+        if (c) {
+            ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+            
+            decrease_chunk_usage(c, Vcb->superblock.node_size);
+            
+            space_list_add(Vcb, c, TRUE, address, Vcb->superblock.node_size, rollback);
+            
+            ExReleaseResourceLite(&c->lock);
+        } else
+            ERR("could not find chunk for address %llx\n", address);
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_changed_extent_ref_edr(changed_extent* ce, EXTENT_DATA_REF* edr, BOOL old) {
+    LIST_ENTRY *le2, *list;
+    changed_extent_ref* cer;
+    
+    list = old ? &ce->old_refs : &ce->refs;
+    
+    le2 = list->Flink;
+    while (le2 != list) {
+        cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
+        
+        if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == edr->root && cer->edr.objid == edr->objid && cer->edr.offset == edr->offset) {
+            cer->edr.count += edr->count;
+            goto end;
+        }
+        
+        le2 = le2->Flink;
+    }
+    
+    cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
+    if (!cer) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    cer->type = TYPE_EXTENT_DATA_REF;
+    RtlCopyMemory(&cer->edr, edr, sizeof(EXTENT_DATA_REF));
+    InsertTailList(list, &cer->list_entry);
+    
+end:
+    if (old)
+        ce->old_count += edr->count;
+    else
+        ce->count += edr->count;
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_changed_extent_ref_sdr(changed_extent* ce, SHARED_DATA_REF* sdr, BOOL old) {
+    LIST_ENTRY *le2, *list;
+    changed_extent_ref* cer;
+    
+    list = old ? &ce->old_refs : &ce->refs;
+    
+    le2 = list->Flink;
+    while (le2 != list) {
+        cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
+        
+        if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr->offset) {
+            cer->sdr.count += sdr->count;
+            goto end;
+        }
+        
+        le2 = le2->Flink;
+    }
+    
+    cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
+    if (!cer) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    cer->type = TYPE_SHARED_DATA_REF;
+    RtlCopyMemory(&cer->sdr, sdr, sizeof(SHARED_DATA_REF));
+    InsertTailList(list, &cer->list_entry);
+    
+end:
+    if (old)
+        ce->old_count += sdr->count;
+    else
+        ce->count += sdr->count;
+
+    return STATUS_SUCCESS;
+}
+
+static BOOL shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp) {
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    
+    searchkey.obj_id = t->header.address;
+    searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return FALSE;
+    }
+    
+    if (tp.item->key.obj_id == t->header.address && (tp.item->key.obj_type == TYPE_METADATA_ITEM || tp.item->key.obj_type == TYPE_EXTENT_ITEM))
+        return FALSE;
+    else
+        return TRUE;
+}
+
+static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
+    NTSTATUS Status;
+    UINT64 rc = get_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, Irp);
+    UINT64 flags = get_extent_flags(Vcb, t->header.address, Irp);
+    
+    if (rc == 0) {
+        ERR("refcount for extent %llx was 0\n", t->header.address);
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
+        TREE_BLOCK_REF tbr;
+        BOOL unique = rc > 1 ? FALSE : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp) : FALSE);
+        
+        if (t->header.level == 0) {
+            LIST_ENTRY* le;
+            
+            le = t->itemlist.Flink;
+            while (le != &t->itemlist) {
+                tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+                
+                if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
+                    EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
+                    
+                    if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
+                        EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
+                            
+                        if (ed2->size > 0) {
+                            EXTENT_DATA_REF edr;
+                            changed_extent* ce = NULL;
+                            chunk* c = get_chunk_from_address(Vcb, ed2->address);
+                            
+                            if (c) {
+                                LIST_ENTRY* le2;
+                                
+                                le2 = c->changed_extents.Flink;
+                                while (le2 != &c->changed_extents) {
+                                    changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
+                                    
+                                    if (ce2->address == ed2->address) {
+                                        ce = ce2;
+                                        break;
+                                    }
+
+                                    le2 = le2->Flink;
+                                }
+                            }
+                                    
+                            edr.root = t->root->id;
+                            edr.objid = td->key.obj_id;
+                            edr.offset = td->key.offset - ed2->offset;
+                            edr.count = 1;
+                            
+                            if (ce) {
+                                Status = add_changed_extent_ref_edr(ce, &edr, TRUE);
+                                if (!NT_SUCCESS(Status)) {
+                                    ERR("add_changed_extent_ref_edr returned %08x\n", Status);
+                                    return Status;
+                                }
+                                
+                                Status = add_changed_extent_ref_edr(ce, &edr, FALSE);
+                                if (!NT_SUCCESS(Status)) {
+                                    ERR("add_changed_extent_ref_edr returned %08x\n", Status);
+                                    return Status;
+                                }
+                            }
+                            
+                            Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp, rollback);
+                            if (!NT_SUCCESS(Status)) {
+                                ERR("increase_extent_refcount returned %08x\n", Status);
+                                return Status;
+                            }
+                            
+                            if ((flags & EXTENT_ITEM_SHARED_BACKREFS && unique) || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
+                                UINT64 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, t->header.address, Irp);
+
+                                if (sdrrc > 0) {
+                                    SHARED_DATA_REF sdr;
+                                    
+                                    sdr.offset = t->header.address;
+                                    sdr.count = sdrrc;
+                                    
+                                    Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
+                                                                      t->header.address, Irp, rollback);
+                                    if (!NT_SUCCESS(Status)) {
+                                        ERR("decrease_extent_refcount returned %08x\n", Status);
+                                        return Status;
+                                    }
+                                    
+                                    if (ce) {
+                                        ce->count--;
+                                        ce->old_count--;
+                                    }
+                                }
+                            }
+                            
+                            // FIXME - clear shared flag if unique?
+                        }
+                    }
+                }
+                
+                le = le->Flink;
+            }
+        } else {
+            LIST_ENTRY* le;
+            
+            le = t->itemlist.Flink;
+            while (le != &t->itemlist) {
+                tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+                
+                if (!td->inserted) {
+                    TREE_BLOCK_REF tbr;
+                    
+                    tbr.offset = t->root->id;
+                    
+                    Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF,
+                                                      &tbr, &td->key, t->header.level - 1, Irp, rollback);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("increase_extent_refcount returned %08x\n", Status);
+                        return Status;
+                    }
+                    
+                    if (unique || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
+                        UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, td->treeholder.address, t->header.address, Irp);
+
+                        if (sbrrc > 0) {
+                            SHARED_BLOCK_REF sbr;
+                
+                            sbr.offset = t->header.address;
+                            
+                            Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
+                                                              t->header.address, Irp, rollback);
+                            if (!NT_SUCCESS(Status)) {
+                                ERR("decrease_extent_refcount returned %08x\n", Status);
+                                return Status;
+                            }
+                        }
+                    }
+                            
+                    // FIXME - clear shared flag if unique?
+                }
+                
+                le = le->Flink;
+            }
+        }
+        
+        if (unique) {
+            UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, t->header.address, t->parent->header.address, Irp);
+            
+            if (sbrrc == 1) {
+                SHARED_BLOCK_REF sbr;
+                
+                sbr.offset = t->parent->header.address;
+                
+                Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
+                                                  t->parent->header.address, Irp, rollback);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("decrease_extent_refcount returned %08x\n", Status);
+                    return Status;
+                }
+            }
+        }
+        
+        if (t->parent)
+            tbr.offset = t->parent->header.tree_id;
+        else
+            tbr.offset = t->header.tree_id;
+        
+        Status = increase_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr,
+                                          t->parent ? &t->paritem->key : NULL, t->header.level, Irp, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("increase_extent_refcount returned %08x\n", Status);
+            return Status;
+        }
+        
+        // FIXME - clear shared flag if unique?
+        
+        t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
+    }
+    
+    Status = reduce_tree_extent(Vcb, t->header.address, t, Irp, rollback);
+    
+    if (!NT_SUCCESS(Status)) {
+        ERR("reduce_tree_extent returned %08x\n", Status);
+        return Status;
+    }
+    
+    t->has_address = FALSE;
+    
+    if (rc > 1 && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) {
+        if (t->header.tree_id == t->root->id) {
+            flags |= EXTENT_ITEM_SHARED_BACKREFS;
+            update_extent_flags(Vcb, t->header.address, flags, Irp);
+        }
+        
+        if (t->header.level > 0) {
+            LIST_ENTRY* le;
+            
+            le = t->itemlist.Flink;
+            while (le != &t->itemlist) {
+                tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+                
+                if (!td->inserted) {
+                    if (t->header.tree_id == t->root->id) {
+                        SHARED_BLOCK_REF sbr;
+                        
+                        sbr.offset = t->header.address;
+                        
+                        Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp, rollback);
+                    } else {
+                        TREE_BLOCK_REF tbr;
+                        
+                        tbr.offset = t->root->id;
+                        
+                        Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp, rollback);
+                    }
+                    
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("increase_extent_refcount returned %08x\n", Status);
+                        return Status;
+                    }
+                }
+                
+                le = le->Flink;
+            }
+        } else {
+            LIST_ENTRY* le;
+            
+            le = t->itemlist.Flink;
+            while (le != &t->itemlist) {
+                tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+                
+                if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
+                    EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
+                    
+                    if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
+                        EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
+                        
+                        if (ed2->size > 0) {
+                            changed_extent* ce = NULL;
+                            chunk* c = get_chunk_from_address(Vcb, ed2->address);
+                            
+                            if (c) {
+                                LIST_ENTRY* le2;
+                                
+                                le2 = c->changed_extents.Flink;
+                                while (le2 != &c->changed_extents) {
+                                    changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
+                                    
+                                    if (ce2->address == ed2->address) {
+                                        ce = ce2;
+                                        break;
+                                    }
+
+                                    le2 = le2->Flink;
+                                }
+                            }
+                            
+                            if (t->header.tree_id == t->root->id) {
+                                SHARED_DATA_REF sdr;
+                                
+                                sdr.offset = t->header.address;
+                                sdr.count = 1;
+                                
+                                if (ce) {
+                                    Status = add_changed_extent_ref_sdr(ce, &sdr, TRUE);
+                                    if (!NT_SUCCESS(Status)) {
+                                        ERR("add_changed_extent_ref_edr returned %08x\n", Status);
+                                        return Status;
+                                    }
+                                    
+                                    Status = add_changed_extent_ref_sdr(ce, &sdr, FALSE);
+                                    if (!NT_SUCCESS(Status)) {
+                                        ERR("add_changed_extent_ref_edr returned %08x\n", Status);
+                                        return Status;
+                                    }
+                                }
+                                
+                                Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, Irp, rollback);
+                            } else {
+                                EXTENT_DATA_REF edr;
+                                
+                                edr.root = t->root->id;
+                                edr.objid = td->key.obj_id;
+                                edr.offset = td->key.offset - ed2->offset;
+                                edr.count = 1;
+                                
+                                if (ce) {
+                                    Status = add_changed_extent_ref_edr(ce, &edr, TRUE);
+                                    if (!NT_SUCCESS(Status)) {
+                                        ERR("add_changed_extent_ref_edr returned %08x\n", Status);
+                                        return Status;
+                                    }
+                                    
+                                    Status = add_changed_extent_ref_edr(ce, &edr, FALSE);
+                                    if (!NT_SUCCESS(Status)) {
+                                        ERR("add_changed_extent_ref_edr returned %08x\n", Status);
+                                        return Status;
+                                    }
+                                }
+                                
+                                Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp, rollback);
+                            }
+                            
+                            if (!NT_SUCCESS(Status)) {
+                                ERR("increase_extent_refcount returned %08x\n", Status);
+                                return Status;
+                            }
+                        }
+                    }
+                }
+                
+                le = le->Flink;
+            }
+        }
+    }
+    
+    t->updated_extents = TRUE;
+    t->header.tree_id = t->root->id;
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+    LIST_ENTRY* le;
+    NTSTATUS Status;
+    BOOL changed = FALSE;
+    UINT8 max_level = 0, level;
+    
+    TRACE("(%p)\n", Vcb);
+    
+    le = Vcb->trees.Flink;
+    while (le != &Vcb->trees) {
+        tree* t = CONTAINING_RECORD(le, tree, list_entry);
+        
+        if (t->write && !t->has_new_address) {
+            chunk* c;
+            
+            Status = get_tree_new_address(Vcb, t, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("get_tree_new_address returned %08x\n", Status);
+                return Status;
+            }
+            
+            TRACE("allocated extent %llx\n", t->new_address);
+            
+            c = get_chunk_from_address(Vcb, t->new_address);
+            
+            if (c) {
+                increase_chunk_usage(c, Vcb->superblock.node_size);
+            } else {
+                ERR("could not find chunk for address %llx\n", t->new_address);
+                return STATUS_INTERNAL_ERROR;
+            }
+            
+            changed = TRUE;
+            
+            if (t->header.level > max_level)
+                max_level = t->header.level;
+        }
+        
+        le = le->Flink;
+    }
+    
+    if (!changed)
+        return STATUS_SUCCESS;
+    
+    level = max_level;
+    do {
+        le = Vcb->trees.Flink;
+        while (le != &Vcb->trees) {
+            tree* t = CONTAINING_RECORD(le, tree, list_entry);
+            
+            if (t->write && !t->updated_extents && t->has_address && t->header.level == level) {
+                Status = update_tree_extents(Vcb, t, Irp, rollback);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("update_tree_extents returned %08x\n", Status);
+                    return Status;
+                }
+            }
+            
+            le = le->Flink;
+        }
+        
+        if (level == 0)
+            break;
+        
+        level--;
+    } while (TRUE);
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS update_root_root(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+    LIST_ENTRY* le;
+    NTSTATUS Status;
+    
+    TRACE("(%p)\n", Vcb);
+    
+    le = Vcb->trees.Flink;
+    while (le != &Vcb->trees) {
+        tree* t = CONTAINING_RECORD(le, tree, list_entry);
+        
+        if (t->write && !t->parent) {
+            if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
+                KEY searchkey;
+                traverse_ptr tp;
+                
+                searchkey.obj_id = t->root->id;
+                searchkey.obj_type = TYPE_ROOT_ITEM;
+                searchkey.offset = 0xffffffffffffffff;
+                
+                Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("error - find_item returned %08x\n", Status);
+                    return Status;
+                }
+                
+                if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+                    ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
+                    int3;
+                    return STATUS_INTERNAL_ERROR;
+                }
+                
+                TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, t->new_address);
+                
+                t->root->root_item.block_number = t->new_address;
+                t->root->root_item.root_level = t->header.level;
+                t->root->root_item.generation = Vcb->superblock.generation;
+                t->root->root_item.generation2 = Vcb->superblock.generation;
+                
+                // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents
+
+                RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
+            }
+            
+            t->root->treeholder.address = t->new_address;
+        }
+        
+        le = le->Flink;
+    }
+    
+    Status = update_chunk_caches(Vcb, Irp, rollback);
+    if (!NT_SUCCESS(Status)) {
+        ERR("update_chunk_caches returned %08x\n", Status);
+        return Status;
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) {
+    UINT8 level;
+    UINT8 *data, *body;
+    UINT32 crc32;
+    NTSTATUS Status;
+    LIST_ENTRY* le;
+    write_data_context* wtc;
+    LIST_ENTRY tree_writes;
+    tree_write* tw;
+    chunk* c;
+    
+    TRACE("(%p)\n", Vcb);
+    
+    InitializeListHead(&tree_writes);
+
+    for (level = 0; level <= 255; level++) {
+        BOOL nothing_found = TRUE;
+        
+        TRACE("level = %u\n", level);
+        
+        le = Vcb->trees.Flink;
+        while (le != &Vcb->trees) {
+            tree* t = CONTAINING_RECORD(le, tree, list_entry);
+            
+            if (t->write && t->header.level == level) {
+                KEY firstitem, searchkey;
+                LIST_ENTRY* le2;
+                traverse_ptr tp;
+                EXTENT_ITEM_TREE* eit;
+                
+                if (!t->has_new_address) {
+                    ERR("error - tried to write tree with no new address\n");
+                    int3;
+                }
+                
+                le2 = t->itemlist.Flink;
+                while (le2 != &t->itemlist) {
+                    tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+                    if (!td->ignore) {
+                        firstitem = td->key;
+                        break;
+                    }
+                    le2 = le2->Flink;
+                }
+                
+                if (t->parent) {
+                    t->paritem->key = firstitem;
+                    t->paritem->treeholder.address = t->new_address;
+                    t->paritem->treeholder.generation = Vcb->superblock.generation;
+                }
+                
+                if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
+                    searchkey.obj_id = t->new_address;
+                    searchkey.obj_type = TYPE_EXTENT_ITEM;
+                    searchkey.offset = Vcb->superblock.node_size;
+                    
+                    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("error - find_item returned %08x\n", Status);
+                        return Status;
+                    }
+                    
+                    if (keycmp(searchkey, tp.item->key)) {
+//                         traverse_ptr next_tp;
+//                         BOOL b;
+//                         tree_data* paritem;
+                        
+                        ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+                        
+//                         searchkey.obj_id = 0;
+//                         searchkey.obj_type = 0;
+//                         searchkey.offset = 0;
+//                         
+//                         find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
+//                         
+//                         paritem = NULL;
+//                         do {
+//                             if (tp.tree->paritem != paritem) {
+//                                 paritem = tp.tree->paritem;
+//                                 ERR("paritem: %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset);
+//                             }
+//                             
+//                             ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+//                             
+//                             b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
+//                             if (b) {
+//                                 free_traverse_ptr(&tp);
+//                                 tp = next_tp;
+//                             }
+//                         } while (b);
+//                         
+//                         free_traverse_ptr(&tp);
+                        
+                        return STATUS_INTERNAL_ERROR;
+                    }
+                    
+                    if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
+                        ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
+                        return STATUS_INTERNAL_ERROR;
+                    }
+                    
+                    eit = (EXTENT_ITEM_TREE*)tp.item->data;
+                    eit->firstitem = firstitem;
+                }
+                
+                nothing_found = FALSE;
+            }
+            
+            le = le->Flink;
+        }
+        
+        if (nothing_found)
+            break;
+    }
+    
+    TRACE("allocated tree extents\n");
+    
+    wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG);
+    if (!wtc) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
+    InitializeListHead(&wtc->stripes);
+    wtc->tree = TRUE;
+    wtc->stripes_left = 0;
+    
+    le = Vcb->trees.Flink;
+    while (le != &Vcb->trees) {
+        tree* t = CONTAINING_RECORD(le, tree, list_entry);
+#ifdef DEBUG_PARANOID
+        UINT32 num_items = 0, size = 0;
+        LIST_ENTRY* le2;
+        BOOL crash = FALSE;
+#endif
+
+        if (t->write) {
+#ifdef DEBUG_PARANOID
+            le2 = t->itemlist.Flink;
+            while (le2 != &t->itemlist) {
+                tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+                if (!td->ignore) {
+                    num_items++;
+                    
+                    if (t->header.level == 0)
+                        size += td->size;
+                }
+                le2 = le2->Flink;
+            }
+            
+            if (t->header.level == 0)
+                size += num_items * sizeof(leaf_node);
+            else
+                size += num_items * sizeof(internal_node);
+            
+            if (num_items != t->header.num_items) {
+                ERR("tree %llx, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
+                crash = TRUE;
+            }
+            
+            if (size != t->size) {
+                ERR("tree %llx, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
+                crash = TRUE;
+            }
+            
+            if (t->header.num_items == 0 && t->parent) {
+                ERR("tree %llx, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
+                crash = TRUE;
+            }
+            
+            if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
+                ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
+                crash = TRUE;
+            }
+            
+            if (crash) {
+                ERR("tree %p\n", t);
+                le2 = t->itemlist.Flink;
+                while (le2 != &t->itemlist) {
+                    tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+                    if (!td->ignore) {
+                        ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
+                    }
+                    le2 = le2->Flink;
+                }
+                int3;
+            }
+#endif
+            t->header.address = t->new_address;
+            t->header.generation = Vcb->superblock.generation;
+            t->header.tree_id = t->root->id;
+            t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
+            t->header.fs_uuid = Vcb->superblock.uuid;
+            t->has_address = TRUE;
+            
+            data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
+            if (!data) {
+                ERR("out of memory\n");
+                Status = STATUS_INSUFFICIENT_RESOURCES;
+                goto end;
+            }
+            
+            body = data + sizeof(tree_header);
+            
+            RtlCopyMemory(data, &t->header, sizeof(tree_header));
+            RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
+            
+            if (t->header.level == 0) {
+                leaf_node* itemptr = (leaf_node*)body;
+                int i = 0;
+                LIST_ENTRY* le2;
+                UINT8* dataptr = data + Vcb->superblock.node_size;
+                
+                le2 = t->itemlist.Flink;
+                while (le2 != &t->itemlist) {
+                    tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+                    if (!td->ignore) {
+                        dataptr = dataptr - td->size;
+                        
+                        itemptr[i].key = td->key;
+                        itemptr[i].offset = (UINT8*)dataptr - (UINT8*)body;
+                        itemptr[i].size = td->size;
+                        i++;
+                        
+                        if (td->size > 0)
+                            RtlCopyMemory(dataptr, td->data, td->size);
+                    }
+                    
+                    le2 = le2->Flink;
+                }
+            } else {
+                internal_node* itemptr = (internal_node*)body;
+                int i = 0;
+                LIST_ENTRY* le2;
+                
+                le2 = t->itemlist.Flink;
+                while (le2 != &t->itemlist) {
+                    tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+                    if (!td->ignore) {
+                        itemptr[i].key = td->key;
+                        itemptr[i].address = td->treeholder.address;
+                        itemptr[i].generation = td->treeholder.generation;
+                        i++;
+                    }
+                    
+                    le2 = le2->Flink;
+                }
+            }
+            
+            crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
+            crc32 = ~crc32;
+            *((UINT32*)data) = crc32;
+            TRACE("setting crc32 to %08x\n", crc32);
+            
+            tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG);
+            if (!tw) {
+                ERR("out of memory\n");
+                return STATUS_INSUFFICIENT_RESOURCES;
+            }
+            
+            tw->address = t->new_address;
+            tw->length = Vcb->superblock.node_size;
+            tw->data = data;
+            tw->overlap = FALSE;
+            
+            if (IsListEmpty(&tree_writes))
+                InsertTailList(&tree_writes, &tw->list_entry);
+            else {
+                LIST_ENTRY* le2;
+                BOOL inserted = FALSE;
+                
+                le2 = tree_writes.Flink;
+                while (le2 != &tree_writes) {
+                    tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry);
+                    
+                    if (tw2->address > tw->address) {
+                        InsertHeadList(le2->Blink, &tw->list_entry);
+                        inserted = TRUE;
+                        break;
+                    }
+                    
+                    le2 = le2->Flink;
+                }
+                
+                if (!inserted)
+                    InsertTailList(&tree_writes, &tw->list_entry);
+            }
+        }
+
+        le = le->Flink;
+    }
+    
+    Status = STATUS_SUCCESS;
+    
+    // merge together runs
+    c = NULL;
+    le = tree_writes.Flink;
+    while (le != &tree_writes) {
+        tw = CONTAINING_RECORD(le, tree_write, list_entry);
+        
+        if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size)
+            c = get_chunk_from_address(Vcb, tw->address);
+        else {
+            tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry);
+            
+            if (tw->address == tw2->address + tw2->length) {
+                data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG);
+                
+                if (!data) {
+                    ERR("out of memory\n");
+                    Status = STATUS_INSUFFICIENT_RESOURCES;
+                    goto end;
+                }
+                
+                RtlCopyMemory(data, tw2->data, tw2->length);
+                RtlCopyMemory(&data[tw2->length], tw->data, tw->length);
+                
+                ExFreePool(tw2->data);
+                tw2->data = data;
+                tw2->length += tw->length;
+                
+                ExFreePool(tw->data);
+                RemoveEntryList(&tw->list_entry);
+                ExFreePool(tw);
+                
+                le = tw2->list_entry.Flink;
+                continue;
+            }
+        }
+        
+        le = le->Flink;
+    }
+    
+    // mark RAID5/6 overlaps so we can do them one by one
+    c = NULL;
+    le = tree_writes.Flink;
+    while (le != &tree_writes) {
+        tw = CONTAINING_RECORD(le, tree_write, list_entry);
+        
+        if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size)
+            c = get_chunk_from_address(Vcb, tw->address);
+        else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
+            tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry);
+            UINT64 last_stripe, this_stripe;
+            
+            last_stripe = (tw2->address + tw2->length - 1 - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1));
+            this_stripe = (tw->address - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1));
+            
+            if (last_stripe == this_stripe)
+                tw->overlap = TRUE;
+        } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
+            tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry);
+            UINT64 last_stripe, this_stripe;
+            
+            last_stripe = (tw2->address + tw2->length - 1 - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2));
+            this_stripe = (tw->address - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2));
+            
+            if (last_stripe == this_stripe)
+                tw->overlap = TRUE;
+        }
+        
+        le = le->Flink;
+    }
+    
+    le = tree_writes.Flink;
+    while (le != &tree_writes) {
+        tw = CONTAINING_RECORD(le, tree_write, list_entry);
+        
+        if (!tw->overlap) {
+            TRACE("address: %llx, size: %x, overlap = %u\n", tw->address, tw->length, tw->overlap);
+            
+            Status = write_data(Vcb, tw->address, tw->data, TRUE, tw->length, wtc, NULL, NULL);
+            if (!NT_SUCCESS(Status)) {
+                ERR("write_data returned %08x\n", Status);
+                goto end;
+            }
+        }
+        
+        le = le->Flink;
+    }
+    
+    if (wtc->stripes.Flink != &wtc->stripes) {
+        // launch writes and wait
+        le = wtc->stripes.Flink;
+        while (le != &wtc->stripes) {
+            write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
+            
+            if (stripe->status != WriteDataStatus_Ignore)
+                IoCallDriver(stripe->device->devobj, stripe->Irp);
+            
+            le = le->Flink;
+        }
+        
+        KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
+        
+        le = wtc->stripes.Flink;
+        while (le != &wtc->stripes) {
+            write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
+            
+            if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
+                Status = stripe->iosb.Status;
+                break;
+            }
+            
+            le = le->Flink;
+        }
+        
+        free_write_data_stripes(wtc);
+    }
+    
+    le = tree_writes.Flink;
+    while (le != &tree_writes) {
+        tw = CONTAINING_RECORD(le, tree_write, list_entry);
+        
+        if (tw->overlap) {
+            TRACE("address: %llx, size: %x, overlap = %u\n", tw->address, tw->length, tw->overlap);
+            
+            Status = write_data_complete(Vcb, tw->address, tw->data, tw->length, Irp, NULL);
+            if (!NT_SUCCESS(Status)) {
+                ERR("write_data_complete returned %08x\n", Status);
+                goto end;
+            }
+        }
+        
+        le = le->Flink;
+    }
+    
+end:
+    ExFreePool(wtc);
+    
+    while (!IsListEmpty(&tree_writes)) {
+        le = RemoveHeadList(&tree_writes);
+        tw = CONTAINING_RECORD(le, tree_write, list_entry);
+        
+        ExFreePool(tw);
+    }
+    
+    return Status;
+}
+
+static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) {
+    KEY searchkey;
+    traverse_ptr tp;
+    
+    RtlZeroMemory(sb, sizeof(superblock_backup));
+    
+    sb->root_tree_addr = Vcb->superblock.root_tree_addr;
+    sb->root_tree_generation = Vcb->superblock.generation;
+    sb->root_level = Vcb->superblock.root_level;
+
+    sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
+    sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
+    sb->chunk_root_level = Vcb->superblock.chunk_root_level;
+
+    searchkey.obj_id = BTRFS_ROOT_EXTENT;
+    searchkey.obj_type = TYPE_ROOT_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
+        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
+            ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
+            
+            sb->extent_tree_addr = ri->block_number;
+            sb->extent_tree_generation = ri->generation;
+            sb->extent_root_level = ri->root_level;
+        }
+    }
+
+    searchkey.obj_id = BTRFS_ROOT_FSTREE;
+    
+    if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
+        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
+            ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
+            
+            sb->fs_tree_addr = ri->block_number;
+            sb->fs_tree_generation = ri->generation;
+            sb->fs_root_level = ri->root_level;
+        }
+    }
+    
+    searchkey.obj_id = BTRFS_ROOT_DEVTREE;
+    
+    if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
+        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
+            ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
+            
+            sb->dev_root_addr = ri->block_number;
+            sb->dev_root_generation = ri->generation;
+            sb->dev_root_level = ri->root_level;
+        }
+    }
+
+    searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
+    
+    if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
+        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
+            ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
+            
+            sb->csum_root_addr = ri->block_number;
+            sb->csum_root_generation = ri->generation;
+            sb->csum_root_level = ri->root_level;
+        }
+    }
+
+    sb->total_bytes = Vcb->superblock.total_bytes;
+    sb->bytes_used = Vcb->superblock.bytes_used;
+    sb->num_devices = Vcb->superblock.num_devices;
+}
+
+static NTSTATUS STDCALL write_superblock(device_extension* Vcb, device* device) {
+    NTSTATUS Status;
+    unsigned int i = 0;
+    UINT32 crc32;
+#ifdef __REACTOS__
+    Status = STATUS_INTERNAL_ERROR;
+#endif
+    
+    RtlCopyMemory(&Vcb->superblock.dev_item, &device->devitem, sizeof(DEV_ITEM));
+    
+    // All the documentation says that the Linux driver only writes one superblock
+    // if it thinks a disk is an SSD, but this doesn't seem to be the case!
+    
+    while (superblock_addrs[i] > 0 && device->length >= superblock_addrs[i] + sizeof(superblock)) {
+        TRACE("writing superblock %u\n", i);
+        
+        Vcb->superblock.sb_phys_addr = superblock_addrs[i];
+        
+        crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
+        crc32 = ~crc32;
+        TRACE("crc32 is %08x\n", crc32);
+        RtlCopyMemory(&Vcb->superblock.checksum, &crc32, sizeof(UINT32));
+        
+        Status = write_data_phys(device->devobj, superblock_addrs[i], &Vcb->superblock, sizeof(superblock));
+        
+        if (!NT_SUCCESS(Status))
+            break;
+        
+        i++;
+    }
+    
+    if (i == 0) {
+        ERR("no superblocks written!\n");
+    }
+
+    return Status;
+}
+
+static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) {
+    UINT64 i;
+    NTSTATUS Status;
+    LIST_ENTRY* le;
+    
+    TRACE("(%p)\n", Vcb);
+    
+    le = Vcb->trees.Flink;
+    while (le != &Vcb->trees) {
+        tree* t = CONTAINING_RECORD(le, tree, list_entry);
+        
+        if (t->write && !t->parent) {
+            if (t->root == Vcb->root_root) {
+                Vcb->superblock.root_tree_addr = t->new_address;
+                Vcb->superblock.root_level = t->header.level;
+            } else if (t->root == Vcb->chunk_root) {
+                Vcb->superblock.chunk_tree_addr = t->new_address;
+                Vcb->superblock.chunk_root_generation = t->header.generation;
+                Vcb->superblock.chunk_root_level = t->header.level;
+            }
+        }
+        
+        le = le->Flink;
+    }
+    
+    for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
+        RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
+    }
+    
+    update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp);
+    
+    for (i = 0; i < Vcb->superblock.num_devices; i++) {
+        if (Vcb->devices[i].devobj && !Vcb->devices[i].readonly) {
+            Status = write_superblock(Vcb, &Vcb->devices[i]);
+            if (!NT_SUCCESS(Status)) {
+                ERR("write_superblock returned %08x\n", Status);
+                return Status;
+            }
+        }
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) {
+    LIST_ENTRY *le, *le2;
+    NTSTATUS Status;
+    UINT64 old_size;
+    
+    le = ce->refs.Flink;
+    while (le != &ce->refs) {
+        changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
+        LIST_ENTRY* le3 = le->Flink;
+        UINT64 old_count = 0;
+        
+        if (cer->type == TYPE_EXTENT_DATA_REF) {
+            le2 = ce->old_refs.Flink;
+            while (le2 != &ce->old_refs) {
+                changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
+                
+                if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
+                    old_count = cer2->edr.count;
+                    
+                    RemoveEntryList(&cer2->list_entry);
+                    ExFreePool(cer2);
+                    break;
+                }
+                
+                le2 = le2->Flink;
+            }
+            
+            old_size = ce->old_count > 0 ? ce->old_size : ce->size;
+            
+            if (cer->edr.count > old_count) {
+                Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp, rollback);
+                            
+                if (!NT_SUCCESS(Status)) {
+                    ERR("increase_extent_refcount_data returned %08x\n", Status);
+                    return Status;
+                }
+            } else if (cer->edr.count < old_count) {
+                Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset,
+                                                    old_count - cer->edr.count, Irp, rollback);
+                
+                if (!NT_SUCCESS(Status)) {
+                    ERR("decrease_extent_refcount_data returned %08x\n", Status);
+                    return Status;
+                }
+            }
+            
+            if (ce->size != ce->old_size && ce->old_count > 0) {
+                KEY searchkey;
+                traverse_ptr tp;
+                void* data;
+                
+                searchkey.obj_id = ce->address;
+                searchkey.obj_type = TYPE_EXTENT_ITEM;
+                searchkey.offset = ce->old_size;
+                
+                Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("error - find_item returned %08x\n", Status);
+                    return Status;
+                }
+                
+                if (keycmp(searchkey, tp.item->key)) {
+                    ERR("could not find (%llx,%x,%llx) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+                    return STATUS_INTERNAL_ERROR;
+                }
+                
+                if (tp.item->size > 0) {
+                    data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+                    
+                    if (!data) {
+                        ERR("out of memory\n");
+                        return STATUS_INSUFFICIENT_RESOURCES;
+                    }
+                    
+                    RtlCopyMemory(data, tp.item->data, tp.item->size);
+                } else
+                    data = NULL;
+                
+                if (!insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp, rollback)) {
+                    ERR("insert_tree_item failed\n");
+                    return STATUS_INTERNAL_ERROR;
+                }
+                
+                delete_tree_item(Vcb, &tp, rollback);
+            }
+        } else if (cer->type == TYPE_SHARED_DATA_REF) {
+            le2 = ce->old_refs.Flink;
+            while (le2 != &ce->old_refs) {
+                changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
+                
+                if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) {
+//                     old_count = cer2->edr.count;
+                    
+                    RemoveEntryList(&cer2->list_entry);
+                    ExFreePool(cer2);
+                    break;
+                }
+                
+                le2 = le2->Flink;
+            }            
+        }
+       
+        RemoveEntryList(&cer->list_entry);
+        ExFreePool(cer);
+        
+        le = le3;
+    }
+    
+#ifdef DEBUG_PARANOID
+    if (!IsListEmpty(&ce->old_refs))
+        WARN("old_refs not empty\n");
+#endif
+    
+    if (ce->count == 0 && !ce->superseded) {
+        if (!ce->no_csum) {
+            LIST_ENTRY changed_sector_list;
+            
+            changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
+            if (!sc) {
+                ERR("out of memory\n");
+                return STATUS_INSUFFICIENT_RESOURCES;
+            }
+            
+            sc->ol.key = ce->address;
+            sc->checksums = NULL;
+            sc->length = ce->size / Vcb->superblock.sector_size;
+
+            sc->deleted = TRUE;
+            
+            InitializeListHead(&changed_sector_list);
+            insert_into_ordered_list(&changed_sector_list, &sc->ol);
+            
+            ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE);
+            commit_checksum_changes(Vcb, &changed_sector_list);
+            ExReleaseResourceLite(&Vcb->checksum_lock);
+        }
+        
+        decrease_chunk_usage(c, ce->size);
+        
+        space_list_add(Vcb, c, TRUE, ce->address, ce->size, rollback);
+    }
+
+    RemoveEntryList(&ce->list_entry);
+    ExFreePool(ce);
+    
+    return STATUS_SUCCESS;
+}
+
+static void update_checksum_tree(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+    LIST_ENTRY* le = Vcb->sector_checksums.Flink;
+    changed_sector* cs;
+    traverse_ptr tp, next_tp;
+    KEY searchkey;
+    UINT32* data;
+    NTSTATUS Status;
+    
+    if (!Vcb->checksum_root) {
+        ERR("no checksum root\n");
+        goto exit;
+    }
+    
+    while (le != &Vcb->sector_checksums) {
+        UINT64 startaddr, endaddr;
+        ULONG len;
+        UINT32* checksums;
+        RTL_BITMAP bmp;
+        ULONG* bmparr;
+        ULONG runlength, index;
+        
+        cs = (changed_sector*)le;
+        
+        searchkey.obj_id = EXTENT_CSUM_ID;
+        searchkey.obj_type = TYPE_EXTENT_CSUM;
+        searchkey.offset = cs->ol.key;
+        
+        // FIXME - create checksum_root if it doesn't exist at all
+        
+        Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
+        if (Status == STATUS_NOT_FOUND) { // tree is completely empty
+            if (!cs->deleted) {
+                checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * cs->length, ALLOC_TAG);
+                if (!checksums) {
+                    ERR("out of memory\n");
+                    goto exit;
+                }
+                
+                RtlCopyMemory(checksums, cs->checksums, sizeof(UINT32) * cs->length);
+                
+                if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, cs->ol.key, checksums, sizeof(UINT32) * cs->length, NULL, Irp, rollback)) {
+                    ERR("insert_tree_item failed\n");
+                    ExFreePool(checksums);
+                    goto exit;
+                }
+            }
+        } else if (!NT_SUCCESS(Status)) {
+            ERR("find_item returned %08x\n", Status);
+            goto exit;
+        } else {
+            UINT32 tplen;
+            
+            // FIXME - check entry is TYPE_EXTENT_CSUM?
+            
+            if (tp.item->key.offset < cs->ol.key && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= cs->ol.key)
+                startaddr = tp.item->key.offset;
+            else
+                startaddr = cs->ol.key;
+            
+            searchkey.obj_id = EXTENT_CSUM_ID;
+            searchkey.obj_type = TYPE_EXTENT_CSUM;
+            searchkey.offset = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
+            
+            Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
+            if (!NT_SUCCESS(Status)) {
+                ERR("error - find_item returned %08x\n", Status);
+                goto exit;
+            }
+            
+            tplen = tp.item->size / sizeof(UINT32);
+            
+            if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= cs->ol.key + (cs->length * Vcb->superblock.sector_size))
+                endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
+            else
+                endaddr = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
+            
+            TRACE("cs starts at %llx (%x sectors)\n", cs->ol.key, cs->length);
+            TRACE("startaddr = %llx\n", startaddr);
+            TRACE("endaddr = %llx\n", endaddr);
+            
+            len = (endaddr - startaddr) / Vcb->superblock.sector_size;
+            
+            checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG);
+            if (!checksums) {
+                ERR("out of memory\n");
+                goto exit;
+            }
+            
+            bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
+            if (!bmparr) {
+                ERR("out of memory\n");
+                ExFreePool(checksums);
+                goto exit;
+            }
+                
+            RtlInitializeBitMap(&bmp, bmparr, len);
+            RtlSetAllBits(&bmp);
+            
+            searchkey.obj_id = EXTENT_CSUM_ID;
+            searchkey.obj_type = TYPE_EXTENT_CSUM;
+            searchkey.offset = cs->ol.key;
+            
+            Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
+            if (!NT_SUCCESS(Status)) {
+                ERR("error - find_item returned %08x\n", Status);
+                goto exit;
+            }
+            
+            // set bit = free space, cleared bit = allocated sector
+            
+    //         ERR("start loop\n");
+            while (tp.item->key.offset < endaddr) {
+    //             ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+                if (tp.item->key.offset >= startaddr) {
+                    if (tp.item->size > 0) {
+                        RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, tp.item->size);
+                        RtlClearBits(&bmp, (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size, tp.item->size / sizeof(UINT32));
+                    }
+                    
+                    delete_tree_item(Vcb, &tp, rollback);
+                }
+                
+                if (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) {
+                    tp = next_tp;
+                } else
+                    break;
+            }
+    //         ERR("end loop\n");
+            
+            if (cs->deleted) {
+                RtlSetBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
+            } else {
+                RtlCopyMemory(&checksums[(cs->ol.key - startaddr) / Vcb->superblock.sector_size], cs->checksums, cs->length * sizeof(UINT32));
+                RtlClearBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
+            }
+            
+            runlength = RtlFindFirstRunClear(&bmp, &index);
+            
+            while (runlength != 0) {
+                do {
+                    ULONG rl;
+                    UINT64 off;
+                    
+                    if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE)
+                        rl = MAX_CSUM_SIZE / sizeof(UINT32);
+                    else
+                        rl = runlength;
+                    
+                    data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG);
+                    if (!data) {
+                        ERR("out of memory\n");
+                        ExFreePool(bmparr);
+                        ExFreePool(checksums);
+                        goto exit;
+                    }
+                    
+                    RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl);
+                    
+                    off = startaddr + UInt32x32To64(index, Vcb->superblock.sector_size);
+                    
+                    if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, sizeof(UINT32) * rl, NULL, Irp, rollback)) {
+                        ERR("insert_tree_item failed\n");
+                        ExFreePool(data);
+                        ExFreePool(bmparr);
+                        ExFreePool(checksums);
+                        goto exit;
+                    }
+                    
+                    runlength -= rl;
+                    index += rl;
+                } while (runlength > 0);
+                
+                runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
+            }
+            
+            ExFreePool(bmparr);
+            ExFreePool(checksums);
+        }
+        
+        le = le->Flink;
+    }
+    
+exit:
+    while (!IsListEmpty(&Vcb->sector_checksums)) {
+        le = RemoveHeadList(&Vcb->sector_checksums);
+        cs = (changed_sector*)le;
+        
+        if (cs->checksums)
+            ExFreePool(cs->checksums);
+        
+        ExFreePool(cs);
+    }
+}
+
+static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+    LIST_ENTRY *le = Vcb->chunks.Flink, *le2;
+    chunk* c;
+    KEY searchkey;
+    traverse_ptr tp;
+    BLOCK_GROUP_ITEM* bgi;
+    NTSTATUS Status;
+    BOOL flushed_extents = FALSE;
+    
+    TRACE("(%p)\n", Vcb);
+    
+    ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
+    
+    while (le != &Vcb->chunks) {
+        c = CONTAINING_RECORD(le, chunk, list_entry);
+        
+        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+        
+        le2 = c->changed_extents.Flink;
+        while (le2 != &c->changed_extents) {
+            LIST_ENTRY* le3 = le2->Flink;
+            changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry);
+            
+            Status = flush_changed_extent(Vcb, c, ce, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("flush_changed_extent returned %08x\n", Status);
+                ExReleaseResourceLite(&c->lock);
+                goto end;
+            }
+            
+            flushed_extents = TRUE;
+            
+            le2 = le3;
+        }
+        
+        // This is usually done by update_chunks, but we have to check again in case any new chunks
+        // have been allocated since.
+        if (c->created) {
+            Status = create_chunk(Vcb, c, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("create_chunk returned %08x\n", Status);
+                ExReleaseResourceLite(&c->lock);
+                goto end;
+            }
+        }
+
+        if (c->used != c->oldused) {
+            searchkey.obj_id = c->offset;
+            searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
+            searchkey.offset = c->chunk_item->size;
+            
+            Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+            if (!NT_SUCCESS(Status)) {
+                ERR("error - find_item returned %08x\n", Status);
+                ExReleaseResourceLite(&c->lock);
+                goto end;
+            }
+            
+            if (keycmp(searchkey, tp.item->key)) {
+                ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+                int3;
+                Status = STATUS_INTERNAL_ERROR;
+                ExReleaseResourceLite(&c->lock);
+                goto end;
+            }
+            
+            if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
+                ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
+                Status = STATUS_INTERNAL_ERROR;
+                ExReleaseResourceLite(&c->lock);
+                goto end;
+            }
+            
+            bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+            if (!bgi) {
+                ERR("out of memory\n");
+                Status = STATUS_INSUFFICIENT_RESOURCES;
+                ExReleaseResourceLite(&c->lock);
+                goto end;
+            }
+    
+            RtlCopyMemory(bgi, tp.item->data, tp.item->size);
+            bgi->used = c->used;
+            
+            TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
+            
+            delete_tree_item(Vcb, &tp, rollback);
+            
+            if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp, rollback)) {
+                ERR("insert_tree_item failed\n");
+                ExFreePool(bgi);
+                Status = STATUS_INTERNAL_ERROR;
+                ExReleaseResourceLite(&c->lock);
+                goto end;
+            }
+            
+            TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
+            
+            Vcb->superblock.bytes_used += c->used - c->oldused;
+            
+            TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
+            
+            c->oldused = c->used;
+        }
+        
+        ExReleaseResourceLite(&c->lock);
+        
+        le = le->Flink;
+    }
+    
+    if (flushed_extents) {
+        ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE);
+        if (!IsListEmpty(&Vcb->sector_checksums)) {
+            update_checksum_tree(Vcb, Irp, rollback);
+        }
+        ExReleaseResourceLite(&Vcb->checksum_lock);
+    }
+    
+    Status = STATUS_SUCCESS;
+    
+end:
+    ExReleaseResourceLite(&Vcb->chunk_lock);
+    
+    return Status;
+}
+
+static void get_first_item(tree* t, KEY* key) {
+    LIST_ENTRY* le;
+    
+    le = t->itemlist.Flink;
+    while (le != &t->itemlist) {
+        tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+
+        *key = td->key;
+        return;
+    }
+}
+
+static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
+    tree *nt, *pt;
+    tree_data* td;
+    tree_data* oldlastitem;
+//     write_tree* wt2;
+// //     tree_data *firsttd, *lasttd;
+// //     LIST_ENTRY* le;
+// #ifdef DEBUG_PARANOID
+//     KEY lastkey1, lastkey2;
+//     traverse_ptr tp, next_tp;
+//     ULONG numitems1, numitems2;
+// #endif
+    
+    TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
+    
+// #ifdef DEBUG_PARANOID
+//     lastkey1.obj_id = 0xffffffffffffffff;
+//     lastkey1.obj_type = 0xff;
+//     lastkey1.offset = 0xffffffffffffffff;
+//     
+//     if (!find_item(Vcb, t->root, &tp, &lastkey1, NULL, FALSE))
+//         ERR("error - find_item failed\n");
+//     else {
+//         lastkey1 = tp.item->key;
+//         numitems1 = 0;
+//         while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
+//             free_traverse_ptr(&tp);
+//             tp = next_tp;
+//             numitems1++;
+//         }
+//         free_traverse_ptr(&tp);
+//     }
+// #endif
+    
+    nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
+    if (!nt) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
+    nt->header.address = 0;
+    nt->header.generation = Vcb->superblock.generation;
+    nt->header.num_items = t->header.num_items - numitems;
+    nt->header.flags = HEADER_FLAG_MIXED_BACKREF;
+    
+    nt->has_address = FALSE;
+    nt->Vcb = Vcb;
+    nt->parent = t->parent;
+    
+#ifdef DEBUG_PARANOID
+    if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
+#endif
+    
+    nt->root = t->root;
+//     nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
+    nt->new_address = 0;
+    nt->has_new_address = FALSE;
+    nt->updated_extents = FALSE;
+    nt->flags = t->flags;
+    InitializeListHead(&nt->itemlist);
+    
+//     ExInitializeResourceLite(&nt->nonpaged->load_tree_lock);
+    
+    oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
+
+// //     firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
+// //     lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
+// //     
+// //     TRACE("old tree in %x was from (%x,%x,%x) to (%x,%x,%x)\n",
+// //                   (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
+// //                   (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
+// //     
+// //     le = wt->tree->itemlist.Flink;
+// //     while (le != &wt->tree->itemlist) {
+// //         td = CONTAINING_RECORD(le, tree_data, list_entry);
+// //         TRACE("old tree item was (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
+// //         le = le->Flink;
+// //     }
+    
+    nt->itemlist.Flink = &newfirstitem->list_entry;
+    nt->itemlist.Blink = t->itemlist.Blink;
+    nt->itemlist.Flink->Blink = &nt->itemlist;
+    nt->itemlist.Blink->Flink = &nt->itemlist;
+    
+    t->itemlist.Blink = &oldlastitem->list_entry;
+    t->itemlist.Blink->Flink = &t->itemlist;
+    
+// //     le = wt->tree->itemlist.Flink;
+// //     while (le != &wt->tree->itemlist) {
+// //         td = CONTAINING_RECORD(le, tree_data, list_entry);
+// //         TRACE("old tree item now (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
+// //         le = le->Flink;
+// //     }
+// //     
+// //     firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
+// //     lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
+// //     
+// //     TRACE("old tree in %x is now from (%x,%x,%x) to (%x,%x,%x)\n",
+// //                   (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
+// //                   (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
+    
+    nt->size = t->size - size;
+    t->size = size;
+    t->header.num_items = numitems;
+    nt->write = TRUE;
+    
+    InterlockedIncrement(&Vcb->open_trees);
+    InsertTailList(&Vcb->trees, &nt->list_entry);
+    
+// //     // TESTING
+// //     td = wt->tree->items;
+// //     while (td) {
+// //         if (!td->ignore) {
+// //             TRACE("old tree item: (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
+// //         }
+// //         td = td->next;
+// //     }
+    
+// //     oldlastitem->next = NULL;
+// //     wt->tree->lastitem = oldlastitem;
+    
+// //     TRACE("last item is now (%x,%x,%x)\n", (UINT32)oldlastitem->key.obj_id, oldlastitem->key.obj_type, (UINT32)oldlastitem->key.offset);
+    
+    if (nt->header.level > 0) {
+        LIST_ENTRY* le = nt->itemlist.Flink;
+        
+        while (le != &nt->itemlist) {
+            tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
+            
+            if (td2->treeholder.tree) {
+                td2->treeholder.tree->parent = nt;
+#ifdef DEBUG_PARANOID
+                if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
+#endif
+            }
+            
+            le = le->Flink;
+        }
+    }
+    
+    if (nt->parent) {
+        td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
+        if (!td) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+    
+        td->key = newfirstitem->key;
+        
+        InsertHeadList(&t->paritem->list_entry, &td->list_entry);
+        
+        td->ignore = FALSE;
+        td->inserted = TRUE;
+        td->treeholder.tree = nt;
+//         td->treeholder.nonpaged->status = tree_holder_loaded;
+        nt->paritem = td;
+        
+        nt->parent->header.num_items++;
+        nt->parent->size += sizeof(internal_node);
+
+        goto end;
+    }
+    
+    TRACE("adding new tree parent\n");
+    
+    if (nt->header.level == 255) {
+        ERR("cannot add parent to tree at level 255\n");
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
+    if (!pt) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
+    pt->header.address = 0;
+    pt->header.num_items = 2;
+    pt->header.level = nt->header.level + 1;
+    pt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
+    
+    pt->has_address = FALSE;
+    pt->Vcb = Vcb;
+    pt->parent = NULL;
+    pt->paritem = NULL;
+    pt->root = t->root;
+    pt->new_address = 0;
+    pt->has_new_address = FALSE;
+    pt->updated_extents = FALSE;
+//     pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
+    pt->size = pt->header.num_items * sizeof(internal_node);
+    pt->flags = t->flags;
+    InitializeListHead(&pt->itemlist);
+    
+//     ExInitializeResourceLite(&pt->nonpaged->load_tree_lock);
+    
+    InterlockedIncrement(&Vcb->open_trees);
+    InsertTailList(&Vcb->trees, &pt->list_entry);
+    
+    td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
+    if (!td) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    get_first_item(t, &td->key);
+    td->ignore = FALSE;
+    td->inserted = FALSE;
+    td->treeholder.address = 0;
+    td->treeholder.generation = Vcb->superblock.generation;
+    td->treeholder.tree = t;
+//     td->treeholder.nonpaged->status = tree_holder_loaded;
+    InsertTailList(&pt->itemlist, &td->list_entry);
+    t->paritem = td;
+    
+    td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
+    if (!td) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    td->key = newfirstitem->key;
+    td->ignore = FALSE;
+    td->inserted = FALSE;
+    td->treeholder.address = 0;
+    td->treeholder.generation = Vcb->superblock.generation;
+    td->treeholder.tree = nt;
+//     td->treeholder.nonpaged->status = tree_holder_loaded;
+    InsertTailList(&pt->itemlist, &td->list_entry);
+    nt->paritem = td;
+    
+    pt->write = TRUE;
+
+    t->root->treeholder.tree = pt;
+    
+    t->parent = pt;
+    nt->parent = pt;
+    
+#ifdef DEBUG_PARANOID
+    if (t->parent && t->parent->header.level <= t->header.level) int3;
+    if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
+#endif
+    
+end:
+    t->root->root_item.bytes_used += Vcb->superblock.node_size;
+
+// #ifdef DEBUG_PARANOID
+//     lastkey2.obj_id = 0xffffffffffffffff;
+//     lastkey2.obj_type = 0xff;
+//     lastkey2.offset = 0xffffffffffffffff;
+//     
+//     if (!find_item(Vcb, wt->tree->root, &tp, &lastkey2, NULL, FALSE))
+//         ERR("error - find_item failed\n");
+//     else {    
+//         lastkey2 = tp.item->key;
+//         
+//         numitems2 = 0;
+//         while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
+//             free_traverse_ptr(&tp);
+//             tp = next_tp;
+//             numitems2++;
+//         }
+//         free_traverse_ptr(&tp);
+//     }
+//     
+//     ERR("lastkey1 = %llx,%x,%llx\n", lastkey1.obj_id, lastkey1.obj_type, lastkey1.offset);
+//     ERR("lastkey2 = %llx,%x,%llx\n", lastkey2.obj_id, lastkey2.obj_type, lastkey2.offset);
+//     ERR("numitems1 = %u\n", numitems1);
+//     ERR("numitems2 = %u\n", numitems2);
+// #endif
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) {
+    LIST_ENTRY* le;
+    UINT32 size, ds, numitems;
+    
+    size = 0;
+    numitems = 0;
+    
+    // FIXME - naïve implementation: maximizes number of filled trees
+    
+    le = t->itemlist.Flink;
+    while (le != &t->itemlist) {
+        tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+        
+        if (!td->ignore) {
+            if (t->header.level == 0)
+                ds = sizeof(leaf_node) + td->size;
+            else
+                ds = sizeof(internal_node);
+            
+            // FIXME - move back if previous item was deleted item with same key
+            if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
+                return split_tree_at(Vcb, t, td, numitems, size);
+
+            size += ds;
+            numitems++;
+        }
+        
+        le = le->Flink;
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+BOOL is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp) {
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    
+    do {
+        EXTENT_ITEM* ei;
+        UINT8* type;
+        
+        if (t->has_address) {
+            searchkey.obj_id = t->header.address;
+            searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+            searchkey.offset = 0xffffffffffffffff;
+            
+            Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+            if (!NT_SUCCESS(Status)) {
+                ERR("error - find_item returned %08x\n", Status);
+                return FALSE;
+            }
+            
+            if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM))
+                return FALSE;
+            
+            if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0))
+                return FALSE;
+            
+            if (tp.item->size < sizeof(EXTENT_ITEM))
+                return FALSE;
+            
+            ei = (EXTENT_ITEM*)tp.item->data;
+            
+            if (ei->refcount > 1)
+                return FALSE;
+            
+            if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) {
+                EXTENT_ITEM2* ei2;
+                
+                if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2))
+                    return FALSE;
+                
+                ei2 = (EXTENT_ITEM2*)&ei[1];
+                type = (UINT8*)&ei2[1];
+            } else
+                type = (UINT8*)&ei[1];
+            
+            if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF)
+                return FALSE;
+        }
+        
+        t = t->parent;
+    } while (t);
+    
+    return TRUE;
+}
+
+static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
+    LIST_ENTRY* le;
+    tree_data* nextparitem = NULL;
+    NTSTATUS Status;
+    tree *next_tree, *par;
+    BOOL loaded;
+    
+    TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
+    
+    // FIXME - doesn't capture everything, as it doesn't ascend
+    // FIXME - write proper function and put it in treefuncs.c
+    le = t->paritem->list_entry.Flink;
+    while (le != &t->parent->itemlist) {
+        tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+        
+        if (!td->ignore) {
+            nextparitem = td;
+            break;
+        }
+        
+        le = le->Flink;
+    }
+    
+    if (!nextparitem)
+        return STATUS_SUCCESS;
+    
+    // FIXME - loop, and capture more than one tree if we can
+    
+    TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
+//     nextparitem = t->paritem;
+    
+//     ExAcquireResourceExclusiveLite(&t->parent->nonpaged->load_tree_lock, TRUE);
+    
+    Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("do_load_tree returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (!is_tree_unique(Vcb, nextparitem->treeholder.tree, Irp))
+        return STATUS_SUCCESS;
+    
+//     ExReleaseResourceLite(&t->parent->nonpaged->load_tree_lock);
+    
+    next_tree = nextparitem->treeholder.tree;
+    
+    if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
+        // merge two trees into one
+        
+        t->header.num_items += next_tree->header.num_items;
+        t->size += next_tree->size;
+        
+        if (next_tree->header.level > 0) {
+            le = next_tree->itemlist.Flink;
+            
+            while (le != &next_tree->itemlist) {
+                tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
+                
+                if (td2->treeholder.tree) {
+                    td2->treeholder.tree->parent = t;
+#ifdef DEBUG_PARANOID
+                    if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
+#endif
+                }
+                
+                le = le->Flink;
+            }
+        }
+        
+        t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
+        t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
+        t->itemlist.Blink = next_tree->itemlist.Blink;
+        t->itemlist.Blink->Flink = &t->itemlist;
+        
+//         // TESTING
+//         le = t->itemlist.Flink;
+//         while (le != &t->itemlist) {
+//             tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+//             if (!td->ignore) {
+//                 ERR("key: %llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
+//             }
+//             le = le->Flink;
+//         }
+        
+        next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
+        
+        next_tree->header.num_items = 0;
+        next_tree->size = 0;
+        
+        if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
+            Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, Irp, rollback);
+            
+            if (!NT_SUCCESS(Status)) {
+                ERR("reduce_tree_extent returned %08x\n", Status);
+                return Status;
+            }
+        } else if (next_tree->has_address) {
+            Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, Irp, rollback);
+            
+            if (!NT_SUCCESS(Status)) {
+                ERR("reduce_tree_extent returned %08x\n", Status);
+                return Status;
+            }
+        }
+        
+        if (!nextparitem->ignore) {
+            nextparitem->ignore = TRUE;
+            next_tree->parent->header.num_items--;
+            next_tree->parent->size -= sizeof(internal_node);
+        }
+        
+        par = next_tree->parent;
+        while (par) {
+            par->write = TRUE;
+            par = par->parent;
+        }
+        
+        RemoveEntryList(&nextparitem->list_entry);
+        ExFreePool(next_tree->paritem);
+        next_tree->paritem = NULL;
+        
+        next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
+        
+        free_tree(next_tree);
+    } else {
+        // rebalance by moving items from second tree into first
+        ULONG avg_size = (t->size + next_tree->size) / 2;
+        KEY firstitem = {0, 0, 0};
+        BOOL changed = FALSE;
+        
+        TRACE("attempting rebalance\n");
+        
+        le = next_tree->itemlist.Flink;
+        while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
+            tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+            ULONG size;
+            
+            if (!td->ignore) {
+                if (next_tree->header.level == 0)
+                    size = sizeof(leaf_node) + td->size;
+                else
+                    size = sizeof(internal_node);
+            } else
+                size = 0;
+            
+            if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
+                RemoveEntryList(&td->list_entry);
+                InsertTailList(&t->itemlist, &td->list_entry);
+                
+                if (next_tree->header.level > 0 && td->treeholder.tree) {
+                    td->treeholder.tree->parent = t;
+#ifdef DEBUG_PARANOID
+                    if (td->treeholder.tree->parent && td->treeholder.tree->parent->header.level <= td->treeholder.tree->header.level) int3;
+#endif
+                }
+                
+                if (!td->ignore) {
+                    next_tree->size -= size;
+                    t->size += size;
+                    next_tree->header.num_items--;
+                    t->header.num_items++;
+                }
+                
+                changed = TRUE;
+            } else
+                break;
+            
+            le = next_tree->itemlist.Flink;
+        }
+        
+        if (changed) {
+            le = next_tree->itemlist.Flink;
+            while (le != &next_tree->itemlist) {
+                tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+                
+                if (!td->ignore) {
+                    firstitem = td->key;
+                    break;
+                }
+                
+                le = le->Flink;
+            }
+            
+    //         ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset);
+            
+            // FIXME - once ascension is working, make this work with parent's parent, etc.
+            if (next_tree->paritem)
+                next_tree->paritem->key = firstitem;
+            
+            par = next_tree;
+            while (par) {
+                par->write = TRUE;
+                par = par->parent;
+            }
+        }
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    
+    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
+        searchkey.obj_id = address;
+        searchkey.obj_type = TYPE_METADATA_ITEM;
+        searchkey.offset = t->header.level;
+        
+        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("error - find_item returned %08x\n", Status);
+            return Status;
+        }
+        
+        if (!keycmp(tp.item->key, searchkey)) {
+            EXTENT_ITEM_SKINNY_METADATA* eism;
+            
+            if (tp.item->size > 0) {
+                eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+                
+                if (!eism) {
+                    ERR("out of memory\n");
+                    return STATUS_INSUFFICIENT_RESOURCES;
+                }
+                
+                RtlCopyMemory(eism, tp.item->data, tp.item->size);
+            } else
+                eism = NULL;
+            
+            delete_tree_item(Vcb, &tp, rollback);
+            
+            if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp, rollback)) {
+                ERR("insert_tree_item failed\n");
+                ExFreePool(eism);
+                return STATUS_INTERNAL_ERROR;
+            }
+            
+            return STATUS_SUCCESS;
+        }
+    }
+    
+    searchkey.obj_id = address;
+    searchkey.obj_type = TYPE_EXTENT_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
+        EXTENT_ITEM_TREE* eit;
+        
+        if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
+            ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
+            return STATUS_INTERNAL_ERROR;
+        }
+        
+        eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+                
+        if (!eit) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        RtlCopyMemory(eit, tp.item->data, tp.item->size);
+        
+        delete_tree_item(Vcb, &tp, rollback);
+        
+        eit->level = level;
+        
+        if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp, rollback)) {
+            ERR("insert_tree_item failed\n");
+            ExFreePool(eit);
+            return STATUS_INTERNAL_ERROR;
+        }
+    
+        return STATUS_SUCCESS;
+    }
+    
+    ERR("could not find EXTENT_ITEM for address %llx\n", address);
+    
+    return STATUS_INTERNAL_ERROR;
+}
+
+static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+//     LIST_ENTRY *le, *le2;
+//     write_tree* wt;
+//     tree_data* td;
+    UINT8 level, max_level;
+    UINT32 min_size;
+    BOOL empty, done_deletions = FALSE;
+    NTSTATUS Status;
+    tree* t;
+    
+    TRACE("(%p)\n", Vcb);
+    
+    max_level = 0;
+    
+    for (level = 0; level <= 255; level++) {
+        LIST_ENTRY *le, *nextle;
+        
+        empty = TRUE;
+        
+        TRACE("doing level %u\n", level);
+        
+        le = Vcb->trees.Flink;
+    
+        while (le != &Vcb->trees) {
+            t = CONTAINING_RECORD(le, tree, list_entry);
+            
+            nextle = le->Flink;
+            
+            if (t->write && t->header.level == level) {
+                empty = FALSE;
+                
+                if (t->header.num_items == 0) {
+                    if (t->parent) {
+                        LIST_ENTRY* le2;
+                        KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
+#ifdef __REACTOS__
+                        (void)firstitem;
+#endif
+                        
+                        done_deletions = TRUE;
+            
+                        le2 = t->itemlist.Flink;
+                        while (le2 != &t->itemlist) {
+                            tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+                            firstitem = td->key;
+                            break;
+                        }
+                        
+                        TRACE("deleting tree in root %llx (first item was %llx,%x,%llx)\n",
+                              t->root->id, firstitem.obj_id, firstitem.obj_type, firstitem.offset);
+                        
+                        t->root->root_item.bytes_used -= Vcb->superblock.node_size;
+                        
+                        if (t->has_new_address) { // delete associated EXTENT_ITEM
+                            Status = reduce_tree_extent(Vcb, t->new_address, t, Irp, rollback);
+                            
+                            if (!NT_SUCCESS(Status)) {
+                                ERR("reduce_tree_extent returned %08x\n", Status);
+                                return Status;
+                            }
+                            
+                            t->has_new_address = FALSE;
+                        } else if (t->has_address) {
+                            Status = reduce_tree_extent(Vcb,t->header.address, t, Irp, rollback);
+                            
+                            if (!NT_SUCCESS(Status)) {
+                                ERR("reduce_tree_extent returned %08x\n", Status);
+                                return Status;
+                            }
+                            
+                            t->has_address = FALSE;
+                        }
+                        
+                        if (!t->paritem->ignore) {
+                            t->paritem->ignore = TRUE;
+                            t->parent->header.num_items--;
+                            t->parent->size -= sizeof(internal_node);
+                        }
+                        
+                        RemoveEntryList(&t->paritem->list_entry);
+                        ExFreePool(t->paritem);
+                        t->paritem = NULL;
+                        
+                        free_tree(t);
+                    } else if (t->header.level != 0) {
+                        if (t->has_new_address) {
+                            Status = update_extent_level(Vcb, t->new_address, t, 0, Irp, rollback);
+                            
+                            if (!NT_SUCCESS(Status)) {
+                                ERR("update_extent_level returned %08x\n", Status);
+                                return Status;
+                            }
+                        }
+                        
+                        t->header.level = 0;
+                    }
+                } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
+                    TRACE("splitting overlarge tree (%x > %x)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header));
+                    Status = split_tree(Vcb, t);
+
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("split_tree returned %08x\n", Status);
+                        return Status;
+                    }
+                }
+            }
+            
+            le = nextle;
+        }
+        
+        if (!empty) {
+            max_level = level;
+        } else {
+            TRACE("nothing found for level %u\n", level);
+            break;
+        }
+    }
+    
+    min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
+    
+    for (level = 0; level <= max_level; level++) {
+        LIST_ENTRY* le;
+        
+        le = Vcb->trees.Flink;
+    
+        while (le != &Vcb->trees) {
+            t = CONTAINING_RECORD(le, tree, list_entry);
+            
+            if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && t->size < min_size && is_tree_unique(Vcb, t, Irp)) {
+                Status = try_tree_amalgamate(Vcb, t, Irp, rollback);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("try_tree_amalgamate returned %08x\n", Status);
+                    return Status;
+                }
+            }
+            
+            le = le->Flink;
+        }
+    }
+    
+    // simplify trees if top tree only has one entry
+    
+    if (done_deletions) {
+        for (level = max_level; level > 0; level--) {
+            LIST_ENTRY *le, *nextle;
+            
+            le = Vcb->trees.Flink;
+            while (le != &Vcb->trees) {
+                nextle = le->Flink;
+                t = CONTAINING_RECORD(le, tree, list_entry);
+                
+                if (t->write && t->header.level == level) {
+                    if (!t->parent && t->header.num_items == 1) {
+                        LIST_ENTRY* le2 = t->itemlist.Flink;
+                        tree_data* td;
+                        tree* child_tree = NULL;
+
+                        while (le2 != &t->itemlist) {
+                            td = CONTAINING_RECORD(le2, tree_data, list_entry);
+                            if (!td->ignore)
+                                break;
+                            le2 = le2->Flink;
+                        }
+                        
+                        TRACE("deleting top-level tree in root %llx with one item\n", t->root->id);
+                        
+                        if (t->has_new_address) { // delete associated EXTENT_ITEM
+                            Status = reduce_tree_extent(Vcb, t->new_address, t, Irp, rollback);
+                            
+                            if (!NT_SUCCESS(Status)) {
+                                ERR("reduce_tree_extent returned %08x\n", Status);
+                                return Status;
+                            }
+                            
+                            t->has_new_address = FALSE;
+                        } else if (t->has_address) {
+                            Status = reduce_tree_extent(Vcb,t->header.address, t, Irp, rollback);
+                            
+                            if (!NT_SUCCESS(Status)) {
+                                ERR("reduce_tree_extent returned %08x\n", Status);
+                                return Status;
+                            }
+                            
+                            t->has_address = FALSE;
+                        }
+                        
+                        if (!td->treeholder.tree) { // load first item if not already loaded
+                            KEY searchkey = {0,0,0};
+                            traverse_ptr tp;
+                            
+                            Status = find_item(Vcb, t->root, &tp, &searchkey, FALSE, Irp);
+                            if (!NT_SUCCESS(Status)) {
+                                ERR("error - find_item returned %08x\n", Status);
+                                return Status;
+                            }
+                        }
+                        
+                        child_tree = td->treeholder.tree;
+                        
+                        if (child_tree) {
+                            child_tree->parent = NULL;
+                            child_tree->paritem = NULL;
+                        }
+                        
+                        t->root->root_item.bytes_used -= Vcb->superblock.node_size;
+
+                        free_tree(t);
+                        
+                        if (child_tree)
+                            child_tree->root->treeholder.tree = child_tree;
+                    }
+                }
+                
+                le = nextle;
+            }
+        }
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
+    NTSTATUS Status;
+    
+    if (level > 0) {
+        if (!th->tree) {
+            Status = load_tree(Vcb, th->address, r, &th->tree, NULL, NULL);
+            
+            if (!NT_SUCCESS(Status)) {
+                ERR("load_tree(%llx) returned %08x\n", th->address, Status);
+                return Status;
+            }
+        }
+        
+        if (th->tree->header.level > 0) {
+            LIST_ENTRY* le = th->tree->itemlist.Flink;
+            
+            while (le != &th->tree->itemlist) {
+                tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+                
+                if (!td->ignore) {
+                    Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, Irp, rollback);
+                    
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("remove_root_extents returned %08x\n", Status);
+                        return Status;
+                    }
+                }
+                
+                le = le->Flink;
+            }
+        }
+    }
+    
+    if (!th->tree || th->tree->has_address) {
+        Status = reduce_tree_extent(Vcb, th->address, NULL, Irp, rollback);
+        
+        if (!NT_SUCCESS(Status)) {
+            ERR("reduce_tree_extent(%llx) returned %08x\n", th->address, Status);
+            return Status;
+        }
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* rollback) {
+    NTSTATUS Status;
+    KEY searchkey;
+    traverse_ptr tp;
+    
+    Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, Irp, rollback);
+    if (!NT_SUCCESS(Status)) {
+        ERR("remove_root_extents returned %08x\n", Status);
+        return Status;
+    }
+    
+    // remove entry in uuid root (tree 9)
+    if (Vcb->uuid_root) {
+        RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(UINT64));
+        searchkey.obj_type = TYPE_SUBVOL_UUID;
+        RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
+        
+        if (searchkey.obj_id != 0 || searchkey.offset != 0) {
+            Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
+            if (!NT_SUCCESS(Status)) {
+                WARN("find_item returned %08x\n", Status);
+            } else {
+                if (!keycmp(tp.item->key, searchkey))
+                    delete_tree_item(Vcb, &tp, rollback);
+                else
+                    WARN("could not find (%llx,%x,%llx) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+            }
+        }
+    }
+    
+    // delete ROOT_ITEM
+    
+    searchkey.obj_id = r->id;
+    searchkey.obj_type = TYPE_ROOT_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
+        delete_tree_item(Vcb, &tp, rollback);
+    else
+        WARN("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+    
+    // delete items in tree cache
+    
+    free_trees_root(Vcb, r);
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS drop_roots(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+    LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2;
+    NTSTATUS Status;
+    
+    while (le != &Vcb->drop_roots) {
+        root* r = CONTAINING_RECORD(le, root, list_entry);
+        
+        le2 = le->Flink;
+        
+        Status = drop_root(Vcb, r, Irp, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("drop_root(%llx) returned %08x\n", r->id, Status);
+            return Status;
+        }
+        
+        le = le2;
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp, LIST_ENTRY* rollback) {
+    KEY searchkey;
+    traverse_ptr tp;
+    DEV_ITEM* di;
+    NTSTATUS Status;
+    
+    searchkey.obj_id = 1;
+    searchkey.obj_type = TYPE_DEV_ITEM;
+    searchkey.offset = device->devitem.dev_id;
+    
+    Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (keycmp(tp.item->key, searchkey)) {
+        ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    delete_tree_item(Vcb, &tp, rollback);
+    
+    di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
+    if (!di) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
+    
+    if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp, rollback)) {
+        ERR("insert_tree_item failed\n");
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static void regen_bootstrap(device_extension* Vcb) {
+    sys_chunk* sc2;
+    USHORT i = 0;
+    LIST_ENTRY* le;
+    
+    i = 0;
+    le = Vcb->sys_chunks.Flink;
+    while (le != &Vcb->sys_chunks) {
+        sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
+        
+        TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
+        
+        RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
+        i += sizeof(KEY);
+        
+        RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
+        i += sc2->size;
+        
+        le = le->Flink;
+    }
+}
+
+static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, ULONG size) {
+    sys_chunk *sc, *sc2;
+    LIST_ENTRY* le;
+    
+    if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
+        ERR("error - bootstrap is full\n");
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
+    if (!sc) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+
+    sc->key.obj_id = obj_id;
+    sc->key.obj_type = obj_type;
+    sc->key.offset = offset;
+    sc->size = size;
+    sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
+    if (!sc->data) {
+        ERR("out of memory\n");
+        ExFreePool(sc);
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    RtlCopyMemory(sc->data, data, sc->size);
+    
+    le = Vcb->sys_chunks.Flink;
+    while (le != &Vcb->sys_chunks) {
+        sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
+        
+        if (keycmp(sc2->key, sc->key) == 1)
+            break;
+        
+        le = le->Flink;
+    }
+    InsertTailList(le, &sc->list_entry);
+    
+    Vcb->superblock.n += sizeof(KEY) + size;
+    
+    regen_bootstrap(Vcb);
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENTRY* rollback) {
+    CHUNK_ITEM* ci;
+    CHUNK_ITEM_STRIPE* cis;
+    BLOCK_GROUP_ITEM* bgi;
+    UINT16 i, factor;
+    NTSTATUS Status;
+    
+    ci = ExAllocatePoolWithTag(PagedPool, c->size, ALLOC_TAG);
+    if (!ci) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    RtlCopyMemory(ci, c->chunk_item, c->size);
+    
+    if (!insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp, rollback)) {
+        ERR("insert_tree_item failed\n");
+        ExFreePool(ci);
+        return STATUS_INTERNAL_ERROR;
+    }
+
+    if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
+        Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size);
+        if (!NT_SUCCESS(Status)) {
+            ERR("add_to_bootstrap returned %08x\n", Status);
+            return Status;
+        }
+    }
+
+    // add BLOCK_GROUP_ITEM to tree 2
+    
+    bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
+    if (!bgi) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+
+    bgi->used = c->used;
+    bgi->chunk_tree = 0x100;
+    bgi->flags = c->chunk_item->type;
+    
+    if (!insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp, rollback)) {
+        ERR("insert_tree_item failed\n");
+        ExFreePool(bgi);
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    if (c->chunk_item->type & BLOCK_FLAG_RAID0)
+        factor = c->chunk_item->num_stripes;
+    else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
+        factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
+    else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
+        factor = c->chunk_item->num_stripes - 1;
+    else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
+        factor = c->chunk_item->num_stripes - 2;
+    else // SINGLE, DUPLICATE, RAID1
+        factor = 1;
+
+    // add DEV_EXTENTs to tree 4
+    
+    cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
+    
+    for (i = 0; i < c->chunk_item->num_stripes; i++) {
+        DEV_EXTENT* de;
+        
+        de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
+        if (!de) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        de->chunktree = Vcb->chunk_root->id;
+        de->objid = 0x100;
+        de->address = c->offset;
+        de->length = c->chunk_item->size / factor;
+        de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
+
+        if (!insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp, rollback)) {
+            ERR("insert_tree_item failed\n");
+            ExFreePool(de);
+            return STATUS_INTERNAL_ERROR;
+        }
+        
+        // FIXME - no point in calling this twice for the same device
+        Status = update_dev_item(Vcb, c->devices[i], Irp, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("update_dev_item returned %08x\n", Status);
+            return Status;
+        }
+    }
+    
+    c->created = FALSE;
+    
+    return STATUS_SUCCESS;
+}
+
+static void remove_from_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset) {
+    sys_chunk* sc2;
+    LIST_ENTRY* le;
+
+    le = Vcb->sys_chunks.Flink;
+    while (le != &Vcb->sys_chunks) {
+        sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
+        
+        if (sc2->key.obj_id == obj_id && sc2->key.obj_type == obj_type && sc2->key.offset == offset) {
+            RemoveEntryList(&sc2->list_entry);
+            
+            Vcb->superblock.n -= sizeof(KEY) + sc2->size;
+            
+            ExFreePool(sc2->data);
+            ExFreePool(sc2);
+            regen_bootstrap(Vcb);
+            return;
+        }
+        
+        le = le->Flink;
+    }
+}
+
+static NTSTATUS STDCALL set_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, UINT64 inode, char* name, UINT32 crc32,
+                                  UINT8* data, UINT16 datalen, PIRP Irp, LIST_ENTRY* rollback) {
+    ULONG xasize;
+    DIR_ITEM* xa;
+    
+    TRACE("(%p, %llx, %llx, %s, %08x, %p, %u)\n", Vcb, subvol->id, inode, name, crc32, data, datalen);
+    
+    xasize = sizeof(DIR_ITEM) - 1 + (ULONG)strlen(name) + datalen;
+    
+    xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
+    if (!xa) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    xa->key.obj_id = 0;
+    xa->key.obj_type = 0;
+    xa->key.offset = 0;
+    xa->transid = Vcb->superblock.generation;
+    xa->m = datalen;
+    xa->n = (UINT16)strlen(name);
+    xa->type = BTRFS_TYPE_EA;
+    RtlCopyMemory(xa->name, name, strlen(name));
+    RtlCopyMemory(xa->name + strlen(name), data, datalen);
+    
+    if (!insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_SetXattr, Irp, rollback))
+        return STATUS_INTERNAL_ERROR;
+    
+    return STATUS_SUCCESS;
+}
+
+static BOOL STDCALL delete_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, PIRP Irp, LIST_ENTRY* rollback) {
+    KEY searchkey;
+    traverse_ptr tp;
+    DIR_ITEM* xa;
+    NTSTATUS Status;
+    
+    TRACE("(%p, %llx, %llx, %s, %08x)\n", Vcb, subvol->id, inode, name, crc32);
+    
+    searchkey.obj_id = inode;
+    searchkey.obj_type = TYPE_XATTR_ITEM;
+    searchkey.offset = crc32;
+    
+    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return FALSE;
+    }
+    
+    if (!keycmp(tp.item->key, searchkey)) { // key exists
+        ULONG size = tp.item->size;
+        
+        if (tp.item->size < sizeof(DIR_ITEM)) {
+            ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
+            
+            return FALSE;
+        } else {
+            xa = (DIR_ITEM*)tp.item->data;
+            
+            while (TRUE) {
+                ULONG oldxasize;
+                
+                if (size < sizeof(DIR_ITEM) || size < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) {
+                    ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+                        
+                    return FALSE;
+                }
+                
+                oldxasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n;
+                
+                if (xa->n == strlen(name) && RtlCompareMemory(name, xa->name, xa->n) == xa->n) {
+                    ULONG newsize;
+                    UINT8 *newdata, *dioff;
+                    
+                    newsize = tp.item->size - (sizeof(DIR_ITEM) - 1 + xa->n + xa->m);
+                    
+                    delete_tree_item(Vcb, &tp, rollback);
+                    
+                    if (newsize == 0) {
+                        TRACE("xattr %s deleted\n", name);
+                        
+                        return TRUE;
+                    }
+
+                    // FIXME - deleting collisions almost certainly works, but we should test it properly anyway
+                    newdata = ExAllocatePoolWithTag(PagedPool, newsize, ALLOC_TAG);
+                    if (!newdata) {
+                        ERR("out of memory\n");
+                        return FALSE;
+                    }
+
+                    if ((UINT8*)xa > tp.item->data) {
+                        RtlCopyMemory(newdata, tp.item->data, (UINT8*)xa - tp.item->data);
+                        dioff = newdata + ((UINT8*)xa - tp.item->data);
+                    } else {
+                        dioff = newdata;
+                    }
+                    
+                    if ((UINT8*)&xa->name[xa->n+xa->m] - tp.item->data < tp.item->size)
+                        RtlCopyMemory(dioff, &xa->name[xa->n+xa->m], tp.item->size - ((UINT8*)&xa->name[xa->n+xa->m] - tp.item->data));
+                    
+                    insert_tree_item(Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, newdata, newsize, NULL, Irp, rollback);
+                    
+                        
+                    return TRUE;
+                }
+                
+                if (xa->m + xa->n >= size) { // FIXME - test this works
+                    WARN("xattr %s not found\n", name);
+
+                    return FALSE;
+                } else {
+                    xa = (DIR_ITEM*)&xa->name[xa->m + xa->n];
+                    size -= oldxasize;
+                }
+            }
+        }
+    } else {
+        WARN("xattr %s not found\n", name);
+        
+        return FALSE;
+    }
+}
+
+static NTSTATUS insert_sparse_extent(fcb* fcb, UINT64 start, UINT64 length, PIRP Irp, LIST_ENTRY* rollback) {
+    EXTENT_DATA* ed;
+    EXTENT_DATA2* ed2;
+    
+    TRACE("((%llx, %llx), %llx, %llx)\n", fcb->subvol->id, fcb->inode, start, length);
+    
+    ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
+    if (!ed) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    ed->generation = fcb->Vcb->superblock.generation;
+    ed->decoded_size = length;
+    ed->compression = BTRFS_COMPRESSION_NONE;
+    ed->encryption = BTRFS_ENCRYPTION_NONE;
+    ed->encoding = BTRFS_ENCODING_NONE;
+    ed->type = EXTENT_TYPE_REGULAR;
+    
+    ed2 = (EXTENT_DATA2*)ed->data;
+    ed2->address = 0;
+    ed2->size = 0;
+    ed2->offset = 0;
+    ed2->num_bytes = length;
+    
+    if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, Irp, rollback)) {
+        ERR("insert_tree_item failed\n");
+        return STATUS_INTERNAL_ERROR;
+    }
+
+    return STATUS_SUCCESS;
+}
+
+static BOOL insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, UINT64 objid, UINT64 objtype, UINT64 offset,
+                                   void* data, UINT16 datalen, enum batch_operation operation, PIRP Irp, LIST_ENTRY* rollback) {
+    LIST_ENTRY* le;
+    batch_root* br = NULL;
+    batch_item* bi;
+    
+    le = batchlist->Flink;
+    while (le != batchlist) {
+        batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry);
+        
+        if (br2->r == r) {
+            br = br2;
+            break;
+        }
+        
+        le = le->Flink;
+    }
+    
+    if (!br) {
+        br = ExAllocatePoolWithTag(PagedPool, sizeof(batch_root), ALLOC_TAG);
+        if (!br) {
+            ERR("out of memory\n");
+            return FALSE;
+        }
+        
+        br->r = r;
+        InitializeListHead(&br->items);
+        InsertTailList(batchlist, &br->list_entry);
+    }
+    
+    bi = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside);
+    if (!bi) {
+        ERR("out of memory\n");
+        return FALSE;
+    }
+    
+    bi->key.obj_id = objid;
+    bi->key.obj_type = objtype;
+    bi->key.offset = offset;
+    bi->data = data;
+    bi->datalen = datalen;
+    bi->operation = operation;
+    
+    le = br->items.Blink;
+    while (le != &br->items) {
+        batch_item* bi2 = CONTAINING_RECORD(le, batch_item, list_entry);
+        
+        if (keycmp(bi2->key, bi->key) == -1) {
+            InsertHeadList(&bi2->list_entry, &bi->list_entry);
+            return TRUE;
+        }
+        
+        le = le->Blink;
+    }
+    
+    InsertHeadList(&br->items, &bi->list_entry);
+    
+    return TRUE;
+}
+
+typedef struct {
+    UINT64 address;
+    UINT64 length;
+    UINT64 offset;
+    BOOL changed;
+    chunk* chunk;
+    UINT64 skip_start;
+    UINT64 skip_end;
+    LIST_ENTRY list_entry;
+} extent_range;
+
+static void rationalize_extents(fcb* fcb, PIRP Irp) {
+    LIST_ENTRY* le;
+    LIST_ENTRY extent_ranges;
+    extent_range* er;
+    BOOL changed = FALSE, truncating = FALSE;
+    UINT32 num_extents = 0;
+    
+    InitializeListHead(&extent_ranges);
+    
+    le = fcb->extents.Flink;
+    while (le != &fcb->extents) {
+        extent* ext = CONTAINING_RECORD(le, extent, list_entry);
+        
+        if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && ext->data->compression == BTRFS_COMPRESSION_NONE && ext->unique) {
+            EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
+            
+            if (ed2->size != 0) {
+                LIST_ENTRY* le2;
+                
+                le2 = extent_ranges.Flink;
+                while (le2 != &extent_ranges) {
+                    extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
+                    
+                    if (er2->address == ed2->address) {
+                        er2->skip_start = min(er2->skip_start, ed2->offset);
+                        er2->skip_end = min(er2->skip_end, ed2->size - ed2->offset - ed2->num_bytes);
+                        goto cont;
+                    } else if (er2->address > ed2->address)
+                        break;
+                    
+                    le2 = le2->Flink;
+                }
+                
+                er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_range), ALLOC_TAG); // FIXME - should be from lookaside?
+                if (!er) {
+                    ERR("out of memory\n");
+                    goto end;
+                }
+                
+                er->address = ed2->address;
+                er->length = ed2->size;
+                er->offset = ext->offset - ed2->offset;
+                er->changed = FALSE;
+                er->chunk = NULL;
+                er->skip_start = ed2->offset;
+                er->skip_end = ed2->size - ed2->offset - ed2->num_bytes;
+                
+                if (er->skip_start != 0 || er->skip_end != 0)
+                    truncating = TRUE;
+                
+                InsertHeadList(le2->Blink, &er->list_entry);
+                num_extents++;
+            }
+        }
+        
+cont:
+        le = le->Flink;
+    }
+    
+    if (num_extents == 0 || (num_extents == 1 && !truncating))
+        goto end;
+    
+    le = extent_ranges.Flink;
+    while (le != &extent_ranges) {
+        er = CONTAINING_RECORD(le, extent_range, list_entry);
+        
+        if (!er->chunk) {
+            LIST_ENTRY* le2;
+            
+            er->chunk = get_chunk_from_address(fcb->Vcb, er->address);
+            
+            if (!er->chunk) {
+                ERR("get_chunk_from_address(%llx) failed\n", er->address);
+                goto end;
+            }
+            
+            le2 = le->Flink;
+            while (le2 != &extent_ranges) {
+                extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
+                
+                if (!er2->chunk && er2->address >= er->chunk->offset && er2->address < er->chunk->offset + er->chunk->chunk_item->size)
+                    er2->chunk = er->chunk;
+                
+                le2 = le2->Flink;
+            }
+        }
+        
+        le = le->Flink;
+    }
+    
+    if (truncating) {
+        // truncate beginning or end of extent if unused
+        
+        le = extent_ranges.Flink;
+        while (le != &extent_ranges) {
+            er = CONTAINING_RECORD(le, extent_range, list_entry);
+            
+            if (er->skip_start > 0) {
+                LIST_ENTRY* le2 = fcb->extents.Flink;
+                while (le2 != &fcb->extents) {
+                    extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
+                    
+                    if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && ext->data->compression == BTRFS_COMPRESSION_NONE && ext->unique) {
+                        EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
+                
+                        if (ed2->size != 0 && ed2->address == er->address) {
+                            NTSTATUS Status;
+                            
+                            Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
+                                                               -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp);
+                            if (!NT_SUCCESS(Status)) {
+                                ERR("update_changed_extent_ref returned %08x\n", Status);
+                                goto end;
+                            }
+                            
+                            ext->data->decoded_size -= er->skip_start;
+                            ed2->size -= er->skip_start;
+                            ed2->address += er->skip_start;
+                            ed2->offset -= er->skip_start;
+                            
+                            add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
+                                                   1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
+                        }
+                    }
+                    
+                    le2 = le2->Flink;
+                }
+                
+                if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
+                    LIST_ENTRY changed_sector_list;
+                    
+                    changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
+                    if (!sc) {
+                        ERR("out of memory\n");
+                        goto end;
+                    }
+                    
+                    sc->ol.key = er->address;
+                    sc->checksums = NULL;
+                    sc->length = er->skip_start / fcb->Vcb->superblock.sector_size;
+
+                    sc->deleted = TRUE;
+                    
+                    InitializeListHead(&changed_sector_list);
+                    insert_into_ordered_list(&changed_sector_list, &sc->ol);
+                    
+                    commit_checksum_changes(fcb->Vcb, &changed_sector_list);
+                }
+                
+                decrease_chunk_usage(er->chunk, er->skip_start);
+                
+                space_list_add(fcb->Vcb, er->chunk, TRUE, er->address, er->skip_start, NULL);
+                
+                er->address += er->skip_start;
+                er->length -= er->skip_start;
+            }
+            
+            if (er->skip_end > 0) {
+                LIST_ENTRY* le2 = fcb->extents.Flink;
+                while (le2 != &fcb->extents) {
+                    extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
+                    
+                    if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && ext->data->compression == BTRFS_COMPRESSION_NONE && ext->unique) {
+                        EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
+                
+                        if (ed2->size != 0 && ed2->address == er->address) {
+                            NTSTATUS Status;
+                            
+                            Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
+                                                               -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp);
+                            if (!NT_SUCCESS(Status)) {
+                                ERR("update_changed_extent_ref returned %08x\n", Status);
+                                goto end;
+                            }
+                            
+                            ext->data->decoded_size -= er->skip_end;
+                            ed2->size -= er->skip_end;
+                            
+                            add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
+                                                   1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
+                        }
+                    }
+                    
+                    le2 = le2->Flink;
+                }
+                
+                if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
+                    LIST_ENTRY changed_sector_list;
+                    
+                    changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
+                    if (!sc) {
+                        ERR("out of memory\n");
+                        goto end;
+                    }
+                    
+                    sc->ol.key = er->address + er->length - er->skip_end;
+                    sc->checksums = NULL;
+                    sc->length = er->skip_end / fcb->Vcb->superblock.sector_size;
+
+                    sc->deleted = TRUE;
+                    
+                    InitializeListHead(&changed_sector_list);
+                    insert_into_ordered_list(&changed_sector_list, &sc->ol);
+                    
+                    commit_checksum_changes(fcb->Vcb, &changed_sector_list);
+                }
+                
+                decrease_chunk_usage(er->chunk, er->skip_end);
+                
+                space_list_add(fcb->Vcb, er->chunk, TRUE, er->address + er->length - er->skip_end, er->skip_end, NULL);
+                
+                er->length -= er->skip_end;
+            }
+            
+            le = le->Flink;
+        }
+    }
+    
+    if (num_extents < 2)
+        goto end;
+    
+    // merge together adjacent extents
+    le = extent_ranges.Flink;
+    while (le != &extent_ranges) {
+        er = CONTAINING_RECORD(le, extent_range, list_entry);
+        
+        if (le->Flink != &extent_ranges && er->length < MAX_EXTENT_SIZE) {
+            extent_range* er2 = CONTAINING_RECORD(le->Flink, extent_range, list_entry);
+            
+            if (er->chunk == er2->chunk) {
+                if (er2->address == er->address + er->length && er2->offset >= er->offset + er->length) {
+                    if (er->length + er2->length <= MAX_EXTENT_SIZE) {
+                        er->length += er2->length;
+                        er->changed = TRUE;
+                        
+                        RemoveEntryList(&er2->list_entry);
+                        ExFreePool(er2);
+                        
+                        changed = TRUE;
+                        continue;
+//                     } else { // FIXME - make changing of beginning of offset work
+//                         er2->length = er2->address + er->length - er->address - MAX_EXTENT_SIZE;
+//                         er2->address = er->address + MAX_EXTENT_SIZE;
+//                         er->length = MAX_EXTENT_SIZE;
+                    }
+                }
+            }
+        }
+        
+        le = le->Flink;
+    }
+    
+    if (!changed)
+        goto end;
+    
+    le = fcb->extents.Flink;
+    while (le != &fcb->extents) {
+        extent* ext = CONTAINING_RECORD(le, extent, list_entry);
+        
+        if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && ext->data->compression == BTRFS_COMPRESSION_NONE && ext->unique) {
+            EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
+            
+            if (ed2->size != 0) {
+                LIST_ENTRY* le2;
+                
+                le2 = extent_ranges.Flink;
+                while (le2 != &extent_ranges) {
+                    extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
+                    
+                    if (ed2->address >= er2->address && ed2->address + ed2->size <= er2->address + er2->length && er2->changed) {
+                        NTSTATUS Status;
+                        
+                        Status = update_changed_extent_ref(fcb->Vcb, er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
+                                                           -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp);
+                        if (!NT_SUCCESS(Status)) {
+                            ERR("update_changed_extent_ref returned %08x\n", Status);
+                            goto end;
+                        }
+                        
+                        ed2->offset += ed2->address - er2->address;
+                        ed2->address = er2->address;
+                        ed2->size = er2->length;
+                        ext->data->decoded_size = ed2->size;
+                        
+                        add_changed_extent_ref(er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
+                                               1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
+                        
+                        break;
+                    }
+                    
+                    le2 = le2->Flink;
+                }
+            }
+        }
+        
+        le = le->Flink;
+    }
+    
+end:
+    while (!IsListEmpty(&extent_ranges)) {
+        le = RemoveHeadList(&extent_ranges);
+        er = CONTAINING_RECORD(le, extent_range, list_entry);
+        
+        ExFreePool(er);
+    }
+}
+
+void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
+    traverse_ptr tp;
+    KEY searchkey;
+    NTSTATUS Status;
+    INODE_ITEM* ii;
+    UINT64 ii_offset;
+#ifdef DEBUG_PARANOID
+    UINT64 old_size = 0;
+    BOOL extents_changed;
+#endif
+    
+//     ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
+    
+    while (!IsListEmpty(&fcb->index_list)) {
+        LIST_ENTRY* le = RemoveHeadList(&fcb->index_list);
+        index_entry* ie = CONTAINING_RECORD(le, index_entry, list_entry);
+
+        if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer);
+        if (ie->filepart_uc.Buffer) ExFreePool(ie->filepart_uc.Buffer);
+        ExFreePool(ie);
+    }
+    
+    fcb->index_loaded = FALSE;
+    
+    if (fcb->ads) {
+        if (fcb->deleted)
+            delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, Irp, rollback);
+        else {
+            Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, (UINT8*)fcb->adsdata.Buffer, fcb->adsdata.Length, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("set_xattr returned %08x\n", Status);
+                goto end;
+            }
+        }
+        goto end;
+    }
+    
+#ifdef DEBUG_PARANOID
+    extents_changed = fcb->extents_changed;
+#endif
+    
+    if (fcb->extents_changed) {
+        BOOL b;
+        traverse_ptr next_tp;
+        LIST_ENTRY* le;
+        BOOL prealloc = FALSE, extents_inline = FALSE;
+        UINT64 last_end;
+        
+        // delete ignored extent items
+        le = fcb->extents.Flink;
+        while (le != &fcb->extents) {
+            LIST_ENTRY* le2 = le->Flink;
+            extent* ext = CONTAINING_RECORD(le, extent, list_entry);
+            
+            if (ext->ignore) {
+                RemoveEntryList(&ext->list_entry);
+                ExFreePool(ext->data);
+                ExFreePool(ext);
+            }
+            
+            le = le2;
+        }
+        
+        if (!IsListEmpty(&fcb->extents)) {
+            rationalize_extents(fcb, Irp);
+            
+            // merge together adjacent EXTENT_DATAs pointing to same extent
+            
+            le = fcb->extents.Flink;
+            while (le != &fcb->extents) {
+                LIST_ENTRY* le2 = le->Flink;
+                extent* ext = CONTAINING_RECORD(le, extent, list_entry);
+                
+                if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) {
+                    extent* nextext = CONTAINING_RECORD(le->Flink, extent, list_entry);
+                        
+                    if (ext->data->type == nextext->data->type) {
+                        EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
+                        EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->data->data;
+                        
+                        if (ed2->size != 0 && ed2->address == ned2->address && ed2->size == ned2->size &&
+                            nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) {
+                            chunk* c;
+                        
+                            ext->data->generation = fcb->Vcb->superblock.generation;
+                            ed2->num_bytes += ned2->num_bytes;
+                        
+                            RemoveEntryList(&nextext->list_entry);
+                            ExFreePool(nextext->data);
+                            ExFreePool(nextext);
+                        
+                            c = get_chunk_from_address(fcb->Vcb, ed2->address);
+                                
+                            if (!c) {
+                                ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
+                            } else {
+                                Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
+                                                                fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
+                                if (!NT_SUCCESS(Status)) {
+                                    ERR("update_changed_extent_ref returned %08x\n", Status);
+                                    goto end;
+                                }
+                            }
+                        
+                            le2 = le;
+                        }
+                    }
+                }
+                
+                le = le2;
+            }
+        }
+        
+        if (!fcb->created) {
+            // delete existing EXTENT_DATA items
+            
+            searchkey.obj_id = fcb->inode;
+            searchkey.obj_type = TYPE_EXTENT_DATA;
+            searchkey.offset = 0;
+            
+            Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
+            if (!NT_SUCCESS(Status)) {
+                ERR("error - find_item returned %08x\n", Status);
+                goto end;
+            }
+            
+            do {
+                if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
+                    delete_tree_item(fcb->Vcb, &tp, rollback);
+                
+                b = find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp);
+                
+                if (b) {
+                    tp = next_tp;
+                    
+                    if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
+                        break;
+                }
+            } while (b);
+        }
+        
+        if (!fcb->deleted) {
+            // add new EXTENT_DATAs
+            
+            last_end = 0;
+            
+            le = fcb->extents.Flink;
+            while (le != &fcb->extents) {
+                extent* ext = CONTAINING_RECORD(le, extent, list_entry);
+                EXTENT_DATA* ed;
+                
+                if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) {
+                    Status = insert_sparse_extent(fcb, last_end, ext->offset - last_end, Irp, rollback);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("insert_sparse_extent returned %08x\n", Status);
+                        goto end;
+                    }
+                }
+                    
+                ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
+                if (!ed) {
+                    ERR("out of memory\n");
+                    Status = STATUS_INSUFFICIENT_RESOURCES;
+                    goto end;
+                }
+                
+                RtlCopyMemory(ed, ext->data, ext->datalen);
+                
+                if (!insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset,
+                                    ed, ext->datalen, Batch_Insert, Irp, rollback)) {
+                    ERR("insert_tree_item_batch failed\n");
+                    Status = STATUS_INTERNAL_ERROR;
+                    goto end;
+                }
+                
+                if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_PREALLOC)
+                    prealloc = TRUE;
+                
+                if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_INLINE)
+                    extents_inline = TRUE;
+                
+                if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) {
+                    if (ed->type == EXTENT_TYPE_INLINE)
+                        last_end = ext->offset + ed->decoded_size;
+                    else {
+                        EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
+                        
+                        last_end = ext->offset + ed2->num_bytes;
+                    }
+                }
+                
+                le = le->Flink;
+            }
+            
+            if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline &&
+                sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) {
+                Status = insert_sparse_extent(fcb, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end, Irp, rollback);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("insert_sparse_extent returned %08x\n", Status);
+                    goto end;
+                }
+            }
+            
+            // update prealloc flag in INODE_ITEM
+            
+            if (!prealloc)
+                fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC;
+            else
+                fcb->inode_item.flags |= BTRFS_INODE_PREALLOC;
+            
+            fcb->inode_item_changed = TRUE;
+        }
+        
+        fcb->extents_changed = FALSE;
+    }
+    
+    if ((!fcb->created && fcb->inode_item_changed) || cache) {
+        searchkey.obj_id = fcb->inode;
+        searchkey.obj_type = TYPE_INODE_ITEM;
+        searchkey.offset = 0xffffffffffffffff;
+        
+        Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("error - find_item returned %08x\n", Status);
+            goto end;
+        }
+        
+        if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+            if (cache) {
+                ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
+                if (!ii) {
+                    ERR("out of memory\n");
+                    goto end;
+                }
+                
+                RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
+                
+                if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp, rollback)) {
+                    ERR("insert_tree_item failed\n");
+                    goto end;
+                }
+                
+                ii_offset = 0;
+            } else {
+                ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id);
+                int3;
+                goto end;
+            }
+        } else {
+#ifdef DEBUG_PARANOID
+            INODE_ITEM* ii2 = (INODE_ITEM*)tp.item->data;
+            
+            old_size = ii2->st_size;
+#endif
+            
+            ii_offset = tp.item->key.offset;
+        }
+        
+        if (!cache)
+            delete_tree_item(fcb->Vcb, &tp, rollback);
+        else {
+            searchkey.obj_id = fcb->inode;
+            searchkey.obj_type = TYPE_INODE_ITEM;
+            searchkey.offset = ii_offset;
+            
+            Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
+            if (!NT_SUCCESS(Status)) {
+                ERR("error - find_item returned %08x\n", Status);
+                goto end;
+            }
+            
+            if (keycmp(tp.item->key, searchkey)) {
+                ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id);
+                int3;
+                goto end;
+            } else
+                RtlCopyMemory(tp.item->data, &fcb->inode_item, min(tp.item->size, sizeof(INODE_ITEM)));
+        }
+    } else
+        ii_offset = 0;
+    
+#ifdef DEBUG_PARANOID
+    if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) {
+        ERR("error - size has changed but extents not marked as changed\n");
+        int3;
+    }
+#endif
+    
+    fcb->created = FALSE;
+        
+    if (fcb->deleted) {
+        traverse_ptr tp2;
+        
+        // delete XATTR_ITEMs
+        
+        searchkey.obj_id = fcb->inode;
+        searchkey.obj_type = TYPE_XATTR_ITEM;
+        searchkey.offset = 0;
+        
+        Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("error - find_item returned %08x\n", Status);
+            goto end;
+        }
+    
+        while (find_next_item(fcb->Vcb, &tp, &tp2, FALSE, Irp)) {
+            tp = tp2;
+            
+            if (tp.item->key.obj_id == fcb->inode) {
+                // FIXME - do metadata thing here too?
+                if (tp.item->key.obj_type == TYPE_XATTR_ITEM) {
+                    delete_tree_item(fcb->Vcb, &tp, rollback);
+                    TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+                }
+            } else
+                break;
+        }
+        
+        goto end;
+    }
+    
+    if (!cache && fcb->inode_item_changed) {
+        ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
+        if (!ii) {
+            ERR("out of memory\n");
+            goto end;
+        }
+        
+        RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
+        
+        if (!insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM),
+                                    Batch_Insert, Irp, rollback)) {
+            ERR("insert_tree_item_batch failed\n");
+            goto end;
+        }
+        
+        fcb->inode_item_changed = FALSE;
+    }
+    
+    if (fcb->sd_dirty) {
+        Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, EA_NTACL_HASH, (UINT8*)fcb->sd, RtlLengthSecurityDescriptor(fcb->sd), Irp, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("set_xattr returned %08x\n", Status);
+        }
+        
+        fcb->sd_dirty = FALSE;
+    }
+    
+    if (fcb->atts_changed) {
+        if (!fcb->atts_deleted) {
+            UINT8 val[16], *val2;
+            ULONG atts = fcb->atts;
+            
+            TRACE("inserting new DOSATTRIB xattr\n");
+            
+            val2 = &val[sizeof(val) - 1];
+            
+            do {
+                UINT8 c = atts % 16;
+                *val2 = (c >= 0 && c <= 9) ? (c + '0') : (c - 0xa + 'a');
+                
+                val2--;
+                atts >>= 4;
+            } while (atts != 0);
+            
+            *val2 = 'x';
+            val2--;
+            *val2 = '0';
+            
+            Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, val2, val + sizeof(val) - val2, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("set_xattr returned %08x\n", Status);
+                goto end;
+            }
+        } else
+            delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, Irp, rollback);
+        
+        fcb->atts_changed = FALSE;
+        fcb->atts_deleted = FALSE;
+    }
+    
+    if (fcb->reparse_xattr_changed) {
+        if (fcb->reparse_xattr.Buffer && fcb->reparse_xattr.Length > 0) {
+            Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, EA_REPARSE_HASH, (UINT8*)fcb->reparse_xattr.Buffer, fcb->reparse_xattr.Length, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("set_xattr returned %08x\n", Status);
+                goto end;
+            }
+        } else
+            delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_REPARSE, EA_REPARSE_HASH, Irp, rollback);
+        
+        fcb->reparse_xattr_changed = FALSE;
+    }
+    
+    if (fcb->ea_changed) {
+        if (fcb->ea_xattr.Buffer && fcb->ea_xattr.Length > 0) {
+            Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, EA_EA_HASH, (UINT8*)fcb->ea_xattr.Buffer, fcb->ea_xattr.Length, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("set_xattr returned %08x\n", Status);
+                goto end;
+            }
+        } else
+            delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_EA, EA_EA_HASH, Irp, rollback);
+        
+        fcb->ea_changed = FALSE;
+    }
+    
+end:
+    fcb->dirty = FALSE;
+    
+//     ExReleaseResourceLite(fcb->Header.Resource);
+    return;
+}
+
+static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
+    NTSTATUS Status;
+    KEY searchkey;
+    traverse_ptr tp;
+    UINT64 i, factor;
+    CHUNK_ITEM_STRIPE* cis;
+    
+    TRACE("dropping chunk %llx\n", c->offset);
+    
+    // remove free space cache
+    if (c->cache) {
+        c->cache->deleted = TRUE;
+        
+        flush_fcb(c->cache, TRUE, batchlist, Irp, rollback);
+        
+        free_fcb(c->cache);
+        
+        searchkey.obj_id = FREE_SPACE_CACHE_ID;
+        searchkey.obj_type = 0;
+        searchkey.offset = c->offset;
+        
+        Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("error - find_item returned %08x\n", Status);
+            return Status;
+        }
+
+        if (!keycmp(tp.item->key, searchkey)) {
+            delete_tree_item(Vcb, &tp, rollback);
+        }
+    }
+    
+    if (c->chunk_item->type & BLOCK_FLAG_RAID0)
+        factor = c->chunk_item->num_stripes;
+    else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
+        factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
+    else // SINGLE, DUPLICATE, RAID1
+        factor = 1;
+    
+    cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
+    for (i = 0; i < c->chunk_item->num_stripes; i++) {
+        if (!c->created) {
+            // remove DEV_EXTENTs from tree 4
+            searchkey.obj_id = cis[i].dev_id;
+            searchkey.obj_type = TYPE_DEV_EXTENT;
+            searchkey.offset = cis[i].offset;
+            
+            Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp);
+            if (!NT_SUCCESS(Status)) {
+                ERR("error - find_item returned %08x\n", Status);
+                return Status;
+            }
+            
+            if (!keycmp(tp.item->key, searchkey)) {
+                delete_tree_item(Vcb, &tp, rollback);
+                
+                if (tp.item->size >= sizeof(DEV_EXTENT)) {
+                    DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data;
+                    
+                    c->devices[i]->devitem.bytes_used -= de->length;
+                    
+                    space_list_add2(Vcb, &c->devices[i]->space, NULL, cis[i].offset, de->length, rollback);
+                }
+            } else
+                WARN("could not find (%llx,%x,%llx) in dev tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+        } else {
+            UINT64 len = c->chunk_item->size / factor;
+            
+            c->devices[i]->devitem.bytes_used -= len;
+            space_list_add2(Vcb, &c->devices[i]->space, NULL, cis[i].offset, len, rollback);
+        }
+    }
+    
+    // modify DEV_ITEMs in chunk tree
+    for (i = 0; i < c->chunk_item->num_stripes; i++) {
+        if (c->devices[i]) {
+            UINT64 j;
+            DEV_ITEM* di;
+            
+            searchkey.obj_id = 1;
+            searchkey.obj_type = TYPE_DEV_ITEM;
+            searchkey.offset = c->devices[i]->devitem.dev_id;
+            
+            Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
+            if (!NT_SUCCESS(Status)) {
+                ERR("error - find_item returned %08x\n", Status);
+                return Status;
+            }
+            
+            if (keycmp(tp.item->key, searchkey)) {
+                ERR("error - could not find DEV_ITEM for device %llx\n", searchkey.offset);
+                return STATUS_INTERNAL_ERROR;
+            }
+            
+            delete_tree_item(Vcb, &tp, rollback);
+            
+            di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
+            if (!di) {
+                ERR("out of memory\n");
+                return STATUS_INSUFFICIENT_RESOURCES;
+            }
+            
+            RtlCopyMemory(di, &c->devices[i]->devitem, sizeof(DEV_ITEM));
+            
+            if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp, rollback)) {
+                ERR("insert_tree_item failed\n");
+                return STATUS_INTERNAL_ERROR;
+            }
+            
+            for (j = i + 1; j < c->chunk_item->num_stripes; j++) {
+                if (c->devices[j] == c->devices[i])
+                    c->devices[j] = NULL;
+            }
+        }
+    }
+    
+    if (!c->created) {
+        // remove CHUNK_ITEM from chunk tree
+        searchkey.obj_id = 0x100;
+        searchkey.obj_type = TYPE_CHUNK_ITEM;
+        searchkey.offset = c->offset;
+        
+        Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("error - find_item returned %08x\n", Status);
+            return Status;
+        }
+        
+        if (!keycmp(tp.item->key, searchkey))
+            delete_tree_item(Vcb, &tp, rollback);
+        else
+            WARN("could not find CHUNK_ITEM for chunk %llx\n", c->offset);
+        
+        // remove BLOCK_GROUP_ITEM from extent tree
+        searchkey.obj_id = c->offset;
+        searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
+        searchkey.offset = 0xffffffffffffffff;
+        
+        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("error - find_item returned %08x\n", Status);
+            return Status;
+        }
+        
+        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
+            delete_tree_item(Vcb, &tp, rollback);
+        else
+            WARN("could not find BLOCK_GROUP_ITEM for chunk %llx\n", c->offset);
+    }
+    
+    if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
+        remove_from_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset);
+    
+    RemoveEntryList(&c->list_entry);
+    
+    if (c->list_entry_changed.Flink)
+        RemoveEntryList(&c->list_entry_changed);
+    
+    ExFreePool(c->chunk_item);
+    ExFreePool(c->devices);
+    
+    while (!IsListEmpty(&c->space)) {
+        space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry);
+        
+        RemoveEntryList(&s->list_entry);
+        ExFreePool(s);
+    }
+    
+    while (!IsListEmpty(&c->deleting)) {
+        space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
+        
+        RemoveEntryList(&s->list_entry);
+        ExFreePool(s);
+    }
+    
+    ExDeleteResourceLite(&c->lock);
+    ExDeleteResourceLite(&c->changed_extents_lock);
+
+    ExFreePool(c);
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS update_chunks(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
+    LIST_ENTRY *le = Vcb->chunks_changed.Flink, *le2;
+    NTSTATUS Status;
+    UINT64 used_minus_cache;
+    
+    ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
+    
+    // FIXME - do tree chunks before data chunks
+    
+    while (le != &Vcb->chunks_changed) {
+        chunk* c = CONTAINING_RECORD(le, chunk, list_entry_changed);
+        
+        le2 = le->Flink;
+        
+        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+        
+        used_minus_cache = c->used;
+        
+        // subtract self-hosted cache
+        if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) {
+            LIST_ENTRY* le3;
+            
+            le3 = c->cache->extents.Flink;
+            while (le3 != &c->cache->extents) {
+                extent* ext = CONTAINING_RECORD(le3, extent, list_entry);
+                EXTENT_DATA* ed = ext->data;
+                
+                if (!ext->ignore) {
+                    if (ext->datalen < sizeof(EXTENT_DATA)) {
+                        ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA));
+                        break;
+                    }
+                    
+                    if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
+                        EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
+                        
+                        if (ext->datalen < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
+                            ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen,
+                                sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
+                            break;
+                        }
+                        
+                        if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size)
+                            used_minus_cache -= ed2->size;
+                    }
+                }
+                
+                le3 = le3->Flink;
+            }
+        }
+        
+        if (used_minus_cache == 0) {
+            Status = drop_chunk(Vcb, c, batchlist, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("drop_chunk returned %08x\n", Status);
+                ExReleaseResourceLite(&c->lock);
+                ExReleaseResourceLite(&Vcb->chunk_lock);
+                return Status;
+            }
+        } else if (c->created) {
+            Status = create_chunk(Vcb, c, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("create_chunk returned %08x\n", Status);
+                ExReleaseResourceLite(&c->lock);
+                ExReleaseResourceLite(&Vcb->chunk_lock);
+                return Status;
+            }
+        }
+        
+        if (used_minus_cache > 0)
+            ExReleaseResourceLite(&c->lock);
+
+        le = le2;
+    }
+    
+    ExReleaseResourceLite(&Vcb->chunk_lock);
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS delete_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, UINT64 parinode, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    
+    searchkey.obj_id = parsubvolid;
+    searchkey.obj_type = TYPE_ROOT_REF;
+    searchkey.offset = subvolid;
+    
+    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (!keycmp(searchkey, tp.item->key)) {
+        if (tp.item->size < sizeof(ROOT_REF)) {
+            ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
+            return STATUS_INTERNAL_ERROR;
+        } else {
+            ROOT_REF* rr;
+            ULONG len;
+            
+            rr = (ROOT_REF*)tp.item->data;
+            len = tp.item->size;
+            
+            do {
+                ULONG itemlen;
+                
+                if (len < sizeof(ROOT_REF) || len < sizeof(ROOT_REF) - 1 + rr->n) {
+                    ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+                    break;
+                }
+                
+                itemlen = sizeof(ROOT_REF) - sizeof(char) + rr->n;
+                
+                if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(rr->name, utf8->Buffer, rr->n) == rr->n) {
+                    ULONG newlen = tp.item->size - itemlen;
+                    
+                    delete_tree_item(Vcb, &tp, rollback);
+                    
+                    if (newlen == 0) {
+                        TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+                    } else {
+                        UINT8 *newrr = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *rroff;
+                        
+                        if (!newrr) {
+                            ERR("out of memory\n");
+                            return STATUS_INSUFFICIENT_RESOURCES;
+                        }
+                        
+                        TRACE("modifying (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+
+                        if ((UINT8*)rr > tp.item->data) {
+                            RtlCopyMemory(newrr, tp.item->data, (UINT8*)rr - tp.item->data);
+                            rroff = newrr + ((UINT8*)rr - tp.item->data);
+                        } else {
+                            rroff = newrr;
+                        }
+                        
+                        if ((UINT8*)&rr->name[rr->n] - tp.item->data < tp.item->size)
+                            RtlCopyMemory(rroff, &rr->name[rr->n], tp.item->size - ((UINT8*)&rr->name[rr->n] - tp.item->data));
+                        
+                        insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp, rollback);
+                    }
+                    
+                    break;
+                }
+                
+                if (len > itemlen) {
+                    len -= itemlen;
+                    rr = (ROOT_REF*)&rr->name[rr->n];
+                } else
+                    break;
+            } while (len > 0);
+        }
+    } else {
+        WARN("could not find ROOT_REF entry for subvol %llx in %llx\n", searchkey.offset, searchkey.obj_id);
+        return STATUS_NOT_FOUND;
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, ROOT_REF* rr, PIRP Irp, LIST_ENTRY* rollback) {
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    
+    searchkey.obj_id = parsubvolid;
+    searchkey.obj_type = TYPE_ROOT_REF;
+    searchkey.offset = subvolid;
+    
+    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (!keycmp(searchkey, tp.item->key)) {
+        ULONG rrsize = tp.item->size + sizeof(ROOT_REF) - 1 + rr->n;
+        UINT8* rr2;
+        
+        rr2 = ExAllocatePoolWithTag(PagedPool, rrsize, ALLOC_TAG);
+        if (!rr2) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        if (tp.item->size > 0)
+            RtlCopyMemory(rr2, tp.item->data, tp.item->size);
+        
+        RtlCopyMemory(rr2 + tp.item->size, rr, sizeof(ROOT_REF) - 1 + rr->n);
+        ExFreePool(rr);
+        
+        delete_tree_item(Vcb, &tp, rollback);
+        
+        if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp, rollback)) {
+            ERR("error - failed to insert item\n");
+            ExFreePool(rr2);
+            return STATUS_INTERNAL_ERROR;
+        }
+    } else {
+        if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, sizeof(ROOT_REF) - 1 + rr->n, NULL, Irp, rollback)) {
+            ERR("error - failed to insert item\n");
+            ExFreePool(rr);
+            return STATUS_INTERNAL_ERROR;
+        }
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS STDCALL update_root_backref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, PIRP Irp, LIST_ENTRY* rollback) {
+    KEY searchkey;
+    traverse_ptr tp;
+    UINT8* data;
+    ULONG datalen;
+    NTSTATUS Status;
+    
+    searchkey.obj_id = parsubvolid;
+    searchkey.obj_type = TYPE_ROOT_REF;
+    searchkey.offset = subvolid;
+    
+    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (!keycmp(tp.item->key, searchkey) && tp.item->size > 0) {
+        datalen = tp.item->size;
+        
+        data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
+        if (!data) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        RtlCopyMemory(data, tp.item->data, datalen);
+    } else {
+        datalen = 0;
+    }
+    
+    searchkey.obj_id = subvolid;
+    searchkey.obj_type = TYPE_ROOT_BACKREF;
+    searchkey.offset = parsubvolid;
+    
+    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        
+        if (datalen > 0)
+            ExFreePool(data);
+        
+        return Status;
+    }
+    
+    if (!keycmp(tp.item->key, searchkey))
+        delete_tree_item(Vcb, &tp, rollback);
+    
+    if (datalen > 0) {
+        if (!insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp, rollback)) {
+            ERR("error - failed to insert item\n");
+            ExFreePool(data);
+            return STATUS_INTERNAL_ERROR;
+        }
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_root_item_to_cache(device_extension* Vcb, UINT64 root, PIRP Irp, LIST_ENTRY* rollback) {
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    
+    searchkey.obj_id = root;
+    searchkey.obj_type = TYPE_ROOT_ITEM;
+    searchkey.offset = 0xffffffffffffffff;
+    
+    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+        ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
+        int3;
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
+        ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
+        if (!ri) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        if (tp.item->size > 0)
+            RtlCopyMemory(ri, tp.item->data, tp.item->size);
+        
+        RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
+        
+        delete_tree_item(Vcb, &tp, rollback);
+        
+        if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp, rollback)) {
+            ERR("insert_tree_item failed\n");
+            return STATUS_INTERNAL_ERROR;
+        }
+    } else {
+        tp.tree->write = TRUE;
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_dir_item(device_extension* Vcb, root* subvol, UINT64 inode, UINT32 crc32, DIR_ITEM* di, ULONG disize, PIRP Irp, LIST_ENTRY* rollback) {
+    KEY searchkey;
+    traverse_ptr tp;
+    UINT8* di2;
+    NTSTATUS Status;
+    
+    searchkey.obj_id = inode;
+    searchkey.obj_type = TYPE_DIR_ITEM;
+    searchkey.offset = crc32;
+    
+    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (!keycmp(tp.item->key, searchkey)) {
+        ULONG maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
+        
+        if (tp.item->size + disize > maxlen) {
+            WARN("DIR_ITEM was longer than maxlen (%u + %u > %u)\n", tp.item->size, disize, maxlen);
+            return STATUS_INTERNAL_ERROR;
+        }
+        
+        di2 = ExAllocatePoolWithTag(PagedPool, tp.item->size + disize, ALLOC_TAG);
+        if (!di2) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        if (tp.item->size > 0)
+            RtlCopyMemory(di2, tp.item->data, tp.item->size);
+        
+        RtlCopyMemory(di2 + tp.item->size, di, disize);
+        
+        delete_tree_item(Vcb, &tp, rollback);
+        
+        insert_tree_item(Vcb, subvol, inode, TYPE_DIR_ITEM, crc32, di2, tp.item->size + disize, NULL, Irp, rollback);
+        
+        ExFreePool(di);
+    } else {
+        insert_tree_item(Vcb, subvol, inode, TYPE_DIR_ITEM, crc32, di, disize, NULL, Irp, rollback);
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_inode_extref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, UINT64 index, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
+    KEY searchkey;
+    traverse_ptr tp;
+    INODE_EXTREF* ier;
+    NTSTATUS Status;
+    
+    searchkey.obj_id = inode;
+    searchkey.obj_type = TYPE_INODE_EXTREF;
+    searchkey.offset = calc_crc32c((UINT32)parinode, (UINT8*)utf8->Buffer, utf8->Length);
+
+    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (!keycmp(searchkey, tp.item->key)) {
+        ULONG iersize = tp.item->size + sizeof(INODE_EXTREF) - 1 + utf8->Length;
+        UINT8* ier2;
+        UINT32 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
+        
+        if (iersize > maxlen) {
+            ERR("item would be too long (%u > %u)\n", iersize, maxlen);
+            return STATUS_INTERNAL_ERROR;
+        }
+        
+        ier2 = ExAllocatePoolWithTag(PagedPool, iersize, ALLOC_TAG);
+        if (!ier2) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        if (tp.item->size > 0)
+            RtlCopyMemory(ier2, tp.item->data, tp.item->size);
+        
+        ier = (INODE_EXTREF*)&ier2[tp.item->size];
+        ier->dir = parinode;
+        ier->index = index;
+        ier->n = utf8->Length;
+        RtlCopyMemory(ier->name, utf8->Buffer, utf8->Length);
+        
+        delete_tree_item(Vcb, &tp, rollback);
+        
+        if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ier2, iersize, NULL, Irp, rollback)) {
+            ERR("error - failed to insert item\n");
+            return STATUS_INTERNAL_ERROR;
+        }
+    } else {
+        ier = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_EXTREF) - 1 + utf8->Length, ALLOC_TAG);
+        if (!ier) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+
+        ier->dir = parinode;
+        ier->index = index;
+        ier->n = utf8->Length;
+        RtlCopyMemory(ier->name, utf8->Buffer, utf8->Length);
+    
+        if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ier, sizeof(INODE_EXTREF) - 1 + utf8->Length, NULL, Irp, rollback)) {
+            ERR("error - failed to insert item\n");
+            return STATUS_INTERNAL_ERROR;
+        }
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, UINT64 index, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
+    KEY searchkey;
+    traverse_ptr tp;
+    INODE_REF* ir;
+    NTSTATUS Status;
+    
+    searchkey.obj_id = inode;
+    searchkey.obj_type = TYPE_INODE_REF;
+    searchkey.offset = parinode;
+    
+    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("error - find_item returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (!keycmp(searchkey, tp.item->key)) {
+        ULONG irsize = tp.item->size + sizeof(INODE_REF) - 1 + utf8->Length;
+        UINT8* ir2;
+        UINT32 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
+        
+        if (irsize > maxlen) {
+            if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) {
+                TRACE("INODE_REF too long, creating INODE_EXTREF\n");
+                return add_inode_extref(Vcb, subvol, inode, parinode, index, utf8, Irp, rollback);
+            } else {
+                ERR("item would be too long (%u > %u)\n", irsize, maxlen);
+                return STATUS_INTERNAL_ERROR;
+            }
+        }
+        
+        ir2 = ExAllocatePoolWithTag(PagedPool, irsize, ALLOC_TAG);
+        if (!ir2) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        if (tp.item->size > 0)
+            RtlCopyMemory(ir2, tp.item->data, tp.item->size);
+        
+        ir = (INODE_REF*)&ir2[tp.item->size];
+        ir->index = index;
+        ir->n = utf8->Length;
+        RtlCopyMemory(ir->name, utf8->Buffer, utf8->Length);
+        
+        delete_tree_item(Vcb, &tp, rollback);
+        
+        if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ir2, irsize, NULL, Irp, rollback)) {
+            ERR("error - failed to insert item\n");
+            return STATUS_INTERNAL_ERROR;
+        }
+    } else {
+        ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + utf8->Length, ALLOC_TAG);
+        if (!ir) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+
+        ir->index = index;
+        ir->n = utf8->Length;
+        RtlCopyMemory(ir->name, utf8->Buffer, utf8->Length);
+    
+        if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ir, sizeof(INODE_REF) - 1 + ir->n, NULL, Irp, rollback)) {
+            ERR("error - failed to insert item\n");
+            return STATUS_INTERNAL_ERROR;
+        }
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
+    NTSTATUS Status;
+    
+    // if fileref created and then immediately deleted, do nothing
+    if (fileref->created && fileref->deleted) {
+        fileref->dirty = FALSE;
+        return STATUS_SUCCESS;
+    }
+    
+    if (fileref->fcb->ads) {
+        fileref->dirty = FALSE;
+        return STATUS_SUCCESS;
+    }
+    
+    if (fileref->created) {
+        ULONG disize;
+        DIR_ITEM *di, *di2;
+        UINT32 crc32;
+        
+        crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->utf8.Buffer, fileref->utf8.Length);
+        
+        disize = sizeof(DIR_ITEM) - 1 + fileref->utf8.Length;
+        di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
+        if (!di) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
+            di->key.obj_id = fileref->fcb->inode;
+            di->key.obj_type = TYPE_INODE_ITEM;
+            di->key.offset = 0;
+        } else { // subvolume
+            di->key.obj_id = fileref->fcb->subvol->id;
+            di->key.obj_type = TYPE_ROOT_ITEM;
+            di->key.offset = 0xffffffffffffffff;
+        }
+
+        di->transid = fileref->fcb->Vcb->superblock.generation;
+        di->m = 0;
+        di->n = (UINT16)fileref->utf8.Length;
+        di->type = fileref->fcb->type;
+        RtlCopyMemory(di->name, fileref->utf8.Buffer, fileref->utf8.Length);
+        
+        di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
+        if (!di2) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        RtlCopyMemory(di2, di, disize);
+
+        if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, fileref->index,
+                                    di, disize, Batch_Insert, Irp, rollback)) {
+            ERR("insert_tree_item_batch failed\n");
+            return STATUS_INTERNAL_ERROR;
+        }
+        
+        if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
+                                    di2, disize, Batch_DirItem, Irp, rollback)) {
+            ERR("insert_tree_item_batch failed\n");
+            return STATUS_INTERNAL_ERROR;
+        }
+        
+        if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
+            INODE_REF* ir;
+            
+            ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->utf8.Length, ALLOC_TAG);
+            if (!ir) {
+                ERR("out of memory\n");
+                return STATUS_INSUFFICIENT_RESOURCES;
+            }
+
+            ir->index = fileref->index;
+            ir->n = fileref->utf8.Length;
+            RtlCopyMemory(ir->name, fileref->utf8.Buffer, ir->n);
+        
+            if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
+                                        ir, sizeof(INODE_REF) - 1 + ir->n, Batch_InodeRef, Irp, rollback)) {
+                ERR("insert_tree_item_batch failed\n");
+                return STATUS_INTERNAL_ERROR;
+            }
+        } else {
+            ULONG rrlen;
+            ROOT_REF* rr;
+
+            rrlen = sizeof(ROOT_REF) - 1 + fileref->utf8.Length;
+                
+            rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
+            if (!rr) {
+                ERR("out of memory\n");
+                return STATUS_INSUFFICIENT_RESOURCES;
+            }
+            
+            rr->dir = fileref->parent->fcb->inode;
+            rr->index = fileref->index;
+            rr->n = fileref->utf8.Length;
+            RtlCopyMemory(rr->name, fileref->utf8.Buffer, fileref->utf8.Length);
+            
+            Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("add_root_ref returned %08x\n", Status);
+                return Status;
+            }
+            
+            Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("update_root_backref returned %08x\n", Status);
+                return Status;
+            }
+        }
+        
+        fileref->created = FALSE;
+    } else if (fileref->deleted) {
+        UINT32 crc32;
+        KEY searchkey;
+        traverse_ptr tp;
+        ANSI_STRING* name;
+        
+        if (fileref->oldutf8.Buffer)
+            name = &fileref->oldutf8;
+        else
+            name = &fileref->utf8;
+
+        crc32 = calc_crc32c(0xfffffffe, (UINT8*)name->Buffer, name->Length);
+
+        TRACE("deleting %.*S\n", file_desc_fileref(fileref));
+        
+        // delete DIR_ITEM (0x54)
+        
+        Status = delete_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, crc32, name, Irp, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("delete_dir_item returned %08x\n", Status);
+            return Status;
+        }
+        
+        if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
+            // delete INODE_REF (0xc)
+            
+            Status = delete_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, name, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("delete_inode_ref returned %08x\n", Status);
+                return Status;
+            }
+        } else { // subvolume
+            Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("delete_root_ref returned %08x\n", Status);
+            }
+            
+            Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("update_root_backref returned %08x\n", Status);
+                return Status;
+            }
+        }
+        
+        // delete DIR_INDEX (0x60)
+        
+        searchkey.obj_id = fileref->parent->fcb->inode;
+        searchkey.obj_type = TYPE_DIR_INDEX;
+        searchkey.offset = fileref->index;
+
+        Status = find_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, &tp, &searchkey, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("error - find_item returned %08x\n", Status);
+            Status = STATUS_INTERNAL_ERROR;
+            return Status;
+        }
+        
+        if (!keycmp(searchkey, tp.item->key)) {
+            delete_tree_item(fileref->fcb->Vcb, &tp, rollback);
+            TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+        }
+        
+        if (fileref->oldutf8.Buffer) {
+            ExFreePool(fileref->oldutf8.Buffer);
+            fileref->oldutf8.Buffer = NULL;
+        }
+    } else { // rename or change type
+        PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->utf8;
+        UINT32 crc32, oldcrc32;
+        ULONG disize;
+        DIR_ITEM *di, *di2;
+        KEY searchkey;
+        traverse_ptr tp;
+        
+        crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->utf8.Buffer, fileref->utf8.Length);
+        
+        if (!fileref->oldutf8.Buffer)
+            oldcrc32 = crc32;
+        else
+            oldcrc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->oldutf8.Buffer, fileref->oldutf8.Length);
+
+        // delete DIR_ITEM (0x54)
+        
+        Status = delete_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, oldcrc32, oldutf8, Irp, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("delete_dir_item returned %08x\n", Status);
+            return Status;
+        }
+        
+        // add DIR_ITEM (0x54)
+        
+        disize = sizeof(DIR_ITEM) - 1 + fileref->utf8.Length;
+        di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
+        if (!di) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
+        if (!di2) {
+            ERR("out of memory\n");
+            ExFreePool(di);
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
+            di->key.obj_id = fileref->fcb->inode;
+            di->key.obj_type = TYPE_INODE_ITEM;
+            di->key.offset = 0;
+        } else { // subvolume
+            di->key.obj_id = fileref->fcb->subvol->id;
+            di->key.obj_type = TYPE_ROOT_ITEM;
+            di->key.offset = 0xffffffffffffffff;
+        }
+        
+        di->transid = fileref->fcb->Vcb->superblock.generation;
+        di->m = 0;
+        di->n = (UINT16)fileref->utf8.Length;
+        di->type = fileref->fcb->type;
+        RtlCopyMemory(di->name, fileref->utf8.Buffer, fileref->utf8.Length);
+        
+        RtlCopyMemory(di2, di, disize);
+        
+        Status = add_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, crc32, di, disize, Irp, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("add_dir_item returned %08x\n", Status);
+            return Status;
+        }
+        
+        if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
+            // delete INODE_REF (0xc)
+            
+            Status = delete_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, oldutf8, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("delete_inode_ref returned %08x\n", Status);
+                return Status;
+            }
+            
+            // add INODE_REF (0xc)
+            
+            Status = add_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, fileref->index, &fileref->utf8, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("add_inode_ref returned %08x\n", Status);
+                return Status;
+            }
+        } else { // subvolume
+            ULONG rrlen;
+            ROOT_REF* rr;
+            
+            // FIXME - make sure this works with duff subvols within snapshots
+            
+            Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("delete_root_ref returned %08x\n", Status);
+            }
+            
+            rrlen = sizeof(ROOT_REF) - 1 + fileref->utf8.Length;
+            
+            rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
+            if (!rr) {
+                ERR("out of memory\n");
+                return STATUS_INSUFFICIENT_RESOURCES;
+            }
+            
+            rr->dir = fileref->parent->fcb->inode;
+            rr->index = fileref->index;
+            rr->n = fileref->utf8.Length;
+            RtlCopyMemory(rr->name, fileref->utf8.Buffer, fileref->utf8.Length);
+            
+            Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("add_root_ref returned %08x\n", Status);
+                return Status;
+            }
+            
+            Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback);
+            if (!NT_SUCCESS(Status)) {
+                ERR("update_root_backref returned %08x\n", Status);
+                return Status;
+            }
+        }
+        
+        // delete DIR_INDEX (0x60)
+        
+        searchkey.obj_id = fileref->parent->fcb->inode;
+        searchkey.obj_type = TYPE_DIR_INDEX;
+        searchkey.offset = fileref->index;
+        
+        Status = find_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, &tp, &searchkey, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("error - find_item returned %08x\n", Status);
+            Status = STATUS_INTERNAL_ERROR;
+            return Status;
+        }
+        
+        if (!keycmp(searchkey, tp.item->key)) {
+            delete_tree_item(fileref->fcb->Vcb, &tp, rollback);
+            TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+        } else
+            WARN("could not find (%llx,%x,%llx) in subvol %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, fileref->fcb->subvol->id);
+        
+        // add DIR_INDEX (0x60)
+        
+        if (!insert_tree_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, fileref->index, di2, disize, NULL, Irp, rollback)) {
+            ERR("insert_tree_item failed\n");
+            Status = STATUS_INTERNAL_ERROR;
+            return Status;
+        }
+
+        if (fileref->oldutf8.Buffer) {
+            ExFreePool(fileref->oldutf8.Buffer);
+            fileref->oldutf8.Buffer = NULL;
+        }
+    }
+
+    fileref->dirty = FALSE;
+    
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+    NTSTATUS Status;
+    LIST_ENTRY *le, batchlist;
+    BOOL cache_changed = FALSE;
+#ifdef DEBUG_FLUSH_TIMES
+    UINT64 filerefs = 0, fcbs = 0;
+    LARGE_INTEGER freq, time1, time2;
+#endif
+#ifdef DEBUG_WRITE_LOOPS
+    UINT loops = 0;
+#endif
+    
+    TRACE("(%p)\n", Vcb);
+    
+    InitializeListHead(&batchlist);
+
+#ifdef DEBUG_FLUSH_TIMES
+    time1 = KeQueryPerformanceCounter(&freq);
+#endif
+    
+    while (!IsListEmpty(&Vcb->dirty_filerefs)) {
+        dirty_fileref* dirt;
+        
+        le = RemoveHeadList(&Vcb->dirty_filerefs);
+        
+        dirt = CONTAINING_RECORD(le, dirty_fileref, list_entry);
+        
+        flush_fileref(dirt->fileref, &batchlist, Irp, rollback);
+        free_fileref(dirt->fileref);
+        ExFreePool(dirt);
+
+#ifdef DEBUG_FLUSH_TIMES
+        filerefs++;
+#endif
+    }
+    
+    commit_batch_list(Vcb, &batchlist, Irp, rollback);
+    
+#ifdef DEBUG_FLUSH_TIMES
+    time2 = KeQueryPerformanceCounter(NULL);
+
+    ERR("flushed %llu filerefs in %llu (freq = %llu)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
+
+    time1 = KeQueryPerformanceCounter(&freq);
+#endif
+
+    // We process deleted streams first, so we don't run over our xattr
+    // limit unless we absolutely have to.
+    
+    le = Vcb->dirty_fcbs.Flink;
+    while (le != &Vcb->dirty_fcbs) {
+        dirty_fcb* dirt;
+        LIST_ENTRY* le2 = le->Flink;
+        
+        dirt = CONTAINING_RECORD(le, dirty_fcb, list_entry);
+        
+        if (dirt->fcb->deleted && dirt->fcb->ads) {
+            RemoveEntryList(le);
+            
+            flush_fcb(dirt->fcb, FALSE, &batchlist, Irp, rollback);
+            free_fcb(dirt->fcb);
+            ExFreePool(dirt);
+
+#ifdef DEBUG_FLUSH_TIMES
+            fcbs++;
+#endif
+        }
+        
+        le = le2;
+    }
+    
+    le = Vcb->dirty_fcbs.Flink;
+    while (le != &Vcb->dirty_fcbs) {
+        dirty_fcb* dirt;
+        LIST_ENTRY* le2 = le->Flink;
+        
+        dirt = CONTAINING_RECORD(le, dirty_fcb, list_entry);
+        
+        if (dirt->fcb->subvol != Vcb->root_root || dirt->fcb->deleted) {
+            RemoveEntryList(le);
+            
+            flush_fcb(dirt->fcb, FALSE, &batchlist, Irp, rollback);
+            free_fcb(dirt->fcb);
+            ExFreePool(dirt);
+
+#ifdef DEBUG_FLUSH_TIMES
+            fcbs++;
+#endif
+        }
+        
+        le = le2;
+    }
+    
+    commit_batch_list(Vcb, &batchlist, Irp, rollback);
+    
+#ifdef DEBUG_FLUSH_TIMES
+    time2 = KeQueryPerformanceCounter(NULL);
+
+    ERR("flushed %llu fcbs in %llu (freq = %llu)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
+#endif
+
+    ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE);
+    if (!IsListEmpty(&Vcb->sector_checksums)) {
+        update_checksum_tree(Vcb, Irp, rollback);
+    }
+    ExReleaseResourceLite(&Vcb->checksum_lock);
+    
+    if (!IsListEmpty(&Vcb->drop_roots)) {
+        Status = drop_roots(Vcb, Irp, rollback);
+        
+        if (!NT_SUCCESS(Status)) {
+            ERR("drop_roots returned %08x\n", Status);
+            return Status;
+        }
+    }
+    
+    if (!IsListEmpty(&Vcb->chunks_changed)) {
+        Status = update_chunks(Vcb, &batchlist, Irp, rollback);
+        
+        if (!NT_SUCCESS(Status)) {
+            ERR("update_chunks returned %08x\n", Status);
+            return Status;
+        }
+    }
+    
+    commit_batch_list(Vcb, &batchlist, Irp, rollback);
+    
+    // If only changing superblock, e.g. changing label, we still need to rewrite
+    // the root tree so the generations match, otherwise you won't be able to mount on Linux.
+    if (!Vcb->root_root->treeholder.tree || !Vcb->root_root->treeholder.tree->write) {
+        KEY searchkey;
+        
+        traverse_ptr tp;
+        
+        searchkey.obj_id = 0;
+        searchkey.obj_type = 0;
+        searchkey.offset = 0;
+        
+        Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("error - find_item returned %08x\n", Status);
+            return Status;
+        }
+        
+        Vcb->root_root->treeholder.tree->write = TRUE;
+    }
+    
+    // make sure we always update the extent tree
+    Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp, rollback);
+    if (!NT_SUCCESS(Status)) {
+        ERR("add_root_item_to_cache returned %08x\n", Status);
+        return Status;
+    }
+    
+    do {
+        Status = add_parents(Vcb, Irp, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("add_parents returned %08x\n", Status);
+            goto end;
+        }
+        
+        Status = do_splits(Vcb, Irp, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("do_splits returned %08x\n", Status);
+            goto end;
+        }
+        
+        Status = allocate_tree_extents(Vcb, Irp, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("add_parents returned %08x\n", Status);
+            goto end;
+        }
+        
+        Status = update_chunk_usage(Vcb, Irp, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("update_chunk_usage returned %08x\n", Status);
+            goto end;
+        }
+        
+        Status = allocate_cache(Vcb, &cache_changed, Irp, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("allocate_cache returned %08x\n", Status);
+            goto end;
+        }
+
+#ifdef DEBUG_WRITE_LOOPS
+        loops++;
+        
+        if (cache_changed)
+            ERR("cache has changed, looping again\n");
+#endif
+    } while (cache_changed || !trees_consistent(Vcb, rollback));
+    
+#ifdef DEBUG_WRITE_LOOPS
+    ERR("%u loops\n", loops);
+#endif
+    
+    TRACE("trees consistent\n");
+    
+    Status = update_root_root(Vcb, Irp, rollback);
+    if (!NT_SUCCESS(Status)) {
+        ERR("update_root_root returned %08x\n", Status);
+        goto end;
+    }
+    
+    Status = write_trees(Vcb, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("write_trees returned %08x\n", Status);
+        goto end;
+    }
+    
+    Vcb->superblock.cache_generation = Vcb->superblock.generation;
+    
+    Status = write_superblocks(Vcb, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("write_superblocks returned %08x\n", Status);
+        goto end;
+    }
+    
+    clean_space_cache(Vcb);
+    
+    Vcb->superblock.generation++;
+    
+    Status = STATUS_SUCCESS;
+    
+    le = Vcb->trees.Flink;
+    while (le != &Vcb->trees) {
+        tree* t = CONTAINING_RECORD(le, tree, list_entry);
+        
+#ifdef DEBUG_PARANOID
+        KEY searchkey;
+        traverse_ptr tp;
+        
+        searchkey.obj_id = t->header.address;
+        searchkey.obj_type = TYPE_METADATA_ITEM;
+        searchkey.offset = 0xffffffffffffffff;
+        
+        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("error - find_item returned %08x\n", Status);
+            int3;
+        }
+        
+        if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+            searchkey.obj_id = t->header.address;
+            searchkey.obj_type = TYPE_EXTENT_ITEM;
+            searchkey.offset = 0xffffffffffffffff;
+            
+            Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+            if (!NT_SUCCESS(Status)) {
+                ERR("error - find_item returned %08x\n", Status);
+                int3;
+            }
+            
+            if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+                ERR("error - could not find entry in extent tree for tree at %llx\n", t->header.address);
+                int3;
+            }
+        }
+#endif
+        
+        t->write = FALSE;
+        
+        le = le->Flink;
+    }
+    
+    Vcb->need_write = FALSE;
+    
+    while (!IsListEmpty(&Vcb->drop_roots)) {
+        LIST_ENTRY* le = RemoveHeadList(&Vcb->drop_roots);
+        root* r = CONTAINING_RECORD(le, root, list_entry);
+
+        ExDeleteResourceLite(&r->nonpaged->load_tree_lock);
+        ExFreePool(r->nonpaged);
+        ExFreePool(r);
+    }
+    
+end:
+    TRACE("do_write returning %08x\n", Status);
+    
+    return Status;
+}
+
+#ifdef DEBUG_STATS
+static void print_stats(device_extension* Vcb) {
+    ERR("READ STATS:\n");
+    ERR("number of reads: %llu\n", Vcb->stats.num_reads);
+    ERR("data read: %llu bytes\n", Vcb->stats.data_read);
+    ERR("total time taken: %llu\n", Vcb->stats.read_total_time);
+    ERR("csum time taken: %llu\n", Vcb->stats.read_csum_time);
+    ERR("disk time taken: %llu\n", Vcb->stats.read_disk_time);
+    ERR("other time taken: %llu\n", Vcb->stats.read_total_time - Vcb->stats.read_csum_time - Vcb->stats.read_disk_time);
+    
+    RtlZeroMemory(&Vcb->stats, sizeof(debug_stats));
+}
+#endif
+
 static void do_flush(device_extension* Vcb) {
     LIST_ENTRY rollback;
     
@@ -26,12 +5658,16 @@ static void do_flush(device_extension* Vcb) {
 
     ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
 
+#ifdef DEBUG_STATS
+    print_stats(Vcb);
+#endif
+
     if (Vcb->need_write && !Vcb->readonly)
         do_write(Vcb, NULL, &rollback);
     
     free_trees(Vcb);
     
-    clear_rollback(&rollback);
+    clear_rollback(Vcb, &rollback);
 
     ExReleaseResourceLite(&Vcb->tree_lock);
 
index 6982ae0..e87408f 100644 (file)
 
 // #define DEBUG_SPACE_LISTS
 
-static NTSTATUS remove_free_space_inode(device_extension* Vcb, UINT64 inode, PIRP Irp, LIST_ENTRY* rollback) {
+static NTSTATUS remove_free_space_inode(device_extension* Vcb, UINT64 inode, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
     NTSTATUS Status;
     fcb* fcb;
     
-    Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, NULL, &fcb, Irp);
+    Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, NULL, &fcb, PagedPool, Irp);
     if (!NT_SUCCESS(Status)) {
         ERR("open_fcb returned %08x\n", Status);
         return Status;
@@ -45,14 +45,14 @@ static NTSTATUS remove_free_space_inode(device_extension* Vcb, UINT64 inode, PIR
     
     fcb->deleted = TRUE;
     
-    flush_fcb(fcb, FALSE, Irp, rollback);
+    flush_fcb(fcb, FALSE, batchlist, Irp, rollback);
     
     free_fcb(fcb);
 
     return STATUS_SUCCESS;
 }
 
-NTSTATUS clear_free_space_cache(device_extension* Vcb, PIRP Irp) {
+NTSTATUS clear_free_space_cache(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp) {
     KEY searchkey;
     traverse_ptr tp, next_tp;
     NTSTATUS Status;
@@ -86,7 +86,7 @@ NTSTATUS clear_free_space_cache(device_extension* Vcb, PIRP Irp) {
                 else {
                     LIST_ENTRY* le;
                     
-                    Status = remove_free_space_inode(Vcb, fsi->key.obj_id, Irp, &rollback);
+                    Status = remove_free_space_inode(Vcb, fsi->key.obj_id, batchlist, Irp, &rollback);
                     
                     if (!NT_SUCCESS(Status)) {
                         ERR("remove_free_space_inode for (%llx,%x,%llx) returned %08x\n", fsi->key.obj_id, fsi->key.obj_type, fsi->key.offset, Status);
@@ -118,7 +118,7 @@ NTSTATUS clear_free_space_cache(device_extension* Vcb, PIRP Irp) {
     
 end:
     if (NT_SUCCESS(Status))
-        clear_rollback(&rollback);
+        clear_rollback(Vcb, &rollback);
     else
         do_rollback(Vcb, &rollback);
     
@@ -193,7 +193,7 @@ size:
     return STATUS_SUCCESS;
 }
 
-static void load_free_space_bitmap(device_extension* Vcb, chunk* c, UINT64 offset, void* data) {
+static void load_free_space_bitmap(device_extension* Vcb, chunk* c, UINT64 offset, void* data, UINT64* total_space) {
     RTL_BITMAP bmph;
     UINT32 i, *dwords = data;
     ULONG runlength, index;
@@ -216,6 +216,7 @@ static void load_free_space_bitmap(device_extension* Vcb, chunk* c, UINT64 offse
         
         add_space_entry(&c->space, &c->space_size, addr, length);
         index += runlength;
+        *total_space += length;
        
         runlength = RtlFindNextForwardRunClear(&bmph, index, &index);
     }
@@ -245,6 +246,115 @@ static void order_space_entry(space* s, LIST_ENTRY* list_size) {
     InsertTailList(list_size, &s->list_entry_size);
 }
 
+typedef struct {
+    UINT64 stripe;
+    LIST_ENTRY list_entry;
+} superblock_stripe;
+
+static void add_superblock_stripe(LIST_ENTRY* stripes, UINT64 off, UINT64 len) {
+    UINT64 i;
+    
+    for (i = 0; i < len; i++) {
+        LIST_ENTRY* le;
+        superblock_stripe* ss;
+        
+        le = stripes->Flink;
+        while (le != stripes) {
+            ss = CONTAINING_RECORD(le, superblock_stripe, list_entry);
+            
+            if (ss->stripe == off + i)
+                continue;
+            
+            le = le->Flink;
+        }
+        
+        ss = ExAllocatePoolWithTag(PagedPool, sizeof(superblock_stripe), ALLOC_TAG);
+        ss->stripe = off + i;
+        InsertTailList(stripes, &ss->list_entry);
+    }
+}
+
+static UINT64 get_superblock_size(chunk* c) {
+    CHUNK_ITEM* ci = c->chunk_item;
+    CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
+    UINT64 off_start, off_end, space;
+    UINT16 i = 0, j;
+    LIST_ENTRY stripes;
+    
+    InitializeListHead(&stripes);
+    
+    while (superblock_addrs[i] != 0) {
+        if (ci->type & BLOCK_FLAG_RAID0 || ci->type & BLOCK_FLAG_RAID10) {
+            for (j = 0; j < ci->num_stripes; j++) {
+                ULONG sub_stripes = max(ci->sub_stripes, 1);
+                
+                if (cis[j].offset + (ci->size * ci->num_stripes / sub_stripes) > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
+                    off_start = superblock_addrs[i] - cis[j].offset;
+                    off_start -= off_start % ci->stripe_length;
+                    off_start *= ci->num_stripes / sub_stripes;
+                    off_start += (j / sub_stripes) * ci->stripe_length;
+
+                    off_end = off_start + ci->stripe_length;
+                    
+                    add_superblock_stripe(&stripes, off_start / ci->stripe_length, 1);
+                }
+            }
+        } else if (ci->type & BLOCK_FLAG_RAID5) {
+            for (j = 0; j < ci->num_stripes; j++) {
+                UINT64 stripe_size = ci->size / (ci->num_stripes - 1);
+                
+                if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
+                    off_start = superblock_addrs[i] - cis[j].offset;
+                    off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 1));
+                    off_start *= ci->num_stripes - 1;
+
+                    off_end = off_start + (ci->stripe_length * (ci->num_stripes - 1));
+
+                    add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length);
+                }
+            }
+        } else if (ci->type & BLOCK_FLAG_RAID6) {
+            for (j = 0; j < ci->num_stripes; j++) {
+                UINT64 stripe_size = ci->size / (ci->num_stripes - 2);
+                
+                if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
+                    off_start = superblock_addrs[i] - cis[j].offset;
+                    off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 2));
+                    off_start *= ci->num_stripes - 2;
+
+                    off_end = off_start + (ci->stripe_length * (ci->num_stripes - 2));
+
+                    add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length);
+                }
+            }
+        } else { // SINGLE, DUPLICATE, RAID1
+            for (j = 0; j < ci->num_stripes; j++) {
+                if (cis[j].offset + ci->size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
+                    off_start = ((superblock_addrs[i] - cis[j].offset) / c->chunk_item->stripe_length) * c->chunk_item->stripe_length;
+                    off_end = sector_align(superblock_addrs[i] - cis[j].offset + sizeof(superblock), c->chunk_item->stripe_length);
+                    
+                    add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length);
+                }
+            }
+        }
+        
+        i++;
+    }
+    
+    space = 0;
+    
+    while (!IsListEmpty(&stripes)) {
+        LIST_ENTRY* le = RemoveHeadList(&stripes);
+        superblock_stripe* ss = CONTAINING_RECORD(le, superblock_stripe, list_entry);
+        
+        space++;
+        
+        ExFreePool(ss);
+    }
+    
+    return space * ci->stripe_length;
+}
+
 static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PIRP Irp) {
     KEY searchkey;
     traverse_ptr tp;
@@ -254,7 +364,7 @@ static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PI
     NTSTATUS Status;
     UINT32 *checksums, crc32;
     FREE_SPACE_ENTRY* fse;
-    UINT64 size, num_entries, num_bitmaps, extent_length, bmpnum, off;
+    UINT64 size, num_entries, num_bitmaps, extent_length, bmpnum, off, total_space = 0, superblock_size;
     LIST_ENTRY *le, rollback;
     
     // FIXME - does this break if Vcb->superblock.sector_size is not 4096?
@@ -272,7 +382,7 @@ static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PI
         return Status;
     }
     
-    if (keycmp(&tp.item->key, &searchkey)) {
+    if (keycmp(tp.item->key, searchkey)) {
         TRACE("(%llx,%x,%llx) not found\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
         return STATUS_NOT_FOUND;
     }
@@ -293,7 +403,7 @@ static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PI
     num_entries = fsi->num_entries;
     num_bitmaps = fsi->num_bitmaps;
     
-    Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, NULL, &c->cache, Irp);
+    Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, NULL, &c->cache, PagedPool, Irp);
     if (!NT_SUCCESS(Status)) {
         ERR("open_fcb returned %08x\n", Status);
         return STATUS_NOT_FOUND;
@@ -308,6 +418,9 @@ static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PI
     
     c->cache->inode_item.flags |= BTRFS_INODE_NODATACOW;
     
+    if (num_entries == 0 && num_bitmaps == 0)
+        return STATUS_SUCCESS;
+    
     size = sector_align(c->cache->inode_item.st_size, Vcb->superblock.sector_size);
     
     data = ExAllocatePoolWithTag(PagedPool, size, ALLOC_TAG);
@@ -385,6 +498,8 @@ static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PI
                 ExFreePool(data);
                 return Status;
             }
+            
+            total_space += fse->size;
         } else if (fse->type != FREE_SPACE_BITMAP) {
             ERR("unknown free-space type %x\n", fse->type);
         }
@@ -404,7 +519,7 @@ static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PI
             
             if (fse->type == FREE_SPACE_BITMAP) {
                 // FIXME - make sure we don't overflow the buffer here
-                load_free_space_bitmap(Vcb, c, fse->offset, &data[bmpnum * Vcb->superblock.sector_size]);
+                load_free_space_bitmap(Vcb, c, fse->offset, &data[bmpnum * Vcb->superblock.sector_size], &total_space);
                 bmpnum++;
             }
             
@@ -412,6 +527,14 @@ static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PI
         }
     }
     
+    // do sanity check
+
+    superblock_size = get_superblock_size(c);
+    if (c->chunk_item->size - c->used != total_space + superblock_size) {
+        WARN("invalidating cache for chunk %llx: space was %llx, expected %llx\n", c->offset, total_space + superblock_size, c->chunk_item->size - c->used);
+        goto clearcache;
+    }
+    
     le = c->space.Flink;
     while (le != &c->space) {
         space* s = CONTAINING_RECORD(le, space, list_entry);
@@ -453,7 +576,7 @@ clearcache:
         return Status;
     }
     
-    clear_rollback(&rollback);
+    clear_rollback(Vcb, &rollback);
     
     c->cache->deleted = TRUE;
     mark_fcb_dirty(c->cache);
@@ -573,18 +696,19 @@ static NTSTATUS insert_cache_extent(fcb* fcb, UINT64 start, UINT64 length, LIST_
     while (le != &fcb->Vcb->chunks) {
         c = CONTAINING_RECORD(le, chunk, list_entry);
         
-        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        
-        if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
-            if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length)) {
-                ExReleaseResourceLite(&c->lock);
-                ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
-                return STATUS_SUCCESS;
+        if (!c->readonly) {
+            ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+            
+            if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
+                if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length)) {
+                    ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
+                    return STATUS_SUCCESS;
+                }
             }
+            
+            ExReleaseResourceLite(&c->lock);
         }
         
-        ExReleaseResourceLite(&c->lock);
-        
         le = le->Flink;
     }
     
@@ -598,10 +722,8 @@ static NTSTATUS insert_cache_extent(fcb* fcb, UINT64 start, UINT64 length, LIST_
         ExAcquireResourceExclusiveLite(&c->lock, TRUE);
         
         if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
-            if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length)) {
-                ExReleaseResourceLite(&c->lock);
+            if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length))
                 return STATUS_SUCCESS;
-            }
         }
         
         ExReleaseResourceLite(&c->lock);
@@ -613,7 +735,7 @@ static NTSTATUS insert_cache_extent(fcb* fcb, UINT64 start, UINT64 length, LIST_
     return STATUS_DISK_FULL;
 }
 
-static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* changed, PIRP Irp, LIST_ENTRY* rollback) {
+static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* changed, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
     LIST_ENTRY* le;
     NTSTATUS Status;
     UINT64 num_entries, new_cache_size, i;
@@ -664,7 +786,7 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan
     
     new_cache_size = sector_align(new_cache_size, CACHE_INCREMENTS * Vcb->superblock.sector_size);
     
-    TRACE("chunk %llx: cache_size = %llx, new_cache_size = %llx\n", c->offset, c->cache->inode_item.st_size, new_cache_size);
+    TRACE("chunk %llx: cache_size = %llx, new_cache_size = %llx\n", c->offset, c->cache ? c->cache->inode_item.st_size : 0, new_cache_size);
     
     if (!c->cache) {
         FREE_SPACE_ITEM* fsi;
@@ -694,10 +816,7 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan
         
         c->cache->subvol = Vcb->root_root;
         
-        if (Vcb->root_root->lastinode == 0)
-            get_last_inode(Vcb, Vcb->root_root, Irp);
-        
-        c->cache->inode = Vcb->root_root->lastinode > 0x100 ? (Vcb->root_root->lastinode + 1) : 0x101;
+        c->cache->inode = InterlockedIncrement64(&Vcb->root_root->lastinode);
         
         c->cache->type = BTRFS_TYPE_FILE;
         c->cache->created = TRUE;
@@ -725,7 +844,7 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan
             return Status;
         }
         
-        if (!keycmp(&searchkey, &tp.item->key))
+        if (!keycmp(searchkey, tp.item->key))
             delete_tree_item(Vcb, &tp, rollback);
         
         fsi->key.obj_id = c->cache->inode;
@@ -751,9 +870,7 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan
         
         c->cache->extents_changed = TRUE;
         
-        Vcb->root_root->lastinode = c->cache->inode;
-        
-        flush_fcb(c->cache, TRUE, Irp, rollback);
+        flush_fcb(c->cache, TRUE, batchlist, Irp, rollback);
         
         *changed = TRUE;
     } else if (new_cache_size > c->cache->inode_item.st_size) {
@@ -777,7 +894,7 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan
             return Status;
         }
         
-        if (keycmp(&searchkey, &tp.item->key)) {
+        if (keycmp(searchkey, tp.item->key)) {
             ERR("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
             return STATUS_INTERNAL_ERROR;
         }
@@ -802,7 +919,7 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan
         c->cache->inode_item.st_size = new_cache_size;
         c->cache->inode_item.st_blocks = new_cache_size;
         
-        flush_fcb(c->cache, TRUE, Irp, rollback);
+        flush_fcb(c->cache, TRUE, batchlist, Irp, rollback);
     
         *changed = TRUE;
     } else {
@@ -821,7 +938,7 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan
             return Status;
         }
         
-        if (keycmp(&searchkey, &tp.item->key)) {
+        if (keycmp(searchkey, tp.item->key)) {
             INODE_ITEM* ii;
             
             ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
@@ -852,8 +969,9 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan
             return Status;
         }
         
-        if (keycmp(&searchkey, &tp.item->key)) {
+        if (keycmp(searchkey, tp.item->key)) {
             ERR("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+            int3;
             return STATUS_INTERNAL_ERROR;
         }
         
@@ -871,17 +989,19 @@ static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* chan
 }
 
 NTSTATUS allocate_cache(device_extension* Vcb, BOOL* changed, PIRP Irp, LIST_ENTRY* rollback) {
-    LIST_ENTRY* le = Vcb->chunks_changed.Flink;
+    LIST_ENTRY *le = Vcb->chunks_changed.Flink, batchlist;
     NTSTATUS Status;
 
     *changed = FALSE;
     
+    InitializeListHead(&batchlist);
+    
     while (le != &Vcb->chunks_changed) {
         BOOL b;
         chunk* c = CONTAINING_RECORD(le, chunk, list_entry_changed);
 
         ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        Status = allocate_cache_chunk(Vcb, c, &b, Irp, rollback);
+        Status = allocate_cache_chunk(Vcb, c, &b, &batchlist, Irp, rollback);
         ExReleaseResourceLite(&c->lock);
         
         if (b)
@@ -889,16 +1009,19 @@ NTSTATUS allocate_cache(device_extension* Vcb, BOOL* changed, PIRP Irp, LIST_ENT
         
         if (!NT_SUCCESS(Status)) {
             ERR("allocate_cache_chunk(%llx) returned %08x\n", c->offset, Status);
+            clear_batch_list(Vcb, &batchlist);
             return Status;
         }
         
         le = le->Flink;
     }
     
+    commit_batch_list(Vcb, &batchlist, Irp, rollback);
+    
     return STATUS_SUCCESS;
 }
 
-static void add_rollback_space(LIST_ENTRY* rollback, BOOL add, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c) {
+static void add_rollback_space(device_extension* Vcb, LIST_ENTRY* rollback, BOOL add, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c) {
     rollback_space* rs;
     
     rs = ExAllocatePoolWithTag(PagedPool, sizeof(rollback_space), ALLOC_TAG);
@@ -913,10 +1036,10 @@ static void add_rollback_space(LIST_ENTRY* rollback, BOOL add, LIST_ENTRY* list,
     rs->length = length;
     rs->chunk = c;
     
-    add_rollback(rollback, add ? ROLLBACK_ADD_SPACE : ROLLBACK_SUBTRACT_SPACE, rs);
+    add_rollback(Vcb, rollback, add ? ROLLBACK_ADD_SPACE : ROLLBACK_SUBTRACT_SPACE, rs);
 }
 
-void _space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func) {
+void _space_list_add2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func) {
     LIST_ENTRY* le;
     space *s, *s2;
     
@@ -940,7 +1063,7 @@ void _space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, U
             InsertTailList(list_size, &s->list_entry_size);
         
         if (rollback)
-            add_rollback_space(rollback, TRUE, list, list_size, address, length, c);
+            add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, length, c);
         
         return;
     }
@@ -957,7 +1080,7 @@ void _space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, U
         if (address <= s2->address && address + length >= s2->address + s2->size) {
             if (address < s2->address) {
                 if (rollback)
-                    add_rollback_space(rollback, TRUE, list, list_size, address, s2->address - address, c);
+                    add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, s2->address - address, c);
                 
                 s2->size += s2->address - address;
                 s2->address = address;
@@ -982,7 +1105,7 @@ void _space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, U
             
             if (length > s2->size) {
                 if (rollback)
-                    add_rollback_space(rollback, TRUE, list, list_size, s2->address + s2->size, address + length - s2->address - s2->size, c);
+                    add_rollback_space(Vcb, rollback, TRUE, list, list_size, s2->address + s2->size, address + length - s2->address - s2->size, c);
                 
                 s2->size = length;
                 
@@ -1014,7 +1137,7 @@ void _space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, U
         // new entry overlaps start of old one
         if (address < s2->address && address + length >= s2->address) {
             if (rollback)
-                add_rollback_space(rollback, TRUE, list, list_size, address, s2->address - address, c);
+                add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, s2->address - address, c);
             
             s2->size += s2->address - address;
             s2->address = address;
@@ -1047,7 +1170,7 @@ void _space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, U
         // new entry overlaps end of old one
         if (address <= s2->address + s2->size && address + length > s2->address + s2->size) {
             if (rollback)
-                add_rollback_space(rollback, TRUE, list, list_size, address, s2->address + s2->size - address, c);
+                add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, s2->address + s2->size - address, c);
             
             s2->size = address + length - s2->address;
             
@@ -1085,7 +1208,7 @@ void _space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, U
             }
             
             if (rollback)
-                add_rollback_space(rollback, TRUE, list, list_size, address, length, c);
+                add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, length, c);
             
             s->address = address;
             s->size = length;
@@ -1128,10 +1251,10 @@ void _space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, U
         order_space_entry(s, list_size);
     
     if (rollback)
-        add_rollback_space(rollback, TRUE, list, list_size, address, length, c);
+        add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, length, c);
 }
 
-static void space_list_merge(LIST_ENTRY* spacelist, LIST_ENTRY* spacelist_size, LIST_ENTRY* deleting) {
+static void space_list_merge(device_extension* Vcb, LIST_ENTRY* spacelist, LIST_ENTRY* spacelist_size, LIST_ENTRY* deleting) {
     LIST_ENTRY* le;
     
     if (!IsListEmpty(deleting)) {
@@ -1139,14 +1262,14 @@ static void space_list_merge(LIST_ENTRY* spacelist, LIST_ENTRY* spacelist_size,
         while (le != deleting) {
             space* s = CONTAINING_RECORD(le, space, list_entry);
             
-            space_list_add2(spacelist, spacelist_size, s->address, s->size, NULL);
+            space_list_add2(Vcb, spacelist, spacelist_size, s->address, s->size, NULL);
             
             le = le->Flink;
         }
     }
 }
 
-static NTSTATUS update_chunk_cache(device_extension* Vcb, chunk* c, BTRFS_TIME* now, PIRP Irp, LIST_ENTRY* rollback) {
+static NTSTATUS update_chunk_cache(device_extension* Vcb, chunk* c, BTRFS_TIME* now, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
     NTSTATUS Status;
     KEY searchkey;
     traverse_ptr tp;
@@ -1157,7 +1280,7 @@ static NTSTATUS update_chunk_cache(device_extension* Vcb, chunk* c, BTRFS_TIME*
     UINT32* checksums;
     LIST_ENTRY* le;
     
-    space_list_merge(&c->space, &c->space_size, &c->deleting);
+    space_list_merge(Vcb, &c->space, &c->space_size, &c->deleting);
     
     data = ExAllocatePoolWithTag(NonPagedPool, c->cache->inode_item.st_size, ALLOC_TAG);
     if (!data) {
@@ -1197,7 +1320,7 @@ static NTSTATUS update_chunk_cache(device_extension* Vcb, chunk* c, BTRFS_TIME*
     c->cache->inode_item.sequence++;
     c->cache->inode_item.st_ctime = *now;
     
-    flush_fcb(c->cache, TRUE, Irp, rollback);
+    flush_fcb(c->cache, TRUE, batchlist, Irp, rollback);
     
     // update free_space item
     
@@ -1211,7 +1334,7 @@ static NTSTATUS update_chunk_cache(device_extension* Vcb, chunk* c, BTRFS_TIME*
         return Status;
     }
     
-    if (keycmp(&searchkey, &tp.item->key)) {
+    if (keycmp(searchkey, tp.item->key)) {
         ERR("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
         return STATUS_INTERNAL_ERROR;
     }
@@ -1261,7 +1384,7 @@ static NTSTATUS update_chunk_cache(device_extension* Vcb, chunk* c, BTRFS_TIME*
 }
 
 NTSTATUS update_chunk_caches(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
-    LIST_ENTRY* le = Vcb->chunks_changed.Flink;
+    LIST_ENTRY *le = Vcb->chunks_changed.Flink, batchlist;
     NTSTATUS Status;
     chunk* c;
     LARGE_INTEGER time;
@@ -1270,21 +1393,26 @@ NTSTATUS update_chunk_caches(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollba
     KeQuerySystemTime(&time);
     win_time_to_unix(time, &now);
     
+    InitializeListHead(&batchlist);
+    
     while (le != &Vcb->chunks_changed) {
         c = CONTAINING_RECORD(le, chunk, list_entry_changed);
         
         ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        Status = update_chunk_cache(Vcb, c, &now, Irp, rollback);
+        Status = update_chunk_cache(Vcb, c, &now, &batchlist, Irp, rollback);
         ExReleaseResourceLite(&c->lock);
 
         if (!NT_SUCCESS(Status)) {
             ERR("update_chunk_cache(%llx) returned %08x\n", c->offset, Status);
+            clear_batch_list(Vcb, &batchlist);
             return Status;
         }
         
         le = le->Flink;
     }
     
+    commit_batch_list(Vcb, &batchlist, Irp, rollback);
+    
     return STATUS_SUCCESS;
 }
 
@@ -1298,10 +1426,10 @@ void _space_list_add(device_extension* Vcb, chunk* c, BOOL deleting, UINT64 addr
     if (!c->list_entry_changed.Flink)
         InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed);
     
-    _space_list_add2(list, deleting ? NULL : &c->space_size, address, length, c, rollback, func);
+    _space_list_add2(Vcb, list, deleting ? NULL : &c->space_size, address, length, c, rollback, func);
 }
 
-void _space_list_subtract2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func) {
+void _space_list_subtract2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func) {
     LIST_ENTRY *le, *le2;
     space *s, *s2;
     
@@ -1322,7 +1450,7 @@ void _space_list_subtract2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 addre
         
         if (s2->address >= address && s2->address + s2->size <= address + length) { // remove entry entirely
             if (rollback)
-                add_rollback_space(rollback, FALSE, list, list_size, s2->address, s2->size, c);
+                add_rollback_space(Vcb, rollback, FALSE, list, list_size, s2->address, s2->size, c);
             
             RemoveEntryList(&s2->list_entry);
             
@@ -1333,7 +1461,7 @@ void _space_list_subtract2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 addre
         } else if (address + length > s2->address && address + length < s2->address + s2->size) {
             if (address > s2->address) { // cut out hole
                 if (rollback)
-                    add_rollback_space(rollback, FALSE, list, list_size, address, length, c);
+                    add_rollback_space(Vcb, rollback, FALSE, list, list_size, address, length, c);
                 
                 s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
 
@@ -1358,7 +1486,7 @@ void _space_list_subtract2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 addre
                 return;
             } else { // remove start of entry
                 if (rollback)
-                    add_rollback_space(rollback, FALSE, list, list_size, s2->address, address + length - s2->address, c);
+                    add_rollback_space(Vcb, rollback, FALSE, list, list_size, s2->address, address + length - s2->address, c);
                 
                 s2->size -= address + length - s2->address;
                 s2->address = address + length;
@@ -1370,7 +1498,7 @@ void _space_list_subtract2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 addre
             }
         } else if (address > s2->address && address < s2->address + s2->size) { // remove end of entry
             if (rollback)
-                add_rollback_space(rollback, FALSE, list, list_size, address, s2->address + s2->size - address, c);
+                add_rollback_space(Vcb, rollback, FALSE, list, list_size, address, s2->address + s2->size - address, c);
             
             s2->size = address - s2->address;
             
@@ -1392,5 +1520,5 @@ void _space_list_subtract(device_extension* Vcb, chunk* c, BOOL deleting, UINT64
     if (!c->list_entry_changed.Flink)
         InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed);
     
-    _space_list_subtract2(list, deleting ? NULL : &c->space_size, address, length, c, rollback, func);
+    _space_list_subtract2(Vcb, list, deleting ? NULL : &c->space_size, address, length, c, rollback, func);
 }
index aa42414..73c9f7d 100644 (file)
@@ -31,6 +31,7 @@
 
 extern LIST_ENTRY VcbList;
 extern ERESOURCE global_loading_lock;
+extern LIST_ENTRY volumes;
 
 static NTSTATUS get_file_ids(PFILE_OBJECT FileObject, void* data, ULONG length) {
     btrfs_get_file_ids* bgfi;
@@ -92,7 +93,7 @@ static NTSTATUS snapshot_tree_copy(device_extension* Vcb, UINT64 addr, root* sub
         return STATUS_INSUFFICIENT_RESOURCES;
     }
     
-    Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, Irp);
+    Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, NULL, Irp);
     if (!NT_SUCCESS(Status)) {
         ERR("read_data returned %08x\n", Status);
         goto end;
@@ -131,6 +132,7 @@ static NTSTATUS snapshot_tree_copy(device_extension* Vcb, UINT64 addr, root* sub
     th->address = t.new_address;
     th->tree_id = subvol->id;
     th->generation = Vcb->superblock.generation;
+    th->fs_uuid = Vcb->superblock.uuid;
     
     if (th->level == 0) {
         UINT32 i;
@@ -140,8 +142,7 @@ static NTSTATUS snapshot_tree_copy(device_extension* Vcb, UINT64 addr, root* sub
             if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) && ln[i].offset + ln[i].size <= Vcb->superblock.node_size - sizeof(tree_header)) {
                 EXTENT_DATA* ed = (EXTENT_DATA*)(((UINT8*)&th[1]) + ln[i].offset);
                 
-                // FIXME - what are we supposed to do with prealloc here? Replace it with sparse extents, or do new preallocation?
-                if (ed->type == EXTENT_TYPE_REGULAR && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
+                if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
                     EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ed->data[0];
                     
                     if (ed2->size != 0) { // not sparse
@@ -157,19 +158,18 @@ static NTSTATUS snapshot_tree_copy(device_extension* Vcb, UINT64 addr, root* sub
         }
     } else {
         UINT32 i;
-        UINT64 newaddr;
         internal_node* in = (internal_node*)&th[1];
         
         for (i = 0; i < th->num_items; i++) {
-            Status = snapshot_tree_copy(Vcb, in[i].address, subvol, dupflags, &newaddr, Irp, rollback);
+            TREE_BLOCK_REF tbr;
             
+            tbr.offset = subvol->id;
+            
+            Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, NULL, th->level - 1, Irp, rollback);
             if (!NT_SUCCESS(Status)) {
-                ERR("snapshot_tree_copy returned %08x\n", Status);
+                ERR("increase_extent_refcount returned %08x\n", Status);
                 goto end;
             }
-            
-            in[i].generation = Vcb->superblock.generation;
-            in[i].address = newaddr;
         }
     }
     
@@ -285,16 +285,13 @@ static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, f
     
     free_trees(Vcb);
     
-    clear_rollback(&rollback);
+    clear_rollback(Vcb, &rollback);
     
     InitializeListHead(&rollback);
     
     // create new root
     
-    if (Vcb->root_root->lastinode == 0)
-        get_last_inode(Vcb, Vcb->root_root, Irp);
-    
-    id = Vcb->root_root->lastinode > 0x100 ? (Vcb->root_root->lastinode + 1) : 0x101;
+    id = InterlockedIncrement64(&Vcb->root_root->lastinode);
     Status = create_root(Vcb, id, &r, TRUE, Vcb->superblock.generation, Irp, &rollback);
     
     if (!NT_SUCCESS(Status)) {
@@ -302,6 +299,8 @@ static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, f
         goto end;
     }
     
+    r->lastinode = subvol->lastinode;
+    
     if (!Vcb->uuid_root) {
         root* uuid_root;
         
@@ -334,7 +333,7 @@ static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, f
         RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
         
         Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
-    } while (NT_SUCCESS(Status) && !keycmp(&searchkey, &tp.item->key));
+    } while (NT_SUCCESS(Status) && !keycmp(searchkey, tp.item->key));
     
     *root_num = r->id;
     
@@ -394,7 +393,6 @@ static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, f
     }
     
     RtlCopyMemory(tp.item->data, &r->root_item, sizeof(ROOT_ITEM));
-    Vcb->root_root->lastinode = r->id;
     
     // update ROOT_ITEM of original subvol
     
@@ -433,7 +431,7 @@ static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, f
     
     RtlCopyMemory(fr->utf8.Buffer, utf8->Buffer, utf8->Length);
     
-    Status = open_fcb(Vcb, r, r->root_item.objid, BTRFS_TYPE_DIRECTORY, utf8, fcb, &fr->fcb, Irp);
+    Status = open_fcb(Vcb, r, r->root_item.objid, BTRFS_TYPE_DIRECTORY, utf8, fcb, &fr->fcb, PagedPool, Irp);
     if (!NT_SUCCESS(Status)) {
         ERR("open_fcb returned %08x\n", Status);
         free_fileref(fr);
@@ -486,9 +484,14 @@ static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, f
     fcb->inode_item.transid = Vcb->superblock.generation;
     fcb->inode_item.sequence++;
     fcb->inode_item.st_size += utf8->Length * 2;
-    fcb->inode_item.st_ctime = now;
-    fcb->inode_item.st_mtime = now;
     
+    if (!ccb->user_set_change_time)
+        fcb->inode_item.st_ctime = now;
+    
+    if (!ccb->user_set_write_time)
+        fcb->inode_item.st_mtime = now;
+    
+    fcb->inode_item_changed = TRUE;
     mark_fcb_dirty(fcb);
     
     fcb->subvol->root_item.ctime = now;
@@ -522,7 +525,7 @@ static NTSTATUS do_create_snapshot(device_extension* Vcb, PFILE_OBJECT parent, f
     
 end:
     if (NT_SUCCESS(Status))
-        clear_rollback(&rollback);
+        clear_rollback(Vcb, &rollback);
     else
         do_rollback(Vcb, &rollback);
 
@@ -607,7 +610,7 @@ static NTSTATUS create_snapshot(device_extension* Vcb, PFILE_OBJECT FileObject,
     ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
 
     // no need for fcb_lock as we have tree_lock exclusively
-    Status = open_fileref(fcb->Vcb, &fr2, &nameus, fileref, FALSE, NULL, NULL, Irp);
+    Status = open_fileref(fcb->Vcb, &fr2, &nameus, fileref, FALSE, NULL, NULL, PagedPool, FALSE, Irp);
     
     if (NT_SUCCESS(Status)) {
         if (!fr2->deleted) {
@@ -653,12 +656,38 @@ static NTSTATUS create_snapshot(device_extension* Vcb, PFILE_OBJECT FileObject,
         goto end;
     }
     
+    // clear unique flag on extents of open files in subvol
+    if (!IsListEmpty(&subvol_fcb->subvol->fcbs)) {
+        LIST_ENTRY* le = subvol_fcb->subvol->fcbs.Flink;
+        
+        while (le != &subvol_fcb->subvol->fcbs) {
+            struct _fcb* openfcb = CONTAINING_RECORD(le, struct _fcb, list_entry);
+            LIST_ENTRY* le2;
+            
+            ExAcquireResourceExclusiveLite(openfcb->Header.Resource, TRUE);
+            
+            le2 = openfcb->extents.Flink;
+            
+            while (le2 != &openfcb->extents) {
+                extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
+                
+                ext->unique = FALSE;
+                
+                le2 = le2->Flink;
+            }
+            
+            ExReleaseResourceLite(openfcb->Header.Resource);
+            
+            le = le->Flink;
+        }
+    }
+    
     Status = do_create_snapshot(Vcb, FileObject, subvol_fcb, &utf8, &nameus, Irp);
     
     if (NT_SUCCESS(Status)) {
         file_ref* fr;
 
-        Status = open_fileref(Vcb, &fr, &nameus, fileref, FALSE, NULL, NULL, Irp);
+        Status = open_fileref(Vcb, &fr, &nameus, fileref, FALSE, NULL, NULL, PagedPool, FALSE, Irp);
         
         if (!NT_SUCCESS(Status)) {
             ERR("open_fileref returned %08x\n", Status);
@@ -779,7 +808,7 @@ static NTSTATUS create_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, WC
     InitializeListHead(&rollback);
     
     // no need for fcb_lock as we have tree_lock exclusively
-    Status = open_fileref(fcb->Vcb, &fr2, &nameus, fileref, FALSE, NULL, NULL, Irp);
+    Status = open_fileref(fcb->Vcb, &fr2, &nameus, fileref, FALSE, NULL, NULL, PagedPool, FALSE, Irp);
     
     if (NT_SUCCESS(Status)) {
         if (!fr2->deleted) {
@@ -794,12 +823,9 @@ static NTSTATUS create_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, WC
         goto end;
     }
     
-    if (Vcb->root_root->lastinode == 0)
-        get_last_inode(Vcb, Vcb->root_root, Irp);
-    
     // FIXME - make sure rollback removes new roots from internal structures
     
-    id = Vcb->root_root->lastinode > 0x100 ? (Vcb->root_root->lastinode + 1) : 0x101;
+    id = InterlockedIncrement64(&Vcb->root_root->lastinode);
     Status = create_root(Vcb, id, &r, FALSE, 0, Irp, &rollback);
     
     if (!NT_SUCCESS(Status)) {
@@ -841,7 +867,7 @@ static NTSTATUS create_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, WC
         RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
         
         Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
-    } while (NT_SUCCESS(Status) && !keycmp(&searchkey, &tp.item->key));
+    } while (NT_SUCCESS(Status) && !keycmp(searchkey, tp.item->key));
     
     *root_num = r->id;
     
@@ -913,6 +939,7 @@ static NTSTATUS create_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, WC
     }
     
     rootfcb->sd_dirty = TRUE;
+    rootfcb->inode_item_changed = TRUE;
 
     ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
     InsertTailList(&r->fcbs, &rootfcb->list_entry);
@@ -926,6 +953,8 @@ static NTSTATUS create_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, WC
     
     rootfcb->created = TRUE;
     
+    r->lastinode = rootfcb->inode;
+    
     // add INODE_REF
     
     irsize = sizeof(INODE_REF) - 1 + strlen(DOTDOT);
@@ -1019,20 +1048,23 @@ static NTSTATUS create_subvol(device_extension* Vcb, PFILE_OBJECT FileObject, WC
     fcb->inode_item.transid = Vcb->superblock.generation;
     fcb->inode_item.st_size += utf8.Length * 2;
     fcb->inode_item.sequence++;
-    fcb->inode_item.st_ctime = now;
-    fcb->inode_item.st_mtime = now;
     
+    if (!ccb->user_set_change_time)
+        fcb->inode_item.st_ctime = now;
+    
+    if (!ccb->user_set_write_time)
+        fcb->inode_item.st_mtime = now;
+    
+    fcb->inode_item_changed = TRUE;
     mark_fcb_dirty(fcb);
     
-    Vcb->root_root->lastinode = id;
-
     Status = STATUS_SUCCESS;    
     
 end:
     if (!NT_SUCCESS(Status))
         do_rollback(Vcb, &rollback);
     else
-        clear_rollback(&rollback);
+        clear_rollback(Vcb, &rollback);
     
     ExReleaseResourceLite(&Vcb->tree_lock);
     
@@ -1233,8 +1265,10 @@ static NTSTATUS set_inode_info(PFILE_OBJECT FileObject, void* data, ULONG length
     if (bsii->gid_changed)
         fcb->inode_item.st_gid = bsii->st_gid;
     
-    if (bsii->flags_changed || bsii->mode_changed || bsii->uid_changed || bsii->gid_changed)
+    if (bsii->flags_changed || bsii->mode_changed || bsii->uid_changed || bsii->gid_changed) {
+        fcb->inode_item_changed = TRUE;
         mark_fcb_dirty(fcb);
+    }
     
     Status = STATUS_SUCCESS;
     
@@ -1278,7 +1312,7 @@ static NTSTATUS is_volume_mounted(device_extension* Vcb, PIRP Irp) {
     return STATUS_SUCCESS;
 }
 
-static NTSTATUS fs_get_statistics(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, DWORD* retlen) {
+static NTSTATUS fs_get_statistics(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, ULONG_PTR* retlen) {
     FILESYSTEM_STATISTICS* fss;
     
     WARN("STUB: FSCTL_FILESYSTEM_GET_STATISTICS\n");
@@ -1328,7 +1362,7 @@ static NTSTATUS set_sparse(device_extension* Vcb, PFILE_OBJECT FileObject, void*
         return STATUS_INVALID_PARAMETER;
     }
     
-    if (!(ccb->access & FILE_WRITE_ATTRIBUTES)) {
+    if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_ATTRIBUTES)) {
         WARN("insufficient privileges\n");
         return STATUS_ACCESS_DENIED;
     }
@@ -1478,7 +1512,7 @@ static NTSTATUS set_zero_data(device_extension* Vcb, PFILE_OBJECT FileObject, vo
         return STATUS_INVALID_PARAMETER;
     }
     
-    if (!(ccb->access & FILE_WRITE_DATA)) {
+    if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_DATA)) {
         WARN("insufficient privileges\n");
         return STATUS_ACCESS_DENIED;
     }
@@ -1591,10 +1625,15 @@ static NTSTATUS set_zero_data(device_extension* Vcb, PFILE_OBJECT FileObject, vo
     
     fcb->inode_item.transid = Vcb->superblock.generation;
     fcb->inode_item.sequence++;
-    fcb->inode_item.st_ctime = now;
-    fcb->inode_item.st_mtime = now;
+    
+    if (!ccb->user_set_change_time)
+        fcb->inode_item.st_ctime = now;
+    
+    if (!ccb->user_set_write_time)
+        fcb->inode_item.st_mtime = now;
     
     fcb->extents_changed = TRUE;
+    fcb->inode_item_changed = TRUE;
     mark_fcb_dirty(fcb);
     
     send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED);
@@ -1614,7 +1653,7 @@ end:
     if (!NT_SUCCESS(Status))
         do_rollback(Vcb, &rollback);
     else
-        clear_rollback(&rollback);
+        clear_rollback(Vcb, &rollback);
     
     ExReleaseResourceLite(fcb->Header.Resource);
     ExReleaseResourceLite(&Vcb->tree_lock);
@@ -1622,7 +1661,7 @@ end:
     return Status;
 }
 
-static NTSTATUS query_ranges(device_extension* Vcb, PFILE_OBJECT FileObject, FILE_ALLOCATED_RANGE_BUFFER* inbuf, ULONG inbuflen, void* outbuf, ULONG outbuflen, DWORD* retlen) {
+static NTSTATUS query_ranges(device_extension* Vcb, PFILE_OBJECT FileObject, FILE_ALLOCATED_RANGE_BUFFER* inbuf, ULONG inbuflen, void* outbuf, ULONG outbuflen, ULONG_PTR* retlen) {
     NTSTATUS Status;
     fcb* fcb;
     LIST_ENTRY* le;
@@ -1721,7 +1760,7 @@ end:
     return Status;
 }
 
-static NTSTATUS get_object_id(device_extension* Vcb, PFILE_OBJECT FileObject, FILE_OBJECTID_BUFFER* buf, ULONG buflen, DWORD* retlen) {
+static NTSTATUS get_object_id(device_extension* Vcb, PFILE_OBJECT FileObject, FILE_OBJECTID_BUFFER* buf, ULONG buflen, ULONG_PTR* retlen) {
     fcb* fcb;
     
     TRACE("(%p, %p, %p, %x, %p)\n", Vcb, FileObject, buf, buflen, retlen);
@@ -1787,7 +1826,7 @@ static NTSTATUS lock_volume(device_extension* Vcb, PIRP Irp) {
     
     ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
     
-    if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->fcb->open_count > 0 || has_open_children(Vcb->root_fileref))) {
+    if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->open_count > 0 || has_open_children(Vcb->root_fileref))) {
         Status = STATUS_ACCESS_DENIED;
         ExReleaseResourceLite(&Vcb->fcb_lock);
         goto end;
@@ -1808,7 +1847,7 @@ static NTSTATUS lock_volume(device_extension* Vcb, PIRP Irp) {
     
     free_trees(Vcb);
     
-    clear_rollback(&rollback);
+    clear_rollback(Vcb, &rollback);
     
     ExReleaseResourceLite(&Vcb->tree_lock);
     
@@ -1949,7 +1988,7 @@ static NTSTATUS invalidate_volumes(PIRP Irp) {
                 
                 free_trees(Vcb);
                 
-                clear_rollback(&rollback);
+                clear_rollback(Vcb, &rollback);
                 
                 flush_fcb_caches(Vcb);
                 
@@ -2016,6 +2055,99 @@ static NTSTATUS is_volume_dirty(device_extension* Vcb, PIRP Irp) {
     return STATUS_SUCCESS;
 }
 
+static NTSTATUS get_compression(device_extension* Vcb, PIRP Irp) {
+    PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+    USHORT* compression;
+    
+    TRACE("FSCTL_GET_COMPRESSION\n");
+
+    if (Irp->AssociatedIrp.SystemBuffer) {
+        compression = Irp->AssociatedIrp.SystemBuffer;
+    } else if (Irp->MdlAddress != NULL) {
+        compression = MmGetSystemAddressForMdlSafe(Irp->MdlAddress, LowPagePriority);
+
+        if (!compression)
+            return STATUS_INSUFFICIENT_RESOURCES;
+    } else
+        return STATUS_INVALID_USER_BUFFER;
+
+    if (IrpSp->Parameters.FileSystemControl.OutputBufferLength < sizeof(USHORT))
+        return STATUS_INVALID_PARAMETER;
+
+    *compression = COMPRESSION_FORMAT_NONE;
+
+    Irp->IoStatus.Information = sizeof(USHORT);
+
+    return STATUS_SUCCESS;
+}
+
+static void update_volumes(device_extension* Vcb) {
+    LIST_ENTRY* le = volumes.Flink;
+        
+    while (le != &volumes) {
+        volume* v = CONTAINING_RECORD(le, volume, list_entry);
+        
+        if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
+            UINT64 i;
+            
+            for (i = 0; i < Vcb->superblock.num_devices; i++) {
+                if (RtlCompareMemory(&Vcb->devices[i].devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
+                    v->gen1 = v->gen2 = Vcb->superblock.generation - 1;
+                    break;
+                }
+            }
+        }
+        
+        le = le->Flink;
+    }
+}
+
+static NTSTATUS dismount_volume(device_extension* Vcb, PIRP Irp) {
+    NTSTATUS Status;
+    KIRQL irql;
+    LIST_ENTRY rollback;
+    
+    TRACE("FSCTL_DISMOUNT_VOLUME\n");
+    
+    if (!(Vcb->Vpb->Flags & VPB_MOUNTED))
+        return STATUS_SUCCESS;
+    
+    if (Vcb->disallow_dismount) {
+        WARN("attempting to dismount boot volume or one containing a pagefile\n");
+        return STATUS_ACCESS_DENIED;
+    }
+    
+    InitializeListHead(&rollback);
+    
+    Status = FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_DISMOUNT);
+    if (!NT_SUCCESS(Status)) {
+        WARN("FsRtlNotifyVolumeEvent returned %08x\n", Status);
+    }
+    
+    ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
+    
+    flush_fcb_caches(Vcb);
+    
+    if (Vcb->need_write && !Vcb->readonly)
+        do_write(Vcb, Irp, &rollback);
+    
+    free_trees(Vcb);
+    
+    clear_rollback(Vcb, &rollback);
+    
+    Vcb->removing = TRUE;
+    update_volumes(Vcb);
+    
+    ExReleaseResourceLite(&Vcb->tree_lock);
+    
+    IoAcquireVpbSpinLock(&irql);
+    Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+    Vcb->Vpb->Flags |= VPB_DIRECT_WRITES_ALLOWED;
+    IoReleaseVpbSpinLock(irql);
+    
+    return STATUS_SUCCESS;
+}
+
 NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL user) {
     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
     NTSTATUS Status;
@@ -2060,8 +2192,7 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL
             break;
 
         case FSCTL_DISMOUNT_VOLUME:
-            WARN("STUB: FSCTL_DISMOUNT_VOLUME\n");
-            Status = STATUS_NOT_IMPLEMENTED;
+            Status = dismount_volume(DeviceObject->DeviceExtension, Irp);
             break;
 
         case FSCTL_IS_VOLUME_MOUNTED:
@@ -2084,8 +2215,7 @@ NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL
             break;
 
         case FSCTL_GET_COMPRESSION:
-            WARN("STUB: FSCTL_GET_COMPRESSION\n");
-            Status = STATUS_NOT_IMPLEMENTED;
+            Status = get_compression(DeviceObject->DeviceExtension, Irp);
             break;
 
         case FSCTL_SET_COMPRESSION:
diff --git a/reactos/drivers/filesystems/btrfs/galois.c b/reactos/drivers/filesystems/btrfs/galois.c
new file mode 100644 (file)
index 0000000..b8e933d
--- /dev/null
@@ -0,0 +1,149 @@
+/* Copyright (c) Mark Harmstone 2016
+ * 
+ * This file is part of WinBtrfs.
+ * 
+ * WinBtrfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public Licence as published by
+ * the Free Software Foundation, either version 3 of the Licence, or
+ * (at your option) any later version.
+ * 
+ * WinBtrfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public Licence for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public Licence
+ * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include "btrfs_drv.h"
+
+static const UINT8 glog[] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
+                             0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
+                             0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
+                             0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
+                             0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+                             0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
+                             0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
+                             0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
+                             0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
+                             0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+                             0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
+                             0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
+                             0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
+                             0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
+                             0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+                             0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01};
+
+static const UINT8 gilog[] = {0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
+                              0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
+                              0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
+                              0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
+                              0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
+                              0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
+                              0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
+                              0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b, 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
+                              0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d, 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
+                              0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c, 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
+                              0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd, 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
+                              0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e, 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
+                              0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76, 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
+                              0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
+                              0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
+                              0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf};
+
+// divides the bytes in data by 2^div
+void galois_divpower(UINT8* data, UINT8 div, UINT32 len) {
+    while (len > 0) {
+        if (data[0] != 0) {
+            if (gilog[data[0]] <= div)
+                data[0] = glog[(gilog[data[0]] + (255 - div)) % 255];
+            else
+                data[0] = glog[(gilog[data[0]] - div) % 255];
+        }
+
+        data++;
+        len--;
+    }
+}
+
+UINT8 gpow2(UINT8 e) {
+    return glog[e%255];
+}
+
+UINT8 gmul(UINT8 a, UINT8 b) {
+    if (a == 0 || b == 0)
+        return 0;
+    else
+        return glog[(gilog[a] + gilog[b]) % 255];
+}
+
+UINT8 gdiv(UINT8 a, UINT8 b) {
+    if (b == 0) {
+        return 0xff; // shouldn't happen
+    } else if (a == 0) {
+        return 0;
+    } else {
+        if (gilog[a] >= gilog[b])
+            return glog[(gilog[a] - gilog[b]) % 255];
+        else
+            return glog[255-((gilog[b] - gilog[a]) % 255)];
+    }
+}
+
+// The code from the following functions is derived from the paper
+// "The mathematics of RAID-6", by H. Peter Anvin.
+// https://www.kernel.org/pub/linux/kernel/people/hpa/raid6.pdf
+
+#ifdef _AMD64_
+#ifdef __REACTOS__
+static __inline UINT64 galois_double_mask64(UINT64 v) {
+#else
+static UINT64 __inline galois_double_mask64(UINT64 v) {
+#endif
+    v &= 0x8080808080808080;
+    return (v << 1) - (v >> 7);
+}
+#else
+#ifdef __REACTOS__
+static __inline UINT32 galois_double_mask32(UINT32 v) {
+#else
+static UINT32 __inline galois_double_mask32(UINT32 v) {
+#endif
+    v &= 0x80808080;
+    return (v << 1) - (v >> 7);
+}
+#endif
+
+void galois_double(UINT8* data, UINT32 len) {
+    // FIXME - SIMD?
+    
+#ifdef _AMD64_
+    while (len > sizeof(UINT64)) {
+        UINT64 v = *((UINT64*)data), vv;
+        
+        vv = (v << 1) & 0xfefefefefefefefe;
+        vv ^= galois_double_mask64(v) & 0x1d1d1d1d1d1d1d1d;
+        *((UINT64*)data) = vv;
+        
+        data += sizeof(UINT64);
+        len -= sizeof(UINT64);
+    }
+#else
+    while (len > sizeof(UINT32)) {
+        UINT32 v = *((UINT32*)data), vv;
+        
+        vv = (v << 1) & 0xfefefefe;
+        vv ^= galois_double_mask32(v) & 0x1d1d1d1d;
+        *((UINT32*)data) = vv;
+        
+        data += sizeof(UINT32);
+        len -= sizeof(UINT32);
+    }
+#endif
+    
+    while (len > 0) {
+        data[0] = (data[0] << 1) ^ ((data[0] & 0x80) ? 0x1d : 0);
+        data++;
+        len--;
+    }
+}
index c84f499..a15091e 100644 (file)
@@ -151,7 +151,7 @@ device_extension* Vcb = DeviceObject->DeviceExtension;
     
     ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
 
-    if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->fcb->open_count > 0 || has_open_children(Vcb->root_fileref))) {
+    if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->open_count > 0 || has_open_children(Vcb->root_fileref))) {
         Status = STATUS_ACCESS_DENIED;
         goto end;
     }
@@ -176,7 +176,7 @@ static NTSTATUS pnp_query_remove_device(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     
     ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
 
-    if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->fcb->open_count > 0 || has_open_children(Vcb->root_fileref))) {
+    if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->open_count > 0 || has_open_children(Vcb->root_fileref))) {
         Status = STATUS_ACCESS_DENIED;
         goto end;
     }
@@ -196,7 +196,7 @@ static NTSTATUS pnp_query_remove_device(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     if (Vcb->need_write && !Vcb->readonly)
         do_write(Vcb, Irp, &rollback);
     
-    clear_rollback(&rollback);
+    clear_rollback(Vcb, &rollback);
 
     ExReleaseResourceLite(&Vcb->tree_lock);
 
@@ -222,8 +222,11 @@ static NTSTATUS pnp_remove_device(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
             WARN("FsRtlNotifyVolumeEvent returned %08x\n", Status);
         }
         
-        uninit(Vcb, FALSE);
-        Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+        if (Vcb->open_files > 0) {
+            Vcb->removing = TRUE;
+            Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+        } else
+            uninit(Vcb, FALSE);
     }
 
     return STATUS_SUCCESS;
@@ -241,8 +244,11 @@ static NTSTATUS pnp_surprise_removal(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     TRACE("(%p, %p)\n", DeviceObject, Irp);
     
     if (DeviceObject->Vpb->Flags & VPB_MOUNTED) {
-        uninit(Vcb, FALSE);
-        Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+        if (Vcb->open_files > 0) {
+            Vcb->removing = TRUE;
+            Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+        } else
+            uninit(Vcb, FALSE);
     }
 
     return STATUS_SUCCESS;
index 869f97a..694eea5 100644 (file)
@@ -33,6 +33,7 @@ typedef struct {
     struct read_data_context* context;
     UINT8* buf;
     UINT16 stripenum;
+    BOOL rewrite;
     PIRP Irp;
     IO_STATUS_BLOCK iosb;
     enum read_data_status status;
@@ -42,23 +43,29 @@ typedef struct {
     KEVENT Event;
     NTSTATUS Status;
     chunk* c;
+    UINT64 address;
     UINT32 buflen;
     UINT64 num_stripes;
     LONG stripes_left;
     UINT64 type;
     UINT32 sector_size;
-    UINT16 firstoff, startoffstripe, sectors_per_stripe;
+    UINT16 firstoff, startoffstripe, sectors_per_stripe, stripes_cancel;
     UINT32* csum;
     BOOL tree;
     read_data_stripe* stripes;
+    KSPIN_LOCK spin_lock;
 } read_data_context;
 
 static NTSTATUS STDCALL read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
     read_data_stripe* stripe = conptr;
     read_data_context* context = (read_data_context*)stripe->context;
     UINT64 i;
+    LONG stripes_left;
+    KIRQL irql;
 
-    // FIXME - we definitely need a per-stripe lock here
+    KeAcquireSpinLock(&context->spin_lock, &irql);
+    
+    stripes_left = InterlockedDecrement(&context->stripes_left);
     
     if (stripe->status == ReadDataStatus_Cancelling) {
         stripe->status = ReadDataStatus_Cancelled;
@@ -69,87 +76,51 @@ static NTSTATUS STDCALL read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP I
     
     if (NT_SUCCESS(Irp->IoStatus.Status)) {
         if (context->type == BLOCK_FLAG_DUPLICATE) {
-            if (context->tree) {
-                tree_header* th = (tree_header*)stripe->buf;
-                UINT32 crc32;
-                
-                crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
-                
-                if (crc32 != *((UINT32*)th->csum))
-                    stripe->status = ReadDataStatus_CRCError;
-            } else if (context->csum) {
-                for (i = 0; i < Irp->IoStatus.Information / context->sector_size; i++) {
-                    UINT32 crc32 = ~calc_crc32c(0xffffffff, stripe->buf + (i * context->sector_size), context->sector_size);
-                    
-                    if (crc32 != context->csum[i]) {
-                        stripe->status = ReadDataStatus_CRCError;
-                        goto end;
-                    }
-                }
-            }
-            
             stripe->status = ReadDataStatus_Success;
-                
-            for (i = 0; i < context->num_stripes; i++) {
-                if (context->stripes[i].status == ReadDataStatus_Pending) {
-                    context->stripes[i].status = ReadDataStatus_Cancelling;
-                    IoCancelIrp(context->stripes[i].Irp);
+
+            if (stripes_left > 0 && stripes_left == context->stripes_cancel) {
+                for (i = 0; i < context->num_stripes; i++) {
+                    if (context->stripes[i].status == ReadDataStatus_Pending) {
+                        context->stripes[i].status = ReadDataStatus_Cancelling;
+                        IoCancelIrp(context->stripes[i].Irp);
+                    }
                 }
             }
         } else if (context->type == BLOCK_FLAG_RAID0) {
-            // no point checking the checksum here, as there's nothing we can do
             stripe->status = ReadDataStatus_Success;
         } else if (context->type == BLOCK_FLAG_RAID10) {
-            if (context->csum) {
-                UINT16 start, left;
-                UINT32 j;
-                
-                if (context->startoffstripe == stripe->stripenum) {
-                    start = 0;
-                    left = context->sectors_per_stripe - context->firstoff;
-                } else {
-                    UINT16 ns;
-                    
-                    if (context->startoffstripe > stripe->stripenum) {
-                        ns = stripe->stripenum + (context->num_stripes / 2) - context->startoffstripe;
-                    } else {
-                        ns = stripe->stripenum - context->startoffstripe;
+            stripe->status = ReadDataStatus_Success;
+            
+            if (stripes_left > 0 && context->stripes_cancel != 0) {
+                for (i = 0; i < context->num_stripes; i++) {
+                    if (context->stripes[i].status == ReadDataStatus_Pending && context->stripes[i].stripenum == stripe->stripenum) {
+                        context->stripes[i].status = ReadDataStatus_Cancelling;
+                        IoCancelIrp(context->stripes[i].Irp);
+                        break;
                     }
-                    
-                    if (context->firstoff == 0)
-                        start = context->sectors_per_stripe * ns;
-                    else
-                        start = (context->sectors_per_stripe - context->firstoff) + (context->sectors_per_stripe * (ns - 1));
-                    
-                    left = context->sectors_per_stripe;
                 }
-                
-                j = start;
-                for (i = 0; i < Irp->IoStatus.Information / context->sector_size; i++) {
-                    UINT32 crc32 = ~calc_crc32c(0xffffffff, stripe->buf + (i * context->sector_size), context->sector_size);
-                    
-                    if (crc32 != context->csum[j]) {
-                        int3;
-                        stripe->status = ReadDataStatus_CRCError;
-                        goto end;
-                    }
-                    
-                    j++;
-                    left--;
-                    
-                    if (left == 0) {
-                        j += context->sectors_per_stripe;
-                        left = context->sectors_per_stripe;
+            }
+        } else if (context->type == BLOCK_FLAG_RAID5) {
+            stripe->status = ReadDataStatus_Success;
+            
+            if (stripes_left > 0 && stripes_left == context->stripes_cancel && (context->csum || context->tree)) {
+                for (i = 0; i < context->num_stripes; i++) {
+                    if (context->stripes[i].status == ReadDataStatus_Pending) {
+                        context->stripes[i].status = ReadDataStatus_Cancelling;
+                        IoCancelIrp(context->stripes[i].Irp);
+                        break;
                     }
                 }
             }
-            
+        } else if (context->type == BLOCK_FLAG_RAID6) {
             stripe->status = ReadDataStatus_Success;
-            
-            for (i = 0; i < context->num_stripes; i++) {
-                if (context->stripes[i].status == ReadDataStatus_Pending && context->stripes[i].stripenum == stripe->stripenum) {
-                    context->stripes[i].status = ReadDataStatus_Cancelling;
-                    IoCancelIrp(context->stripes[i].Irp);
+
+            if (stripes_left > 0 && stripes_left == context->stripes_cancel && (context->csum || context->tree)) {
+                for (i = 0; i < context->num_stripes; i++) {
+                    if (context->stripes[i].status == ReadDataStatus_Pending) {
+                        context->stripes[i].status = ReadDataStatus_Cancelling;
+                        IoCancelIrp(context->stripes[i].Irp);
+                    }
                 }
             }
         }
@@ -160,526 +131,2702 @@ static NTSTATUS STDCALL read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP I
     }
     
 end:
-    if (InterlockedDecrement(&context->stripes_left) == 0)
+    KeReleaseSpinLock(&context->spin_lock, irql);
+    
+    if (stripes_left == 0)
         KeSetEvent(&context->Event, 0, FALSE);
 
     return STATUS_MORE_PROCESSING_REQUIRED;
 }
 
-NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk** pc, PIRP Irp) {
-    CHUNK_ITEM* ci;
-    CHUNK_ITEM_STRIPE* cis;
-    read_data_context* context;
-    UINT64 i, type, offset;
-    NTSTATUS Status;
-    device** devices;
-    UINT64 *stripestart = NULL, *stripeend = NULL;
-    UINT16 startoffstripe;
+static void raid5_reconstruct(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT64 maxsize,
+                              BOOL first, UINT32 firststripesize, UINT16 missing) {
+    UINT16 parity, stripe;
+    UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+    UINT32 readlen;
     
-    Status = verify_vcb(Vcb, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("verify_vcb returned %08x\n", Status);
-        return Status;
-    }
+    TRACE("(%llx, %x, %p, %p, %llx, %llx, %u, %x, %x)\n", off, skip, context, ci, *stripeoff, maxsize, first, firststripesize, missing);
     
-    if (Vcb->log_to_phys_loaded) {
-        chunk* c = get_chunk_from_address(Vcb, addr);
+    parity = ((off / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
+    
+    readlen = min(min(ci->stripe_length - (skip % ci->stripe_length), stripelen), maxsize - *stripeoff);
+    
+    if (missing != parity) {
+        UINT16 firststripe = missing == 0 ? 1 : 0;
         
-        if (!c) {
-            ERR("get_chunk_from_address failed\n");
-            return STATUS_INTERNAL_ERROR;
+        RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], &context->stripes[firststripe].buf[*stripeoff], readlen);
+        
+        for (stripe = firststripe + 1; stripe < context->num_stripes; stripe++) {
+            if (stripe != missing)
+                do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+        }
+    } else
+        TRACE("parity == missing == %x, skipping\n", parity);
+    
+    *stripeoff += stripelen;
+}
+
+static void raid5_decode(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf,
+                         UINT32* pos, UINT32 length, UINT32 firststripesize) {
+    UINT16 parity, stripe;
+    BOOL first = *pos == 0;
+    UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+    
+    parity = ((off / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
+    
+    stripe = (parity + 1) % ci->num_stripes;
+    
+    while (TRUE) {
+        if (stripe == parity) {
+            *stripeoff += stripelen;
+            return;
         }
         
-        ci = c->chunk_item;
-        offset = c->offset;
-        devices = c->devices;
-           
-        if (pc)
-            *pc = c;
-    } else {
-        LIST_ENTRY* le = Vcb->sys_chunks.Flink;
+        if (skip >= ci->stripe_length) {
+            skip -= ci->stripe_length;
+        } else {
+            UINT32 copylen = min(ci->stripe_length - skip, length - *pos);
+            
+            RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+            
+            *pos += copylen;
+            
+            if (*pos == length)
+                return;
+            
+            skip = 0;
+        }
         
-        ci = NULL;
+        stripe = (stripe + 1) % ci->num_stripes;
+    }
+}
+
+static BOOL raid5_decode_with_checksum(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf,
+                                       UINT32* pos, UINT32 length, UINT32 firststripesize, UINT32* csum, UINT32 sector_size) {
+    UINT16 parity, stripe;
+    BOOL first = *pos == 0;
+    UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+    
+    parity = ((off / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
+    
+    stripe = (parity + 1) % ci->num_stripes;
+    
+    while (TRUE) {
+        if (stripe == parity) {
+            *stripeoff += stripelen;
+            return TRUE;
+        }
         
-        while (le != &Vcb->sys_chunks) {
-            sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
+        if (skip >= ci->stripe_length) {
+            skip -= ci->stripe_length;
+        } else {
+            UINT32 i;
+            UINT32 copylen = min(ci->stripe_length - skip, length - *pos);
             
-            if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
-                CHUNK_ITEM* chunk_item = sc->data;
+            RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+            
+            for (i = 0; i < copylen / sector_size; i ++) {
+                UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + *pos + (i * sector_size), sector_size);
                 
-                if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
-                    ci = chunk_item;
-                    offset = sc->key.offset;
-                    cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
+                if (crc32 != csum[i]) {
+                    UINT16 j, firststripe = stripe == 0 ? 1 : 0;
                     
-                    devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
-                    if (!devices) {
-                        ERR("out of memory\n");
-                        return STATUS_INSUFFICIENT_RESOURCES;
+                    RtlCopyMemory(buf + *pos + (i * sector_size),
+                                  &context->stripes[firststripe].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+                    
+                    for (j = firststripe + 1; j < ci->num_stripes; j++) {
+                        if (j != stripe) {
+                            do_xor(buf + *pos + (i * sector_size), &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+                        }
                     }
                     
-                    for (i = 0; i < ci->num_stripes; i++) {
-                        devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
+                    crc32 = ~calc_crc32c(0xffffffff, buf + *pos + (i * sector_size), sector_size);
+                    
+                    if (crc32 != csum[i]) {
+                        ERR("unrecoverable checksum error\n");
+                        return FALSE;
                     }
                     
-                    break;
+                    RtlCopyMemory(&context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], buf + *pos + (i * sector_size), sector_size);
+                    context->stripes[stripe].rewrite = TRUE;
                 }
             }
             
-            le = le->Flink;
-        }
-        
-        if (!ci) {
-            ERR("could not find chunk for %llx in bootstrap\n", addr);
-            return STATUS_INTERNAL_ERROR;
+            *pos += copylen;
+            
+            if (*pos == length)
+                return TRUE;
+            
+            skip = 0;
         }
         
-        if (pc)
-            *pc = NULL;
+        stripe = (stripe + 1) % ci->num_stripes;
     }
     
-    if (ci->type & BLOCK_FLAG_DUPLICATE) {
-        type = BLOCK_FLAG_DUPLICATE;
-    } else if (ci->type & BLOCK_FLAG_RAID0) {
-        type = BLOCK_FLAG_RAID0;
-    } else if (ci->type & BLOCK_FLAG_RAID1) {
-        type = BLOCK_FLAG_DUPLICATE;
-    } else if (ci->type & BLOCK_FLAG_RAID10) {
-        type = BLOCK_FLAG_RAID10;
-    } else if (ci->type & BLOCK_FLAG_RAID5) {
-        FIXME("RAID5 not yet supported\n");
-        return STATUS_NOT_IMPLEMENTED;
-    } else if (ci->type & BLOCK_FLAG_RAID6) {
-        FIXME("RAID6 not yet supported\n");
-        return STATUS_NOT_IMPLEMENTED;
-    } else { // SINGLE
-        type = BLOCK_FLAG_DUPLICATE;
-    }
-
-    cis = (CHUNK_ITEM_STRIPE*)&ci[1];
+    return FALSE;
+}
 
-    context = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_context), ALLOC_TAG);
-    if (!context) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
+static BOOL raid5_decode_with_checksum_metadata(UINT64 addr, UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf,
+                                                UINT32* pos, UINT32 length, UINT32 firststripesize, UINT32 node_size) {
+    UINT16 parity, stripe;
+    BOOL first = *pos == 0;
+    UINT32 stripelen = first ? firststripesize : ci->stripe_length;
     
-    RtlZeroMemory(context, sizeof(read_data_context));
-    KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
+    parity = ((off / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
     
-    context->stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
-    if (!context->stripes) {
-        ERR("out of memory\n");
-        ExFreePool(context);
-        return STATUS_INSUFFICIENT_RESOURCES;
+    stripe = (parity + 1) % ci->num_stripes;
+    
+    while (TRUE) {
+        if (stripe == parity) {
+            *stripeoff += stripelen;
+            return TRUE;
+        }
+        
+        if (skip >= ci->stripe_length) {
+            skip -= ci->stripe_length;
+        } else {
+            UINT32 copylen = min(ci->stripe_length - skip, length - *pos);
+            tree_header* th = (tree_header*)buf;
+            UINT32 crc32;
+            
+            RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+            
+            crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, node_size - sizeof(th->csum));
+            
+            if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+                UINT16 j, firststripe = stripe == 0 ? 1 : 0;
+                
+                RtlCopyMemory(buf + *pos, &context->stripes[firststripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+                
+                for (j = firststripe + 1; j < ci->num_stripes; j++) {
+                    if (j != stripe) {
+                        do_xor(buf + *pos, &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+                    }
+                }
+                
+                crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, node_size - sizeof(th->csum));
+                
+                if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+                    ERR("unrecoverable checksum error\n");
+                    return FALSE;
+                }
+            }
+            
+            RtlCopyMemory(&context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], buf + *pos, copylen);
+            context->stripes[stripe].rewrite = TRUE;
+            
+            *pos += copylen;
+            
+            if (*pos == length)
+                return TRUE;
+            
+            skip = 0;
+        }
+        
+        stripe = (stripe + 1) % ci->num_stripes;
     }
     
-    RtlZeroMemory(context->stripes, sizeof(read_data_stripe) * ci->num_stripes);
+    return FALSE;
+}
+
+static void raid6_reconstruct1(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT64 maxsize,
+                               BOOL first, UINT32 firststripesize, UINT16 missing) {
+    UINT16 parity1, parity2, stripe;
+    UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+    UINT32 readlen;
     
-    context->buflen = length;
-    context->num_stripes = ci->num_stripes;
-    context->stripes_left = context->num_stripes;
-    context->sector_size = Vcb->superblock.sector_size;
-    context->csum = csum;
-    context->tree = is_tree;
-    context->type = type;
+    TRACE("(%llx, %x, %p, %p, %llx, %llx, %u, %x, %x)\n", off, skip, context, ci, *stripeoff, maxsize, first, firststripesize, missing);
     
-    stripestart = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG);
-    if (!stripestart) {
-        ERR("out of memory\n");
-        ExFreePool(context);
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
+    parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
+    parity2 = (parity1 + 1) % ci->num_stripes;
     
-    stripeend = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG);
-    if (!stripeend) {
-        ERR("out of memory\n");
-        ExFreePool(stripestart);
-        ExFreePool(context);
-        return STATUS_INSUFFICIENT_RESOURCES;
+    readlen = min(min(ci->stripe_length - (skip % ci->stripe_length), stripelen), maxsize - *stripeoff);
+    
+    if (missing != parity1 && missing != parity2) {
+        RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], &context->stripes[parity1].buf[*stripeoff], readlen);
+        stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+        
+        do {
+            if (stripe != missing)
+                do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+            
+            stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+        } while (stripe != parity2);
+    } else
+        TRACE("skipping parity stripe\n");
+    
+    *stripeoff += stripelen;
+}
+
+static void raid6_reconstruct2(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT64 maxsize,
+                               BOOL first, UINT32 firststripesize, UINT16 missing1, UINT16 missing2) {
+    UINT16 parity1, parity2, stripe;
+    UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+    UINT32 readlen = min(min(ci->stripe_length - (skip % ci->stripe_length), stripelen), maxsize - *stripeoff);
+    
+    TRACE("(%llx, %x, %p, %p, %llx, %llx, %u, %x, %x, %x)\n", off, skip, context, ci, *stripeoff, maxsize,
+          first, firststripesize, missing1, missing2);
+    
+    parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
+    parity2 = (parity1 + 1) % ci->num_stripes;
+    
+    // skip if missing stripes are p and q
+    if ((parity1 == missing1 && parity2 == missing2) || (parity1 == missing2 && parity2 == missing1)) {
+        *stripeoff += stripelen;
+        return;
     }
     
-    if (type == BLOCK_FLAG_RAID0) {
-        UINT64 startoff, endoff;
-        UINT16 endoffstripe;
+    if (missing1 == parity2 || missing2 == parity2) { // reconstruct from p and data
+        UINT16 missing = missing1 == parity2 ? missing2 : missing1;
         
-        get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
-        get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
+        RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], &context->stripes[parity1].buf[*stripeoff], readlen);
+        stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
         
-        for (i = 0; i < ci->num_stripes; i++) {
-            if (startoffstripe > i) {
-                stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
-            } else if (startoffstripe == i) {
-                stripestart[i] = startoff;
-            } else {
-                stripestart[i] = startoff - (startoff % ci->stripe_length);
-            }
+        do {
+            if (stripe != missing)
+                do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
             
-            if (endoffstripe > i) {
-                stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
-            } else if (endoffstripe == i) {
-                stripeend[i] = endoff + 1;
-            } else {
-                stripeend[i] = endoff - (endoff % ci->stripe_length);
-            }
-        }
-    } else if (type == BLOCK_FLAG_RAID10) {
-        UINT64 startoff, endoff;
-        UINT16 endoffstripe, j;
+            stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+        } while (stripe != parity2);
+    } else if (missing1 == parity1 || missing2 == parity1) { // reconstruct from q and data
+        UINT16 missing = missing1 == parity1 ? missing2 : missing1;
+        UINT16 i, div;
         
-        get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
-        get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
+        stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
         
-        if ((ci->num_stripes % ci->sub_stripes) != 0) {
-            ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
-            Status = STATUS_INTERNAL_ERROR;
-            goto exit;
-        }
+        i = ci->num_stripes - 3;
         
-        context->firstoff = (startoff % ci->stripe_length) / Vcb->superblock.sector_size;
-        context->startoffstripe = startoffstripe;
-        context->sectors_per_stripe = ci->stripe_length / Vcb->superblock.sector_size;
+        if (stripe == missing) {
+            RtlZeroMemory(&context->stripes[missing].buf[*stripeoff], readlen);
+            div = i;
+        } else
+            RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
         
-        startoffstripe *= ci->sub_stripes;
-        endoffstripe *= ci->sub_stripes;
+        stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
         
-        for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
-            if (startoffstripe > i) {
-                stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
-            } else if (startoffstripe == i) {
-                stripestart[i] = startoff;
-            } else {
-                stripestart[i] = startoff - (startoff % ci->stripe_length);
-            }
+        i--;
+        do {
+            galois_double(&context->stripes[missing].buf[*stripeoff], readlen);
             
-            if (endoffstripe > i) {
-                stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
-            } else if (endoffstripe == i) {
-                stripeend[i] = endoff + 1;
-            } else {
-                stripeend[i] = endoff - (endoff % ci->stripe_length);
-            }
+            if (stripe != missing)
+                do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+            else
+                div = i;
             
-            for (j = 1; j < ci->sub_stripes; j++) {
-                stripestart[i+j] = stripestart[i];
-                stripeend[i+j] = stripeend[i];
-            }
+            stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+            i--;
+        } while (stripe != parity2);
+        
+        do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[parity2].buf[*stripeoff], readlen);
+        
+        if (div != 0)
+            galois_divpower(&context->stripes[missing].buf[*stripeoff], div, readlen);
+    } else { // reconstruct from p and q
+        UINT16 x, y, i;
+        UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
+        UINT32 j;
+        
+        stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+        
+        // put qxy in missing1
+        // put pxy in missing2
+        
+        i = ci->num_stripes - 3;
+        if (stripe == missing1 || stripe == missing2) {
+            RtlZeroMemory(&context->stripes[missing1].buf[*stripeoff], readlen);
+            RtlZeroMemory(&context->stripes[missing2].buf[*stripeoff], readlen);
+            
+            if (stripe == missing1)
+                x = i;
+            else
+                y = i;
+        } else {
+            RtlCopyMemory(&context->stripes[missing1].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+            RtlCopyMemory(&context->stripes[missing2].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
         }
-    } else if (type == BLOCK_FLAG_DUPLICATE) {
-        for (i = 0; i < ci->num_stripes; i++) {
-            stripestart[i] = addr - offset;
-            stripeend[i] = stripestart[i] + length;
+        
+        stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+        
+        i--;
+        do {
+            galois_double(&context->stripes[missing1].buf[*stripeoff], readlen);
+            
+            if (stripe != missing1 && stripe != missing2) {
+                do_xor(&context->stripes[missing1].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+                do_xor(&context->stripes[missing2].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+            } else if (stripe == missing1)
+                x = i;
+            else if (stripe == missing2)
+                y = i;
+            
+            stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+            i--;
+        } while (stripe != parity2);
+        
+        gyx = gpow2(y > x ? (y-x) : (255-x+y));
+        gx = gpow2(255-x);
+
+        denom = gdiv(1, gyx ^ 1);
+        a = gmul(gyx, denom);
+        b = gmul(gx, denom);
+        
+        p = &context->stripes[parity1].buf[*stripeoff];
+        q = &context->stripes[parity2].buf[*stripeoff];
+        pxy = &context->stripes[missing2].buf[*stripeoff];
+        qxy = &context->stripes[missing1].buf[*stripeoff]; 
+        
+        for (j = 0; j < readlen; j++) {
+            *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
+            
+            p++;
+            q++;
+            pxy++;
+            qxy++;
         }
+        
+        do_xor(&context->stripes[missing2].buf[*stripeoff], &context->stripes[missing1].buf[*stripeoff], readlen);
+        do_xor(&context->stripes[missing2].buf[*stripeoff], &context->stripes[parity1].buf[*stripeoff], readlen);
     }
     
-    // FIXME - for RAID, check beforehand whether there's enough devices to satisfy request
+    *stripeoff += stripelen;
+}
+
+static void raid6_decode(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf,
+                         UINT32* pos, UINT32 length, UINT32 firststripesize) {
+    UINT16 parity1, stripe;
+    BOOL first = *pos == 0;
+    UINT32 stripelen = first ? firststripesize : ci->stripe_length;
     
-    for (i = 0; i < ci->num_stripes; i++) {
-        PIO_STACK_LOCATION IrpSp;
+    parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
+    
+    stripe = (parity1 + 2) % ci->num_stripes;
+    
+    while (TRUE) {
+        if (stripe == parity1) {
+            *stripeoff += stripelen;
+            return;
+        }
         
-        if (!devices[i] || stripestart[i] == stripeend[i]) {
-            context->stripes[i].status = ReadDataStatus_MissingDevice;
-            context->stripes[i].buf = NULL;
-            context->stripes_left--;
+        if (skip >= ci->stripe_length) {
+            skip -= ci->stripe_length;
         } else {
-            context->stripes[i].context = (struct read_data_context*)context;
-            context->stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, stripeend[i] - stripestart[i], ALLOC_TAG);
+            UINT32 copylen = min(ci->stripe_length - skip, length - *pos);
             
-            if (!context->stripes[i].buf) {
-                ERR("out of memory\n");
-                Status = STATUS_INSUFFICIENT_RESOURCES;
-                goto exit;
+            RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+            
+            *pos += copylen;
+            
+            if (*pos == length)
+                return;
+            
+            skip = 0;
+        }
+        
+        stripe = (stripe + 1) % ci->num_stripes;
+    }
+}
+
+static BOOL raid6_decode_with_checksum(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf,
+                                       UINT32* pos, UINT32 length, UINT32 firststripesize, UINT32* csum, UINT32 sector_size) {
+    UINT16 parity1, parity2, stripe;
+    BOOL first = *pos == 0;
+    UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+    
+    parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
+    parity2 = (parity1 + 1) % ci->num_stripes;
+    stripe = (parity1 + 2) % ci->num_stripes;
+    
+    while (TRUE) {
+        if (stripe == parity1) {
+            *stripeoff += stripelen;
+            return TRUE;
+        }
+        
+        if (skip >= ci->stripe_length) {
+            skip -= ci->stripe_length;
+        } else {
+            UINT32 i;
+            UINT32 copylen = min(ci->stripe_length - skip, length - *pos);
+            
+            RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+            
+            for (i = 0; i < copylen / sector_size; i ++) {
+                UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + *pos + (i * sector_size), sector_size);
+                
+                if (crc32 != csum[i]) {
+                    UINT16 j, firststripe;
+                    
+                    if (parity2 == 0 && stripe == 1)
+                        firststripe = 2;
+                    else if (parity2 == 0 || stripe == 0)
+                        firststripe = 1;
+                    else
+                        firststripe = 0;
+                    
+                    RtlCopyMemory(buf + *pos + (i * sector_size),
+                                  &context->stripes[firststripe].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+                    
+                    for (j = firststripe + 1; j < ci->num_stripes; j++) {
+                        if (j != stripe && j != parity2) {
+                            do_xor(buf + *pos + (i * sector_size), &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+                        }
+                    }
+                    
+                    crc32 = ~calc_crc32c(0xffffffff, buf + *pos + (i * sector_size), sector_size);
+                    
+                    if (crc32 != csum[i]) {
+                        UINT8 *parity, *buf2;
+                        UINT16 rs, div;
+                        
+                        // assume p is wrong
+                        
+                        parity = ExAllocatePoolWithTag(NonPagedPool, sector_size, ALLOC_TAG);
+                        if (!parity) {
+                            ERR("out of memory\n");
+                            return FALSE;
+                        }
+                        
+                        rs = (parity1 + ci->num_stripes - 1) % ci->num_stripes;
+                        j = ci->num_stripes - 3;
+                        
+                        if (rs == stripe) {
+                            RtlZeroMemory(parity, sector_size);
+                            div = j;
+                        } else
+                            RtlCopyMemory(parity, &context->stripes[rs].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+                        
+                        rs = (rs + ci->num_stripes - 1) % ci->num_stripes;
+                        j--;
+                        while (rs != parity2) {
+                            galois_double(parity, sector_size);
+                            
+                            if (rs != stripe)
+                                do_xor(parity, &context->stripes[rs].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+                            else
+                                div = j;
+            
+                            rs = (rs + ci->num_stripes - 1) % ci->num_stripes;
+                            j--;
+                        }
+                        
+                        do_xor(parity, &context->stripes[parity2].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+                        
+                        if (div != 0)
+                            galois_divpower(parity, div, sector_size);
+                        
+                        crc32 = ~calc_crc32c(0xffffffff, parity, sector_size);
+                        if (crc32 == csum[i]) {
+                            RtlCopyMemory(buf + *pos + (i * sector_size), parity, sector_size);
+                            
+                            // recalculate p
+                            RtlCopyMemory(&context->stripes[parity1].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], parity, sector_size);
+                            
+                            for (j = 0; j < ci->num_stripes; j++) {
+                                if (j != stripe && j != parity1 && j != parity2) {
+                                    do_xor(&context->stripes[parity1].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)],
+                                           &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+                                }
+                            }
+                            
+                            context->stripes[parity1].rewrite = TRUE;
+                            
+                            ExFreePool(parity);
+                            goto success;
+                        }
+                        
+                        // assume another of the data stripes is wrong
+                        
+                        buf2 = ExAllocatePoolWithTag(NonPagedPool, sector_size, ALLOC_TAG);
+                        if (!buf2) {
+                            ERR("out of memory\n");
+                            ExFreePool(parity);
+                            return FALSE;
+                        }
+                        
+                        j = (parity2 + 1) % ci->num_stripes;
+                        
+                        while (j != parity1) {
+                            if (j != stripe) {
+                                UINT16 curstripe, k;
+                                UINT32 bufoff = *stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size);
+                                UINT16 x, y;
+                                UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
+                            
+                                curstripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+                                
+                                // put qxy in parity
+                                // put pxy in buf2
+                                
+                                k = ci->num_stripes - 3;
+                                if (curstripe == stripe || curstripe == j) {
+                                    RtlZeroMemory(parity, sector_size);
+                                    RtlZeroMemory(buf2, sector_size);
+                                    
+                                    if (curstripe == stripe)
+                                        x = k;
+                                    else
+                                        y = k;
+                                } else {
+                                    RtlCopyMemory(parity, &context->stripes[curstripe].buf[bufoff], sector_size);
+                                    RtlCopyMemory(buf2, &context->stripes[curstripe].buf[bufoff], sector_size);
+                                }
+                                
+                                curstripe = curstripe == 0 ? (ci->num_stripes - 1) : (curstripe - 1);
+                                
+                                k--;
+                                do {
+                                    galois_double(parity, sector_size);
+                                    
+                                    if (curstripe != stripe && curstripe != j) {
+                                        do_xor(parity, &context->stripes[curstripe].buf[bufoff], sector_size);
+                                        do_xor(buf2, &context->stripes[curstripe].buf[bufoff], sector_size);
+                                    } else if (curstripe == stripe)
+                                        x = k;
+                                    else if (curstripe == j)
+                                        y = k;
+                                    
+                                    curstripe = curstripe == 0 ? (ci->num_stripes - 1) : (curstripe - 1);
+                                    k--;
+                                } while (curstripe != parity2);
+                                
+                                gyx = gpow2(y > x ? (y-x) : (255-x+y));
+                                gx = gpow2(255-x);
+
+                                denom = gdiv(1, gyx ^ 1);
+                                a = gmul(gyx, denom);
+                                b = gmul(gx, denom);
+                                
+                                p = &context->stripes[parity1].buf[bufoff];
+                                q = &context->stripes[parity2].buf[bufoff];
+                                pxy = buf2;
+                                qxy = parity; 
+                                
+                                for (k = 0; k < sector_size; k++) {
+                                    *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
+                                    
+                                    p++;
+                                    q++;
+                                    pxy++;
+                                    qxy++;
+                                }
+                                
+                                crc32 = ~calc_crc32c(0xffffffff, parity, sector_size);
+                                
+                                if (crc32 == csum[i]) {
+                                    do_xor(buf2, parity, sector_size);
+                                    do_xor(buf2, &context->stripes[parity1].buf[bufoff], sector_size);
+                                    
+                                    RtlCopyMemory(&context->stripes[j].buf[bufoff], buf2, sector_size);
+                                    context->stripes[j].rewrite = TRUE;
+                                    
+                                    RtlCopyMemory(buf + *pos + (i * sector_size), parity, sector_size);
+                                    ExFreePool(parity);
+                                    ExFreePool(buf2);
+                                    goto success;
+                                }
+                            }
+                            
+                            j = (j + 1) % ci->num_stripes;
+                        }
+                            
+                        ExFreePool(parity);
+                        ExFreePool(buf2);
+                        
+                        ERR("unrecoverable checksum error\n");
+                        return FALSE;
+                    }
+                    
+success:
+                    RtlCopyMemory(&context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], buf + *pos + (i * sector_size), sector_size);
+                    context->stripes[stripe].rewrite = TRUE;
+                }
+            }
+            
+            *pos += copylen;
+            
+            if (*pos == length)
+                return TRUE;
+            
+            skip = 0;
+        }
+        
+        stripe = (stripe + 1) % ci->num_stripes;
+    }
+}
+
+static BOOL raid6_decode_with_checksum_metadata(UINT64 addr, UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf,
+                                                UINT32* pos, UINT32 length, UINT32 firststripesize, UINT32 node_size) {
+    UINT16 parity1, parity2, stripe;
+    BOOL first = *pos == 0;
+    UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+    
+    parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
+    parity2 = (parity1 + 1) % ci->num_stripes;
+    stripe = (parity1 + 2) % ci->num_stripes;
+    
+    while (TRUE) {
+        if (stripe == parity1) {
+            *stripeoff += stripelen;
+            return TRUE;
+        }
+        
+        if (skip >= ci->stripe_length) {
+            skip -= ci->stripe_length;
+        } else {
+            UINT32 copylen = min(ci->stripe_length - skip, length - *pos);
+            tree_header* th = (tree_header*)buf;
+            UINT32 crc32;
+            
+            RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+            
+            crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, node_size - sizeof(th->csum));
+            
+            if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+                UINT16 j, firststripe;
+                
+                if (parity2 == 0 && stripe == 1)
+                    firststripe = 2;
+                else if (parity2 == 0 || stripe == 0)
+                    firststripe = 1;
+                else
+                    firststripe = 0;
+                
+                RtlCopyMemory(buf + *pos, &context->stripes[firststripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size);
+                
+                for (j = firststripe + 1; j < ci->num_stripes; j++) {
+                    if (j != stripe && j != parity2) {
+                        do_xor(buf + *pos, &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size);
+                    }
+                }
+                
+                crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, node_size - sizeof(th->csum));
+                
+                if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+                    UINT8 *parity, *buf2;
+                    UINT16 rs, div;
+                    tree_header* th2;
+                    
+                    // assume p is wrong
+                    
+                    parity = ExAllocatePoolWithTag(NonPagedPool, node_size, ALLOC_TAG);
+                    if (!parity) {
+                        ERR("out of memory\n");
+                        return FALSE;
+                    }
+                    
+                    rs = (parity1 + ci->num_stripes - 1) % ci->num_stripes;
+                    j = ci->num_stripes - 3;
+                    
+                    if (rs == stripe) {
+                        RtlZeroMemory(parity, node_size);
+                        div = j;
+                    } else
+                        RtlCopyMemory(parity, &context->stripes[rs].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size);
+                    
+                    rs = (rs + ci->num_stripes - 1) % ci->num_stripes;
+                    j--;
+                    while (rs != parity2) {
+                        galois_double(parity, node_size);
+                        
+                        if (rs != stripe)
+                            do_xor(parity, &context->stripes[rs].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size);
+                        else
+                            div = j;
+        
+                        rs = (rs + ci->num_stripes - 1) % ci->num_stripes;
+                        j--;
+                    }
+                    
+                    do_xor(parity, &context->stripes[parity2].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size);
+                    
+                    if (div != 0)
+                        galois_divpower(parity, div, node_size);
+                    
+                    th2 = (tree_header*)parity;
+                    
+                    crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th2->fs_uuid, node_size - sizeof(th2->csum));
+                
+                    if (addr != th2->address || crc32 == *((UINT32*)th2->csum)) {
+                        RtlCopyMemory(buf + *pos, parity, node_size);
+                        
+                        // recalculate p
+                        RtlCopyMemory(&context->stripes[parity1].buf[*stripeoff + skip - ci->stripe_length + stripelen], parity, node_size);
+                        
+                        for (j = 0; j < ci->num_stripes; j++) {
+                            if (j != stripe && j != parity1 && j != parity2) {
+                                do_xor(&context->stripes[parity1].buf[*stripeoff + skip - ci->stripe_length + stripelen],
+                                        &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size);
+                            }
+                        }
+                        
+                        context->stripes[parity1].rewrite = TRUE;
+                        
+                        ExFreePool(parity);
+                        goto success;
+                    }
+                    
+                    // assume another of the data stripes is wrong
+                    
+                    buf2 = ExAllocatePoolWithTag(NonPagedPool, node_size, ALLOC_TAG);
+                    if (!buf2) {
+                        ERR("out of memory\n");
+                        ExFreePool(parity);
+                        return FALSE;
+                    }
+                    
+                    j = (parity2 + 1) % ci->num_stripes;
+                    
+                    while (j != parity1) {
+                        if (j != stripe) {
+                            UINT16 curstripe, k;
+                            UINT32 bufoff = *stripeoff + skip - ci->stripe_length + stripelen;
+                            UINT16 x, y;
+                            UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
+                        
+                            curstripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+                            
+                            // put qxy in parity
+                            // put pxy in buf2
+                            
+                            k = ci->num_stripes - 3;
+                            if (curstripe == stripe || curstripe == j) {
+                                RtlZeroMemory(parity, node_size);
+                                RtlZeroMemory(buf2, node_size);
+                                
+                                if (curstripe == stripe)
+                                    x = k;
+                                else
+                                    y = k;
+                            } else {
+                                RtlCopyMemory(parity, &context->stripes[curstripe].buf[bufoff], node_size);
+                                RtlCopyMemory(buf2, &context->stripes[curstripe].buf[bufoff], node_size);
+                            }
+                            
+                            curstripe = curstripe == 0 ? (ci->num_stripes - 1) : (curstripe - 1);
+                            
+                            k--;
+                            do {
+                                galois_double(parity, node_size);
+                                
+                                if (curstripe != stripe && curstripe != j) {
+                                    do_xor(parity, &context->stripes[curstripe].buf[bufoff], node_size);
+                                    do_xor(buf2, &context->stripes[curstripe].buf[bufoff], node_size);
+                                } else if (curstripe == stripe)
+                                    x = k;
+                                else if (curstripe == j)
+                                    y = k;
+                                
+                                curstripe = curstripe == 0 ? (ci->num_stripes - 1) : (curstripe - 1);
+                                k--;
+                            } while (curstripe != parity2);
+                            
+                            gyx = gpow2(y > x ? (y-x) : (255-x+y));
+                            gx = gpow2(255-x);
+
+                            denom = gdiv(1, gyx ^ 1);
+                            a = gmul(gyx, denom);
+                            b = gmul(gx, denom);
+                            
+                            p = &context->stripes[parity1].buf[bufoff];
+                            q = &context->stripes[parity2].buf[bufoff];
+                            pxy = buf2;
+                            qxy = parity; 
+                            
+                            for (k = 0; k < node_size; k++) {
+                                *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
+                                
+                                p++;
+                                q++;
+                                pxy++;
+                                qxy++;
+                            }
+                            
+                            th2 = (tree_header*)parity;
+                    
+                            crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th2->fs_uuid, node_size - sizeof(th2->csum));
+                        
+                            if (addr != th2->address || crc32 == *((UINT32*)th2->csum)) {
+                                do_xor(buf2, parity, node_size);
+                                do_xor(buf2, &context->stripes[parity1].buf[bufoff], node_size);
+                                
+                                RtlCopyMemory(&context->stripes[j].buf[bufoff], buf2, node_size);
+                                context->stripes[j].rewrite = TRUE;
+                                
+                                RtlCopyMemory(buf + *pos, parity, node_size);
+                                ExFreePool(parity);
+                                ExFreePool(buf2);
+                                goto success;
+                            }
+                        }
+                        
+                        j = (j + 1) % ci->num_stripes;
+                    }
+                        
+                    ExFreePool(parity);
+                    ExFreePool(buf2);
+                    
+                    ERR("unrecoverable checksum error\n");
+                    return FALSE;
+                }
+                
+success:
+                RtlCopyMemory(&context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], buf + *pos, node_size);
+                context->stripes[stripe].rewrite = TRUE;
+            }
+            
+            *pos += copylen;
+            
+            if (*pos == length)
+                return TRUE;
+            
+            skip = 0;
+        }
+        
+        stripe = (stripe + 1) % ci->num_stripes;
+    }
+}
+
+static NTSTATUS check_raid6_nocsum_parity(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT64 maxsize,
+                                          BOOL first, UINT32 firststripesize, UINT8* scratch) {
+    UINT16 parity1, parity2, stripe;
+    UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+    UINT32 readlen, i;
+    BOOL bad = FALSE;
+    
+    TRACE("(%llx, %x, %p, %p, %llx, %llx, %u, %x, %p)\n", off, skip, context, ci, *stripeoff, maxsize, first, firststripesize, scratch);
+    
+    parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
+    parity2 = (parity1 + 1) % ci->num_stripes;
+    
+    readlen = min(min(ci->stripe_length - (skip % ci->stripe_length), stripelen), maxsize - *stripeoff);
+    
+    RtlCopyMemory(scratch, &context->stripes[parity1].buf[*stripeoff], readlen);
+    stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+    
+    do {
+        do_xor(scratch, &context->stripes[stripe].buf[*stripeoff], readlen);
+        
+        stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+    } while (stripe != parity2);
+    
+    for (i = 0; i < readlen; i++) {
+        if (scratch[i] != 0) {
+            bad = TRUE;
+            break;
+        }
+    }
+    
+    if (bad) {
+        UINT16 missing;
+        UINT8* buf2;
+        
+        // assume parity is bad
+        stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+        RtlCopyMemory(scratch, &context->stripes[stripe].buf[*stripeoff], readlen);
+        stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+        
+        do {
+            galois_double(scratch, readlen);
+            
+            do_xor(scratch, &context->stripes[stripe].buf[*stripeoff], readlen);
+            
+            stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+        } while (stripe != parity2);
+        
+        if (RtlCompareMemory(scratch, &context->stripes[parity2].buf[*stripeoff], readlen) == readlen) {
+            WARN("recovering from invalid parity stripe\n");
+            
+            // recalc p
+            stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+            RtlCopyMemory(&context->stripes[parity1].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+            stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+        
+            do {
+                do_xor(&context->stripes[parity1].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+                
+                stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+            } while (stripe != parity2);
+            
+            context->stripes[parity1].rewrite = TRUE;
+            goto end;
+        }
+        
+        // assume one of the data stripes is bad
+        
+        buf2 = ExAllocatePoolWithTag(NonPagedPool, readlen, ALLOC_TAG);
+        if (!buf2) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        missing = (parity2 + 1) % ci->num_stripes;
+        while (missing != parity1) {
+            RtlCopyMemory(scratch, &context->stripes[parity1].buf[*stripeoff], readlen);
+            for (i = 0; i < ci->num_stripes; i++) {
+                if (i != parity1 && i != parity2 && i != missing) {
+                    do_xor(scratch, &context->stripes[i].buf[*stripeoff], readlen);
+                }
+            }
+            
+            stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+            RtlCopyMemory(buf2, stripe == missing ? scratch : &context->stripes[stripe].buf[*stripeoff], readlen);
+            stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+            
+            do {
+                galois_double(buf2, readlen);
+                
+                do_xor(buf2, stripe == missing ? scratch : &context->stripes[stripe].buf[*stripeoff], readlen);
+                
+                stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+            } while (stripe != parity2);
+            
+            if (RtlCompareMemory(buf2, &context->stripes[parity2].buf[*stripeoff], readlen) == readlen) {
+                WARN("recovering from invalid data stripe\n");
+                
+                RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], scratch, readlen);
+                ExFreePool(buf2);
+                
+                context->stripes[missing].rewrite = TRUE;
+                goto end;
+            }
+            
+            missing = (missing + 1) % ci->num_stripes;
+        }
+        
+        ExFreePool(buf2);
+        
+        ERR("unrecoverable checksum error\n");
+        return STATUS_CRC_ERROR;
+    }
+    
+end:
+    *stripeoff += stripelen;
+    
+    return STATUS_SUCCESS;
+}
+
+NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk* c, chunk** pc, PIRP Irp) {
+    CHUNK_ITEM* ci;
+    CHUNK_ITEM_STRIPE* cis;
+    read_data_context* context;
+    UINT64 i, type, offset;
+    NTSTATUS Status;
+    device** devices;
+    UINT64 *stripestart = NULL, *stripeend = NULL;
+    UINT32 firststripesize;
+    UINT16 startoffstripe, allowed_missing, missing_devices = 0;
+#ifdef DEBUG_STATS
+    LARGE_INTEGER time1, time2;
+#endif
+    
+    Status = verify_vcb(Vcb, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("verify_vcb returned %08x\n", Status);
+        return Status;
+    }
+    
+    if (Vcb->log_to_phys_loaded) {
+        if (!c) {
+            c = get_chunk_from_address(Vcb, addr);
+            
+            if (!c) {
+                ERR("get_chunk_from_address failed\n");
+                return STATUS_INTERNAL_ERROR;
+            }
+        }
+        
+        ci = c->chunk_item;
+        offset = c->offset;
+        devices = c->devices;
+           
+        if (pc)
+            *pc = c;
+    } else {
+        LIST_ENTRY* le = Vcb->sys_chunks.Flink;
+        
+        ci = NULL;
+        
+        while (le != &Vcb->sys_chunks) {
+            sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
+            
+            if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
+                CHUNK_ITEM* chunk_item = sc->data;
+                
+                if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
+                    ci = chunk_item;
+                    offset = sc->key.offset;
+                    cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
+                    
+                    devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
+                    if (!devices) {
+                        ERR("out of memory\n");
+                        return STATUS_INSUFFICIENT_RESOURCES;
+                    }
+                    
+                    for (i = 0; i < ci->num_stripes; i++) {
+                        devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
+                    }
+                    
+                    break;
+                }
+            }
+            
+            le = le->Flink;
+        }
+        
+        if (!ci) {
+            ERR("could not find chunk for %llx in bootstrap\n", addr);
+            return STATUS_INTERNAL_ERROR;
+        }
+        
+        if (pc)
+            *pc = NULL;
+    }
+    
+    if (ci->type & BLOCK_FLAG_DUPLICATE) {
+        type = BLOCK_FLAG_DUPLICATE;
+        allowed_missing = 0;
+    } else if (ci->type & BLOCK_FLAG_RAID0) {
+        type = BLOCK_FLAG_RAID0;
+        allowed_missing = 0;
+    } else if (ci->type & BLOCK_FLAG_RAID1) {
+        type = BLOCK_FLAG_DUPLICATE;
+        allowed_missing = 1;
+    } else if (ci->type & BLOCK_FLAG_RAID10) {
+        type = BLOCK_FLAG_RAID10;
+        allowed_missing = 1;
+    } else if (ci->type & BLOCK_FLAG_RAID5) {
+        type = BLOCK_FLAG_RAID5;
+        allowed_missing = 1;
+    } else if (ci->type & BLOCK_FLAG_RAID6) {
+        type = BLOCK_FLAG_RAID6;
+        allowed_missing = 2;
+    } else { // SINGLE
+        type = BLOCK_FLAG_DUPLICATE;
+        allowed_missing = 0;
+    }
+
+    cis = (CHUNK_ITEM_STRIPE*)&ci[1];
+
+    context = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_context), ALLOC_TAG);
+    if (!context) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    RtlZeroMemory(context, sizeof(read_data_context));
+    KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
+    
+    context->stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
+    if (!context->stripes) {
+        ERR("out of memory\n");
+        ExFreePool(context);
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    RtlZeroMemory(context->stripes, sizeof(read_data_stripe) * ci->num_stripes);
+    
+    context->buflen = length;
+    context->num_stripes = ci->num_stripes;
+    context->stripes_left = context->num_stripes;
+    context->sector_size = Vcb->superblock.sector_size;
+    context->csum = csum;
+    context->tree = is_tree;
+    context->type = type;
+    
+    stripestart = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG);
+    if (!stripestart) {
+        ERR("out of memory\n");
+        ExFreePool(context);
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    stripeend = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG);
+    if (!stripeend) {
+        ERR("out of memory\n");
+        ExFreePool(stripestart);
+        ExFreePool(context);
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    
+    if (type == BLOCK_FLAG_RAID0) {
+        UINT64 startoff, endoff;
+        UINT16 endoffstripe;
+        
+        get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
+        get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
+        
+        for (i = 0; i < ci->num_stripes; i++) {
+            if (startoffstripe > i) {
+                stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
+            } else if (startoffstripe == i) {
+                stripestart[i] = startoff;
+            } else {
+                stripestart[i] = startoff - (startoff % ci->stripe_length);
+            }
+            
+            if (endoffstripe > i) {
+                stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
+            } else if (endoffstripe == i) {
+                stripeend[i] = endoff + 1;
+            } else {
+                stripeend[i] = endoff - (endoff % ci->stripe_length);
+            }
+        }
+    } else if (type == BLOCK_FLAG_RAID10) {
+        UINT64 startoff, endoff;
+        UINT16 endoffstripe, j;
+        
+        get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
+        get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
+        
+        if ((ci->num_stripes % ci->sub_stripes) != 0) {
+            ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
+            Status = STATUS_INTERNAL_ERROR;
+            goto exit;
+        }
+        
+        context->firstoff = (startoff % ci->stripe_length) / Vcb->superblock.sector_size;
+        context->startoffstripe = startoffstripe;
+        context->sectors_per_stripe = ci->stripe_length / Vcb->superblock.sector_size;
+        
+        startoffstripe *= ci->sub_stripes;
+        endoffstripe *= ci->sub_stripes;
+        
+        for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
+            if (startoffstripe > i) {
+                stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
+            } else if (startoffstripe == i) {
+                stripestart[i] = startoff;
+            } else {
+                stripestart[i] = startoff - (startoff % ci->stripe_length);
+            }
+            
+            if (endoffstripe > i) {
+                stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
+            } else if (endoffstripe == i) {
+                stripeend[i] = endoff + 1;
+            } else {
+                stripeend[i] = endoff - (endoff % ci->stripe_length);
+            }
+            
+            for (j = 1; j < ci->sub_stripes; j++) {
+                stripestart[i+j] = stripestart[i];
+                stripeend[i+j] = stripeend[i];
+            }
+        }
+        
+        context->stripes_cancel = 1;
+    } else if (type == BLOCK_FLAG_DUPLICATE) {
+        for (i = 0; i < ci->num_stripes; i++) {
+            stripestart[i] = addr - offset;
+            stripeend[i] = stripestart[i] + length;
+        }
+        
+        context->stripes_cancel = ci->num_stripes - 1;
+    } else if (type == BLOCK_FLAG_RAID5) {
+        UINT64 startoff, endoff;
+        UINT16 endoffstripe;
+        UINT64 start = 0xffffffffffffffff, end = 0;
+        
+        get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
+        get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
+        
+        for (i = 0; i < ci->num_stripes - 1; i++) {
+            UINT64 ststart, stend;
+            
+            if (startoffstripe > i) {
+                ststart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
+            } else if (startoffstripe == i) {
+                ststart = startoff;
+            } else {
+                ststart = startoff - (startoff % ci->stripe_length);
+            }
+              
+            if (endoffstripe > i) {
+                stend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
+            } else if (endoffstripe == i) {
+                stend = endoff + 1;
+            } else {
+                stend = endoff - (endoff % ci->stripe_length);
+            }
+            
+            if (ststart != stend) {
+                if (ststart < start) {
+                    start = ststart;
+                    firststripesize = ci->stripe_length - (ststart % ci->stripe_length);
+                }
+                
+                if (stend > end)
+                    end = stend;
+            }
+        }
+        
+        for (i = 0; i < ci->num_stripes; i++) {
+            stripestart[i] = start;
+            stripeend[i] = end;
+        }
+        
+        context->stripes_cancel = Vcb->options.raid5_recalculation;
+    } else if (type == BLOCK_FLAG_RAID6) {
+        UINT64 startoff, endoff;
+        UINT16 endoffstripe;
+        UINT64 start = 0xffffffffffffffff, end = 0;
+        
+        get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
+        get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
+        
+        for (i = 0; i < ci->num_stripes - 2; i++) {
+            UINT64 ststart, stend;
+            
+            if (startoffstripe > i) {
+                ststart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
+            } else if (startoffstripe == i) {
+                ststart = startoff;
+            } else {
+                ststart = startoff - (startoff % ci->stripe_length);
+            }
+              
+            if (endoffstripe > i) {
+                stend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
+            } else if (endoffstripe == i) {
+                stend = endoff + 1;
+            } else {
+                stend = endoff - (endoff % ci->stripe_length);
+            }
+            
+            if (ststart != stend) {
+                if (ststart < start) {
+                    start = ststart;
+                    firststripesize = ci->stripe_length - (ststart % ci->stripe_length);
+                }
+                
+                if (stend > end)
+                    end = stend;
+            }
+        }
+        
+        for (i = 0; i < ci->num_stripes; i++) {
+            stripestart[i] = start;
+            stripeend[i] = end;
+        }
+        
+        context->stripes_cancel = Vcb->options.raid6_recalculation;
+    }
+    
+    KeInitializeSpinLock(&context->spin_lock);
+    
+    context->address = addr;
+    
+    for (i = 0; i < ci->num_stripes; i++) {
+        if (!devices[i] || stripestart[i] == stripeend[i]) {
+            context->stripes[i].status = ReadDataStatus_MissingDevice;
+            context->stripes[i].buf = NULL;
+            context->stripes_left--;
+            
+            if (!devices[i])
+                missing_devices++;
+        }
+    }
+      
+    if (missing_devices > allowed_missing) {
+        ERR("not enough devices to service request (%u missing)\n", missing_devices);
+        Status = STATUS_UNEXPECTED_IO_ERROR;
+        goto exit;
+    }
+    
+    for (i = 0; i < ci->num_stripes; i++) {
+        PIO_STACK_LOCATION IrpSp;
+        
+        if (devices[i] && stripestart[i] != stripeend[i]) {
+            context->stripes[i].context = (struct read_data_context*)context;
+            context->stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, stripeend[i] - stripestart[i], ALLOC_TAG);
+            
+            if (!context->stripes[i].buf) {
+                ERR("out of memory\n");
+                Status = STATUS_INSUFFICIENT_RESOURCES;
+                goto exit;
+            }
+            
+            if (type == BLOCK_FLAG_RAID10) {
+                context->stripes[i].stripenum = i / ci->sub_stripes;
+            }
+
+            if (!Irp) {
+                context->stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE);
+                
+                if (!context->stripes[i].Irp) {
+                    ERR("IoAllocateIrp failed\n");
+                    Status = STATUS_INSUFFICIENT_RESOURCES;
+                    goto exit;
+                }
+            } else {
+                context->stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
+                
+                if (!context->stripes[i].Irp) {
+                    ERR("IoMakeAssociatedIrp failed\n");
+                    Status = STATUS_INSUFFICIENT_RESOURCES;
+                    goto exit;
+                }
+            }
+            
+            IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp);
+            IrpSp->MajorFunction = IRP_MJ_READ;
+            
+            if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
+                FIXME("FIXME - buffered IO\n");
+            } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) {
+                context->stripes[i].Irp->MdlAddress = IoAllocateMdl(context->stripes[i].buf, stripeend[i] - stripestart[i], FALSE, FALSE, NULL);
+                if (!context->stripes[i].Irp->MdlAddress) {
+                    ERR("IoAllocateMdl failed\n");
+                    Status = STATUS_INSUFFICIENT_RESOURCES;
+                    goto exit;
+                }
+                
+                MmProbeAndLockPages(context->stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
+            } else {
+                context->stripes[i].Irp->UserBuffer = context->stripes[i].buf;
+            }
+
+            IrpSp->Parameters.Read.Length = stripeend[i] - stripestart[i];
+            IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[i] + cis[i].offset;
+            
+            context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb;
+            
+            IoSetCompletionRoutine(context->stripes[i].Irp, read_data_completion, &context->stripes[i], TRUE, TRUE, TRUE);
+
+            context->stripes[i].status = ReadDataStatus_Pending;
+        }
+    }
+    
+#ifdef DEBUG_STATS
+    if (!is_tree)
+        time1 = KeQueryPerformanceCounter(NULL);
+#endif
+    
+    for (i = 0; i < ci->num_stripes; i++) {
+        if (context->stripes[i].status != ReadDataStatus_MissingDevice) {
+            IoCallDriver(devices[i]->devobj, context->stripes[i].Irp);
+        }
+    }
+
+    KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
+   
+#ifdef DEBUG_STATS
+    if (!is_tree) {
+        time2 = KeQueryPerformanceCounter(NULL);
+        
+        Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
+    }
+#endif
+    
+    // check if any of the devices return a "user-induced" error
+    
+    for (i = 0; i < ci->num_stripes; i++) {
+        if (context->stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context->stripes[i].iosb.Status)) {
+            if (Irp && context->stripes[i].iosb.Status == STATUS_VERIFY_REQUIRED) {
+                PDEVICE_OBJECT dev;
+                
+                dev = IoGetDeviceToVerify(Irp->Tail.Overlay.Thread);
+                IoSetDeviceToVerify(Irp->Tail.Overlay.Thread, NULL);
+                
+                if (!dev) {
+                    dev = IoGetDeviceToVerify(PsGetCurrentThread());
+                    IoSetDeviceToVerify(PsGetCurrentThread(), NULL);
+                }
+                
+                dev = Vcb->Vpb ? Vcb->Vpb->RealDevice : NULL;
+                
+                if (dev)
+                    IoVerifyVolume(dev, FALSE);
+            }
+//             IoSetHardErrorOrVerifyDevice(context->stripes[i].Irp, devices[i]->devobj);
+            
+            Status = context->stripes[i].iosb.Status;
+            goto exit;
+        }
+    }
+    
+    if (type == BLOCK_FLAG_RAID0) {
+        UINT32 pos, *stripeoff;
+        UINT8 stripe;
+        
+        for (i = 0; i < ci->num_stripes; i++) {
+            if (context->stripes[i].status == ReadDataStatus_Error) {
+                WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); 
+                Status = context->stripes[i].iosb.Status;
+                goto exit;
+            }
+        }
+        
+        pos = 0;
+        stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
+        if (!stripeoff) {
+            ERR("out of memory\n");
+            Status = STATUS_INSUFFICIENT_RESOURCES;
+            goto exit;
+        }
+        
+        RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
+        
+        stripe = startoffstripe;
+        while (pos < length) {
+            if (pos == 0) {
+                UINT32 readlen = min(stripeend[stripe] - stripestart[stripe], ci->stripe_length - (stripestart[stripe] % ci->stripe_length));
+                
+                RtlCopyMemory(buf, context->stripes[stripe].buf, readlen);
+                stripeoff[stripe] += readlen;
+                pos += readlen;
+            } else if (length - pos < ci->stripe_length) {
+                RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], length - pos);
+                pos = length;
+            } else {
+                RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], ci->stripe_length);
+                stripeoff[stripe] += ci->stripe_length;
+                pos += ci->stripe_length;
+            }
+            
+            stripe = (stripe + 1) % ci->num_stripes;
+        }
+        
+        ExFreePool(stripeoff);
+        
+        // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short
+        
+        if (is_tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
+            tree_header* th = (tree_header*)buf;
+            UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
+            
+            if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+                WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
+                Status = STATUS_CRC_ERROR;
+                goto exit;
+            }
+        } else if (csum) {
+#ifdef DEBUG_STATS
+            time1 = KeQueryPerformanceCounter(NULL);
+#endif
+            for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
+                UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+                
+                if (crc32 != csum[i]) {
+                    WARN("checksum error (%08x != %08x)\n", crc32, csum[i]);
+                    Status = STATUS_CRC_ERROR;
+                    goto exit;
+                }
+            }
+#ifdef DEBUG_STATS
+            time2 = KeQueryPerformanceCounter(NULL);
+            
+            Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+        }
+        
+        Status = STATUS_SUCCESS;
+    } else if (type == BLOCK_FLAG_RAID10) {
+        BOOL checksum_error = FALSE;
+        UINT32 pos, *stripeoff;
+        UINT8 stripe;
+        read_data_stripe** stripes;
+
+        stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
+        if (!stripes) {
+            ERR("out of memory\n");
+            Status = STATUS_INSUFFICIENT_RESOURCES;
+            goto exit;
+        }
+        
+        RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
+        
+        for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
+            UINT16 j;
+            
+            for (j = 0; j < ci->sub_stripes; j++) {
+                if (context->stripes[i+j].status == ReadDataStatus_Success) {
+                    stripes[i / ci->sub_stripes] = &context->stripes[i+j];
+                    break;
+                }
+            }
+            
+            if (!stripes[i / ci->sub_stripes]) {
+                for (j = 0; j < ci->sub_stripes; j++) {
+                    if (context->stripes[i+j].status == ReadDataStatus_Error) {
+                        // both stripes must have errored if we get here
+                        WARN("stripe %llu returned error %08x\n", i+j, context->stripes[i+j].iosb.Status);
+                        Status = context->stripes[i].iosb.Status;
+                        ExFreePool(stripes);
+                        goto exit;
+                    }
+                }
+            }
+        }
+        
+        pos = 0;
+        stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
+        if (!stripeoff) {
+            ERR("out of memory\n");
+            Status = STATUS_INSUFFICIENT_RESOURCES;
+            ExFreePool(stripes);
+            goto exit;
+        }
+        
+        RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes);
+        
+        stripe = startoffstripe / ci->sub_stripes;
+        while (pos < length) {
+            if (pos == 0) {
+                UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length));
+                
+                RtlCopyMemory(buf, stripes[stripe]->buf, readlen);
+                stripeoff[stripe] += readlen;
+                pos += readlen;
+                
+                if (context->csum) {
+#ifdef DEBUG_STATS
+                    time1 = KeQueryPerformanceCounter(NULL);
+#endif
+                    for (i = 0; i < readlen / Vcb->superblock.sector_size; i++) {
+                        UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+                    
+                        if (crc32 != csum[i]) {
+                            checksum_error = TRUE;
+                            stripes[stripe]->status = ReadDataStatus_CRCError;
+                        }
+                    }
+#ifdef DEBUG_STATS
+                    time2 = KeQueryPerformanceCounter(NULL);
+                    
+                    Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+                }
+            } else if (length - pos < ci->stripe_length) {
+                RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], length - pos);
+                
+                if (context->csum) {
+#ifdef DEBUG_STATS
+                    time1 = KeQueryPerformanceCounter(NULL);
+#endif
+                    for (i = 0; i < (length - pos) / Vcb->superblock.sector_size; i++) {
+                        UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+                    
+                        if (crc32 != csum[(pos / Vcb->superblock.sector_size) + i]) {
+                            checksum_error = TRUE;
+                            stripes[stripe]->status = ReadDataStatus_CRCError;
+                        }
+                    }
+#ifdef DEBUG_STATS
+                    time2 = KeQueryPerformanceCounter(NULL);
+                    
+                    Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+                }
+                
+                pos = length;
+            } else {
+                RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], ci->stripe_length);
+                stripeoff[stripe] += ci->stripe_length;
+                
+                if (context->csum) {
+#ifdef DEBUG_STATS
+                    time1 = KeQueryPerformanceCounter(NULL);
+#endif
+                    for (i = 0; i < ci->stripe_length / Vcb->superblock.sector_size; i++) {
+                        UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+                    
+                        if (crc32 != csum[(pos / Vcb->superblock.sector_size) + i]) {
+                            checksum_error = TRUE;
+                            stripes[stripe]->status = ReadDataStatus_CRCError;
+                        }
+                    }
+#ifdef DEBUG_STATS
+                    time2 = KeQueryPerformanceCounter(NULL);
+                    
+                    Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+                }
+                
+                pos += ci->stripe_length;
+            }
+            
+            stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
+        }
+        
+        if (is_tree) {
+            tree_header* th = (tree_header*)buf;
+            UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
+            
+            if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+                WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
+                checksum_error = TRUE;
+                stripes[startoffstripe]->status = ReadDataStatus_CRCError;
+            }
+        }
+        
+        if (checksum_error) {
+            // FIXME - update dev stats
+            
+            WARN("checksum error\n");
+            
+            context->stripes_left = 0;
+            
+            for (i = 0; i < ci->num_stripes; i++) {
+                if (context->stripes[i].status == ReadDataStatus_CRCError) {
+                    UINT16 other_stripe = (i % 1) ? (i - 1) : (i + 1);
+                    
+                    if (context->stripes[other_stripe].status == ReadDataStatus_Cancelled) {
+                        PIO_STACK_LOCATION IrpSp;
+                        
+                        // re-run Irp that we cancelled
+                        
+                        if (context->stripes[other_stripe].Irp) {
+                            if (devices[other_stripe]->devobj->Flags & DO_DIRECT_IO) {
+                                MmUnlockPages(context->stripes[other_stripe].Irp->MdlAddress);
+                                IoFreeMdl(context->stripes[other_stripe].Irp->MdlAddress);
+                            }
+                            IoFreeIrp(context->stripes[other_stripe].Irp);
+                        }
+                        
+                        if (!Irp) {
+                            context->stripes[other_stripe].Irp = IoAllocateIrp(devices[other_stripe]->devobj->StackSize, FALSE);
+                            
+                            if (!context->stripes[other_stripe].Irp) {
+                                ERR("IoAllocateIrp failed\n");
+                                Status = STATUS_INSUFFICIENT_RESOURCES;
+                                goto exit;
+                            }
+                        } else {
+                            context->stripes[other_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[other_stripe]->devobj->StackSize);
+                            
+                            if (!context->stripes[other_stripe].Irp) {
+                                ERR("IoMakeAssociatedIrp failed\n");
+                                Status = STATUS_INSUFFICIENT_RESOURCES;
+                                goto exit;
+                            }
+                        }
+                        
+                        IrpSp = IoGetNextIrpStackLocation(context->stripes[other_stripe].Irp);
+                        IrpSp->MajorFunction = IRP_MJ_READ;
+                        
+                        if (devices[other_stripe]->devobj->Flags & DO_BUFFERED_IO) {
+                            FIXME("FIXME - buffered IO\n");
+                        } else if (devices[other_stripe]->devobj->Flags & DO_DIRECT_IO) {
+                            context->stripes[other_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[other_stripe].buf, stripeend[other_stripe] - stripestart[other_stripe], FALSE, FALSE, NULL);
+                            if (!context->stripes[other_stripe].Irp->MdlAddress) {
+                                ERR("IoAllocateMdl failed\n");
+                                Status = STATUS_INSUFFICIENT_RESOURCES;
+                                goto exit;
+                            }
+                            
+                            MmProbeAndLockPages(context->stripes[other_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess);
+                        } else {
+                            context->stripes[other_stripe].Irp->UserBuffer = context->stripes[other_stripe].buf;
+                        }
+
+                        IrpSp->Parameters.Read.Length = stripeend[other_stripe] - stripestart[other_stripe];
+                        IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[other_stripe] + cis[other_stripe].offset;
+                        
+                        context->stripes[other_stripe].Irp->UserIosb = &context->stripes[other_stripe].iosb;
+                        
+                        IoSetCompletionRoutine(context->stripes[other_stripe].Irp, read_data_completion, &context->stripes[other_stripe], TRUE, TRUE, TRUE);
+                        
+                        context->stripes_left++;
+                        context->stripes[other_stripe].status = ReadDataStatus_Pending;
+                    }
+                }
+            }
+            
+            if (context->stripes_left == 0) {
+                WARN("could not recover from checksum error\n");
+                ExFreePool(stripes);
+                ExFreePool(stripeoff);
+                Status = STATUS_CRC_ERROR;
+                goto exit;
+            }
+            
+            context->stripes_cancel = 0;
+            KeClearEvent(&context->Event);
+            
+#ifdef DEBUG_STATS
+            if (!is_tree)
+                time1 = KeQueryPerformanceCounter(NULL);
+#endif
+
+            for (i = 0; i < ci->num_stripes; i++) {
+                if (context->stripes[i].status == ReadDataStatus_Pending) {
+                    IoCallDriver(devices[i]->devobj, context->stripes[i].Irp);
+                }
+            }
+            
+            KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
+            
+#ifdef DEBUG_STATS
+            if (!is_tree) {
+                time2 = KeQueryPerformanceCounter(NULL);
+                
+                Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
+            }
+#endif
+
+            for (i = 0; i < ci->num_stripes; i++) {
+                if (context->stripes[i].status == ReadDataStatus_CRCError) {
+                    UINT16 other_stripe = (i % 1) ? (i - 1) : (i + 1);
+                    
+                    if (context->stripes[other_stripe].status != ReadDataStatus_Success) {
+                        WARN("could not recover from checksum error\n");
+                        ExFreePool(stripes);
+                        ExFreePool(stripeoff);
+                        Status = STATUS_CRC_ERROR;
+                        goto exit;
+                    }
+                }
+            }
+
+            RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes);
+        
+            pos = 0;
+            stripe = startoffstripe / ci->sub_stripes;
+            while (pos < length) {
+                if (pos == 0) {
+                    UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length));
+                    
+                    stripeoff[stripe] += readlen;
+                    pos += readlen;
+                    
+                    if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) {
+                        for (i = 0; i < readlen / Vcb->superblock.sector_size; i++) {
+                            UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+                        
+                            if (crc32 != csum[i]) {
+                                UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0);
+                                UINT32 crc32b = ~calc_crc32c(0xffffffff, context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+                                
+                                if (crc32b == csum[i]) {
+                                    RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+                                    RtlCopyMemory(stripes[stripe]->buf + (i * Vcb->superblock.sector_size), context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size),
+                                                  Vcb->superblock.sector_size);
+                                    stripes[stripe]->rewrite = TRUE;
+                                } else {
+                                    WARN("could not recover from checksum error\n");
+                                    ExFreePool(stripes);
+                                    ExFreePool(stripeoff);
+                                    Status = STATUS_CRC_ERROR;
+                                    goto exit;
+                                }
+                            }
+                        }
+                    } else if (is_tree) {
+                        UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0);
+                        tree_header* th = (tree_header*)buf;
+                        UINT32 crc32;
+                        
+                        RtlCopyMemory(buf, context->stripes[other_stripe].buf, readlen);
+                        
+                        crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
+                        
+                        if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+                            WARN("could not recover from checksum error\n");
+                            ExFreePool(stripes);
+                            ExFreePool(stripeoff);
+                            Status = STATUS_CRC_ERROR;
+                            goto exit;
+                        }
+                        
+                        RtlCopyMemory(stripes[stripe]->buf, buf, readlen);
+                        stripes[stripe]->rewrite = TRUE;
+                    }
+                } else if (length - pos < ci->stripe_length) {
+                    if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) {
+                        for (i = 0; i < (length - pos) / Vcb->superblock.sector_size; i++) {
+                            UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+                        
+                            if (crc32 != csum[(pos / Vcb->superblock.sector_size) + i]) {
+                                UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0);
+                                UINT32 crc32b = ~calc_crc32c(0xffffffff, &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)],
+                                                                Vcb->superblock.sector_size);
+                                
+                                if (crc32b == csum[i]) {
+                                    RtlCopyMemory(buf + pos + (i * Vcb->superblock.sector_size),
+                                                    &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size);
+                                    RtlCopyMemory(&stripes[stripe]->buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)],
+                                                  &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)],
+                                                  Vcb->superblock.sector_size);
+                                    stripes[stripe]->rewrite = TRUE;
+                                } else {
+                                    WARN("could not recover from checksum error\n");
+                                    ExFreePool(stripes);
+                                    ExFreePool(stripeoff);
+                                    Status = STATUS_CRC_ERROR;
+                                    goto exit;
+                                }
+                            }
+                        }
+                    }
+                    
+                    pos = length;
+                } else {
+                    if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) {
+                        for (i = 0; i < ci->stripe_length / Vcb->superblock.sector_size; i++) {
+                            UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+                        
+                            if (crc32 != csum[(pos / Vcb->superblock.sector_size) + i]) {
+                                UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0);
+                                UINT32 crc32b = ~calc_crc32c(0xffffffff, &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)],
+                                                                Vcb->superblock.sector_size);
+                                
+                                if (crc32b == csum[i]) {
+                                    RtlCopyMemory(buf + pos + (i * Vcb->superblock.sector_size),
+                                                    &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size);
+                                    RtlCopyMemory(&stripes[stripe]->buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)],
+                                                  &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)],
+                                                  Vcb->superblock.sector_size);
+                                    stripes[stripe]->rewrite = TRUE;
+                                } else {
+                                    WARN("could not recover from checksum error\n");
+                                    ExFreePool(stripes);
+                                    ExFreePool(stripeoff);
+                                    Status = STATUS_CRC_ERROR;
+                                    goto exit;
+                                }
+                            }
+                        }
+                    }
+                    
+                    stripeoff[stripe] += ci->stripe_length;
+                    pos += ci->stripe_length;
+                }
+                
+                stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
+            }
+            
+            // write good data over bad
+            
+            if (!Vcb->readonly) {
+                for (i = 0; i < ci->num_stripes; i++) {
+                    if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) {
+                        Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]);
+                        
+                        if (!NT_SUCCESS(Status))
+                            WARN("write_data_phys returned %08x\n", Status);
+                    }
+                }
+            }
+        }
+        
+        ExFreePool(stripes);
+        ExFreePool(stripeoff);
+        
+        // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short
+        
+        Status = STATUS_SUCCESS;
+    } else if (type == BLOCK_FLAG_DUPLICATE) {
+        BOOL checksum_error = FALSE;
+        UINT16 cancelled = 0;
+        
+        for (i = 0; i < ci->num_stripes; i++) {
+            if (context->stripes[i].status == ReadDataStatus_Success) {
+                if (context->tree) {
+                    tree_header* th = (tree_header*)context->stripes[i].buf;
+                    UINT32 crc32;
+                    
+                    crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
+                    
+                    if (th->address != context->address || crc32 != *((UINT32*)th->csum)) {
+                        context->stripes[i].status = ReadDataStatus_CRCError;
+                        checksum_error = TRUE;
+                    }
+                } else if (context->csum) {
+                    UINT32 j;
+                    
+#ifdef DEBUG_STATS
+                    time1 = KeQueryPerformanceCounter(NULL);
+#endif
+        
+                    for (j = 0; j < context->stripes[i].Irp->IoStatus.Information / context->sector_size; j++) {
+                        UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[i].buf + (j * context->sector_size), context->sector_size);
+                        
+                        if (crc32 != context->csum[j]) {
+                            context->stripes[i].status = ReadDataStatus_CRCError;
+                            checksum_error = TRUE;
+                            break;
+                        }
+                    }
+#ifdef DEBUG_STATS
+                    time2 = KeQueryPerformanceCounter(NULL);
+                    
+                    Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+                }
+            } else if (context->stripes[i].status == ReadDataStatus_Cancelled) {
+                cancelled++;
             }
+        }
+        
+        if (checksum_error) {
+            // FIXME - update dev stats
             
-            if (type == BLOCK_FLAG_RAID10) {
-                context->stripes[i].stripenum = i / ci->sub_stripes;
-            }
+            if (cancelled > 0) {
+                context->stripes_left = 0;
+                
+                for (i = 0; i < ci->num_stripes; i++) {
+                    if (context->stripes[i].status == ReadDataStatus_Cancelled) {
+                        PIO_STACK_LOCATION IrpSp;
+                        
+                        // re-run Irp that we cancelled
+                        
+                        if (context->stripes[i].Irp) {
+                            if (devices[i]->devobj->Flags & DO_DIRECT_IO) {
+                                MmUnlockPages(context->stripes[i].Irp->MdlAddress);
+                                IoFreeMdl(context->stripes[i].Irp->MdlAddress);
+                            }
+                            IoFreeIrp(context->stripes[i].Irp);
+                        }
+                        
+                        if (!Irp) {
+                            context->stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE);
+                            
+                            if (!context->stripes[i].Irp) {
+                                ERR("IoAllocateIrp failed\n");
+                                Status = STATUS_INSUFFICIENT_RESOURCES;
+                                goto exit;
+                            }
+                        } else {
+                            context->stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
+                            
+                            if (!context->stripes[i].Irp) {
+                                ERR("IoMakeAssociatedIrp failed\n");
+                                Status = STATUS_INSUFFICIENT_RESOURCES;
+                                goto exit;
+                            }
+                        }
+                        
+                        IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp);
+                        IrpSp->MajorFunction = IRP_MJ_READ;
+                        
+                        if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
+                            FIXME("FIXME - buffered IO\n");
+                        } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) {
+                            context->stripes[i].Irp->MdlAddress = IoAllocateMdl(context->stripes[i].buf, stripeend[i] - stripestart[i], FALSE, FALSE, NULL);
+                            if (!context->stripes[i].Irp->MdlAddress) {
+                                ERR("IoAllocateMdl failed\n");
+                                Status = STATUS_INSUFFICIENT_RESOURCES;
+                                goto exit;
+                            }
+                            
+                            MmProbeAndLockPages(context->stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
+                        } else {
+                            context->stripes[i].Irp->UserBuffer = context->stripes[i].buf;
+                        }
 
-            if (!Irp) {
-                context->stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE);
+                        IrpSp->Parameters.Read.Length = stripeend[i] - stripestart[i];
+                        IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[i] + cis[i].offset;
+                        
+                        context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb;
+                        
+                        IoSetCompletionRoutine(context->stripes[i].Irp, read_data_completion, &context->stripes[i], TRUE, TRUE, TRUE);
+                        
+                        context->stripes_left++;
+                        context->stripes[i].status = ReadDataStatus_Pending;
+                    }
+                }
                 
-                if (!context->stripes[i].Irp) {
-                    ERR("IoAllocateIrp failed\n");
-                    Status = STATUS_INSUFFICIENT_RESOURCES;
-                    goto exit;
+                context->stripes_cancel = 0;
+                KeClearEvent(&context->Event);
+                
+#ifdef DEBUG_STATS
+                if (!is_tree)
+                    time1 = KeQueryPerformanceCounter(NULL);
+#endif
+
+                for (i = 0; i < ci->num_stripes; i++) {
+                    if (context->stripes[i].status == ReadDataStatus_Pending) {
+                        IoCallDriver(devices[i]->devobj, context->stripes[i].Irp);
+                    }
                 }
-            } else {
-                context->stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
                 
-                if (!context->stripes[i].Irp) {
-                    ERR("IoMakeAssociatedIrp failed\n");
-                    Status = STATUS_INSUFFICIENT_RESOURCES;
-                    goto exit;
+                KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
+                
+#ifdef DEBUG_STATS
+                if (!is_tree) {
+                    time2 = KeQueryPerformanceCounter(NULL);
+                    
+                    Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
+                }
+#endif
+                for (i = 0; i < ci->num_stripes; i++) {
+                    if (context->stripes[i].status == ReadDataStatus_Success) {
+                        if (context->tree) {
+                            tree_header* th = (tree_header*)context->stripes[i].buf;
+                            UINT32 crc32;
+                            
+                            crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
+                            
+                            if (th->address != context->address || crc32 != *((UINT32*)th->csum)) {
+                                context->stripes[i].status = ReadDataStatus_CRCError;
+                                checksum_error = TRUE;
+                            }
+                        } else if (context->csum) {
+                            UINT32 j;
+                            
+#ifdef DEBUG_STATS
+                            time1 = KeQueryPerformanceCounter(NULL);
+#endif
+                            for (j = 0; j < context->stripes[i].Irp->IoStatus.Information / context->sector_size; j++) {
+                                UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[i].buf + (j * context->sector_size), context->sector_size);
+                                
+                                if (crc32 != context->csum[j]) {
+                                    context->stripes[i].status = ReadDataStatus_CRCError;
+                                    checksum_error = TRUE;
+                                    break;
+                                }
+                            }
+#ifdef DEBUG_STATS
+                            time2 = KeQueryPerformanceCounter(NULL);
+                            
+                            Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+                        }
+                    }
                 }
             }
             
-            IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp);
-            IrpSp->MajorFunction = IRP_MJ_READ;
-            
-            if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
-                FIXME("FIXME - buffered IO\n");
-            } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) {
-                context->stripes[i].Irp->MdlAddress = IoAllocateMdl(context->stripes[i].buf, stripeend[i] - stripestart[i], FALSE, FALSE, NULL);
-                if (!context->stripes[i].Irp->MdlAddress) {
-                    ERR("IoAllocateMdl failed\n");
-                    Status = STATUS_INSUFFICIENT_RESOURCES;
-                    goto exit;
+            for (i = 0; i < ci->num_stripes; i++) {
+                if (context->stripes[i].status == ReadDataStatus_Success) {
+                    RtlCopyMemory(buf, context->stripes[i].buf, length);
+                    goto raid1write;
                 }
+            }
+            
+            if (context->tree || ci->num_stripes == 1) { // unable to recover from checksum error
+                ERR("unrecoverable checksum error at %llx\n", addr);
                 
-                MmProbeAndLockPages(context->stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
-            } else {
-                context->stripes[i].Irp->UserBuffer = context->stripes[i].buf;
+#ifdef _DEBUG
+                if (context->tree) {
+                    for (i = 0; i < ci->num_stripes; i++) {
+                        if (context->stripes[i].status == ReadDataStatus_CRCError) {
+                            tree_header* th = (tree_header*)context->stripes[i].buf;
+                            UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
+                            
+                            WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
+                        }
+                    }
+                }
+#endif
+                Status = STATUS_CRC_ERROR;
+                goto exit;
             }
-
-            IrpSp->Parameters.Read.Length = stripeend[i] - stripestart[i];
-            IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[i] + cis[i].offset;
             
-            context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb;
+            // checksum errors on both stripes - we need to check sector by sector
             
-            IoSetCompletionRoutine(context->stripes[i].Irp, read_data_completion, &context->stripes[i], TRUE, TRUE, TRUE);
-
-            context->stripes[i].status = ReadDataStatus_Pending;
-        }
-    }
-    
-    for (i = 0; i < ci->num_stripes; i++) {
-        if (context->stripes[i].status != ReadDataStatus_MissingDevice) {
-            IoCallDriver(devices[i]->devobj, context->stripes[i].Irp);
-        }
-    }
-
-    KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
-    
-    // FIXME - if checksum error, write good data over bad
-    
-    // check if any of the devices return a "user-induced" error
-    
-    for (i = 0; i < ci->num_stripes; i++) {
-        if (context->stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context->stripes[i].iosb.Status)) {
-            if (Irp && context->stripes[i].iosb.Status == STATUS_VERIFY_REQUIRED) {
-                PDEVICE_OBJECT dev;
+            for (i = 0; i < (stripeend[0] - stripestart[0]) / context->sector_size; i++) {
+                UINT16 j;
+                BOOL success = FALSE;
                 
-                dev = IoGetDeviceToVerify(Irp->Tail.Overlay.Thread);
-                IoSetDeviceToVerify(Irp->Tail.Overlay.Thread, NULL);
+#ifdef DEBUG_STATS
+                time1 = KeQueryPerformanceCounter(NULL);
+#endif
                 
-                if (!dev) {
-                    dev = IoGetDeviceToVerify(PsGetCurrentThread());
-                    IoSetDeviceToVerify(PsGetCurrentThread(), NULL);
+                for (j = 0; j < ci->num_stripes; j++) {
+                    if (context->stripes[j].status == ReadDataStatus_CRCError) {
+                        UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[j].buf + (i * context->sector_size), context->sector_size);
+                        
+                        if (crc32 == context->csum[i]) {
+                            RtlCopyMemory(buf + (i * context->sector_size), context->stripes[j].buf + (i * context->sector_size), context->sector_size);
+                            success = TRUE;
+                            break;
+                        }
+                    }
                 }
                 
-                dev = Vcb->Vpb ? Vcb->Vpb->RealDevice : NULL;
-                
-                if (dev)
-                    IoVerifyVolume(dev, FALSE);
+#ifdef DEBUG_STATS
+                time2 = KeQueryPerformanceCounter(NULL);
+
+                Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+                if (!success) {
+                    ERR("unrecoverable checksum error at %llx\n", addr + (i * context->sector_size));
+                    Status = STATUS_CRC_ERROR;
+                    goto exit;
+                }
             }
-//             IoSetHardErrorOrVerifyDevice(context->stripes[i].Irp, devices[i]->devobj);
             
-            Status = context->stripes[i].iosb.Status;
+raid1write:
+            // write good data over bad
+            
+            if (!Vcb->readonly) {
+                for (i = 0; i < ci->num_stripes; i++) {
+                    if (context->stripes[i].status == ReadDataStatus_CRCError && devices[i] && !devices[i]->readonly) {
+                        Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], buf, length);
+                        
+                        if (!NT_SUCCESS(Status))
+                            WARN("write_data_phys returned %08x\n", Status);
+                    }
+                }
+            }
+            
+            Status = STATUS_SUCCESS;
             goto exit;
         }
-    }
-    
-    if (type == BLOCK_FLAG_RAID0) {
-        UINT32 pos, *stripeoff;
-        UINT8 stripe;
+        
+        // check if any of the stripes succeeded
+        
+        for (i = 0; i < ci->num_stripes; i++) {
+            if (context->stripes[i].status == ReadDataStatus_Success) {
+                RtlCopyMemory(buf, context->stripes[i].buf, length);
+                Status = STATUS_SUCCESS;
+                goto exit;
+            }
+        }
+        
+        // failing that, return the first error we encountered
         
         for (i = 0; i < ci->num_stripes; i++) {
             if (context->stripes[i].status == ReadDataStatus_Error) {
-                WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); 
                 Status = context->stripes[i].iosb.Status;
                 goto exit;
             }
         }
         
-        pos = 0;
-        stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
-        if (!stripeoff) {
-            ERR("out of memory\n");
-            Status = STATUS_INSUFFICIENT_RESOURCES;
-            goto exit;
+        // if we somehow get here, return STATUS_INTERNAL_ERROR
+        
+        Status = STATUS_INTERNAL_ERROR;
+    } else if (type == BLOCK_FLAG_RAID5) {
+        UINT32 pos, skip;
+        int num_errors = 0;
+        UINT64 off, stripeoff, origoff;
+        BOOL needs_reconstruct = FALSE;
+        UINT64 reconstruct_stripe;
+        BOOL checksum_error = FALSE;
+        
+        for (i = 0; i < ci->num_stripes; i++) {
+            if (context->stripes[i].status == ReadDataStatus_Error) {
+                num_errors++;
+                if (num_errors > 1)
+                    break;
+            }
         }
         
-        RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
+        if (num_errors > 1) {
+            for (i = 0; i < ci->num_stripes; i++) {
+                if (context->stripes[i].status == ReadDataStatus_Error) {
+                    WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); 
+                    Status = context->stripes[i].iosb.Status;
+                    goto exit;
+                }
+            }
+        }
+        
+        off = addr - offset;
+        off -= off % ((ci->num_stripes - 1) * ci->stripe_length);
+        skip = addr - offset - off;
+        origoff = off;
+        
+        for (i = 0; i < ci->num_stripes; i++) {
+            if (context->stripes[i].status == ReadDataStatus_Cancelled) {
+                if (needs_reconstruct) {
+                    ERR("more than one stripe needs reconstruction\n");
+                    Status = STATUS_INTERNAL_ERROR;
+                    goto exit;
+                } else {
+                    needs_reconstruct = TRUE;
+                    reconstruct_stripe = i;
+                }
+            }
+        }
+        
+        if (needs_reconstruct) {
+            TRACE("reconstructing stripe %u\n", reconstruct_stripe);
+            
+            stripeoff = 0;
+            
+            raid5_reconstruct(off, skip, context, ci, &stripeoff, stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], TRUE, firststripesize, reconstruct_stripe);
+            
+            while (stripeoff < stripeend[0] - stripestart[0]) {
+                off += (ci->num_stripes - 1) * ci->stripe_length;
+                raid5_reconstruct(off, 0, context, ci, &stripeoff, stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, 0, reconstruct_stripe);
+            }
+            
+            off = addr - offset;
+            off -= off % ((ci->num_stripes - 1) * ci->stripe_length);
+        }
+        
+        pos = 0;
+        stripeoff = 0;
+        raid5_decode(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize);
         
-        stripe = startoffstripe;
         while (pos < length) {
-            if (pos == 0) {
-                UINT32 readlen = min(stripeend[stripe] - stripestart[stripe], ci->stripe_length - (stripestart[stripe] % ci->stripe_length));
+            off += (ci->num_stripes - 1) * ci->stripe_length;
+            raid5_decode(off, 0, context, ci, &stripeoff, buf, &pos, length, 0);
+        }
+        
+        if (is_tree) {
+            tree_header* th = (tree_header*)buf;
+            UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
+            
+            if (addr != th->address || crc32 != *((UINT32*)th->csum))
+                checksum_error = TRUE;
+        } else if (csum) {
+#ifdef DEBUG_STATS
+            time1 = KeQueryPerformanceCounter(NULL);
+#endif
+            for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
+                UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
                 
-                RtlCopyMemory(buf, context->stripes[stripe].buf, readlen);
-                stripeoff[stripe] += readlen;
-                pos += readlen;
-            } else if (length - pos < ci->stripe_length) {
-                RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], length - pos);
-                pos = length;
+                if (crc32 != csum[i]) {
+                    checksum_error = TRUE;
+                    break;
+                }
+            }
+#ifdef DEBUG_STATS
+            time2 = KeQueryPerformanceCounter(NULL);
+            
+            Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+        }
+        
+        if (checksum_error) {
+            if (needs_reconstruct) {
+                PIO_STACK_LOCATION IrpSp;
+                
+                // re-run Irp that we cancelled
+                
+                if (context->stripes[reconstruct_stripe].Irp) {
+                    if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) {
+                        MmUnlockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress);
+                        IoFreeMdl(context->stripes[reconstruct_stripe].Irp->MdlAddress);
+                    }
+                    IoFreeIrp(context->stripes[reconstruct_stripe].Irp);
+                }
+                
+                if (!Irp) {
+                    context->stripes[reconstruct_stripe].Irp = IoAllocateIrp(devices[reconstruct_stripe]->devobj->StackSize, FALSE);
+                    
+                    if (!context->stripes[reconstruct_stripe].Irp) {
+                        ERR("IoAllocateIrp failed\n");
+                        Status = STATUS_INSUFFICIENT_RESOURCES;
+                        goto exit;
+                    }
+                } else {
+                    context->stripes[reconstruct_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[reconstruct_stripe]->devobj->StackSize);
+                    
+                    if (!context->stripes[reconstruct_stripe].Irp) {
+                        ERR("IoMakeAssociatedIrp failed\n");
+                        Status = STATUS_INSUFFICIENT_RESOURCES;
+                        goto exit;
+                    }
+                }
+                
+                IrpSp = IoGetNextIrpStackLocation(context->stripes[reconstruct_stripe].Irp);
+                IrpSp->MajorFunction = IRP_MJ_READ;
+                
+                if (devices[reconstruct_stripe]->devobj->Flags & DO_BUFFERED_IO) {
+                    FIXME("FIXME - buffered IO\n");
+                } else if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) {
+                    context->stripes[reconstruct_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[reconstruct_stripe].buf,
+                                                                                         stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, FALSE, NULL);
+                    if (!context->stripes[reconstruct_stripe].Irp->MdlAddress) {
+                        ERR("IoAllocateMdl failed\n");
+                        Status = STATUS_INSUFFICIENT_RESOURCES;
+                        goto exit;
+                    }
+                    
+                    MmProbeAndLockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess);
+                } else {
+                    context->stripes[reconstruct_stripe].Irp->UserBuffer = context->stripes[reconstruct_stripe].buf;
+                }
+
+                IrpSp->Parameters.Read.Length = stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe];
+                IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[reconstruct_stripe] + cis[reconstruct_stripe].offset;
+                
+                context->stripes[reconstruct_stripe].Irp->UserIosb = &context->stripes[reconstruct_stripe].iosb;
+                
+                IoSetCompletionRoutine(context->stripes[reconstruct_stripe].Irp, read_data_completion, &context->stripes[reconstruct_stripe], TRUE, TRUE, TRUE);
+
+                context->stripes[reconstruct_stripe].status = ReadDataStatus_Pending;
+                
+                context->stripes_left = 1;
+                KeClearEvent(&context->Event);
+                
+#ifdef DEBUG_STATS
+                if (!is_tree)
+                    time1 = KeQueryPerformanceCounter(NULL);
+#endif
+    
+                IoCallDriver(devices[reconstruct_stripe]->devobj, context->stripes[reconstruct_stripe].Irp);
+                
+                KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
+                
+#ifdef DEBUG_STATS
+                if (!is_tree) {
+                    time2 = KeQueryPerformanceCounter(NULL);
+                    
+                    Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
+                }
+#endif
+    
+                if (context->stripes[reconstruct_stripe].status != ReadDataStatus_Success) {
+                    ERR("unrecoverable checksum error\n");
+                    Status = STATUS_CRC_ERROR;
+                    goto exit;
+                }
+            }
+            
+            if (context->tree) {
+                off = origoff;
+                pos = 0;
+                stripeoff = 0;
+                if (!raid5_decode_with_checksum_metadata(addr, off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, Vcb->superblock.node_size)) {
+                    ERR("unrecoverable metadata checksum error\n");
+                    Status = STATUS_CRC_ERROR;
+                    goto exit;
+                }
             } else {
-                RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], ci->stripe_length);
-                stripeoff[stripe] += ci->stripe_length;
-                pos += ci->stripe_length;
+                off = origoff;
+                pos = 0;
+                stripeoff = 0;
+                if (!raid5_decode_with_checksum(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, csum, Vcb->superblock.sector_size)) {
+                    Status = STATUS_CRC_ERROR;
+                    goto exit;
+                }
+                
+                while (pos < length) {
+                    off += (ci->num_stripes - 1) * ci->stripe_length;
+                    if (!raid5_decode_with_checksum(off, 0, context, ci, &stripeoff, buf, &pos, length, 0, csum, Vcb->superblock.sector_size)) {
+                        Status = STATUS_CRC_ERROR;
+                        goto exit;
+                    }
+                }
             }
             
-            stripe = (stripe + 1) % ci->num_stripes;
+            // write good data over bad
+            
+            if (!Vcb->readonly) {
+                for (i = 0; i < ci->num_stripes; i++) {
+                    if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) {
+                        Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]);
+                        
+                        if (!NT_SUCCESS(Status))
+                            WARN("write_data_phys returned %08x\n", Status);
+                    }
+                }
+            }
         }
         
-        ExFreePool(stripeoff);
-        
-        // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short
-        
-        if (is_tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
-            tree_header* th = (tree_header*)buf;
-            UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
+        if (!context->tree && !context->csum) {
+            UINT32* parity_buf;
             
-            if (crc32 != *((UINT32*)th->csum)) {
-                WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
-                Status = STATUS_CRC_ERROR;
+            // We are reading a nodatacsum extent. Even though there's no checksum, we
+            // can still identify errors by checking if the parity is consistent.
+            
+            parity_buf = ExAllocatePoolWithTag(NonPagedPool, stripeend[0] - stripestart[0], ALLOC_TAG);
+            
+            if (!parity_buf) {
+                ERR("out of memory\n");
+                Status = STATUS_INSUFFICIENT_RESOURCES;
                 goto exit;
             }
-        } else if (csum) {
-            for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
-                UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
-                
-                if (crc32 != csum[i]) {
-                    WARN("checksum error (%08x != %08x)\n", crc32, csum[i]);
+            
+            RtlCopyMemory(parity_buf, context->stripes[0].buf, stripeend[0] - stripestart[0]);
+            
+            for (i = 0; i < ci->num_stripes; i++) {
+                do_xor((UINT8*)parity_buf, context->stripes[i].buf, stripeend[0] - stripestart[0]);
+            }
+            
+            for (i = 0; i < (stripeend[0] - stripestart[0]) / sizeof(UINT32); i++) {
+                if (parity_buf[i] != 0) {
+                    ERR("parity error on nodatacsum inode\n");
+                    ExFreePool(parity_buf);
                     Status = STATUS_CRC_ERROR;
                     goto exit;
                 }
             }
+            
+            ExFreePool(parity_buf);
         }
         
         Status = STATUS_SUCCESS;
-    } else if (type == BLOCK_FLAG_RAID10) {
-        UINT32 pos, *stripeoff;
-        UINT8 stripe;
-        read_data_stripe** stripes;
+    } else if (type == BLOCK_FLAG_RAID6) {
+        UINT32 pos, skip;
+        int num_errors = 0;
+        UINT64 off, stripeoff, origoff;
+        UINT8 needs_reconstruct = 0;
+        UINT16 missing1, missing2;
+        BOOL checksum_error = FALSE;
         
-        stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
-        if (!stripes) {
-            ERR("out of memory\n");
-            Status = STATUS_INSUFFICIENT_RESOURCES;
-            goto exit;
+        for (i = 0; i < ci->num_stripes; i++) {
+            if (context->stripes[i].status == ReadDataStatus_Error) {
+                num_errors++;
+                if (num_errors > 2)
+                    break;
+            }
         }
         
-        RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
+        if (num_errors > 2) {
+            for (i = 0; i < ci->num_stripes; i++) {
+                if (context->stripes[i].status == ReadDataStatus_Error) {
+                    WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status); 
+                    Status = context->stripes[i].iosb.Status;
+                    goto exit;
+                }
+            }
+        }
         
-        for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
-            UINT16 j;
-            
-            for (j = 0; j < ci->sub_stripes; j++) {
-                if (context->stripes[i+j].status == ReadDataStatus_Success) {
-                    stripes[i / ci->sub_stripes] = &context->stripes[i+j];
-                    break;
+        off = addr - offset;
+        off -= off % ((ci->num_stripes - 2) * ci->stripe_length);
+        skip = addr - offset - off;
+        origoff = off;
+        
+        for (i = 0; i < ci->num_stripes; i++) {
+            if (context->stripes[i].status == ReadDataStatus_Cancelled) {
+                if (needs_reconstruct == 2) {
+                    ERR("more than two stripes need reconstruction\n");
+                    Status = STATUS_INTERNAL_ERROR;
+                    goto exit;
+                } else if (needs_reconstruct == 1) {
+                    needs_reconstruct++;
+                    missing2 = i;
+                } else {
+                    needs_reconstruct++;
+                    missing1 = i;
                 }
             }
+        }
+        
+        if (needs_reconstruct > 0) {
+            stripeoff = 0;
             
-            if (!stripes[i / ci->sub_stripes]) {
-                for (j = 0; j < ci->sub_stripes; j++) {
-                    if (context->stripes[i+j].status == ReadDataStatus_CRCError) {
-                        WARN("stripe %llu had a checksum error\n", i+j);
-                        Status = STATUS_CRC_ERROR;
-                        goto exit;
-                    }
+            if (needs_reconstruct == 2) {
+                TRACE("reconstructing stripes %u and %u\n", missing1, missing2);
+            
+                raid6_reconstruct2(off, skip, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1],
+                                   TRUE, firststripesize, missing1, missing2);
+                
+                while (stripeoff < stripeend[0] - stripestart[0]) {
+                    off += (ci->num_stripes - 2) * ci->stripe_length;
+                    raid6_reconstruct2(off, 0, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1],
+                                       FALSE, 0, missing1, missing2);
                 }
+            } else {
+                TRACE("reconstructing stripe %u\n", missing1);
                 
-                for (j = 0; j < ci->sub_stripes; j++) {
-                    if (context->stripes[i+j].status == ReadDataStatus_Error) {
-                        WARN("stripe %llu returned error %08x\n", i+j, context->stripes[i+j].iosb.Status);
-                        Status = context->stripes[i].iosb.Status;
-                        goto exit;
-                    }
+                raid6_reconstruct1(off, skip, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], TRUE, firststripesize, missing1);
+                
+                while (stripeoff < stripeend[0] - stripestart[0]) {
+                    off += (ci->num_stripes - 2) * ci->stripe_length;
+                    raid6_reconstruct1(off, 0, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], FALSE, 0, missing1);
                 }
             }
+            
+            off = origoff;
         }
         
-        pos = 0;
-        stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
-        if (!stripeoff) {
-            ERR("out of memory\n");
-            Status = STATUS_INSUFFICIENT_RESOURCES;
-            goto exit;
-        }
-        
-        RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes);
-        
-        stripe = startoffstripe / ci->sub_stripes;
-        while (pos < length) {
-            if (pos == 0) {
-                UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length));
+        if (!context->tree && !context->csum) {
+            UINT8* scratch;
+            
+            scratch = ExAllocatePoolWithTag(NonPagedPool, ci->stripe_length, ALLOC_TAG);
+            if (!scratch) {
+                ERR("out of memory\n");
+                Status = STATUS_INSUFFICIENT_RESOURCES;
+                goto exit;
+            }
+            
+            stripeoff = 0;
+            Status = check_raid6_nocsum_parity(off, skip, context, ci, &stripeoff, stripeend[0] - stripestart[0], TRUE, firststripesize, scratch);
+            if (!NT_SUCCESS(Status)) {
+                ERR("check_raid6_nocsum_parity returned %08x\n", Status);
+                ExFreePool(scratch);
+                goto exit;
+            }
                 
-                RtlCopyMemory(buf, stripes[stripe]->buf, readlen);
-                stripeoff[stripe] += readlen;
-                pos += readlen;
-            } else if (length - pos < ci->stripe_length) {
-                RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], length - pos);
-                pos = length;
-            } else {
-                RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], ci->stripe_length);
-                stripeoff[stripe] += ci->stripe_length;
-                pos += ci->stripe_length;
+            while (stripeoff < stripeend[0] - stripestart[0]) {
+                off += (ci->num_stripes - 2) * ci->stripe_length;
+                Status = check_raid6_nocsum_parity(off, 0, context, ci, &stripeoff, stripeend[0] - stripestart[0], FALSE, 0, scratch);
+                
+                if (!NT_SUCCESS(Status)) {
+                    ERR("check_raid6_nocsum_parity returned %08x\n", Status);
+                    ExFreePool(scratch);
+                    goto exit;
+                }
             }
             
-            stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
+            ExFreePool(scratch);
+            
+            off = origoff;
         }
         
-        ExFreePool(stripes);
-        ExFreePool(stripeoff);
+        pos = 0;
+        stripeoff = 0;
+        raid6_decode(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize);
         
-        // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short
+        while (pos < length) {
+            off += (ci->num_stripes - 2) * ci->stripe_length;
+            raid6_decode(off, 0, context, ci, &stripeoff, buf, &pos, length, 0);
+        }
         
         if (is_tree) {
             tree_header* th = (tree_header*)buf;
             UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
             
-            if (crc32 != *((UINT32*)th->csum)) {
-                WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
-                Status = STATUS_CRC_ERROR;
-                goto exit;
+            if (addr != th->address || crc32 != *((UINT32*)th->csum))
+                checksum_error = TRUE;
+        } else if (csum) {
+#ifdef DEBUG_STATS
+            time1 = KeQueryPerformanceCounter(NULL);
+#endif
+            for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
+                UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+                
+                if (crc32 != csum[i]) {
+                    checksum_error = TRUE;
+                    break;
+                }
             }
+#ifdef DEBUG_STATS
+            time2 = KeQueryPerformanceCounter(NULL);
+            
+            Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
         }
         
-        Status = STATUS_SUCCESS;
-    } else if (type == BLOCK_FLAG_DUPLICATE) {
-        // check if any of the stripes succeeded
-        
-        for (i = 0; i < ci->num_stripes; i++) {
-            if (context->stripes[i].status == ReadDataStatus_Success) {
-                RtlCopyMemory(buf, context->stripes[i].buf, length);
-                Status = STATUS_SUCCESS;
-                goto exit;
+        if (checksum_error) {
+            for (i = 0; i < needs_reconstruct; i++) {
+                PIO_STACK_LOCATION IrpSp;
+                UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2;
+                
+                // re-run Irps that we cancelled
+                
+                if (context->stripes[reconstruct_stripe].Irp) {
+                    if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) {
+                        MmUnlockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress);
+                        IoFreeMdl(context->stripes[reconstruct_stripe].Irp->MdlAddress);
+                    }
+                    IoFreeIrp(context->stripes[reconstruct_stripe].Irp);
+                }
+                
+                if (!Irp) {
+                    context->stripes[reconstruct_stripe].Irp = IoAllocateIrp(devices[reconstruct_stripe]->devobj->StackSize, FALSE);
+                    
+                    if (!context->stripes[reconstruct_stripe].Irp) {
+                        ERR("IoAllocateIrp failed\n");
+                        Status = STATUS_INSUFFICIENT_RESOURCES;
+                        goto exit;
+                    }
+                } else {
+                    context->stripes[reconstruct_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[reconstruct_stripe]->devobj->StackSize);
+                    
+                    if (!context->stripes[reconstruct_stripe].Irp) {
+                        ERR("IoMakeAssociatedIrp failed\n");
+                        Status = STATUS_INSUFFICIENT_RESOURCES;
+                        goto exit;
+                    }
+                }
+                
+                IrpSp = IoGetNextIrpStackLocation(context->stripes[reconstruct_stripe].Irp);
+                IrpSp->MajorFunction = IRP_MJ_READ;
+                
+                if (devices[reconstruct_stripe]->devobj->Flags & DO_BUFFERED_IO) {
+                    FIXME("FIXME - buffered IO\n");
+                } else if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) {
+                    context->stripes[reconstruct_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[reconstruct_stripe].buf,
+                                                                                         stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, FALSE, NULL);
+                    if (!context->stripes[reconstruct_stripe].Irp->MdlAddress) {
+                        ERR("IoAllocateMdl failed\n");
+                        Status = STATUS_INSUFFICIENT_RESOURCES;
+                        goto exit;
+                    }
+                    
+                    MmProbeAndLockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess);
+                } else {
+                    context->stripes[reconstruct_stripe].Irp->UserBuffer = context->stripes[reconstruct_stripe].buf;
+                }
+
+                IrpSp->Parameters.Read.Length = stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe];
+                IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[reconstruct_stripe] + cis[reconstruct_stripe].offset;
+                
+                context->stripes[reconstruct_stripe].Irp->UserIosb = &context->stripes[reconstruct_stripe].iosb;
+                
+                IoSetCompletionRoutine(context->stripes[reconstruct_stripe].Irp, read_data_completion, &context->stripes[reconstruct_stripe], TRUE, TRUE, TRUE);
+
+                context->stripes[reconstruct_stripe].status = ReadDataStatus_Pending;
             }
-        }
-        
-        // if not, see if we got a checksum error
-        
-        for (i = 0; i < ci->num_stripes; i++) {
-            if (context->stripes[i].status == ReadDataStatus_CRCError) {
-#ifdef _DEBUG
-                WARN("stripe %llu had a checksum error\n", i);
+             
+            if (needs_reconstruct > 0) {
+                context->stripes_left = needs_reconstruct;
+                KeClearEvent(&context->Event);
                 
-                if (context->tree) {
-                    tree_header* th = (tree_header*)context->stripes[i].buf;
-                    UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
+#ifdef DEBUG_STATS
+                if (!is_tree)
+                    time1 = KeQueryPerformanceCounter(NULL);
+#endif
+                
+                for (i = 0; i < needs_reconstruct; i++) {
+                    UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2;
+                    
+                    IoCallDriver(devices[reconstruct_stripe]->devobj, context->stripes[reconstruct_stripe].Irp);
+                }
+                
+                KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
+                
+#ifdef DEBUG_STATS
+                if (!is_tree) {
+                    time2 = KeQueryPerformanceCounter(NULL);
                     
-                    WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
+                    Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
                 }
 #endif
+    
+                for (i = 0; i < needs_reconstruct; i++) {
+                    UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2;
+                    
+                    if (context->stripes[reconstruct_stripe].status != ReadDataStatus_Success) {
+                        ERR("unrecoverable checksum error\n");
+                        Status = STATUS_CRC_ERROR;
+                        goto exit;
+                    }
+                }
+            }
+            
+            off = origoff;
+            
+            if (context->tree) {
+                pos = 0;
+                stripeoff = 0;
+                if (!raid6_decode_with_checksum_metadata(addr, off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, Vcb->superblock.node_size)) {
+                    ERR("unrecoverable metadata checksum error\n");
+                    Status = STATUS_CRC_ERROR;
+                    goto exit;
+                }
+            } else {
+                pos = 0;
+                stripeoff = 0;
+                if (!raid6_decode_with_checksum(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, csum, Vcb->superblock.sector_size)) {
+                    Status = STATUS_CRC_ERROR;
+                    goto exit;
+                }
                 
-                Status = STATUS_CRC_ERROR;
-                goto exit;
+                while (pos < length) {
+                    off += (ci->num_stripes - 1) * ci->stripe_length;
+                    if (!raid6_decode_with_checksum(off, 0, context, ci, &stripeoff, buf, &pos, length, 0, csum, Vcb->superblock.sector_size)) {
+                        Status = STATUS_CRC_ERROR;
+                        goto exit;
+                    }
+                }
             }
         }
         
-        // failing that, return the first error we encountered
+        // write good data over bad
         
-        for (i = 0; i < ci->num_stripes; i++) {
-            if (context->stripes[i].status == ReadDataStatus_Error) {
-                Status = context->stripes[i].iosb.Status;
-                goto exit;
+        if (!Vcb->readonly) {
+            for (i = 0; i < ci->num_stripes; i++) {
+                if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) {
+                    Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]);
+                    
+                    if (!NT_SUCCESS(Status))
+                        WARN("write_data_phys returned %08x\n", Status);
+                }
             }
         }
         
-        // if we somehow get here, return STATUS_INTERNAL_ERROR
-        
-        Status = STATUS_INTERNAL_ERROR;
+        Status = STATUS_SUCCESS;
     }
 
 exit:
@@ -895,6 +3042,9 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U
     UINT64 bytes_read = 0;
     UINT64 last_end;
     LIST_ENTRY* le;
+#ifdef DEBUG_STATS
+    LARGE_INTEGER time1, time2;
+#endif
     
     TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr);
     
@@ -906,6 +3056,10 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U
         Status = STATUS_END_OF_FILE;
         goto exit;        
     }
+    
+#ifdef DEBUG_STATS
+    time1 = KeQueryPerformanceCounter(NULL);
+#endif
 
     le = fcb->extents.Flink;
 
@@ -973,8 +3127,10 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U
                     UINT64 off = start + bytes_read - ext->offset;
                     UINT32 to_read, read;
                     UINT8* buf;
+                    BOOL buf_free;
                     UINT32 *csum, bumpoff = 0;
-                    UINT64 addr;
+                    UINT64 addr, lockaddr, locklen;
+                    chunk* c;
                     
                     read = len - off;
                     if (read > length) read = length;
@@ -993,12 +3149,19 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U
                         to_read = sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
                     }
                     
-                    buf = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG);
-                    
-                    if (!buf) {
-                        ERR("out of memory\n");
-                        Status = STATUS_INSUFFICIENT_RESOURCES;
-                        goto exit;
+                    if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 &&
+                        length % fcb->Vcb->superblock.sector_size == 0) {
+                        buf = data + bytes_read;
+                        buf_free = FALSE;
+                    } else {
+                        buf = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG);
+                        buf_free = TRUE;
+                        
+                        if (!buf) {
+                            ERR("out of memory\n");
+                            Status = STATUS_INSUFFICIENT_RESOURCES;
+                            goto exit;
+                        }
                     }
                     
                     if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
@@ -1006,21 +3169,51 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U
                         
                         if (!NT_SUCCESS(Status)) {
                             ERR("load_csum returned %08x\n", Status);
-                            ExFreePool(buf);
+                            
+                            if (buf_free)
+                                ExFreePool(buf);
+                            
                             goto exit;
                         }
                     } else
                         csum = NULL;
                     
-                    Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, NULL, Irp);
+                    c = get_chunk_from_address(fcb->Vcb, addr);
+                    
+                    if (!c) {
+                        ERR("get_chunk_from_address(%llx) failed\n", addr);
+                        
+                        if (buf_free)
+                            ExFreePool(buf);
+                        
+                        goto exit;
+                    }
+                    
+                    if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
+                        get_raid56_lock_range(c, addr, to_read, &lockaddr, &locklen);
+                        chunk_lock_range(fcb->Vcb, c, lockaddr, locklen);
+                    }
+                    
+                    
+                    Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, c, NULL, Irp);
                     if (!NT_SUCCESS(Status)) {
                         ERR("read_data returned %08x\n", Status);
-                        ExFreePool(buf);
+                        
+                        if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
+                            chunk_unlock_range(fcb->Vcb, c, lockaddr, locklen);
+                        
+                        if (buf_free)
+                            ExFreePool(buf);
+                        
                         goto exit;
                     }
                     
+                    if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
+                        chunk_unlock_range(fcb->Vcb, c, lockaddr, locklen);
+                    
                     if (ed->compression == BTRFS_COMPRESSION_NONE) {
-                        RtlCopyMemory(data + bytes_read, buf + bumpoff, read);
+                        if (buf_free)
+                            RtlCopyMemory(data + bytes_read, buf + bumpoff, read);
                     } else {
                         UINT8* decomp = NULL;
                         
@@ -1048,7 +3241,8 @@ NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, U
                         ExFreePool(decomp);
                     }
                     
-                    ExFreePool(buf);
+                    if (buf_free)
+                        ExFreePool(buf);
                     
                     if (csum)
                         ExFreePool(csum);
@@ -1103,6 +3297,14 @@ nextitem:
     if (pbr)
         *pbr = bytes_read;
     
+#ifdef DEBUG_STATS
+    time2 = KeQueryPerformanceCounter(NULL);
+    
+    fcb->Vcb->stats.num_reads++;
+    fcb->Vcb->stats.data_read += bytes_read;
+    fcb->Vcb->stats.read_total_time += time2.QuadPart - time1.QuadPart;
+#endif
+    
 exit:
     return Status;
 }
@@ -1124,7 +3326,7 @@ NTSTATUS do_read(PIRP Irp, BOOL wait, ULONG* bytes_read) {
     TRACE("offset = %llx, length = %x\n", start, length);
     TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "TRUE" : "FALSE", Irp->Flags & IRP_NOCACHE ? "TRUE" : "FALSE");
 
-    if (fcb->type == BTRFS_TYPE_DIRECTORY)
+    if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
         return STATUS_INVALID_DEVICE_REQUEST;
     
     if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
@@ -1321,7 +3523,9 @@ NTSTATUS STDCALL drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
             
             tree_lock = TRUE;
         }
+    }
     
+    if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
         if (!ExAcquireResourceSharedLite(fcb->Header.Resource, IoIsOperationSynchronous(Irp))) {
             Status = STATUS_PENDING;
             IoMarkIrpPending(Irp);
index 1719690..f021141 100644 (file)
@@ -27,7 +27,7 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb) {
     BTRFS_UUID* uuid = &Vcb->superblock.uuid;
     mount_options* options = &Vcb->options;
     UNICODE_STRING path, ignoreus, compressus, compressforceus, compresstypeus, readonlyus, zliblevelus, flushintervalus,
-                   maxinlineus, subvolidus;
+                   maxinlineus, subvolidus, raid5recalcus, raid6recalcus;
     OBJECT_ATTRIBUTES oa;
     NTSTATUS Status;
     ULONG i, j, kvfilen, index, retlen;
@@ -41,6 +41,8 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb) {
     options->zlib_level = mount_zlib_level;
     options->flush_interval = mount_flush_interval;
     options->max_inline = min(mount_max_inline, Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node) - sizeof(EXTENT_DATA) + 1);
+    options->raid5_recalculation = mount_raid5_recalculation;
+    options->raid6_recalculation = mount_raid6_recalculation;
     options->subvol_id = 0;
     
     path.Length = path.MaximumLength = registry_path.Length + (37 * sizeof(WCHAR));
@@ -99,6 +101,8 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb) {
     RtlInitUnicodeString(&flushintervalus, L"FlushInterval");
     RtlInitUnicodeString(&maxinlineus, L"MaxInline");
     RtlInitUnicodeString(&subvolidus, L"SubvolId");
+    RtlInitUnicodeString(&raid5recalcus, L"Raid5Recalculation");
+    RtlInitUnicodeString(&raid6recalcus, L"Raid6Recalculation");
     
     do {
         Status = ZwEnumerateValueKey(h, index, KeyValueFullInformation, kvfi, kvfilen, &retlen);
@@ -147,6 +151,14 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb) {
                 UINT64* val = (UINT64*)((UINT8*)kvfi + kvfi->DataOffset);
                 
                 options->subvol_id = *val;
+            } else if (FsRtlAreNamesEqual(&raid5recalcus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) {
+                DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset);
+                
+                options->raid5_recalculation = *val;
+            } else if (FsRtlAreNamesEqual(&raid6recalcus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) {
+                DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset);
+                
+                options->raid6_recalculation = *val;
             }
         } else if (Status != STATUS_NO_MORE_ENTRIES) {
             ERR("ZwEnumerateValueKey returned %08x\n", Status);
@@ -162,6 +174,12 @@ NTSTATUS registry_load_volume_options(device_extension* Vcb) {
     
     if (options->flush_interval == 0)
         options->flush_interval = mount_flush_interval;
+    
+    if (options->raid5_recalculation > 1)
+        options->raid5_recalculation = 1;
+    
+    if (options->raid6_recalculation > 2)
+        options->raid6_recalculation = 2;
 
     Status = STATUS_SUCCESS;
     
@@ -635,10 +653,18 @@ void STDCALL read_registry(PUNICODE_STRING regpath) {
     get_registry_value(h, L"ZlibLevel", REG_DWORD, &mount_zlib_level, sizeof(mount_zlib_level));
     get_registry_value(h, L"FlushInterval", REG_DWORD, &mount_flush_interval, sizeof(mount_flush_interval));
     get_registry_value(h, L"MaxInline", REG_DWORD, &mount_max_inline, sizeof(mount_max_inline));
+    get_registry_value(h, L"Raid5Recalculation", REG_DWORD, &mount_raid5_recalculation, sizeof(mount_raid5_recalculation));
+    get_registry_value(h, L"Raid6Recalculation", REG_DWORD, &mount_raid6_recalculation, sizeof(mount_raid6_recalculation));
     
     if (mount_flush_interval == 0)
         mount_flush_interval = 1;
     
+    if (mount_raid5_recalculation > 1)
+        mount_raid5_recalculation = 1;
+    
+    if (mount_raid6_recalculation > 2)
+        mount_raid6_recalculation = 2;
+    
 #ifdef _DEBUG
     get_registry_value(h, L"DebugLogLevel", REG_DWORD, &debug_log_level, sizeof(debug_log_level));
     
index 0aa0e26..5360604 100644 (file)
@@ -17,7 +17,7 @@
 
 #include "btrfs_drv.h"
 
-NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, DWORD* retlen) {
+NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, ULONG_PTR* retlen) {
     USHORT subnamelen, printnamelen, i;
     ULONG stringlen;
     DWORD reqlen;
@@ -32,79 +32,100 @@ NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject,
     ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE);
     
     if (fcb->type == BTRFS_TYPE_SYMLINK) {
-        data = ExAllocatePoolWithTag(PagedPool, fcb->inode_item.st_size, ALLOC_TAG);
-        if (!data) {
-            ERR("out of memory\n");
-            Status = STATUS_INSUFFICIENT_RESOURCES;
-            goto end;
-        }
-        
-        TRACE("data = %p, size = %x\n", data, fcb->inode_item.st_size);
-        Status = read_file(fcb, (UINT8*)data, 0, fcb->inode_item.st_size, NULL, NULL);
-        
-        if (!NT_SUCCESS(Status)) {
-            ERR("read_file returned %08x\n", Status);
-            ExFreePool(data);
-            goto end;
-        }
-        
-        Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, data, fcb->inode_item.st_size);
-        if (!NT_SUCCESS(Status)) {
-            ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status);
-            ExFreePool(data);
-            goto end;
-        }
-        
-        subnamelen = stringlen;
-        printnamelen = stringlen;
-        
-        reqlen = offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) + subnamelen + printnamelen;
-        
-        if (buflen < reqlen) {
-            Status = STATUS_BUFFER_OVERFLOW;
-            goto end;
-        }
-        
-        rdb->ReparseTag = IO_REPARSE_TAG_SYMLINK;
-        rdb->ReparseDataLength = reqlen - offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer);
-        rdb->Reserved = 0;
-        
-        rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset = 0;
-        rdb->SymbolicLinkReparseBuffer.SubstituteNameLength = subnamelen;
-        rdb->SymbolicLinkReparseBuffer.PrintNameOffset = subnamelen;
-        rdb->SymbolicLinkReparseBuffer.PrintNameLength = printnamelen;
-        rdb->SymbolicLinkReparseBuffer.Flags = SYMLINK_FLAG_RELATIVE;
-        
-        Status = RtlUTF8ToUnicodeN(&rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)],
-                                stringlen, &stringlen, data, fcb->inode_item.st_size);
+        if (called_from_lxss()) {
+            reqlen = offsetof(REPARSE_DATA_BUFFER, GenericReparseBuffer.DataBuffer) + sizeof(UINT32);
+            
+            if (buflen < reqlen) {
+                Status = STATUS_BUFFER_OVERFLOW;
+                goto end;
+            }
+            
+            rdb->ReparseTag = IO_REPARSE_TAG_LXSS_SYMLINK;
+            rdb->ReparseDataLength = offsetof(REPARSE_DATA_BUFFER, GenericReparseBuffer.DataBuffer) + sizeof(UINT32);
+            rdb->Reserved = 0;
+            
+            *((UINT32*)rdb->GenericReparseBuffer.DataBuffer) = 1;
+            
+            *retlen = reqlen;
+        } else {
+            data = ExAllocatePoolWithTag(PagedPool, fcb->inode_item.st_size, ALLOC_TAG);
+            if (!data) {
+                ERR("out of memory\n");
+                Status = STATUS_INSUFFICIENT_RESOURCES;
+                goto end;
+            }
+            
+            TRACE("data = %p, size = %x\n", data, fcb->inode_item.st_size);
+            Status = read_file(fcb, (UINT8*)data, 0, fcb->inode_item.st_size, NULL, NULL);
+            
+            if (!NT_SUCCESS(Status)) {
+                ERR("read_file returned %08x\n", Status);
+                ExFreePool(data);
+                goto end;
+            }
+            
+            Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, data, fcb->inode_item.st_size);
+            if (!NT_SUCCESS(Status)) {
+                ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status);
+                ExFreePool(data);
+                goto end;
+            }
+            
+            subnamelen = stringlen;
+            printnamelen = stringlen;
+            
+            reqlen = offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) + subnamelen + printnamelen;
+            
+            if (buflen < reqlen) {
+                Status = STATUS_BUFFER_OVERFLOW;
+                goto end;
+            }
+            
+            rdb->ReparseTag = IO_REPARSE_TAG_SYMLINK;
+            rdb->ReparseDataLength = reqlen - offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer);
+            rdb->Reserved = 0;
+            
+            rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset = 0;
+            rdb->SymbolicLinkReparseBuffer.SubstituteNameLength = subnamelen;
+            rdb->SymbolicLinkReparseBuffer.PrintNameOffset = subnamelen;
+            rdb->SymbolicLinkReparseBuffer.PrintNameLength = printnamelen;
+            rdb->SymbolicLinkReparseBuffer.Flags = SYMLINK_FLAG_RELATIVE;
+            
+            Status = RtlUTF8ToUnicodeN(&rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)],
+                                    stringlen, &stringlen, data, fcb->inode_item.st_size);
 
-        if (!NT_SUCCESS(Status)) {
-            ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status);
+            if (!NT_SUCCESS(Status)) {
+                ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status);
+                ExFreePool(data);
+                goto end;
+            }
+            
+            for (i = 0; i < stringlen / sizeof(WCHAR); i++) {
+                if (rdb->SymbolicLinkReparseBuffer.PathBuffer[(rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)) + i] == '/')
+                    rdb->SymbolicLinkReparseBuffer.PathBuffer[(rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)) + i] = '\\';
+            }
+            
+            RtlCopyMemory(&rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.PrintNameOffset / sizeof(WCHAR)],
+                        &rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)],
+                        rdb->SymbolicLinkReparseBuffer.SubstituteNameLength);
+            
+            *retlen = reqlen;
+            
             ExFreePool(data);
-            goto end;
-        }
-        
-        for (i = 0; i < stringlen / sizeof(WCHAR); i++) {
-            if (rdb->SymbolicLinkReparseBuffer.PathBuffer[(rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)) + i] == '/')
-                rdb->SymbolicLinkReparseBuffer.PathBuffer[(rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)) + i] = '\\';
         }
         
-        RtlCopyMemory(&rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.PrintNameOffset / sizeof(WCHAR)],
-                    &rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)],
-                    rdb->SymbolicLinkReparseBuffer.SubstituteNameLength);
-        
-        *retlen = reqlen;
-        
-        ExFreePool(data);
-        
         Status = STATUS_SUCCESS;
     } else if (fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT) {
         if (fcb->type == BTRFS_TYPE_FILE) {
-            Status = read_file(fcb, buffer, 0, buflen, retlen, NULL);
+            ULONG len;
+            
+            Status = read_file(fcb, buffer, 0, buflen, &len, NULL);
             
             if (!NT_SUCCESS(Status)) {
                 ERR("read_file returned %08x\n", Status);
             }
+            
+            *retlen = len;
         } else if (fcb->type == BTRFS_TYPE_DIRECTORY) {
             if (!fcb->reparse_xattr.Buffer || fcb->reparse_xattr.Length < sizeof(ULONG)) {
                 Status = STATUS_NOT_A_REPARSE_POINT;
@@ -129,7 +150,7 @@ end:
     return Status;
 }
 
-static NTSTATUS set_symlink(PIRP Irp, file_ref* fileref, REPARSE_DATA_BUFFER* rdb, ULONG buflen, LIST_ENTRY* rollback) {
+static NTSTATUS set_symlink(PIRP Irp, file_ref* fileref, ccb* ccb, REPARSE_DATA_BUFFER* rdb, ULONG buflen, BOOL write, LIST_ENTRY* rollback) {
     NTSTATUS Status;
     ULONG minlen;
     ULONG tlength;
@@ -139,70 +160,81 @@ static NTSTATUS set_symlink(PIRP Irp, file_ref* fileref, REPARSE_DATA_BUFFER* rd
     BTRFS_TIME now;
     USHORT i;
     
-    minlen = offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) + sizeof(WCHAR);
-    if (buflen < minlen) {
-        WARN("buffer was less than minimum length (%u < %u)\n", buflen, minlen);
-        return STATUS_INVALID_PARAMETER;
+    if (write) {
+        minlen = offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) + sizeof(WCHAR);
+        if (buflen < minlen) {
+            WARN("buffer was less than minimum length (%u < %u)\n", buflen, minlen);
+            return STATUS_INVALID_PARAMETER;
+        }
+        
+        subname.Buffer = &rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)];
+        subname.MaximumLength = subname.Length = rdb->SymbolicLinkReparseBuffer.SubstituteNameLength;
+        
+        TRACE("substitute name = %.*S\n", subname.Length / sizeof(WCHAR), subname.Buffer);
     }
     
-    subname.Buffer = &rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)];
-    subname.MaximumLength = subname.Length = rdb->SymbolicLinkReparseBuffer.SubstituteNameLength;
-    
-    TRACE("substitute name = %.*S\n", subname.Length / sizeof(WCHAR), subname.Buffer);
-    
     fileref->fcb->type = BTRFS_TYPE_SYMLINK;
     
     fileref->fcb->inode_item.st_mode |= __S_IFLNK;
     
-    Status = truncate_file(fileref->fcb, 0, Irp, rollback);
-    if (!NT_SUCCESS(Status)) {
-        ERR("truncate_file returned %08x\n", Status);
-        return Status;
-    }
-    
-    Status = RtlUnicodeToUTF8N(NULL, 0, (PULONG)&target.Length, subname.Buffer, subname.Length);
-    if (!NT_SUCCESS(Status)) {
-        ERR("RtlUnicodeToUTF8N 1 failed with error %08x\n", Status);
-        return Status;
-    }
-    
-    target.MaximumLength = target.Length;
-    target.Buffer = ExAllocatePoolWithTag(PagedPool, target.MaximumLength, ALLOC_TAG);
-    if (!target.Buffer) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    Status = RtlUnicodeToUTF8N(target.Buffer, target.Length, (PULONG)&target.Length, subname.Buffer, subname.Length);
-    if (!NT_SUCCESS(Status)) {
-        ERR("RtlUnicodeToUTF8N 2 failed with error %08x\n", Status);
+    if (write) {
+        Status = truncate_file(fileref->fcb, 0, Irp, rollback);
+        if (!NT_SUCCESS(Status)) {
+            ERR("truncate_file returned %08x\n", Status);
+            return Status;
+        }
+        
+        Status = RtlUnicodeToUTF8N(NULL, 0, (PULONG)&target.Length, subname.Buffer, subname.Length);
+        if (!NT_SUCCESS(Status)) {
+            ERR("RtlUnicodeToUTF8N 1 failed with error %08x\n", Status);
+            return Status;
+        }
+        
+        target.MaximumLength = target.Length;
+        target.Buffer = ExAllocatePoolWithTag(PagedPool, target.MaximumLength, ALLOC_TAG);
+        if (!target.Buffer) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        Status = RtlUnicodeToUTF8N(target.Buffer, target.Length, (PULONG)&target.Length, subname.Buffer, subname.Length);
+        if (!NT_SUCCESS(Status)) {
+            ERR("RtlUnicodeToUTF8N 2 failed with error %08x\n", Status);
+            ExFreePool(target.Buffer);
+            return Status;
+        }
+        
+        for (i = 0; i < target.Length; i++) {
+            if (target.Buffer[i] == '\\')
+                target.Buffer[i] = '/';
+        }
+        
+        offset.QuadPart = 0;
+        tlength = target.Length;
+        Status = write_file2(fileref->fcb->Vcb, Irp, offset, target.Buffer, &tlength, FALSE, TRUE,
+                            TRUE, FALSE, rollback);
         ExFreePool(target.Buffer);
-        return Status;
-    }
-    
-    for (i = 0; i < target.Length; i++) {
-        if (target.Buffer[i] == '\\')
-            target.Buffer[i] = '/';
-    }
-    
-    offset.QuadPart = 0;
-    tlength = target.Length;
-    Status = write_file2(fileref->fcb->Vcb, Irp, offset, target.Buffer, &tlength, FALSE, TRUE,
-                         TRUE, FALSE, rollback);
-    ExFreePool(target.Buffer);
+    } else
+        Status = STATUS_SUCCESS;
     
     KeQuerySystemTime(&time);
     win_time_to_unix(time, &now);
 
     fileref->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation;
     fileref->fcb->inode_item.sequence++;
-    fileref->fcb->inode_item.st_ctime = now;
-    fileref->fcb->inode_item.st_mtime = now;
+    
+    if (!ccb->user_set_change_time)
+        fileref->fcb->inode_item.st_ctime = now;
+    
+    if (!ccb->user_set_write_time)
+        fileref->fcb->inode_item.st_mtime = now;
     
     fileref->fcb->subvol->root_item.ctransid = fileref->fcb->Vcb->superblock.generation;
     fileref->fcb->subvol->root_item.ctime = now;
     
+    fileref->fcb->inode_item_changed = TRUE;
     mark_fcb_dirty(fileref->fcb);
+    
     mark_fileref_dirty(fileref);
     
     return Status;
@@ -240,7 +272,7 @@ NTSTATUS set_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     
     // It isn't documented what permissions FSCTL_SET_REPARSE_POINT needs, but CreateSymbolicLinkW
     // creates a file with FILE_WRITE_ATTRIBUTES | DELETE | SYNCHRONIZE.
-    if (!(ccb->access & FILE_WRITE_ATTRIBUTES)) {
+    if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_ATTRIBUTES)) {
         WARN("insufficient privileges\n");
         return STATUS_ACCESS_DENIED;
     }
@@ -276,8 +308,9 @@ NTSTATUS set_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     
     RtlCopyMemory(&tag, buffer, sizeof(ULONG));
     
-    if (fcb->type == BTRFS_TYPE_FILE && tag == IO_REPARSE_TAG_SYMLINK && rdb->SymbolicLinkReparseBuffer.Flags & SYMLINK_FLAG_RELATIVE) {
-        Status = set_symlink(Irp, fileref, rdb, buflen, &rollback);
+    if (fcb->type == BTRFS_TYPE_FILE &&
+        ((tag == IO_REPARSE_TAG_SYMLINK && rdb->SymbolicLinkReparseBuffer.Flags & SYMLINK_FLAG_RELATIVE) || tag == IO_REPARSE_TAG_LXSS_SYMLINK)) {
+        Status = set_symlink(Irp, fileref, ccb, rdb, buflen, tag == IO_REPARSE_TAG_SYMLINK, &rollback);
         fcb->atts |= FILE_ATTRIBUTE_REPARSE_POINT;
     } else {
         LARGE_INTEGER offset, time;
@@ -324,14 +357,20 @@ NTSTATUS set_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
 
         fcb->inode_item.transid = fcb->Vcb->superblock.generation;
         fcb->inode_item.sequence++;
-        fcb->inode_item.st_ctime = now;
-        fcb->inode_item.st_mtime = now;
+        
+        if (!ccb->user_set_change_time)
+            fcb->inode_item.st_ctime = now;
+        
+        if (!ccb->user_set_write_time)
+            fcb->inode_item.st_mtime = now;
+        
         fcb->atts |= FILE_ATTRIBUTE_REPARSE_POINT;
         fcb->atts_changed = TRUE;
         
         fcb->subvol->root_item.ctransid = fcb->Vcb->superblock.generation;
         fcb->subvol->root_item.ctime = now;
         
+        fcb->inode_item_changed = TRUE;
         mark_fcb_dirty(fcb);
     }
     
@@ -339,7 +378,7 @@ NTSTATUS set_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     
 end:
     if (NT_SUCCESS(Status))
-        clear_rollback(&rollback);
+        clear_rollback(fcb->Vcb, &rollback);
     else
         do_rollback(fcb->Vcb, &rollback);
 
@@ -383,7 +422,7 @@ NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         return STATUS_INVALID_PARAMETER;
     }
     
-    if (!(ccb->access & FILE_WRITE_ATTRIBUTES)) {
+    if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_ATTRIBUTES)) {
         WARN("insufficient privileges\n");
         return STATUS_ACCESS_DENIED;
     }
@@ -437,11 +476,18 @@ NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         fileref->fcb->inode_item.st_mode |= __S_IFREG;
         fileref->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation;
         fileref->fcb->inode_item.sequence++;
-        fileref->fcb->inode_item.st_ctime = now;
-        fileref->fcb->inode_item.st_mtime = now;
+        
+        if (!ccb->user_set_change_time)
+            fileref->fcb->inode_item.st_ctime = now;
+        
+        if (!ccb->user_set_write_time)
+            fileref->fcb->inode_item.st_mtime = now;
+        
         fileref->fcb->atts &= ~FILE_ATTRIBUTE_REPARSE_POINT;
         
         mark_fileref_dirty(fileref);
+        
+        fileref->fcb->inode_item_changed = TRUE;
         mark_fcb_dirty(fileref->fcb);
 
         fileref->fcb->subvol->root_item.ctransid = fcb->Vcb->superblock.generation;
@@ -466,9 +512,14 @@ NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
         
         fcb->inode_item.transid = fcb->Vcb->superblock.generation;
         fcb->inode_item.sequence++;
-        fcb->inode_item.st_ctime = now;
-        fcb->inode_item.st_mtime = now;
+        
+        if (!ccb->user_set_change_time)
+            fcb->inode_item.st_ctime = now;
+        
+        if (!ccb->user_set_write_time)
+            fcb->inode_item.st_mtime = now;
 
+        fcb->inode_item_changed = TRUE;
         mark_fcb_dirty(fcb);
 
         fcb->subvol->root_item.ctransid = fcb->Vcb->superblock.generation;
@@ -494,9 +545,14 @@ NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
 
         fcb->inode_item.transid = fcb->Vcb->superblock.generation;
         fcb->inode_item.sequence++;
-        fcb->inode_item.st_ctime = now;
-        fcb->inode_item.st_mtime = now;
+        
+        if (!ccb->user_set_change_time)
+            fcb->inode_item.st_ctime = now;
+        
+        if (!ccb->user_set_write_time)
+            fcb->inode_item.st_mtime = now;
 
+        fcb->inode_item_changed = TRUE;
         mark_fcb_dirty(fcb);
 
         fcb->subvol->root_item.ctransid = fcb->Vcb->superblock.generation;
@@ -513,7 +569,7 @@ NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
     
 end:
     if (NT_SUCCESS(Status))
-        clear_rollback(&rollback);
+        clear_rollback(fcb->Vcb, &rollback);
     else
         do_rollback(fcb->Vcb, &rollback);
     
index 8a7a79b..f5d2506 100644 (file)
@@ -320,6 +320,7 @@ static void STDCALL test_vol(PDRIVER_OBJECT DriverObject, PDEVICE_OBJECT mountmg
         v->length = gli.Length.QuadPart;
         v->gen1 = sb->generation;
         v->gen2 = 0;
+        v->seeding = sb->flags & BTRFS_SUPERBLOCK_FLAGS_SEEDING ? TRUE : FALSE;
         InsertTailList(volumes, &v->list_entry);
         
         i = 1;
index b2d0d78..7ad75a3 100644 (file)
@@ -860,7 +860,10 @@ static NTSTATUS STDCALL set_file_security(device_extension* Vcb, PFILE_OBJECT Fi
     win_time_to_unix(time, &now);
     
     fcb->inode_item.transid = Vcb->superblock.generation;
-    fcb->inode_item.st_ctime = now;
+    
+    if (!ccb->user_set_change_time)
+        fcb->inode_item.st_ctime = now;
+    
     fcb->inode_item.sequence++;
     
     if (flags & OWNER_SECURITY_INFORMATION) {
@@ -878,6 +881,7 @@ static NTSTATUS STDCALL set_file_security(device_extension* Vcb, PFILE_OBJECT Fi
     }
     
     fcb->sd_dirty = TRUE;
+    fcb->inode_item_changed = TRUE;
     
     fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
     fcb->subvol->root_item.ctime = now;
index c9c988b..1864a52 100644 (file)
 
 // #define DEBUG_TREE_LOCKS
 
-typedef struct {
-    enum rollback_type type;
-    void* ptr;
-    LIST_ENTRY list_entry;
-} rollback_item;
-
 NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** pt, tree* parent, PIRP Irp, const char* func, const char* file, unsigned int line) {
     UINT8* buf;
     NTSTATUS Status;
@@ -32,7 +26,6 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree**
     tree* t;
     tree_data* td;
     chunk* c;
-    shared_data* sd;
     
     TRACE("(%p, %llx)\n", Vcb, addr);
     
@@ -42,7 +35,7 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree**
         return STATUS_INSUFFICIENT_RESOURCES;
     }
     
-    Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, &c, Irp);
+    Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, &c, Irp);
     if (!NT_SUCCESS(Status)) {
         ERR("read_data returned 0x%08x\n", Status);
         ExFreePool(buf);
@@ -70,6 +63,7 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree**
     t->size = 0;
     t->new_address = 0;
     t->has_new_address = FALSE;
+    t->updated_extents = FALSE;
     t->write = FALSE;
     
     if (c)
@@ -82,21 +76,6 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree**
 //     t->items = ExAllocatePoolWithTag(PagedPool, num_items * sizeof(tree_data), ALLOC_TAG);
     InitializeListHead(&t->itemlist);
     
-    if (t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
-        sd = ExAllocatePoolWithTag(NonPagedPool, sizeof(shared_data), ALLOC_TAG);
-        if (!sd) {
-            ERR("out of memory\n");
-            ExFreePool(buf);
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
-        
-        sd->address = addr;
-        sd->parent = parent ? parent->header.address : addr;
-        InitializeListHead(&sd->entries);
-        
-        ExInterlockedInsertTailList(&Vcb->shared_extents, &sd->list_entry, &Vcb->shared_extents_lock);
-    }
-    
     if (t->header.level == 0) { // leaf node
         leaf_node* ln = (leaf_node*)(buf + sizeof(tree_header));
         unsigned int i;
@@ -108,7 +87,7 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree**
         }
         
         for (i = 0; i < t->header.num_items; i++) {
-            td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
+            td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
             if (!td) {
                 ERR("out of memory\n");
                 ExFreePool(buf);
@@ -130,55 +109,6 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree**
             } else
                 td->data = NULL;
             
-            if ((t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) &&
-                ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA)) {
-                EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
-                
-                if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
-                    EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
-                    
-                    if (ed2->size != 0) {
-                        LIST_ENTRY* le;
-                        BOOL found = FALSE;
-                        
-                        TRACE("shared extent %llx,%llx\n", ed2->address, ed2->size);
-                        
-                        le = sd->entries.Flink;
-                        while (le != &sd->entries) {
-                            shared_data_entry* sde = CONTAINING_RECORD(le, shared_data_entry, list_entry);
-                            
-                            if (sde->address == ed2->address && sde->size == ed2->size && sde->edr.root == t->header.tree_id &&
-                                sde->edr.objid == ln[i].key.obj_id && sde->edr.offset == ln[i].key.offset - ed2->offset) {
-                                sde->edr.count++;
-                                found = TRUE;
-                                break;
-                            }
-                            
-                            le = le->Flink;
-                        }
-                        
-                        if (!found) {
-                            shared_data_entry* sde = ExAllocatePoolWithTag(PagedPool, sizeof(shared_data_entry), ALLOC_TAG);
-                            
-                            if (!sde) {
-                                ERR("out of memory\n");
-                                ExFreePool(buf);
-                                return STATUS_INSUFFICIENT_RESOURCES;
-                            }
-                            
-                            sde->address = ed2->address;
-                            sde->size = ed2->size;
-                            sde->edr.root = t->header.tree_id;
-                            sde->edr.objid = ln[i].key.obj_id;
-                            sde->edr.offset = ln[i].key.offset - ed2->offset;
-                            sde->edr.count = 1;
-                            
-                            InsertTailList(&sd->entries, &sde->list_entry);
-                        }
-                    }
-                }
-            }
-            
             td->size = ln[i].size;
             td->ignore = FALSE;
             td->inserted = FALSE;
@@ -200,7 +130,7 @@ NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree**
         }
         
         for (i = 0; i < t->header.num_items; i++) {
-            td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
+            td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
             if (!td) {
                 ERR("out of memory\n");
                 ExFreePool(buf);
@@ -271,7 +201,7 @@ static tree* free_tree2(tree* t, const char* func, const char* file, unsigned in
         if (t->header.level == 0 && td->data)
             ExFreePool(td->data);
             
-        ExFreePool(td);
+        ExFreeToPagedLookasideList(&t->Vcb->tree_data_lookaside, td);
     }
     
     InterlockedDecrement(&t->Vcb->open_trees);
@@ -334,6 +264,11 @@ NTSTATUS STDCALL _do_load_tree(device_extension* Vcb, tree_holder* th, root* r,
         }
         
         th->tree->parent = t;
+        
+#ifdef DEBUG_PARANOID
+        if (t && t->header.level <= th->tree->header.level) int3;
+#endif
+        
         th->tree->paritem = td;
         
         ret = TRUE;
@@ -394,6 +329,7 @@ static NTSTATUS STDCALL find_item_in_tree(device_extension* Vcb, tree* t, traver
                                           const char* func, const char* file, unsigned int line) {
     int cmp;
     tree_data *td, *lasttd;
+    KEY key2;
     
     TRACE("(%p, %p, %p, %p, %u)\n", Vcb, t, tp, searchkey, ignore);
     
@@ -403,8 +339,10 @@ static NTSTATUS STDCALL find_item_in_tree(device_extension* Vcb, tree* t, traver
     
     if (!td) return STATUS_NOT_FOUND;
     
+    key2 = *searchkey;
+    
     do {
-        cmp = keycmp(searchkey, &td->key);
+        cmp = keycmp(key2, td->key);
 //         TRACE("(%u) comparing (%x,%x,%x) to (%x,%x,%x) - %i (ignore = %s)\n", t->header.level, (UINT32)searchkey->obj_id, searchkey->obj_type, (UINT32)searchkey->offset, (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset, cmp, td->ignore ? "TRUE" : "FALSE");
         if (cmp == 1) {
             lasttd = td;
@@ -418,7 +356,7 @@ static NTSTATUS STDCALL find_item_in_tree(device_extension* Vcb, tree* t, traver
                 td = next_item(t, td);
             
             if (td) {
-                cmp = keycmp(searchkey, &td->key);
+                cmp = keycmp(key2, td->key);
                 
                 if (cmp != 0) {
                     td = origtd;
@@ -653,7 +591,7 @@ BOOL STDCALL _find_prev_item(device_extension* Vcb, const traverse_ptr* tp, trav
     
     td = prev_item(t->parent, t->paritem);
     
-    Status = _do_load_tree(Vcb, &td->treeholder, t->parent->root, t, td, &loaded, Irp, func, file, line);
+    Status = _do_load_tree(Vcb, &td->treeholder, t->parent->root, t->parent, td, &loaded, Irp, func, file, line);
     if (!NT_SUCCESS(Status)) {
         ERR("do_load_tree returned %08x\n", Status);
         return FALSE;
@@ -754,7 +692,7 @@ void STDCALL free_trees(device_extension* Vcb) {
                     r->treeholder.tree = NULL;
                 
                 if (IsListEmpty(&Vcb->trees))
-                    goto free_shared;
+                    return;
             } else if (t->header.level > level)
                 empty = FALSE;
             
@@ -764,26 +702,9 @@ void STDCALL free_trees(device_extension* Vcb) {
         if (empty)
             break;
     }
-    
-free_shared:
-    while (!IsListEmpty(&Vcb->shared_extents)) {
-        shared_data* sd;
-        
-        le = RemoveHeadList(&Vcb->shared_extents);
-        sd = CONTAINING_RECORD(le, shared_data, list_entry);
-        
-        while (!IsListEmpty(&sd->entries)) {
-            LIST_ENTRY* le2 = RemoveHeadList(&sd->entries);
-            shared_data_entry* sde = CONTAINING_RECORD(le2, shared_data_entry, list_entry);
-            
-            ExFreePool(sde);
-        }
-        
-        ExFreePool(sd);
-    }
 }
 
-void add_rollback(LIST_ENTRY* rollback, enum rollback_type type, void* ptr) {
+void add_rollback(device_extension* Vcb, LIST_ENTRY* rollback, enum rollback_type type, void* ptr) {
     rollback_item* ri;
     
     ri = ExAllocatePoolWithTag(PagedPool, sizeof(rollback_item), ALLOC_TAG);
@@ -813,12 +734,12 @@ BOOL STDCALL insert_tree_item(device_extension* Vcb, root* r, UINT64 obj_id, UIN
     
     TRACE("(%p, %p, %llx, %x, %llx, %p, %x, %p, %p)\n", Vcb, r, obj_id, obj_type, offset, data, size, ptp, rollback);
     
-#ifdef DEBUG_PARANOID
-    if (!ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) {
-        ERR("ERROR - tree_lock not held exclusively\n");
-        int3;
-    }
-#endif
+// #ifdef DEBUG_PARANOID
+//     if (!ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) {
+//         ERR("ERROR - tree_lock not held exclusively\n");
+//         int3;
+//     }
+// #endif
     
     searchkey.obj_id = obj_id;
     searchkey.obj_type = obj_type;
@@ -858,7 +779,7 @@ BOOL STDCALL insert_tree_item(device_extension* Vcb, root* r, UINT64 obj_id, UIN
     
     if (tp.item) {
         TRACE("tp.item->key = %p\n", &tp.item->key);
-        cmp = keycmp(&searchkey, &tp.item->key);
+        cmp = keycmp(searchkey, tp.item->key);
         
         if (cmp == 0 && !tp.item->ignore) { // FIXME - look for all items of the same key to make sure none are non-ignored
             ERR("error: key (%llx,%x,%llx) already present\n", obj_id, obj_type, offset);
@@ -868,7 +789,7 @@ BOOL STDCALL insert_tree_item(device_extension* Vcb, root* r, UINT64 obj_id, UIN
     } else
         cmp = -1;
     
-    td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
+    td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
     if (!td) {
         ERR("out of memory\n");
         goto end;
@@ -897,17 +818,17 @@ BOOL STDCALL insert_tree_item(device_extension* Vcb, root* r, UINT64 obj_id, UIN
         paritem = tp.tree->paritem;
         while (paritem) {
 //             ERR("paritem = %llx,%x,%llx, tp.item->key = %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-            if (!keycmp(&paritem->key, &tp.item->key)) {
+            if (!keycmp(paritem->key, tp.item->key)) {
                 paritem->key = searchkey;
             } else
                 break;
             
             paritem = paritem->treeholder.tree->paritem;
         }
-        
-    } else {          
-        InsertAfter(&tp.tree->itemlist, &td->list_entry, &tp.item->list_entry); // FIXME - we don't need this
-    }
+    } else if (cmp == 0)
+        InsertHeadList(tp.item->list_entry.Blink, &td->list_entry); // make sure non-deleted item is before deleted ones
+    else
+        InsertHeadList(&tp.item->list_entry, &td->list_entry);
     
     tp.tree->header.num_items++;
     tp.tree->size += size + sizeof(leaf_node);
@@ -938,7 +859,7 @@ BOOL STDCALL insert_tree_item(device_extension* Vcb, root* r, UINT64 obj_id, UIN
     
     // FIXME - free this correctly
     
-    tp2 = ExAllocatePoolWithTag(PagedPool, sizeof(traverse_ptr), ALLOC_TAG);
+    tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside);
     if (!tp2) {
         ERR("out of memory\n");
         goto end;
@@ -947,7 +868,7 @@ BOOL STDCALL insert_tree_item(device_extension* Vcb, root* r, UINT64 obj_id, UIN
     tp2->tree = tp.tree;
     tp2->item = td;
     
-    add_rollback(rollback, ROLLBACK_INSERT_ITEM, tp2);
+    add_rollback(Vcb, rollback, ROLLBACK_INSERT_ITEM, tp2);
     
     success = TRUE;
 
@@ -1011,7 +932,7 @@ void STDCALL delete_tree_item(device_extension* Vcb, traverse_ptr* tp, LIST_ENTR
         t = t->parent;
     }
     
-    tp2 = ExAllocatePoolWithTag(PagedPool, sizeof(traverse_ptr), ALLOC_TAG);
+    tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside);
     if (!tp2) {
         ERR("out of memory\n");
         return;
@@ -1020,10 +941,10 @@ void STDCALL delete_tree_item(device_extension* Vcb, traverse_ptr* tp, LIST_ENTR
     tp2->tree = tp->tree;
     tp2->item = tp->item;
 
-    add_rollback(rollback, ROLLBACK_DELETE_ITEM, tp2);
+    add_rollback(Vcb, rollback, ROLLBACK_DELETE_ITEM, tp2);
 }
 
-void clear_rollback(LIST_ENTRY* rollback) {
+void clear_rollback(device_extension* Vcb, LIST_ENTRY* rollback) {
     rollback_item* ri;
     
     while (!IsListEmpty(rollback)) {
@@ -1033,6 +954,9 @@ void clear_rollback(LIST_ENTRY* rollback) {
         switch (ri->type) {
             case ROLLBACK_INSERT_ITEM:
             case ROLLBACK_DELETE_ITEM:
+                ExFreeToPagedLookasideList(&Vcb->traverse_ptr_lookaside, ri->ptr);
+                break;
+                
             case ROLLBACK_ADD_SPACE:
             case ROLLBACK_SUBTRACT_SPACE:
             case ROLLBACK_INSERT_EXTENT:
@@ -1071,7 +995,7 @@ void do_rollback(device_extension* Vcb, LIST_ENTRY* rollback) {
                         tp->tree->size -= sizeof(internal_node);
                 }
                 
-                ExFreePool(tp);
+                ExFreeToPagedLookasideList(&Vcb->traverse_ptr_lookaside, tp);
                 break;
             }
                 
@@ -1089,7 +1013,7 @@ void do_rollback(device_extension* Vcb, LIST_ENTRY* rollback) {
                         tp->tree->size += sizeof(internal_node);
                 }
                 
-                ExFreePool(tp);
+                ExFreeToPagedLookasideList(&Vcb->traverse_ptr_lookaside, tp);
                 break;
             }
             
@@ -1108,7 +1032,7 @@ void do_rollback(device_extension* Vcb, LIST_ENTRY* rollback) {
                         if (c) {
                             Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, re->fcb->subvol->id,
                                                                re->fcb->inode, re->ext->offset - ed2->offset, -1,
-                                                               re->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, NULL);
+                                                               re->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, NULL);
                             
                             if (!NT_SUCCESS(Status))
                                 ERR("update_changed_extent_ref returned %08x\n", Status);
@@ -1137,7 +1061,7 @@ void do_rollback(device_extension* Vcb, LIST_ENTRY* rollback) {
                         if (c) {
                             Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, re->fcb->subvol->id,
                                                                re->fcb->inode, re->ext->offset - ed2->offset, 1,
-                                                               re->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, NULL);
+                                                               re->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, NULL);
                             
                             if (!NT_SUCCESS(Status))
                                 ERR("update_changed_extent_ref returned %08x\n", Status);
@@ -1160,9 +1084,9 @@ void do_rollback(device_extension* Vcb, LIST_ENTRY* rollback) {
                     ExAcquireResourceExclusiveLite(&rs->chunk->lock, TRUE);
                 
                 if (ri->type == ROLLBACK_ADD_SPACE)
-                    space_list_subtract2(rs->list, rs->list_size, rs->address, rs->length, NULL);
+                    space_list_subtract2(Vcb, rs->list, rs->list_size, rs->address, rs->length, NULL);
                 else
-                    space_list_add2(rs->list, rs->list_size, rs->address, rs->length, NULL);
+                    space_list_add2(Vcb, rs->list, rs->list_size, rs->address, rs->length, NULL);
                 
                 if (rs->chunk) {
                     LIST_ENTRY* le2 = le->Blink;
@@ -1176,9 +1100,9 @@ void do_rollback(device_extension* Vcb, LIST_ENTRY* rollback) {
                             
                             if (rs2->chunk == rs->chunk) {
                                 if (ri2->type == ROLLBACK_ADD_SPACE)
-                                    space_list_subtract2(rs2->list, rs2->list_size, rs2->address, rs2->length, NULL);
+                                    space_list_subtract2(Vcb, rs2->list, rs2->list_size, rs2->address, rs2->length, NULL);
                                 else
-                                    space_list_add2(rs2->list, rs2->list_size, rs2->address, rs2->length, NULL);
+                                    space_list_add2(Vcb, rs2->list, rs2->list_size, rs2->address, rs2->length, NULL);
                                 
                                 ExFreePool(rs2);
                                 RemoveEntryList(&ri2->list_entry);
@@ -1201,3 +1125,486 @@ void do_rollback(device_extension* Vcb, LIST_ENTRY* rollback) {
         ExFreePool(ri);
     }
 }
+
+static void find_tree_end(tree* t, KEY* tree_end, BOOL* no_end) {
+    tree* p;
+    
+    p = t;
+    do {
+        tree_data* pi;
+        
+        if (!p->parent) {
+            *no_end = TRUE;
+            return;
+        }
+        
+        pi = p->paritem;
+        
+        if (pi->list_entry.Flink != &p->parent->itemlist) {
+            tree_data* td = CONTAINING_RECORD(pi->list_entry.Flink, tree_data, list_entry);
+            
+            *tree_end = td->key;
+            *no_end = FALSE;
+            return;
+        }
+        
+        p = p->parent;
+    } while (p);
+}
+
+void clear_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist) {
+    while (!IsListEmpty(batchlist)) {
+        LIST_ENTRY* le = RemoveHeadList(batchlist);
+        batch_root* br = CONTAINING_RECORD(le, batch_root, list_entry);
+        
+        while (!IsListEmpty(&br->items)) {
+            LIST_ENTRY* le2 = RemoveHeadList(&br->items);
+            batch_item* bi = CONTAINING_RECORD(le2, batch_item, list_entry);
+            
+            ExFreeToPagedLookasideList(&Vcb->batch_item_lookaside, bi);
+        }
+        
+        ExFreePool(br);
+    }
+}
+
+static BOOL handle_batch_collision(device_extension* Vcb, batch_item* bi, tree* t, tree_data* td, tree_data* newtd, LIST_ENTRY* listhead, LIST_ENTRY* rollback) {
+    if (bi->operation == Batch_SetXattr || bi->operation == Batch_DirItem || bi->operation == Batch_InodeRef || bi->operation == Batch_InodeExtRef) {
+        UINT16 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
+        
+        if (bi->operation == Batch_SetXattr) {
+            if (td->size < sizeof(DIR_ITEM)) {
+                ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset, td->size, sizeof(DIR_ITEM));
+            } else {
+                UINT8* newdata;
+                ULONG size = td->size;
+                DIR_ITEM* newxa = (DIR_ITEM*)bi->data;
+                DIR_ITEM* xa = (DIR_ITEM*)td->data;
+                
+                while (TRUE) {
+                    ULONG oldxasize;
+                    
+                    if (size < sizeof(DIR_ITEM) || size < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) {
+                        ERR("(%llx,%x,%llx) was truncated\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset);
+                        break;
+                    }
+                    
+                    oldxasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n;
+                    
+                    if (xa->n == newxa->n && RtlCompareMemory(newxa->name, xa->name, xa->n) == xa->n) {
+                        UINT64 pos;
+                        
+                        // replace
+                        
+                        if (td->size + bi->datalen - oldxasize > maxlen)
+                            ERR("DIR_ITEM would be over maximum size, truncating (%u + %u - %u > %u)\n", td->size, bi->datalen, oldxasize, maxlen);
+                        
+                        newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen - oldxasize, ALLOC_TAG);
+                        if (!newdata) {
+                            ERR("out of memory\n");
+                            return TRUE;
+                        }
+                        
+                        pos = (UINT8*)xa - td->data;
+                        if (pos + oldxasize < td->size) { // copy after changed xattr
+                            RtlCopyMemory(newdata + pos + bi->datalen, td->data + pos + oldxasize, td->size - pos - oldxasize);
+                        }
+                        
+                        if (pos > 0) { // copy before changed xattr
+                            RtlCopyMemory(newdata, td->data, pos);
+                            xa = (DIR_ITEM*)(newdata + pos);
+                        } else
+                            xa = (DIR_ITEM*)newdata;
+                        
+                        RtlCopyMemory(xa, bi->data, bi->datalen);
+                        
+                        bi->datalen = min(td->size + bi->datalen - oldxasize, maxlen);
+                        
+                        ExFreePool(bi->data);
+                        bi->data = newdata;
+                        
+                        break;
+                    }
+                    
+                    if ((UINT8*)xa - (UINT8*)td->data + oldxasize >= size) {
+                        // not found, add to end of data
+                        
+                        if (td->size + bi->datalen > maxlen)
+                            ERR("DIR_ITEM would be over maximum size, truncating (%u + %u > %u)\n", td->size, bi->datalen, maxlen);
+                        
+                        newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG);
+                        if (!newdata) {
+                            ERR("out of memory\n");
+                            return TRUE;
+                        }
+                        
+                        RtlCopyMemory(newdata, td->data, td->size);
+                        
+                        xa = (DIR_ITEM*)((UINT8*)newdata + td->size);
+                        RtlCopyMemory(xa, bi->data, bi->datalen);
+                        
+                        bi->datalen = min(bi->datalen + td->size, maxlen);
+                        
+                        ExFreePool(bi->data);
+                        bi->data = newdata;
+
+                        break;
+                    } else {
+                        xa = (DIR_ITEM*)&xa->name[xa->m + xa->n];
+                        size -= oldxasize;
+                    }
+                }
+            }
+        } else if (bi->operation == Batch_DirItem) {
+            UINT8* newdata;
+            
+            if (td->size + bi->datalen > maxlen) {
+                ERR("DIR_ITEM would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen);
+                return TRUE;
+            }
+            
+            newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG);
+            if (!newdata) {
+                ERR("out of memory\n");
+                return TRUE;
+            }
+            
+            RtlCopyMemory(newdata, td->data, td->size);
+            
+            RtlCopyMemory(newdata + td->size, bi->data, bi->datalen);
+
+            bi->datalen += td->size;
+            
+            ExFreePool(bi->data);
+            bi->data = newdata;
+        } else if (bi->operation == Batch_InodeRef) {
+            UINT8* newdata;
+            
+            if (td->size + bi->datalen > maxlen) {
+                if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) {
+                    INODE_REF* ir = (INODE_REF*)bi->data;
+                    INODE_EXTREF* ier;
+                    ULONG ierlen;
+                    batch_item* bi2;
+                    LIST_ENTRY* le;
+                    BOOL inserted = FALSE;
+                    
+                    TRACE("INODE_REF would be too long, adding INODE_EXTREF instead\n");
+
+                    ierlen = sizeof(INODE_EXTREF) - 1 + ir->n;
+                    
+                    ier = ExAllocatePoolWithTag(PagedPool, ierlen, ALLOC_TAG);
+                    if (!ier) {
+                        ERR("out of memory\n");
+                        return TRUE;
+                    }
+                    
+                    ier->dir = bi->key.offset;
+                    ier->index = ir->index;
+                    ier->n = ir->n;
+                    RtlCopyMemory(ier->name, ir->name, ier->n);
+                    
+                    bi2 = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside);
+                    if (!bi2) {
+                        ERR("out of memory\n");
+                        ExFreePool(ier);
+                        return TRUE;
+                    }
+                    
+                    bi2->key.obj_id = bi->key.obj_id;
+                    bi2->key.obj_type = TYPE_INODE_EXTREF;
+                    bi2->key.offset = calc_crc32c((UINT32)ier->dir, (UINT8*)ier->name, ier->n);
+                    bi2->data = ier;
+                    bi2->datalen = ierlen;
+                    bi2->operation = Batch_InodeExtRef;
+                    
+                    le = bi->list_entry.Flink;
+                    while (le != listhead) {
+                        batch_item* bi3 = CONTAINING_RECORD(le, batch_item, list_entry);
+                        
+                        if (keycmp(bi3->key, bi2->key) != -1) {
+                            InsertHeadList(le->Blink, &bi2->list_entry);
+                            inserted = TRUE;
+                        }
+                        
+                        le = le->Flink;
+                    }
+                    
+                    if (!inserted)
+                        InsertTailList(listhead, &bi2->list_entry);
+                    
+                    return TRUE;
+                } else {
+                    ERR("INODE_REF would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen);
+                    return TRUE;
+                }
+            }
+            
+            newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG);
+            if (!newdata) {
+                ERR("out of memory\n");
+                return TRUE;
+            }
+            
+            RtlCopyMemory(newdata, td->data, td->size);
+            
+            RtlCopyMemory(newdata + td->size, bi->data, bi->datalen);
+
+            bi->datalen += td->size;
+            
+            ExFreePool(bi->data);
+            bi->data = newdata;
+        } else if (bi->operation == Batch_InodeExtRef) {
+            UINT8* newdata;
+            
+            if (td->size + bi->datalen > maxlen) {
+                ERR("INODE_EXTREF would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen);
+                return TRUE;
+            }
+            
+            newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG);
+            if (!newdata) {
+                ERR("out of memory\n");
+                return TRUE;
+            }
+            
+            RtlCopyMemory(newdata, td->data, td->size);
+            
+            RtlCopyMemory(newdata + td->size, bi->data, bi->datalen);
+
+            bi->datalen += td->size;
+            
+            ExFreePool(bi->data);
+            bi->data = newdata;
+        }
+        
+        newtd->data = bi->data;
+        newtd->size = bi->datalen;
+        
+        // delete old item
+        if (!td->ignore) {
+            traverse_ptr* tp2;
+            
+            td->ignore = TRUE;
+        
+            t->header.num_items--;
+            t->size -= sizeof(leaf_node) + td->size;
+            
+            if (rollback) {
+                tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside);
+                if (!tp2) {
+                    ERR("out of memory\n");
+                    return FALSE;
+                }
+                
+                tp2->tree = t;
+                tp2->item = td;
+    
+                add_rollback(Vcb, rollback, ROLLBACK_DELETE_ITEM, tp2);
+            }
+        }
+        
+        InsertHeadList(&td->list_entry, &newtd->list_entry);
+    } else {
+        ERR("(%llx,%x,%llx) already exists\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset);
+        int3;
+    }
+    
+    return FALSE;
+}
+
+static void commit_batch_list_root(device_extension* Vcb, batch_root* br, PIRP Irp, LIST_ENTRY* rollback) {
+    LIST_ENTRY* le;
+    NTSTATUS Status;
+    
+    TRACE("root: %llx\n", br->r->id);
+    
+    le = br->items.Flink;
+    while (le != &br->items) {
+        batch_item* bi = CONTAINING_RECORD(le, batch_item, list_entry);
+        LIST_ENTRY *le2, *listhead;
+        traverse_ptr tp, *tp2;
+        KEY tree_end;
+        BOOL no_end;
+        tree_data* td;
+        int cmp;
+        tree* t;
+        BOOL ignore = FALSE;
+        
+        TRACE("(%llx,%x,%llx)\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset);
+        
+        Status = find_item(Vcb, br->r, &tp, &bi->key, FALSE, Irp);
+        if (!NT_SUCCESS(Status)) { // FIXME - handle STATUS_NOT_FOUND
+            ERR("find_item returned %08x\n", Status);
+            return;
+        }
+        
+        find_tree_end(tp.tree, &tree_end, &no_end);
+        
+        td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
+        if (!td) {
+            ERR("out of memory\n");
+            return;
+        }
+        
+        td->key = bi->key;
+        td->size = bi->datalen;
+        td->data = bi->data;
+        td->ignore = FALSE;
+        td->inserted = TRUE;
+        
+        cmp = keycmp(bi->key, tp.item->key);
+        
+        if (cmp == -1) { // very first key in root
+            tree_data* paritem;
+            
+            InsertHeadList(&tp.tree->itemlist, &td->list_entry);
+
+            paritem = tp.tree->paritem;
+            while (paritem) {
+                if (!keycmp(paritem->key, tp.item->key)) {
+                    paritem->key = bi->key;
+                } else
+                    break;
+                
+                paritem = paritem->treeholder.tree->paritem;
+            }
+        } else if (cmp == 0) { // item already exists
+            ignore = handle_batch_collision(Vcb, bi, tp.tree, tp.item, td, &br->items, rollback);
+        } else {
+            InsertHeadList(&tp.item->list_entry, &td->list_entry);
+        }
+        
+        if (!ignore) {
+            tp.tree->header.num_items++;
+            tp.tree->size += bi->datalen + sizeof(leaf_node);
+            tp.tree->write = TRUE;
+            
+            if (rollback) {
+                // FIXME - free this correctly
+                tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside);
+                if (!tp2) {
+                    ERR("out of memory\n");
+                    return;
+                }
+                
+                tp2->tree = tp.tree;
+                tp2->item = td;
+
+                add_rollback(Vcb, rollback, ROLLBACK_INSERT_ITEM, tp2);
+            }
+            
+            listhead = &td->list_entry;
+        } else
+            listhead = &tp.item->list_entry;
+        
+        le2 = le->Flink;
+        while (le2 != &br->items) {
+            batch_item* bi2 = CONTAINING_RECORD(le2, batch_item, list_entry);
+            
+            if (no_end || keycmp(bi2->key, tree_end) == -1) {
+                LIST_ENTRY* le3;
+                BOOL inserted = FALSE;
+                
+                ignore = FALSE;
+                
+                td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
+                if (!td) {
+                    ERR("out of memory\n");
+                    return;
+                }
+                
+                td->key = bi2->key;
+                td->size = bi2->datalen;
+                td->data = bi2->data;
+                td->ignore = FALSE;
+                td->inserted = TRUE;
+                
+                le3 = listhead;
+                while (le3 != &tp.tree->itemlist) {
+                    tree_data* td2 = CONTAINING_RECORD(le3, tree_data, list_entry);
+                    
+                    if (!td2->ignore) {
+                        cmp = keycmp(bi2->key, td2->key);
+
+                        if (cmp == 0) {
+                            ignore = handle_batch_collision(Vcb, bi2, tp.tree, td2, td, &br->items, rollback);
+                            inserted = TRUE;
+                            break;
+                        } else if (cmp == -1) {
+                            InsertHeadList(le3->Blink, &td->list_entry);
+                            inserted = TRUE;
+                            break;
+                        }
+                    }
+                    
+                    le3 = le3->Flink;
+                }
+                
+                if (!inserted)
+                    InsertTailList(&tp.tree->itemlist, &td->list_entry);
+                
+                if (!ignore) {
+                    tp.tree->header.num_items++;
+                    tp.tree->size += bi2->datalen + sizeof(leaf_node);
+                    
+                    if (rollback) {
+                        // FIXME - free this correctly
+                        tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside);
+                        if (!tp2) {
+                            ERR("out of memory\n");
+                            return;
+                        }
+                        
+                        tp2->tree = tp.tree;
+                        tp2->item = td;
+                        
+                        add_rollback(Vcb, rollback, ROLLBACK_INSERT_ITEM, tp2);
+                    }
+                    
+                    listhead = &td->list_entry;
+                }
+                
+                le = le2;
+            } else
+                break;
+            
+            le2 = le2->Flink;
+        }
+        
+        t = tp.tree;
+        while (t) {
+            if (t->paritem && t->paritem->ignore) {
+                t->paritem->ignore = FALSE;
+                t->parent->header.num_items++;
+                t->parent->size += sizeof(internal_node);
+                
+                // FIXME - do we need to add a rollback entry here?
+            }
+
+            t->header.generation = Vcb->superblock.generation;
+            t = t->parent;
+        }
+        
+        le = le->Flink;
+    }
+    
+    // FIXME - remove as we are going along
+    while (!IsListEmpty(&br->items)) {
+        LIST_ENTRY* le = RemoveHeadList(&br->items);
+        batch_item* bi = CONTAINING_RECORD(le, batch_item, list_entry);
+        
+        ExFreeToPagedLookasideList(&Vcb->batch_item_lookaside, bi);
+    }
+}
+
+void commit_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
+    while (!IsListEmpty(batchlist)) {
+        LIST_ENTRY* le = RemoveHeadList(batchlist);
+        batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry);
+        
+        commit_batch_list_root(Vcb, br2, Irp, rollback);
+        
+        ExFreePool(br2);
+    }
+}
index 2895fb4..276e507 100644 (file)
 
 #include "btrfs_drv.h"
 
+typedef struct {
+    device_extension* Vcb;
+    PIRP Irp;
+    WORK_QUEUE_ITEM item;
+} job_info;
+
 void do_read_job(PIRP Irp) {
     NTSTATUS Status;
     ULONG bytes_read;
     BOOL top_level = is_top_level(Irp);
+    PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+    PFILE_OBJECT FileObject = IrpSp->FileObject;
+    fcb* fcb = FileObject->FsContext;
+    BOOL fcb_lock = FALSE;
     
     Irp->IoStatus.Information = 0;
     
+    if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
+        ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE);
+        fcb_lock = TRUE;
+    }
+    
     Status = do_read(Irp, TRUE, &bytes_read);
+    
+    if (fcb_lock)
+        ExReleaseResourceLite(fcb->Header.Resource);
 
     Irp->IoStatus.Status = Status;
     
@@ -52,6 +70,9 @@ void do_write_job(device_extension* Vcb, PIRP Irp) {
         Status = _SEH2_GetExceptionCode();
     } _SEH2_END;
     
+    if (!NT_SUCCESS(Status))
+        ERR("write_file returned %08x\n", Status);
+    
     Irp->IoStatus.Status = Status;
 
     TRACE("wrote %u bytes\n", Irp->IoStatus.Information);
@@ -64,64 +85,73 @@ void do_write_job(device_extension* Vcb, PIRP Irp) {
     TRACE("returning %08x\n", Status);
 }
 
-static void do_job(drv_thread* thread, LIST_ENTRY* le) {
-    thread_job* tj = CONTAINING_RECORD(le, thread_job, list_entry);
-    PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(tj->Irp);
+#ifdef __REACTOS__
+static void NTAPI do_job(void* context) {
+#else
+static void do_job(void* context) {
+#endif
+    job_info* ji = context;
+    PIO_STACK_LOCATION IrpSp = ji->Irp ? IoGetCurrentIrpStackLocation(ji->Irp) : NULL;
     
     if (IrpSp->MajorFunction == IRP_MJ_READ) {
-        do_read_job(tj->Irp);
+        do_read_job(ji->Irp);
     } else if (IrpSp->MajorFunction == IRP_MJ_WRITE) {
-        do_write_job(thread->DeviceObject->DeviceExtension, tj->Irp);
-    } else {
-        ERR("unsupported major function %x\n", IrpSp->MajorFunction);
-        tj->Irp->IoStatus.Status = STATUS_INTERNAL_ERROR;
-        tj->Irp->IoStatus.Information = 0;
-        IoCompleteRequest(tj->Irp, IO_NO_INCREMENT);
+        do_write_job(ji->Vcb, ji->Irp);
     }
     
-    ExFreePool(tj);
+    ExFreePool(ji);
 }
 
-void STDCALL worker_thread(void* context) {
-    drv_thread* thread = context;
-    KIRQL irql;
+BOOL add_thread_job(device_extension* Vcb, PIRP Irp) {
+    job_info* ji;
     
-    ObReferenceObject(thread->DeviceObject);
+    ji = ExAllocatePoolWithTag(NonPagedPool, sizeof(job_info), ALLOC_TAG);
+    if (!ji) {
+        ERR("out of memory\n");
+        return FALSE;
+    }
     
-    while (TRUE) {
-        KeWaitForSingleObject(&thread->event, Executive, KernelMode, FALSE, NULL);
-        
-        FsRtlEnterFileSystem();
+    ji->Vcb = Vcb;
+    ji->Irp = Irp;
+    
+    if (!Irp->MdlAddress) {
+        PMDL Mdl;
+        LOCK_OPERATION op;
+        ULONG len;
+        PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
         
-        while (TRUE) {
-            LIST_ENTRY* le;
-            device_extension* Vcb = thread->DeviceObject->DeviceExtension;
-            
-            KeAcquireSpinLock(&thread->spin_lock, &irql);
-            
-            if (IsListEmpty(&thread->jobs)) {
-                KeReleaseSpinLock(&thread->spin_lock, irql);
-                break;
-            }
-            
-            le = thread->jobs.Flink;
-            RemoveEntryList(le);
-            
-            KeReleaseSpinLock(&thread->spin_lock, irql);
-            
-            InterlockedDecrement(&Vcb->threads.pending_jobs);
-            do_job(thread, le);
+        if (IrpSp->MajorFunction == IRP_MJ_READ) {
+            op = IoWriteAccess;
+            len = IrpSp->Parameters.Read.Length;
+        } else if (IrpSp->MajorFunction == IRP_MJ_WRITE) {
+            op = IoReadAccess;
+            len = IrpSp->Parameters.Write.Length;
+        } else {
+            ERR("unexpected major function %u\n", IrpSp->MajorFunction);
+            return FALSE;
         }
         
-        FsRtlExitFileSystem();
+        Mdl = IoAllocateMdl(Irp->UserBuffer, len, FALSE, FALSE, Irp);
+
+        if (!Mdl) {
+            ERR("out of memory\n");
+            return FALSE;
+        }
         
-        if (thread->quit)
-            break;
+        _SEH2_TRY {
+            MmProbeAndLockPages(Mdl, Irp->RequestorMode, op);
+        } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
+            ERR("MmProbeAndLockPages raised status %08x\n", _SEH2_GetExceptionCode());
+
+            IoFreeMdl(Mdl);
+            Irp->MdlAddress = NULL;
+
+            _SEH2_YIELD(return FALSE);
+        } _SEH2_END;
     }
     
-    ObDereferenceObject(thread->DeviceObject);
-    
-    KeSetEvent(&thread->finished, 0, FALSE);
+    ExInitializeWorkItem(&ji->item, do_job, ji);
+    ExQueueWorkItem(&ji->item, DelayedWorkQueue);
     
-    PsTerminateSystemThread(STATUS_SUCCESS);
+    return TRUE;
 }
index 9e14b21..9fc46ee 100644 (file)
 
 #include "btrfs_drv.h"
 
-#define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
-
-// #define DEBUG_WRITE_LOOPS
-
 // BOOL did_split;
 BOOL chunk_test = FALSE;
 
 typedef struct {
-    KEVENT Event;
-    IO_STATUS_BLOCK iosb;
-} write_context;
-
-typedef struct {
-    EXTENT_ITEM ei;
-    UINT8 type;
-    EXTENT_DATA_REF edr;
-} EXTENT_ITEM_DATA_REF;
+    UINT64 start;
+    UINT64 end;
+    UINT8* data;
+    UINT32 skip_start;
+    UINT32 skip_end;
+} write_stripe;
 
 typedef struct {
-    EXTENT_ITEM_TREE eit;
-    UINT8 type;
-    TREE_BLOCK_REF tbr;
-} EXTENT_ITEM_TREE2;
+    LONG stripes_left;
+    KEVENT event;
+} read_stripe_master;
 
 typedef struct {
-    EXTENT_ITEM ei;
-    UINT8 type;
-    TREE_BLOCK_REF tbr;
-} EXTENT_ITEM_SKINNY_METADATA;
+    PIRP Irp;
+    PDEVICE_OBJECT devobj;
+    IO_STATUS_BLOCK iosb;
+    read_stripe_master* master;
+} read_stripe;
 
 // static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len);
 static NTSTATUS STDCALL write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr);
-static void update_checksum_tree(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback);
 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback);
 
-static NTSTATUS STDCALL write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
-    write_context* context = conptr;
-    
-    context->iosb = Irp->IoStatus;
-    KeSetEvent(&context->Event, 0, FALSE);
-    
-//     return STATUS_SUCCESS;
-    return STATUS_MORE_PROCESSING_REQUIRED;
-}
-
-static NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length) {
-    NTSTATUS Status;
-    LARGE_INTEGER offset;
-    PIRP Irp;
-    PIO_STACK_LOCATION IrpSp;
-    write_context* context = NULL;
-    
-    TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
-    
-    context = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_context), ALLOC_TAG);
-    if (!context) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    RtlZeroMemory(context, sizeof(write_context));
-    
-    KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
-    
-    offset.QuadPart = address;
-    
-//     Irp = IoBuildSynchronousFsdRequest(IRP_MJ_WRITE, Vcb->device, data, length, &offset, NULL, &context->iosb);
-    
-    Irp = IoAllocateIrp(device->StackSize, FALSE);
-    
-    if (!Irp) {
-        ERR("IoAllocateIrp failed\n");
-        Status = STATUS_INTERNAL_ERROR;
-        goto exit2;
-    }
-    
-    IrpSp = IoGetNextIrpStackLocation(Irp);
-    IrpSp->MajorFunction = IRP_MJ_WRITE;
-    
-    if (device->Flags & DO_BUFFERED_IO) {
-        Irp->AssociatedIrp.SystemBuffer = data;
-
-        Irp->Flags = IRP_BUFFERED_IO;
-    } else if (device->Flags & DO_DIRECT_IO) {
-        Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
-        if (!Irp->MdlAddress) {
-            DbgPrint("IoAllocateMdl failed\n");
-            goto exit;
-        }
-        
-        MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
-    } else {
-        Irp->UserBuffer = data;
-    }
-
-    IrpSp->Parameters.Write.Length = length;
-    IrpSp->Parameters.Write.ByteOffset = offset;
-    
-    Irp->UserIosb = &context->iosb;
-
-    Irp->UserEvent = &context->Event;
-
-    IoSetCompletionRoutine(Irp, write_completion, context, TRUE, TRUE, TRUE);
-
-    // FIXME - support multiple devices
-    Status = IoCallDriver(device, Irp);
-    
-    if (Status == STATUS_PENDING) {
-        KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
-        Status = context->iosb.Status;
-    }
-    
-    if (!NT_SUCCESS(Status)) {
-        ERR("IoCallDriver returned %08x\n", Status);
-    }
-    
-    if (device->Flags & DO_DIRECT_IO) {
-        MmUnlockPages(Irp->MdlAddress);
-        IoFreeMdl(Irp->MdlAddress);
-    }
-    
-exit:
-    IoFreeIrp(Irp);
-    
-exit2:
-    if (context)
-        ExFreePool(context);
-    
-    return Status;
-}
-
-static NTSTATUS STDCALL write_superblock(device_extension* Vcb, device* device) {
-    NTSTATUS Status;
-    unsigned int i = 0;
-    UINT32 crc32;
-
-#ifdef __REACTOS__
-    Status = STATUS_INTERNAL_ERROR;
-#endif
-    
-    RtlCopyMemory(&Vcb->superblock.dev_item, &device->devitem, sizeof(DEV_ITEM));
-    
-    // FIXME - only write one superblock if on SSD (?)
-    while (superblock_addrs[i] > 0 && device->length >= superblock_addrs[i] + sizeof(superblock)) {
-        TRACE("writing superblock %u\n", i);
-        
-        Vcb->superblock.sb_phys_addr = superblock_addrs[i];
-        
-        crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
-        crc32 = ~crc32;
-        TRACE("crc32 is %08x\n", crc32);
-        RtlCopyMemory(&Vcb->superblock.checksum, &crc32, sizeof(UINT32));
-        
-        Status = write_data_phys(device->devobj, superblock_addrs[i], &Vcb->superblock, sizeof(superblock));
-        
-        if (!NT_SUCCESS(Status))
-            break;
-        
-        i++;
-    }
-    
-    if (i == 0) {
-        ERR("no superblocks written!\n");
-    }
-
-    return Status;
-}
-
-static BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
+BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
     LIST_ENTRY* le;
     space* s;
     
@@ -275,145 +134,38 @@ static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) {
     return lastaddr;
 }
 
-static NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    DEV_ITEM* di;
-    NTSTATUS Status;
-    
-    searchkey.obj_id = 1;
-    searchkey.obj_type = TYPE_DEV_ITEM;
-    searchkey.offset = device->devitem.dev_id;
-    
-    Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (keycmp(&tp.item->key, &searchkey)) {
-        ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
-        return STATUS_INTERNAL_ERROR;
-    }
-    
-    delete_tree_item(Vcb, &tp, rollback);
-    
-    di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
-    if (!di) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
-    
-    if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp, rollback)) {
-        ERR("insert_tree_item failed\n");
-        return STATUS_INTERNAL_ERROR;
-    }
-    
-    return STATUS_SUCCESS;
-}
-
-static void regen_bootstrap(device_extension* Vcb) {
-    sys_chunk* sc2;
-    USHORT i = 0;
-    LIST_ENTRY* le;
-    
-    i = 0;
-    le = Vcb->sys_chunks.Flink;
-    while (le != &Vcb->sys_chunks) {
-        sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
-        
-        TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
-        
-        RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
-        i += sizeof(KEY);
-        
-        RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
-        i += sc2->size;
-        
-        le = le->Flink;
-    }
-}
-
-static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, ULONG size) {
-    sys_chunk *sc, *sc2;
-    LIST_ENTRY* le;
-    
-    if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
-        ERR("error - bootstrap is full\n");
-        return STATUS_INTERNAL_ERROR;
-    }
-    
-    sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
-    if (!sc) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-
-    sc->key.obj_id = obj_id;
-    sc->key.obj_type = obj_type;
-    sc->key.offset = offset;
-    sc->size = size;
-    sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
-    if (!sc->data) {
-        ERR("out of memory\n");
-        ExFreePool(sc);
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    RtlCopyMemory(sc->data, data, sc->size);
-    
-    le = Vcb->sys_chunks.Flink;
-    while (le != &Vcb->sys_chunks) {
-        sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
-        
-        if (keycmp(&sc2->key, &sc->key) == 1)
-            break;
-        
-        le = le->Flink;
-    }
-    InsertTailList(le, &sc->list_entry);
-    
-    Vcb->superblock.n += sizeof(KEY) + size;
-    
-    regen_bootstrap(Vcb);
-    
-    return STATUS_SUCCESS;
-}
-
 static BOOL find_new_dup_stripes(device_extension* Vcb, stripe* stripes, UINT64 max_stripe_size) {
     UINT64 j, devnum, devusage = 0xffffffffffffffff;
     space *devdh1 = NULL, *devdh2 = NULL;
     
     for (j = 0; j < Vcb->superblock.num_devices; j++) {
-        UINT64 usage;
-        
-        usage = (Vcb->devices[j].devitem.bytes_used * 4096) / Vcb->devices[j].devitem.num_bytes;
-        
-        // favour devices which have been used the least
-        if (usage < devusage) {
-            if (!IsListEmpty(&Vcb->devices[j].space)) {
-                LIST_ENTRY* le;
-                space *dh1 = NULL, *dh2 = NULL;
-                
-                le = Vcb->devices[j].space.Flink;
-                while (le != &Vcb->devices[j].space) {
-                    space* dh = CONTAINING_RECORD(le, space, list_entry);
+        if (!Vcb->devices[j].readonly) {
+            UINT64 usage = (Vcb->devices[j].devitem.bytes_used * 4096) / Vcb->devices[j].devitem.num_bytes;
+            
+            // favour devices which have been used the least
+            if (usage < devusage) {
+                if (!IsListEmpty(&Vcb->devices[j].space)) {
+                    LIST_ENTRY* le;
+                    space *dh1 = NULL, *dh2 = NULL;
                     
-                    if (dh->size >= max_stripe_size && (!dh1 || dh->size < dh1->size)) {
-                        dh2 = dh1;
-                        dh1 = dh;
-                    }
+                    le = Vcb->devices[j].space.Flink;
+                    while (le != &Vcb->devices[j].space) {
+                        space* dh = CONTAINING_RECORD(le, space, list_entry);
+                        
+                        if (dh->size >= max_stripe_size && (!dh1 || dh->size < dh1->size)) {
+                            dh2 = dh1;
+                            dh1 = dh;
+                        }
 
-                    le = le->Flink;
-                }
-                
-                if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) {
-                    devnum = j;
-                    devusage = usage;
-                    devdh1 = dh1;
-                    devdh2 = dh2 ? dh2 : dh1;
+                        le = le->Flink;
+                    }
+                    
+                    if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) {
+                        devnum = j;
+                        devusage = usage;
+                        devdh1 = dh1;
+                        devdh2 = dh2 ? dh2 : dh1;
+                    }
                 }
             }
         }
@@ -438,6 +190,9 @@ static BOOL find_new_stripe(device_extension* Vcb, stripe* stripes, UINT16 i, UI
         UINT64 usage;
         BOOL skip = FALSE;
         
+        if (Vcb->devices[j].readonly)
+            continue;
+
         // skip this device if it already has a stripe
         if (i > 0) {
             for (k = 0; k < i; k++) {
@@ -544,11 +299,15 @@ chunk* alloc_chunk(device_extension* Vcb, UINT64 flags) {
         sub_stripes = 2;
         type = BLOCK_FLAG_RAID10;
     } else if (flags & BLOCK_FLAG_RAID5) {
-        FIXME("RAID5 not yet supported\n");
-        goto end;
+        min_stripes = 3;
+        max_stripes = Vcb->superblock.num_devices;
+        sub_stripes = 1;
+        type = BLOCK_FLAG_RAID5;
     } else if (flags & BLOCK_FLAG_RAID6) {
-        FIXME("RAID6 not yet supported\n");
-        goto end;
+        min_stripes = 4;
+        max_stripes = 257;
+        sub_stripes = 1;
+        type = BLOCK_FLAG_RAID6;
     } else { // SINGLE
         min_stripes = 1;
         max_stripes = 1;
@@ -615,6 +374,10 @@ chunk* alloc_chunk(device_extension* Vcb, UINT64 flags) {
         factor = num_stripes;
     else if (type == BLOCK_FLAG_RAID10)
         factor = num_stripes / sub_stripes;
+    else if (type == BLOCK_FLAG_RAID5)
+        factor = num_stripes - 1;
+    else if (type == BLOCK_FLAG_RAID6)
+        factor = num_stripes - 2;
     
     if (stripe_size * factor > max_chunk_size)
         stripe_size = max_chunk_size / factor;
@@ -663,11 +426,16 @@ chunk* alloc_chunk(device_extension* Vcb, UINT64 flags) {
     c->offset = logaddr;
     c->used = c->oldused = 0;
     c->cache = NULL;
+    c->readonly = FALSE;
     InitializeListHead(&c->space);
     InitializeListHead(&c->space_size);
     InitializeListHead(&c->deleting);
     InitializeListHead(&c->changed_extents);
     
+    InitializeListHead(&c->range_locks);
+    KeInitializeSpinLock(&c->range_locks_spinlock);
+    KeInitializeEvent(&c->range_locks_event, NotificationEvent, FALSE);
+    
     ExInitializeResourceLite(&c->lock);
     ExInitializeResourceLite(&c->changed_extents_lock);
     
@@ -687,7 +455,7 @@ chunk* alloc_chunk(device_extension* Vcb, UINT64 flags) {
     for (i = 0; i < num_stripes; i++) {
         stripes[i].device->devitem.bytes_used += stripe_size;
         
-        space_list_subtract2(&stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL);
+        space_list_subtract2(Vcb, &stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL);
     }
     
     success = TRUE;
@@ -729,4831 +497,1307 @@ end:
     return success ? c : NULL;
 }
 
-NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, BOOL need_free, UINT32 length, write_data_context* wtc, PIRP Irp, chunk* c) {
-    NTSTATUS Status;
+static NTSTATUS prepare_raid0_write(chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) {
+    UINT64 startoff, endoff;
+    UINT16 startoffstripe, endoffstripe, stripenum;
+    UINT64 pos, *stripeoff;
     UINT32 i;
-    CHUNK_ITEM_STRIPE* cis;
-    write_data_stripe* stripe;
-    UINT64 *stripestart = NULL, *stripeend = NULL;
-    UINT8** stripedata = NULL;
-    BOOL need_free2;
-    
-    TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
     
-    if (!c) {
-        c = get_chunk_from_address(Vcb, address);
-        if (!c) {
-            ERR("could not get chunk for address %llx\n", address);
-            return STATUS_INTERNAL_ERROR;
-        }
+    stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG);
+    if (!stripeoff) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
     }
+
+    get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
+    get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
     
-    if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
-        FIXME("RAID5 not yet supported\n");
-        return STATUS_NOT_IMPLEMENTED;
-    } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
-        FIXME("RAID6 not yet supported\n");
-        return STATUS_NOT_IMPLEMENTED;
+    for (i = 0; i < c->chunk_item->num_stripes; i++) {
+        if (startoffstripe > i) {
+            stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+        } else if (startoffstripe == i) {
+            stripes[i].start = startoff;
+        } else {
+            stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
+        }
+        
+        if (endoffstripe > i) {
+            stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+        } else if (endoffstripe == i) {
+            stripes[i].end = endoff + 1;
+        } else {
+            stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
+        }
+        
+        if (stripes[i].start != stripes[i].end) {
+            stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, stripes[i].end - stripes[i].start, ALLOC_TAG);
+            
+            if (!stripes[i].data) {
+                ERR("out of memory\n");
+                ExFreePool(stripeoff);
+                return STATUS_INSUFFICIENT_RESOURCES;
+            }
+        }
     }
     
-    stripestart = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG);
-    if (!stripestart) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
+    pos = 0;
+    RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes);
     
-    stripeend = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG);
-    if (!stripeend) {
-        ERR("out of memory\n");
-        ExFreePool(stripestart);
-        return STATUS_INSUFFICIENT_RESOURCES;
+    stripenum = startoffstripe;
+    while (pos < length) {
+        if (pos == 0) {
+            UINT32 writelen = min(stripes[stripenum].end - stripes[stripenum].start,
+                                  c->chunk_item->stripe_length - (stripes[stripenum].start % c->chunk_item->stripe_length));
+            
+            RtlCopyMemory(stripes[stripenum].data, data, writelen);
+            stripeoff[stripenum] += writelen;
+            pos += writelen;
+        } else if (length - pos < c->chunk_item->stripe_length) {
+            RtlCopyMemory(stripes[stripenum].data + stripeoff[stripenum], (UINT8*)data + pos, length - pos);
+            break;
+        } else {
+            RtlCopyMemory(stripes[stripenum].data + stripeoff[stripenum], (UINT8*)data + pos, c->chunk_item->stripe_length);
+            stripeoff[stripenum] += c->chunk_item->stripe_length;
+            pos += c->chunk_item->stripe_length;
+        }
+        
+        stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
     }
+
+    ExFreePool(stripeoff);
     
-    stripedata = ExAllocatePoolWithTag(PagedPool, sizeof(UINT8*) * c->chunk_item->num_stripes, ALLOC_TAG);
-    if (!stripedata) {
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS prepare_raid10_write(chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) {
+    UINT64 startoff, endoff;
+    UINT16 startoffstripe, endoffstripe, stripenum;
+    UINT64 pos, *stripeoff;
+    UINT32 i;
+
+    stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG);
+    if (!stripeoff) {
         ERR("out of memory\n");
-        ExFreePool(stripeend);
-        ExFreePool(stripestart);
         return STATUS_INSUFFICIENT_RESOURCES;
     }
-    RtlZeroMemory(stripedata, sizeof(UINT8*) * c->chunk_item->num_stripes);
-    
-    cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
-    
-    if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
-        UINT64 startoff, endoff;
-        UINT16 startoffstripe, endoffstripe, stripenum;
-        UINT64 pos, *stripeoff;
+
+    get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &startoff, &startoffstripe);
+    get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &endoff, &endoffstripe);
+
+    startoffstripe *= c->chunk_item->sub_stripes;
+    endoffstripe *= c->chunk_item->sub_stripes;
+
+    for (i = 0; i < c->chunk_item->num_stripes; i += c->chunk_item->sub_stripes) {
+        UINT16 j;
         
-        stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG);
-        if (!stripeoff) {
-            ERR("out of memory\n");
-            ExFreePool(stripedata);
-            ExFreePool(stripeend);
-            ExFreePool(stripestart);
-            return STATUS_INSUFFICIENT_RESOURCES;
+        if (startoffstripe > i) {
+            stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+        } else if (startoffstripe == i) {
+            stripes[i].start = startoff;
+        } else {
+            stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
         }
-
-        get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
-        get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
         
-        for (i = 0; i < c->chunk_item->num_stripes; i++) {
-            if (startoffstripe > i) {
-                stripestart[i] = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
-            } else if (startoffstripe == i) {
-                stripestart[i] = startoff;
-            } else {
-                stripestart[i] = startoff - (startoff % c->chunk_item->stripe_length);
-            }
-            
-            if (endoffstripe > i) {
-                stripeend[i] = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
-            } else if (endoffstripe == i) {
-                stripeend[i] = endoff + 1;
-            } else {
-                stripeend[i] = endoff - (endoff % c->chunk_item->stripe_length);
-            }
+        if (endoffstripe > i) {
+            stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+        } else if (endoffstripe == i) {
+            stripes[i].end = endoff + 1;
+        } else {
+            stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
+        }
+        
+        if (stripes[i].start != stripes[i].end) {
+            stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, stripes[i].end - stripes[i].start, ALLOC_TAG);
             
-            if (stripestart[i] != stripeend[i]) {
-                stripedata[i] = ExAllocatePoolWithTag(NonPagedPool, stripeend[i] - stripestart[i], ALLOC_TAG);
-                
-                if (!stripedata[i]) {
-                    ERR("out of memory\n");
-                    ExFreePool(stripeoff);
-                    Status = STATUS_INSUFFICIENT_RESOURCES;
-                    goto end;
-                }
+            if (!stripes[i].data) {
+                ERR("out of memory\n");
+                ExFreePool(stripeoff);
+                return STATUS_INSUFFICIENT_RESOURCES;
             }
         }
         
-        pos = 0;
-        RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes);
-        
-        stripenum = startoffstripe;
-        while (pos < length) {
-            if (pos == 0) {
-                UINT32 writelen = min(stripeend[stripenum] - stripestart[stripenum],
-                                      c->chunk_item->stripe_length - (stripestart[stripenum] % c->chunk_item->stripe_length));
-                
-                RtlCopyMemory(stripedata[stripenum], data, writelen);
-                stripeoff[stripenum] += writelen;
-                pos += writelen;
-            } else if (length - pos < c->chunk_item->stripe_length) {
-                RtlCopyMemory(stripedata[stripenum] + stripeoff[stripenum], (UINT8*)data + pos, length - pos);
-                break;
-            } else {
-                RtlCopyMemory(stripedata[stripenum] + stripeoff[stripenum], (UINT8*)data + pos, c->chunk_item->stripe_length);
-                stripeoff[stripenum] += c->chunk_item->stripe_length;
-                pos += c->chunk_item->stripe_length;
-            }
-            
-            stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
+        for (j = 1; j < c->chunk_item->sub_stripes; j++) {
+            stripes[i+j].start = stripes[i].start;
+            stripes[i+j].end = stripes[i].end;
+            stripes[i+j].data = stripes[i].data;
         }
+    }
 
-        ExFreePool(stripeoff);
-        
-        if (need_free)
-            ExFreePool(data);
+    pos = 0;
+    RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
 
-        need_free2 = TRUE;
-    } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
-        UINT64 startoff, endoff;
-        UINT16 startoffstripe, endoffstripe, stripenum;
-        UINT64 pos, *stripeoff;
+    stripenum = startoffstripe / c->chunk_item->sub_stripes;
+    while (pos < length) {
+        if (pos == 0) {
+            UINT32 writelen = min(stripes[stripenum * c->chunk_item->sub_stripes].end - stripes[stripenum * c->chunk_item->sub_stripes].start,
+                                  c->chunk_item->stripe_length - (stripes[stripenum * c->chunk_item->sub_stripes].start % c->chunk_item->stripe_length));
+            
+            RtlCopyMemory(stripes[stripenum * c->chunk_item->sub_stripes].data, data, writelen);
+            stripeoff[stripenum] += writelen;
+            pos += writelen;
+        } else if (length - pos < c->chunk_item->stripe_length) {
+            RtlCopyMemory(stripes[stripenum * c->chunk_item->sub_stripes].data + stripeoff[stripenum], (UINT8*)data + pos, length - pos);
+            break;
+        } else {
+            RtlCopyMemory(stripes[stripenum * c->chunk_item->sub_stripes].data + stripeoff[stripenum], (UINT8*)data + pos, c->chunk_item->stripe_length);
+            stripeoff[stripenum] += c->chunk_item->stripe_length;
+            pos += c->chunk_item->stripe_length;
+        }
         
-        stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG);
-        if (!stripeoff) {
-            ERR("out of memory\n");
-            ExFreePool(stripedata);
-            ExFreePool(stripeend);
-            ExFreePool(stripestart);
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
-
-        get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &startoff, &startoffstripe);
-        get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &endoff, &endoffstripe);
-        
-        startoffstripe *= c->chunk_item->sub_stripes;
-        endoffstripe *= c->chunk_item->sub_stripes;
-        
-        for (i = 0; i < c->chunk_item->num_stripes; i += c->chunk_item->sub_stripes) {
-            UINT16 j;
-            
-            if (startoffstripe > i) {
-                stripestart[i] = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
-            } else if (startoffstripe == i) {
-                stripestart[i] = startoff;
-            } else {
-                stripestart[i] = startoff - (startoff % c->chunk_item->stripe_length);
-            }
-            
-            if (endoffstripe > i) {
-                stripeend[i] = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
-            } else if (endoffstripe == i) {
-                stripeend[i] = endoff + 1;
-            } else {
-                stripeend[i] = endoff - (endoff % c->chunk_item->stripe_length);
-            }
-            
-            if (stripestart[i] != stripeend[i]) {
-                stripedata[i] = ExAllocatePoolWithTag(NonPagedPool, stripeend[i] - stripestart[i], ALLOC_TAG);
-                
-                if (!stripedata[i]) {
-                    ERR("out of memory\n");
-                    ExFreePool(stripeoff);
-                    Status = STATUS_INSUFFICIENT_RESOURCES;
-                    goto end;
-                }
-            }
-            
-            for (j = 1; j < c->chunk_item->sub_stripes; j++) {
-                stripestart[i+j] = stripestart[i];
-                stripeend[i+j] = stripeend[i];
-                stripedata[i+j] = stripedata[i];
-            }
-        }
-        
-        pos = 0;
-        RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
-        
-        stripenum = startoffstripe / c->chunk_item->sub_stripes;
-        while (pos < length) {
-            if (pos == 0) {
-                UINT32 writelen = min(stripeend[stripenum * c->chunk_item->sub_stripes] - stripestart[stripenum * c->chunk_item->sub_stripes],
-                                      c->chunk_item->stripe_length - (stripestart[stripenum * c->chunk_item->sub_stripes] % c->chunk_item->stripe_length));
-                
-                RtlCopyMemory(stripedata[stripenum * c->chunk_item->sub_stripes], data, writelen);
-                stripeoff[stripenum] += writelen;
-                pos += writelen;
-            } else if (length - pos < c->chunk_item->stripe_length) {
-                RtlCopyMemory(stripedata[stripenum * c->chunk_item->sub_stripes] + stripeoff[stripenum], (UINT8*)data + pos, length - pos);
-                break;
-            } else {
-                RtlCopyMemory(stripedata[stripenum * c->chunk_item->sub_stripes] + stripeoff[stripenum], (UINT8*)data + pos, c->chunk_item->stripe_length);
-                stripeoff[stripenum] += c->chunk_item->stripe_length;
-                pos += c->chunk_item->stripe_length;
-            }
-            
-            stripenum = (stripenum + 1) % (c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
-        }
-
-        ExFreePool(stripeoff);
-        
-        if (need_free)
-            ExFreePool(data);
-
-        need_free2 = TRUE;
-    } else {
-        for (i = 0; i < c->chunk_item->num_stripes; i++) {
-            stripestart[i] = address - c->offset;
-            stripeend[i] = stripestart[i] + length;
-            stripedata[i] = data;
-        }
-        need_free2 = need_free;
-    }
-
-    for (i = 0; i < c->chunk_item->num_stripes; i++) {
-        PIO_STACK_LOCATION IrpSp;
-        
-        // FIXME - handle missing devices
-        
-        stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_stripe), ALLOC_TAG);
-        if (!stripe) {
-            ERR("out of memory\n");
-            Status = STATUS_INSUFFICIENT_RESOURCES;
-            goto end;
-        }
-        
-        if (stripestart[i] == stripeend[i]) {
-            stripe->status = WriteDataStatus_Ignore;
-            stripe->Irp = NULL;
-            stripe->buf = NULL;
-        } else {
-            stripe->context = (struct _write_data_context*)wtc;
-            stripe->buf = stripedata[i];
-            stripe->need_free = need_free2;
-            stripe->device = c->devices[i];
-            RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
-            stripe->status = WriteDataStatus_Pending;
-            
-            if (!Irp) {
-                stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
-            
-                if (!stripe->Irp) {
-                    ERR("IoAllocateIrp failed\n");
-                    Status = STATUS_INTERNAL_ERROR;
-                    goto end;
-                }
-            } else {
-                stripe->Irp = IoMakeAssociatedIrp(Irp, stripe->device->devobj->StackSize);
-                
-                if (!stripe->Irp) {
-                    ERR("IoMakeAssociatedIrp failed\n");
-                    Status = STATUS_INTERNAL_ERROR;
-                    goto end;
-                }
-            }
-            
-            IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
-            IrpSp->MajorFunction = IRP_MJ_WRITE;
-            
-            if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
-                stripe->Irp->AssociatedIrp.SystemBuffer = stripedata[i];
-
-                stripe->Irp->Flags = IRP_BUFFERED_IO;
-            } else if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
-                stripe->Irp->MdlAddress = IoAllocateMdl(stripedata[i], stripeend[i] - stripestart[i], FALSE, FALSE, NULL);
-                if (!stripe->Irp->MdlAddress) {
-                    ERR("IoAllocateMdl failed\n");
-                    Status = STATUS_INTERNAL_ERROR;
-                    goto end;
-                }
-                
-                MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess);
-            } else {
-                stripe->Irp->UserBuffer = stripedata[i];
-            }
-
-            IrpSp->Parameters.Write.Length = stripeend[i] - stripestart[i];
-            IrpSp->Parameters.Write.ByteOffset.QuadPart = stripestart[i] + cis[i].offset;
-            
-            stripe->Irp->UserIosb = &stripe->iosb;
-            wtc->stripes_left++;
-
-            IoSetCompletionRoutine(stripe->Irp, write_data_completion, stripe, TRUE, TRUE, TRUE);
-        }
-
-        InsertTailList(&wtc->stripes, &stripe->list_entry);
-    }
-    
-    Status = STATUS_SUCCESS;
-    
-end:
-
-    if (stripestart) ExFreePool(stripestart);
-    if (stripeend) ExFreePool(stripeend);
-    if (stripedata) ExFreePool(stripedata);
-    
-    if (!NT_SUCCESS(Status)) {
-        free_write_data_stripes(wtc);
-        ExFreePool(wtc);
-    }
-    
-    return Status;
-}
-
-NTSTATUS STDCALL write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c) {
-    write_data_context* wtc;
-    NTSTATUS Status;
-    
-    wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG);
-    if (!wtc) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-
-    KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
-    InitializeListHead(&wtc->stripes);
-    wtc->tree = FALSE;
-    wtc->stripes_left = 0;
-    
-    Status = write_data(Vcb, address, data, FALSE, length, wtc, Irp, c);
-    if (!NT_SUCCESS(Status)) {
-        ERR("write_data returned %08x\n", Status);
-        free_write_data_stripes(wtc);
-        ExFreePool(wtc);
-        return Status;
-    }
-    
-    if (wtc->stripes.Flink != &wtc->stripes) {
-        // launch writes and wait
-        LIST_ENTRY* le = wtc->stripes.Flink;
-        while (le != &wtc->stripes) {
-            write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-            
-            if (stripe->status != WriteDataStatus_Ignore)
-                IoCallDriver(stripe->device->devobj, stripe->Irp);
-            
-            le = le->Flink;
-        }
-        
-        KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
-        
-        le = wtc->stripes.Flink;
-        while (le != &wtc->stripes) {
-            write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-            
-            if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
-                Status = stripe->iosb.Status;
-                break;
-            }
-            
-            le = le->Flink;
-        }
-        
-        free_write_data_stripes(wtc);
-    }
-
-    ExFreePool(wtc);
-
-    return STATUS_SUCCESS;
-}
-
-static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
-    // FIXME - loop through c->deleting and do TRIM if device supports it
-    // FIXME - also find way of doing TRIM of dropped chunks
-    
-    while (!IsListEmpty(&c->deleting)) {
-        space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
-        
-        RemoveEntryList(&s->list_entry);
-        ExFreePool(s);
-    }
-}
-
-static void clean_space_cache(device_extension* Vcb) {
-    chunk* c;
-    
-    TRACE("(%p)\n", Vcb);
-    
-    while (!IsListEmpty(&Vcb->chunks_changed)) {
-        c = CONTAINING_RECORD(Vcb->chunks_changed.Flink, chunk, list_entry_changed);
-        
-        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        
-        clean_space_cache_chunk(Vcb, c);
-        RemoveEntryList(&c->list_entry_changed);
-        c->list_entry_changed.Flink = NULL;
-        
-        ExReleaseResourceLite(&c->lock);
-    }
-}
-
-static BOOL trees_consistent(device_extension* Vcb, LIST_ENTRY* rollback) {
-    ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
-    LIST_ENTRY* le;
-    
-    le = Vcb->trees.Flink;
-    while (le != &Vcb->trees) {
-        tree* t = CONTAINING_RECORD(le, tree, list_entry);
-        
-        if (t->write) {
-            if (t->header.num_items == 0 && t->parent) {
-#ifdef DEBUG_WRITE_LOOPS
-                ERR("empty tree found, looping again\n");
-#endif
-                return FALSE;
-            }
-            
-            if (t->size > maxsize) {
-#ifdef DEBUG_WRITE_LOOPS
-                ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize);
-#endif
-                return FALSE;
-            }
-            
-            if (!t->has_new_address) {
-#ifdef DEBUG_WRITE_LOOPS
-                ERR("tree found without new address, looping again\n");
-#endif
-                return FALSE;
-            }
-        }
-        
-        le = le->Flink;
-    }
-    
-    return TRUE;
-}
-
-static NTSTATUS add_parents(device_extension* Vcb, LIST_ENTRY* rollback) {
-    UINT8 level;
-    LIST_ENTRY* le;
-    
-    for (level = 0; level <= 255; level++) {
-        BOOL nothing_found = TRUE;
-        
-        TRACE("level = %u\n", level);
-        
-        le = Vcb->trees.Flink;
-        while (le != &Vcb->trees) {
-            tree* t = CONTAINING_RECORD(le, tree, list_entry);
-            
-            if (t->write && t->header.level == level) {
-                TRACE("tree %p: root = %llx, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent);
-                
-                nothing_found = FALSE;
-                
-                if (t->parent) {
-                    if (!t->parent->write)
-                        TRACE("adding tree %p (level %x)\n", t->parent, t->header.level);
-                        
-                    t->parent->write = TRUE;
-                }
-            }
-            
-            le = le->Flink;
-        }
-        
-        if (nothing_found)
-            break;
-    }
-
-    return STATUS_SUCCESS;
-}
-
-static void add_parents_to_cache(device_extension* Vcb, tree* t) {
-    while (t->parent) {
-        t = t->parent;
-        t->write = TRUE;
-    }
-}
-
-static BOOL insert_tree_extent_skinny(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64 address, PIRP Irp, LIST_ENTRY* rollback) {
-    EXTENT_ITEM_SKINNY_METADATA* eism;
-    traverse_ptr insert_tp;
-    
-    eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
-    if (!eism) {
-        ERR("out of memory\n");
-        return FALSE;
-    }
-    
-    eism->ei.refcount = 1;
-    eism->ei.generation = Vcb->superblock.generation;
-    eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
-    eism->type = TYPE_TREE_BLOCK_REF;
-    eism->tbr.offset = root_id;
-    
-    if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp, rollback)) {
-        ERR("insert_tree_item failed\n");
-        ExFreePool(eism);
-        return FALSE;
-    }
-    
-    ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-    
-    space_list_subtract(Vcb, c, FALSE, address, Vcb->superblock.node_size, rollback);
-
-    ExReleaseResourceLite(&c->lock);
-    
-    add_parents_to_cache(Vcb, insert_tp.tree);
-    
-    return TRUE;
-}
-
-static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64* new_address, PIRP Irp, LIST_ENTRY* rollback) {
-    UINT64 address;
-    EXTENT_ITEM_TREE2* eit2;
-    traverse_ptr insert_tp;
-    
-    TRACE("(%p, %x, %llx, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback);
-    
-    if (!find_address_in_chunk(Vcb, c, Vcb->superblock.node_size, &address))
-        return FALSE;
-    
-    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
-        BOOL b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback);
-        
-        if (b)
-            *new_address = address;
-        
-        return b;
-    }
-    
-    eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
-    if (!eit2) {
-        ERR("out of memory\n");
-        return FALSE;
-    }
-
-    eit2->eit.extent_item.refcount = 1;
-    eit2->eit.extent_item.generation = Vcb->superblock.generation;
-    eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
-//     eit2->eit.firstitem = wt->firstitem;
-    eit2->eit.level = level;
-    eit2->type = TYPE_TREE_BLOCK_REF;
-    eit2->tbr.offset = root_id;
-    
-// #ifdef DEBUG_PARANOID
-//     if (wt->firstitem.obj_type == 0xcc) { // TESTING
-//         ERR("error - firstitem not set (wt = %p, tree = %p, address = %x)\n", wt, wt->tree, (UINT32)address);
-//         ERR("num_items = %u, level = %u, root = %x, delete = %u\n", wt->tree->header.num_items, wt->tree->header.level, (UINT32)wt->tree->root->id, wt->delete);
-//         int3;
-//     }
-// #endif
-    
-    if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp, rollback)) {
-        ERR("insert_tree_item failed\n");
-        ExFreePool(eit2);
-        return FALSE;
-    }
-    
-    ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-    
-    space_list_subtract(Vcb, c, FALSE, address, Vcb->superblock.node_size, rollback);
-    
-    ExReleaseResourceLite(&c->lock);
-
-    add_parents_to_cache(Vcb, insert_tp.tree);
-    
-    *new_address = address;
-    
-    return TRUE;
-}
-
-NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
-    chunk *origchunk = NULL, *c;
-    LIST_ENTRY* le;
-    UINT64 flags = t->flags, addr;
-    
-    if (flags == 0) {
-        if (t->root->id == BTRFS_ROOT_CHUNK)
-            flags = BLOCK_FLAG_SYSTEM | BLOCK_FLAG_DUPLICATE;
-        else if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS)
-            flags = BLOCK_FLAG_DATA | BLOCK_FLAG_METADATA;
-        else
-            flags = BLOCK_FLAG_METADATA | BLOCK_FLAG_DUPLICATE;
-    }
-    
-//     TRACE("flags = %x\n", (UINT32)wt->flags);
-    
-//     if (!chunk_test) { // TESTING
-//         if ((c = alloc_chunk(Vcb, flags))) {
-//             if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
-//                 if (insert_tree_extent(Vcb, t, c)) {
-//                     chunk_test = TRUE;
-//                     return STATUS_SUCCESS;
-//                 }
-//             }
-//         }
-//     }
-    
-    if (t->has_address) {
-        origchunk = get_chunk_from_address(Vcb, t->header.address);
-        
-        if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, origchunk, &addr, Irp, rollback)) {
-            t->new_address = addr;
-            t->has_new_address = TRUE;
-            return STATUS_SUCCESS;
-        }
-    }
-    
-    ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
-    
-    le = Vcb->chunks.Flink;
-    while (le != &Vcb->chunks) {
-        c = CONTAINING_RECORD(le, chunk, list_entry);
-        
-        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        
-        if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
-            if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, c, &addr, Irp, rollback)) {
-                ExReleaseResourceLite(&c->lock);
-                ExReleaseResourceLite(&Vcb->chunk_lock);
-                t->new_address = addr;
-                t->has_new_address = TRUE;
-                return STATUS_SUCCESS;
-            }
-        }
-        
-        ExReleaseResourceLite(&c->lock);
-
-        le = le->Flink;
-    }
-    
-    // allocate new chunk if necessary
-    if ((c = alloc_chunk(Vcb, flags))) {
-        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        
-        if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
-            if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, c, &addr, Irp, rollback)) {
-                ExReleaseResourceLite(&c->lock);
-                ExReleaseResourceLite(&Vcb->chunk_lock);
-                t->new_address = addr;
-                t->has_new_address = TRUE;
-                return STATUS_SUCCESS;
-            }
-        }
-        
-        ExReleaseResourceLite(&c->lock);
-    }
-    
-    ExReleaseResourceLite(&Vcb->chunk_lock);
-    
-    ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
-
-    return STATUS_DISK_FULL;
-}
-
-static BOOL reduce_tree_extent_skinny(device_extension* Vcb, UINT64 address, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    chunk* c;
-    NTSTATUS Status;
-    
-    searchkey.obj_id = address;
-    searchkey.obj_type = TYPE_METADATA_ITEM;
-    searchkey.offset = 0xffffffffffffffff;
-    
-    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return FALSE;
-    }
-    
-    if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
-        TRACE("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
-        return FALSE;
-    }
-    
-    if (tp.item->size < sizeof(EXTENT_ITEM_SKINNY_METADATA)) {
-        ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_SKINNY_METADATA));
-        return FALSE;
-    }
-    
-    delete_tree_item(Vcb, &tp, rollback);
-
-    c = get_chunk_from_address(Vcb, address);
-    
-    if (c) {
-        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        
-        decrease_chunk_usage(c, Vcb->superblock.node_size);
-        
-        space_list_add(Vcb, c, TRUE, address, Vcb->superblock.node_size, rollback);
-        
-        ExReleaseResourceLite(&c->lock);
-    } else
-        ERR("could not find chunk for address %llx\n", address);
-    
-    return TRUE;
-}
-
-// TESTING
-// static void check_tree_num_items(tree* t) {
-//     LIST_ENTRY* le2;
-//     UINT32 ni;
-//     
-//     le2 = t->itemlist.Flink;
-//     ni = 0;
-//     while (le2 != &t->itemlist) {
-//         tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
-//         if (!td->ignore)
-//             ni++;
-//         le2 = le2->Flink;
-//     }
-//     
-//     if (t->header.num_items != ni) {
-//         ERR("tree %p not okay: num_items was %x, expecting %x\n", t, ni, t->header.num_items);
-//         int3;
-//     } else {
-//         ERR("tree %p okay\n", t);
-//     }
-// }
-// 
-// static void check_trees_num_items(LIST_ENTRY* tc) {
-//     LIST_ENTRY* le = tc->Flink;
-//     while (le != tc) {
-//         tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
-//         
-//         check_tree_num_items(tc2->tree);
-//         
-//         le = le->Flink;
-//     }    
-// }
-
-static void convert_old_tree_extent(device_extension* Vcb, tree_data* td, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp, tp2, insert_tp;
-    EXTENT_REF_V0* erv0;
-    NTSTATUS Status;
-    
-    TRACE("(%p, %p, %p)\n", Vcb, td, t);
-    
-    searchkey.obj_id = td->treeholder.address;
-    searchkey.obj_type = TYPE_EXTENT_REF_V0;
-    searchkey.offset = 0xffffffffffffffff;
-    
-    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return;
-    }
-    
-    if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
-        TRACE("could not find EXTENT_REF_V0 for %llx\n", searchkey.obj_id);
-        return;
-    }
-    
-    searchkey.obj_id = td->treeholder.address;
-    searchkey.obj_type = TYPE_EXTENT_ITEM;
-    searchkey.offset = Vcb->superblock.node_size;
-    
-    Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return;
-    }
-    
-    if (keycmp(&searchkey, &tp2.item->key)) {
-        ERR("could not find %llx,%x,%llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
-        return;
-    }
-    
-    if (tp.item->size < sizeof(EXTENT_REF_V0)) {
-        ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_REF_V0));
-        return;
-    }
-    
-    erv0 = (EXTENT_REF_V0*)tp.item->data;
-    
-    delete_tree_item(Vcb, &tp, rollback);
-    delete_tree_item(Vcb, &tp2, rollback);
-    
-    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
-        EXTENT_ITEM_SKINNY_METADATA* eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
-        
-        if (!eism) {
-            ERR("out of memory\n");
-            return;
-        }
-        
-        eism->ei.refcount = 1;
-        eism->ei.generation = erv0->gen;
-        eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
-        eism->type = TYPE_TREE_BLOCK_REF;
-        eism->tbr.offset = t->header.tree_id;
-        
-        if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_METADATA_ITEM, t->header.level -1, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp, rollback)) {
-            ERR("insert_tree_item failed\n");
-            return;
-        }
-    } else {
-        EXTENT_ITEM_TREE2* eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
-        
-        if (!eit2) {
-            ERR("out of memory\n");
-            return;
-        }
-        
-        eit2->eit.extent_item.refcount = 1;
-        eit2->eit.extent_item.generation = erv0->gen;
-        eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
-        eit2->eit.firstitem = td->key;
-        eit2->eit.level = t->header.level - 1;
-        eit2->type = TYPE_TREE_BLOCK_REF;
-        eit2->tbr.offset = t->header.tree_id;
-
-        if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp, rollback)) {
-            ERR("insert_tree_item failed\n");
-            return;
-        }
-    }
-    
-    add_parents_to_cache(Vcb, insert_tp.tree);
-    add_parents_to_cache(Vcb, tp.tree);
-    add_parents_to_cache(Vcb, tp2.tree);
-}
-
-static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    EXTENT_ITEM* ei;
-    EXTENT_ITEM_V0* eiv0;
-    chunk* c;
-    NTSTATUS Status;
-    
-    // FIXME - deal with refcounts > 1
-    
-    TRACE("(%p, %llx, %p)\n", Vcb, address, t);
-    
-    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
-        if (reduce_tree_extent_skinny(Vcb, address, t, Irp, rollback)) {
-            return STATUS_SUCCESS;
-        }
-    }
-    
-    searchkey.obj_id = address;
-    searchkey.obj_type = TYPE_EXTENT_ITEM;
-    searchkey.offset = Vcb->superblock.node_size;
-    
-    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (keycmp(&tp.item->key, &searchkey)) {
-        ERR("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
-        int3;
-        return STATUS_INTERNAL_ERROR;
-    }
-    
-    if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
-        eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
-        
-        if (eiv0->refcount > 1) {
-            FIXME("FIXME - cannot deal with refcounts larger than 1 at present (eiv0->refcount == %llx)\n", eiv0->refcount);
-            return STATUS_INTERNAL_ERROR;
-        }
-    } else {
-        if (tp.item->size < sizeof(EXTENT_ITEM)) {
-            ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
-            return STATUS_INTERNAL_ERROR;
-        }
-        
-        ei = (EXTENT_ITEM*)tp.item->data;
-        
-        if (ei->refcount > 1) {
-            FIXME("FIXME - cannot deal with refcounts larger than 1 at present (ei->refcount == %llx)\n", ei->refcount);
-            return STATUS_INTERNAL_ERROR;
-        }
-    }
-    
-    delete_tree_item(Vcb, &tp, rollback);
-    
-    // if EXTENT_ITEM_V0, delete corresponding B4 item
-    if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
-        traverse_ptr tp2;
-        
-        searchkey.obj_id = address;
-        searchkey.obj_type = TYPE_EXTENT_REF_V0;
-        searchkey.offset = 0xffffffffffffffff;
-        
-        Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            return Status;
-        }
-        
-        if (tp2.item->key.obj_id == searchkey.obj_id && tp2.item->key.obj_type == searchkey.obj_type) {
-            delete_tree_item(Vcb, &tp2, rollback);
-        }
-    }
-     
-    if (t && !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
-        LIST_ENTRY* le;
-        
-        // when writing old internal trees, convert related extents
-        
-        le = t->itemlist.Flink;
-        while (le != &t->itemlist) {
-            tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-            
-//             ERR("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
-            
-            if (!td->ignore && !td->inserted) {
-                if (t->header.level > 0) {
-                    convert_old_tree_extent(Vcb, td, t, Irp, rollback);
-                } else if (td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA)) {
-                    EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
-                    
-                    if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
-                        EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
-                        
-                        if (ed2->address != 0) {
-                            TRACE("trying to convert old data extent %llx,%llx\n", ed2->address, ed2->size);
-                            convert_old_data_extent(Vcb, ed2->address, ed2->size, Irp, rollback);
-                        }
-                    }
-                }
-            }
-
-            le = le->Flink;
-        }
-    }
-
-    c = get_chunk_from_address(Vcb, address);
-    
-    if (c) {
-        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        
-        decrease_chunk_usage(c, tp.item->key.offset);
-        
-        space_list_add(Vcb, c, TRUE, address, tp.item->key.offset, rollback);
-        
-        ExReleaseResourceLite(&c->lock);
-    } else
-        ERR("could not find chunk for address %llx\n", address);
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
-    LIST_ENTRY* le;
-    NTSTATUS Status;
-    
-    TRACE("(%p)\n", Vcb);
-    
-    le = Vcb->trees.Flink;
-    while (le != &Vcb->trees) {
-        tree* t = CONTAINING_RECORD(le, tree, list_entry);
-        
-        if (t->write && !t->has_new_address) {
-            chunk* c;
-            
-            Status = get_tree_new_address(Vcb, t, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("get_tree_new_address returned %08x\n", Status);
-                return Status;
-            }
-            
-            TRACE("allocated extent %llx\n", t->new_address);
-            
-            if (t->has_address) {
-                Status = reduce_tree_extent(Vcb, t->header.address, t, Irp, rollback);
-                
-                if (!NT_SUCCESS(Status)) {
-                    ERR("reduce_tree_extent returned %08x\n", Status);
-                    return Status;
-                }
-            }
-
-            c = get_chunk_from_address(Vcb, t->new_address);
-            
-            if (c) {
-                increase_chunk_usage(c, Vcb->superblock.node_size);
-            } else {
-                ERR("could not find chunk for address %llx\n", t->new_address);
-                return STATUS_INTERNAL_ERROR;
-            }
-        }
-        
-        le = le->Flink;
-    }
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS update_root_root(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
-    LIST_ENTRY* le;
-    NTSTATUS Status;
-    
-    TRACE("(%p)\n", Vcb);
-    
-    le = Vcb->trees.Flink;
-    while (le != &Vcb->trees) {
-        tree* t = CONTAINING_RECORD(le, tree, list_entry);
-        
-        if (t->write && !t->parent) {
-            if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
-                KEY searchkey;
-                traverse_ptr tp;
-                
-                searchkey.obj_id = t->root->id;
-                searchkey.obj_type = TYPE_ROOT_ITEM;
-                searchkey.offset = 0xffffffffffffffff;
-                
-                Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
-                if (!NT_SUCCESS(Status)) {
-                    ERR("error - find_item returned %08x\n", Status);
-                    return Status;
-                }
-                
-                if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
-                    ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
-                    int3;
-                    return STATUS_INTERNAL_ERROR;
-                }
-                
-                TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, t->new_address);
-                
-                t->root->root_item.block_number = t->new_address;
-                t->root->root_item.root_level = t->header.level;
-                t->root->root_item.generation = Vcb->superblock.generation;
-                t->root->root_item.generation2 = Vcb->superblock.generation;
-                
-                if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
-                    ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
-                    
-                    if (!ri) {
-                        ERR("out of memory\n");
-                        return STATUS_INSUFFICIENT_RESOURCES;
-                    }
-                    
-                    RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
-                    
-                    delete_tree_item(Vcb, &tp, rollback);
-                    
-                    if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, Irp, rollback)) {
-                        ERR("insert_tree_item failed\n");
-                        return STATUS_INTERNAL_ERROR;
-                    }
-                } else
-                    RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
-            }
-            
-            t->root->treeholder.address = t->new_address;
-        }
-        
-        le = le->Flink;
-    }
-    
-    Status = update_chunk_caches(Vcb, Irp, rollback);
-    if (!NT_SUCCESS(Status)) {
-        ERR("update_chunk_caches returned %08x\n", Status);
-        return Status;
-    }
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS STDCALL write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
-    write_data_stripe* stripe = conptr;
-    write_data_context* context = (write_data_context*)stripe->context;
-    LIST_ENTRY* le;
-    
-    // FIXME - we need a lock here
-    
-    if (stripe->status == WriteDataStatus_Cancelling) {
-        stripe->status = WriteDataStatus_Cancelled;
-        goto end;
-    }
-    
-    stripe->iosb = Irp->IoStatus;
-    
-    if (NT_SUCCESS(Irp->IoStatus.Status)) {
-        stripe->status = WriteDataStatus_Success;
-    } else {
-        le = context->stripes.Flink;
-        
-        stripe->status = WriteDataStatus_Error;
-        
-        while (le != &context->stripes) {
-            write_data_stripe* s2 = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-            
-            if (s2->status == WriteDataStatus_Pending) {
-                s2->status = WriteDataStatus_Cancelling;
-                IoCancelIrp(s2->Irp);
-            }
-            
-            le = le->Flink;
-        }
-    }
-    
-end:
-    if (InterlockedDecrement(&context->stripes_left) == 0)
-        KeSetEvent(&context->Event, 0, FALSE);
-
-    return STATUS_MORE_PROCESSING_REQUIRED;
-}
-
-void free_write_data_stripes(write_data_context* wtc) {
-    LIST_ENTRY *le, *le2, *nextle;
-    
-    le = wtc->stripes.Flink;
-    while (le != &wtc->stripes) {
-        write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-        
-        if (stripe->Irp) {
-            if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
-                MmUnlockPages(stripe->Irp->MdlAddress);
-                IoFreeMdl(stripe->Irp->MdlAddress);
-            }
-        }
-        
-        le = le->Flink;
-    }
-    
-    le = wtc->stripes.Flink;
-    while (le != &wtc->stripes) {
-        write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-        
-        nextle = le->Flink;
-
-        if (stripe->buf && stripe->need_free) {
-            ExFreePool(stripe->buf);
-            
-            le2 = le->Flink;
-            while (le2 != &wtc->stripes) {
-                write_data_stripe* s2 = CONTAINING_RECORD(le2, write_data_stripe, list_entry);
-                
-                if (s2->buf == stripe->buf)
-                    s2->buf = NULL;
-                
-                le2 = le2->Flink;
-            }
-            
-        }
-        
-        ExFreePool(stripe);
-        
-        le = nextle;
-    }
-}
-
-static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) {
-    UINT8 level;
-    UINT8 *data, *body;
-    UINT32 crc32;
-    NTSTATUS Status;
-    LIST_ENTRY* le;
-    write_data_context* wtc;
-    
-    TRACE("(%p)\n", Vcb);
-    
-    for (level = 0; level <= 255; level++) {
-        BOOL nothing_found = TRUE;
-        
-        TRACE("level = %u\n", level);
-        
-        le = Vcb->trees.Flink;
-        while (le != &Vcb->trees) {
-            tree* t = CONTAINING_RECORD(le, tree, list_entry);
-            
-            if (t->write && t->header.level == level) {
-                KEY firstitem, searchkey;
-                LIST_ENTRY* le2;
-                traverse_ptr tp;
-                EXTENT_ITEM_TREE* eit;
-                
-                if (!t->has_new_address) {
-                    ERR("error - tried to write tree with no new address\n");
-                    int3;
-                }
-                
-                le2 = t->itemlist.Flink;
-                while (le2 != &t->itemlist) {
-                    tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
-                    if (!td->ignore) {
-                        firstitem = td->key;
-                        break;
-                    }
-                    le2 = le2->Flink;
-                }
-                
-                if (t->parent) {
-                    t->paritem->key = firstitem;
-                    t->paritem->treeholder.address = t->new_address;
-                    t->paritem->treeholder.generation = Vcb->superblock.generation;
-                }
-                
-                if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
-                    searchkey.obj_id = t->new_address;
-                    searchkey.obj_type = TYPE_EXTENT_ITEM;
-                    searchkey.offset = Vcb->superblock.node_size;
-                    
-                    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-                    if (!NT_SUCCESS(Status)) {
-                        ERR("error - find_item returned %08x\n", Status);
-                        return Status;
-                    }
-                    
-                    if (keycmp(&searchkey, &tp.item->key)) {
-//                         traverse_ptr next_tp;
-//                         BOOL b;
-//                         tree_data* paritem;
-                        
-                        ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                        
-//                         searchkey.obj_id = 0;
-//                         searchkey.obj_type = 0;
-//                         searchkey.offset = 0;
-//                         
-//                         find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
-//                         
-//                         paritem = NULL;
-//                         do {
-//                             if (tp.tree->paritem != paritem) {
-//                                 paritem = tp.tree->paritem;
-//                                 ERR("paritem: %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset);
-//                             }
-//                             
-//                             ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-//                             
-//                             b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
-//                             if (b) {
-//                                 free_traverse_ptr(&tp);
-//                                 tp = next_tp;
-//                             }
-//                         } while (b);
-//                         
-//                         free_traverse_ptr(&tp);
-                        
-                        return STATUS_INTERNAL_ERROR;
-                    }
-                    
-                    if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
-                        ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
-                        return STATUS_INTERNAL_ERROR;
-                    }
-                    
-                    eit = (EXTENT_ITEM_TREE*)tp.item->data;
-                    eit->firstitem = firstitem;
-                }
-                
-                nothing_found = FALSE;
-            }
-            
-            le = le->Flink;
-        }
-        
-        if (nothing_found)
-            break;
-    }
-    
-    TRACE("allocated tree extents\n");
-    
-    wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG);
-    if (!wtc) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
-    InitializeListHead(&wtc->stripes);
-    wtc->tree = TRUE;
-    wtc->stripes_left = 0;
-    
-    le = Vcb->trees.Flink;
-    while (le != &Vcb->trees) {
-        tree* t = CONTAINING_RECORD(le, tree, list_entry);
-#ifdef DEBUG_PARANOID
-        UINT32 num_items = 0, size = 0;
-        LIST_ENTRY* le2;
-        BOOL crash = FALSE;
-#endif
-
-        if (t->write) {
-#ifdef DEBUG_PARANOID
-            le2 = t->itemlist.Flink;
-            while (le2 != &t->itemlist) {
-                tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
-                if (!td->ignore) {
-                    num_items++;
-                    
-                    if (t->header.level == 0)
-                        size += td->size;
-                }
-                le2 = le2->Flink;
-            }
-            
-            if (t->header.level == 0)
-                size += num_items * sizeof(leaf_node);
-            else
-                size += num_items * sizeof(internal_node);
-            
-            if (num_items != t->header.num_items) {
-                ERR("tree %llx, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
-                crash = TRUE;
-            }
-            
-            if (size != t->size) {
-                ERR("tree %llx, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
-                crash = TRUE;
-            }
-            
-            if (t->header.num_items == 0 && t->parent) {
-                ERR("tree %llx, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
-                crash = TRUE;
-            }
-            
-            if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
-                ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
-                crash = TRUE;
-            }
-            
-            if (crash) {
-                ERR("tree %p\n", t);
-                le2 = t->itemlist.Flink;
-                while (le2 != &t->itemlist) {
-                    tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
-                    if (!td->ignore) {
-                        ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
-                    }
-                    le2 = le2->Flink;
-                }
-                int3;
-            }
-#endif
-            t->header.address = t->new_address;
-            t->header.generation = Vcb->superblock.generation;
-            t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
-            t->has_address = TRUE;
-            
-            data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
-            if (!data) {
-                ERR("out of memory\n");
-                Status = STATUS_INSUFFICIENT_RESOURCES;
-                goto end;
-            }
-            
-            body = data + sizeof(tree_header);
-            
-            RtlCopyMemory(data, &t->header, sizeof(tree_header));
-            RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
-            
-            if (t->header.level == 0) {
-                leaf_node* itemptr = (leaf_node*)body;
-                int i = 0;
-                LIST_ENTRY* le2;
-                UINT8* dataptr = data + Vcb->superblock.node_size;
-                
-                le2 = t->itemlist.Flink;
-                while (le2 != &t->itemlist) {
-                    tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
-                    if (!td->ignore) {
-                        dataptr = dataptr - td->size;
-                        
-                        itemptr[i].key = td->key;
-                        itemptr[i].offset = (UINT8*)dataptr - (UINT8*)body;
-                        itemptr[i].size = td->size;
-                        i++;
-                        
-                        if (td->size > 0)
-                            RtlCopyMemory(dataptr, td->data, td->size);
-                    }
-                    
-                    le2 = le2->Flink;
-                }
-            } else {
-                internal_node* itemptr = (internal_node*)body;
-                int i = 0;
-                LIST_ENTRY* le2;
-                
-                le2 = t->itemlist.Flink;
-                while (le2 != &t->itemlist) {
-                    tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
-                    if (!td->ignore) {
-                        itemptr[i].key = td->key;
-                        itemptr[i].address = td->treeholder.address;
-                        itemptr[i].generation = td->treeholder.generation;
-                        i++;
-                    }
-                    
-                    le2 = le2->Flink;
-                }
-            }
-            
-            crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
-            crc32 = ~crc32;
-            *((UINT32*)data) = crc32;
-            TRACE("setting crc32 to %08x\n", crc32);
-            
-            Status = write_data(Vcb, t->new_address, data, TRUE, Vcb->superblock.node_size, wtc, NULL, NULL);
-            if (!NT_SUCCESS(Status)) {
-                ERR("write_data returned %08x\n", Status);
-                goto end;
-            }
-        }
-
-        le = le->Flink;
-    }
-    
-    Status = STATUS_SUCCESS;
-    
-    if (wtc->stripes.Flink != &wtc->stripes) {
-        // launch writes and wait
-        le = wtc->stripes.Flink;
-        while (le != &wtc->stripes) {
-            write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-            
-            if (stripe->status != WriteDataStatus_Ignore)
-                IoCallDriver(stripe->device->devobj, stripe->Irp);
-            
-            le = le->Flink;
-        }
-        
-        KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
-        
-        le = wtc->stripes.Flink;
-        while (le != &wtc->stripes) {
-            write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-            
-            if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
-                Status = stripe->iosb.Status;
-                break;
-            }
-            
-            le = le->Flink;
-        }
-        
-        free_write_data_stripes(wtc);
-    }
-    
-end:
-    ExFreePool(wtc);
-    
-    return Status;
-}
-
-static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) {
-    KEY searchkey;
-    traverse_ptr tp;
-    
-    RtlZeroMemory(sb, sizeof(superblock_backup));
-    
-    sb->root_tree_addr = Vcb->superblock.root_tree_addr;
-    sb->root_tree_generation = Vcb->superblock.generation;
-    sb->root_level = Vcb->superblock.root_level;
-
-    sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
-    sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
-    sb->chunk_root_level = Vcb->superblock.chunk_root_level;
-
-    searchkey.obj_id = BTRFS_ROOT_EXTENT;
-    searchkey.obj_type = TYPE_ROOT_ITEM;
-    searchkey.offset = 0xffffffffffffffff;
-    
-    if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
-        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
-            ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
-            
-            sb->extent_tree_addr = ri->block_number;
-            sb->extent_tree_generation = ri->generation;
-            sb->extent_root_level = ri->root_level;
-        }
-    }
-
-    searchkey.obj_id = BTRFS_ROOT_FSTREE;
-    
-    if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
-        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
-            ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
-            
-            sb->fs_tree_addr = ri->block_number;
-            sb->fs_tree_generation = ri->generation;
-            sb->fs_root_level = ri->root_level;
-        }
-    }
-    
-    searchkey.obj_id = BTRFS_ROOT_DEVTREE;
-    
-    if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
-        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
-            ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
-            
-            sb->dev_root_addr = ri->block_number;
-            sb->dev_root_generation = ri->generation;
-            sb->dev_root_level = ri->root_level;
-        }
-    }
-
-    searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
-    
-    if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
-        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
-            ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
-            
-            sb->csum_root_addr = ri->block_number;
-            sb->csum_root_generation = ri->generation;
-            sb->csum_root_level = ri->root_level;
-        }
-    }
-
-    sb->total_bytes = Vcb->superblock.total_bytes;
-    sb->bytes_used = Vcb->superblock.bytes_used;
-    sb->num_devices = Vcb->superblock.num_devices;
-}
-
-static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) {
-    UINT64 i;
-    NTSTATUS Status;
-    LIST_ENTRY* le;
-    
-    TRACE("(%p)\n", Vcb);
-    
-    le = Vcb->trees.Flink;
-    while (le != &Vcb->trees) {
-        tree* t = CONTAINING_RECORD(le, tree, list_entry);
-        
-        if (t->write && !t->parent) {
-            if (t->root == Vcb->root_root) {
-                Vcb->superblock.root_tree_addr = t->new_address;
-                Vcb->superblock.root_level = t->header.level;
-            } else if (t->root == Vcb->chunk_root) {
-                Vcb->superblock.chunk_tree_addr = t->new_address;
-                Vcb->superblock.chunk_root_generation = t->header.generation;
-                Vcb->superblock.chunk_root_level = t->header.level;
-            }
-        }
-        
-        le = le->Flink;
-    }
-    
-    for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
-        RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
-    }
-    
-    update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp);
-    
-    for (i = 0; i < Vcb->superblock.num_devices; i++) {
-        if (Vcb->devices[i].devobj) {
-            Status = write_superblock(Vcb, &Vcb->devices[i]);
-            if (!NT_SUCCESS(Status)) {
-                ERR("write_superblock returned %08x\n", Status);
-                return Status;
-            }
-        }
-    }
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) {
-    LIST_ENTRY *le, *le2;
-    NTSTATUS Status;
-    UINT64 old_size;
-    
-    le = ce->refs.Flink;
-    while (le != &ce->refs) {
-        changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
-        LIST_ENTRY* le3 = le->Flink;
-        UINT64 old_count = 0;
-        
-        le2 = ce->old_refs.Flink;
-        while (le2 != &ce->old_refs) {
-            changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
-            
-            if (cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
-                old_count = cer2->edr.count;
-                
-                RemoveEntryList(&cer2->list_entry);
-                ExFreePool(cer2);
-                break;
-            }
-            
-            le2 = le2->Flink;
-        }
-        
-        old_size = ce->old_count > 0 ? ce->old_size : ce->size;
-        
-        if (cer->edr.count > old_count) {
-            Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp, rollback);
-                        
-            if (!NT_SUCCESS(Status)) {
-                ERR("increase_extent_refcount_data returned %08x\n", Status);
-                return Status;
-            }
-        } else if (cer->edr.count < old_count) {
-            Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset,
-                                                   old_count - cer->edr.count, Irp, rollback);
-            
-            if (!NT_SUCCESS(Status)) {
-                ERR("decrease_extent_refcount_data returned %08x\n", Status);
-                return Status;
-            }
-        }
-        
-        if (ce->size != ce->old_size && ce->old_count > 0) {
-            KEY searchkey;
-            traverse_ptr tp;
-            void* data;
-            
-            searchkey.obj_id = ce->address;
-            searchkey.obj_type = TYPE_EXTENT_ITEM;
-            searchkey.offset = ce->old_size;
-            
-            Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-            if (!NT_SUCCESS(Status)) {
-                ERR("error - find_item returned %08x\n", Status);
-                return Status;
-            }
-            
-            if (keycmp(&searchkey, &tp.item->key)) {
-                ERR("could not find (%llx,%x,%llx) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
-                return STATUS_INTERNAL_ERROR;
-            }
-            
-            if (tp.item->size > 0) {
-                data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
-                
-                if (!data) {
-                    ERR("out of memory\n");
-                    return STATUS_INSUFFICIENT_RESOURCES;
-                }
-                
-                RtlCopyMemory(data, tp.item->data, tp.item->size);
-            } else
-                data = NULL;
-            
-            if (!insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp, rollback)) {
-                ERR("insert_tree_item failed\n");
-                return STATUS_INTERNAL_ERROR;
-            }
-            
-            delete_tree_item(Vcb, &tp, rollback);
-        }
-       
-        RemoveEntryList(&cer->list_entry);
-        ExFreePool(cer);
-        
-        le = le3;
-    }
-    
-#ifdef DEBUG_PARANOID
-    if (!IsListEmpty(&ce->old_refs))
-        WARN("old_refs not empty\n");
-#endif
-    
-    if (ce->count == 0) {
-        if (!ce->no_csum) {
-            LIST_ENTRY changed_sector_list;
-            
-            changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
-            if (!sc) {
-                ERR("out of memory\n");
-                return STATUS_INSUFFICIENT_RESOURCES;
-            }
-            
-            sc->ol.key = ce->address;
-            sc->checksums = NULL;
-            sc->length = ce->size / Vcb->superblock.sector_size;
-
-            sc->deleted = TRUE;
-            
-            InitializeListHead(&changed_sector_list);
-            insert_into_ordered_list(&changed_sector_list, &sc->ol);
-            
-            ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE);
-            commit_checksum_changes(Vcb, &changed_sector_list);
-            ExReleaseResourceLite(&Vcb->checksum_lock);
-        }
-        
-        decrease_chunk_usage(c, ce->size);
-        
-        space_list_add(Vcb, c, TRUE, ce->address, ce->size, rollback);
-    }
-
-    RemoveEntryList(&ce->list_entry);
-    ExFreePool(ce);
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
-    LIST_ENTRY *le = Vcb->chunks.Flink, *le2;
-    chunk* c;
-    KEY searchkey;
-    traverse_ptr tp;
-    BLOCK_GROUP_ITEM* bgi;
-    NTSTATUS Status;
-    BOOL flushed_extents = FALSE;
-    
-    TRACE("(%p)\n", Vcb);
-    
-    ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
-    
-    while (le != &Vcb->chunks) {
-        c = CONTAINING_RECORD(le, chunk, list_entry);
-        
-        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        
-        le2 = c->changed_extents.Flink;
-        while (le2 != &c->changed_extents) {
-            LIST_ENTRY* le3 = le2->Flink;
-            changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry);
-            
-            Status = flush_changed_extent(Vcb, c, ce, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("flush_changed_extent returned %08x\n", Status);
-                ExReleaseResourceLite(&c->lock);
-                goto end;
-            }
-            
-            flushed_extents = TRUE;
-            
-            le2 = le3;
-        }
-        
-        if (c->used != c->oldused) {
-            searchkey.obj_id = c->offset;
-            searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
-            searchkey.offset = c->chunk_item->size;
-            
-            Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-            if (!NT_SUCCESS(Status)) {
-                ERR("error - find_item returned %08x\n", Status);
-                ExReleaseResourceLite(&c->lock);
-                goto end;
-            }
-            
-            if (keycmp(&searchkey, &tp.item->key)) {
-                ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
-                int3;
-                Status = STATUS_INTERNAL_ERROR;
-                ExReleaseResourceLite(&c->lock);
-                goto end;
-            }
-            
-            if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
-                ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
-                Status = STATUS_INTERNAL_ERROR;
-                ExReleaseResourceLite(&c->lock);
-                goto end;
-            }
-            
-            bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
-            if (!bgi) {
-                ERR("out of memory\n");
-                Status = STATUS_INSUFFICIENT_RESOURCES;
-                ExReleaseResourceLite(&c->lock);
-                goto end;
-            }
-    
-            RtlCopyMemory(bgi, tp.item->data, tp.item->size);
-            bgi->used = c->used;
-            
-            TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
-            
-            delete_tree_item(Vcb, &tp, rollback);
-            
-            if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp, rollback)) {
-                ERR("insert_tree_item failed\n");
-                ExFreePool(bgi);
-                Status = STATUS_INTERNAL_ERROR;
-                ExReleaseResourceLite(&c->lock);
-                goto end;
-            }
-            
-            TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
-            TRACE("chunk_item type = %llx\n", c->chunk_item->type);
-            
-            if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
-                Vcb->superblock.bytes_used += c->used - c->oldused;
-            } else if (c->chunk_item->type & BLOCK_FLAG_RAID1 || c->chunk_item->type & BLOCK_FLAG_DUPLICATE || c->chunk_item->type & BLOCK_FLAG_RAID10) {
-                Vcb->superblock.bytes_used += 2 * (c->used - c->oldused);
-            } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
-                FIXME("RAID5 not yet supported\n");
-                ExFreePool(bgi);
-                Status = STATUS_INTERNAL_ERROR;
-                ExReleaseResourceLite(&c->lock);
-                goto end;
-            } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
-                FIXME("RAID6 not yet supported\n");
-                ExFreePool(bgi);
-                Status = STATUS_INTERNAL_ERROR;
-                ExReleaseResourceLite(&c->lock);
-                goto end;
-            } else { // SINGLE
-                Vcb->superblock.bytes_used += c->used - c->oldused;
-            }
-            
-            TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
-            
-            c->oldused = c->used;
-        }
-        
-        ExReleaseResourceLite(&c->lock);
-        
-        le = le->Flink;
-    }
-    
-    if (flushed_extents) {
-        ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE);
-        if (!IsListEmpty(&Vcb->sector_checksums)) {
-            update_checksum_tree(Vcb, Irp, rollback);
-        }
-        ExReleaseResourceLite(&Vcb->checksum_lock);
-    }
-    
-    Status = STATUS_SUCCESS;
-    
-end:
-    ExReleaseResourceLite(&Vcb->chunk_lock);
-    
-    return Status;
-}
-
-static void get_first_item(tree* t, KEY* key) {
-    LIST_ENTRY* le;
-    
-    le = t->itemlist.Flink;
-    while (le != &t->itemlist) {
-        tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-
-        *key = td->key;
-        return;
-    }
-}
-
-static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
-    tree *nt, *pt;
-    tree_data* td;
-    tree_data* oldlastitem;
-//     write_tree* wt2;
-// //     tree_data *firsttd, *lasttd;
-// //     LIST_ENTRY* le;
-// #ifdef DEBUG_PARANOID
-//     KEY lastkey1, lastkey2;
-//     traverse_ptr tp, next_tp;
-//     ULONG numitems1, numitems2;
-// #endif
-    
-    TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
-    
-// #ifdef DEBUG_PARANOID
-//     lastkey1.obj_id = 0xffffffffffffffff;
-//     lastkey1.obj_type = 0xff;
-//     lastkey1.offset = 0xffffffffffffffff;
-//     
-//     if (!find_item(Vcb, t->root, &tp, &lastkey1, NULL, FALSE))
-//         ERR("error - find_item failed\n");
-//     else {
-//         lastkey1 = tp.item->key;
-//         numitems1 = 0;
-//         while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
-//             free_traverse_ptr(&tp);
-//             tp = next_tp;
-//             numitems1++;
-//         }
-//         free_traverse_ptr(&tp);
-//     }
-// #endif
-    
-    nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
-    if (!nt) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
-    nt->header.address = 0;
-    nt->header.generation = Vcb->superblock.generation;
-    nt->header.num_items = t->header.num_items - numitems;
-    nt->header.flags = HEADER_FLAG_MIXED_BACKREF;
-    
-    nt->has_address = FALSE;
-    nt->Vcb = Vcb;
-    nt->parent = t->parent;
-    nt->root = t->root;
-//     nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
-    nt->new_address = 0;
-    nt->has_new_address = FALSE;
-    nt->flags = t->flags;
-    InitializeListHead(&nt->itemlist);
-    
-//     ExInitializeResourceLite(&nt->nonpaged->load_tree_lock);
-    
-    oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
-
-// //     firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
-// //     lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
-// //     
-// //     TRACE("old tree in %x was from (%x,%x,%x) to (%x,%x,%x)\n",
-// //                   (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
-// //                   (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
-// //     
-// //     le = wt->tree->itemlist.Flink;
-// //     while (le != &wt->tree->itemlist) {
-// //         td = CONTAINING_RECORD(le, tree_data, list_entry);
-// //         TRACE("old tree item was (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
-// //         le = le->Flink;
-// //     }
-    
-    nt->itemlist.Flink = &newfirstitem->list_entry;
-    nt->itemlist.Blink = t->itemlist.Blink;
-    nt->itemlist.Flink->Blink = &nt->itemlist;
-    nt->itemlist.Blink->Flink = &nt->itemlist;
-    
-    t->itemlist.Blink = &oldlastitem->list_entry;
-    t->itemlist.Blink->Flink = &t->itemlist;
-    
-// //     le = wt->tree->itemlist.Flink;
-// //     while (le != &wt->tree->itemlist) {
-// //         td = CONTAINING_RECORD(le, tree_data, list_entry);
-// //         TRACE("old tree item now (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
-// //         le = le->Flink;
-// //     }
-// //     
-// //     firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
-// //     lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
-// //     
-// //     TRACE("old tree in %x is now from (%x,%x,%x) to (%x,%x,%x)\n",
-// //                   (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
-// //                   (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
-    
-    nt->size = t->size - size;
-    t->size = size;
-    t->header.num_items = numitems;
-    nt->write = TRUE;
-    
-    InterlockedIncrement(&Vcb->open_trees);
-    InsertTailList(&Vcb->trees, &nt->list_entry);
-    
-// //     // TESTING
-// //     td = wt->tree->items;
-// //     while (td) {
-// //         if (!td->ignore) {
-// //             TRACE("old tree item: (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
-// //         }
-// //         td = td->next;
-// //     }
-    
-// //     oldlastitem->next = NULL;
-// //     wt->tree->lastitem = oldlastitem;
-    
-// //     TRACE("last item is now (%x,%x,%x)\n", (UINT32)oldlastitem->key.obj_id, oldlastitem->key.obj_type, (UINT32)oldlastitem->key.offset);
-    
-    if (nt->header.level > 0) {
-        LIST_ENTRY* le = nt->itemlist.Flink;
-        
-        while (le != &nt->itemlist) {
-            tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
-            
-            if (td2->treeholder.tree)
-                td2->treeholder.tree->parent = nt;
-            
-            le = le->Flink;
-        }
-    }
-    
-    if (nt->parent) {
-        td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
-        if (!td) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
-    
-        td->key = newfirstitem->key;
-        
-        InsertHeadList(&t->paritem->list_entry, &td->list_entry);
-        
-        td->ignore = FALSE;
-        td->inserted = TRUE;
-        td->treeholder.tree = nt;
-//         td->treeholder.nonpaged->status = tree_holder_loaded;
-        nt->paritem = td;
-        
-        nt->parent->header.num_items++;
-        nt->parent->size += sizeof(internal_node);
-
-        goto end;
-    }
-    
-    TRACE("adding new tree parent\n");
-    
-    if (nt->header.level == 255) {
-        ERR("cannot add parent to tree at level 255\n");
-        return STATUS_INTERNAL_ERROR;
-    }
-    
-    pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
-    if (!pt) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
-    pt->header.address = 0;
-    pt->header.num_items = 2;
-    pt->header.level = nt->header.level + 1;
-    pt->header.flags = HEADER_FLAG_MIXED_BACKREF;
-    
-    pt->has_address = FALSE;
-    pt->Vcb = Vcb;
-    pt->parent = NULL;
-    pt->paritem = NULL;
-    pt->root = t->root;
-    pt->new_address = 0;
-    pt->has_new_address = FALSE;
-//     pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
-    pt->size = pt->header.num_items * sizeof(internal_node);
-    pt->flags = t->flags;
-    InitializeListHead(&pt->itemlist);
-    
-//     ExInitializeResourceLite(&pt->nonpaged->load_tree_lock);
-    
-    InterlockedIncrement(&Vcb->open_trees);
-    InsertTailList(&Vcb->trees, &pt->list_entry);
-    
-    td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
-    if (!td) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    get_first_item(t, &td->key);
-    td->ignore = FALSE;
-    td->inserted = FALSE;
-    td->treeholder.address = 0;
-    td->treeholder.generation = Vcb->superblock.generation;
-    td->treeholder.tree = t;
-//     td->treeholder.nonpaged->status = tree_holder_loaded;
-    InsertTailList(&pt->itemlist, &td->list_entry);
-    t->paritem = td;
-    
-    td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
-    if (!td) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    td->key = newfirstitem->key;
-    td->ignore = FALSE;
-    td->inserted = FALSE;
-    td->treeholder.address = 0;
-    td->treeholder.generation = Vcb->superblock.generation;
-    td->treeholder.tree = nt;
-//     td->treeholder.nonpaged->status = tree_holder_loaded;
-    InsertTailList(&pt->itemlist, &td->list_entry);
-    nt->paritem = td;
-    
-    pt->write = TRUE;
-
-    t->root->treeholder.tree = pt;
-    
-    t->parent = pt;
-    nt->parent = pt;
-    
-end:
-    t->root->root_item.bytes_used += Vcb->superblock.node_size;
-
-// #ifdef DEBUG_PARANOID
-//     lastkey2.obj_id = 0xffffffffffffffff;
-//     lastkey2.obj_type = 0xff;
-//     lastkey2.offset = 0xffffffffffffffff;
-//     
-//     if (!find_item(Vcb, wt->tree->root, &tp, &lastkey2, NULL, FALSE))
-//         ERR("error - find_item failed\n");
-//     else {    
-//         lastkey2 = tp.item->key;
-//         
-//         numitems2 = 0;
-//         while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
-//             free_traverse_ptr(&tp);
-//             tp = next_tp;
-//             numitems2++;
-//         }
-//         free_traverse_ptr(&tp);
-//     }
-//     
-//     ERR("lastkey1 = %llx,%x,%llx\n", lastkey1.obj_id, lastkey1.obj_type, lastkey1.offset);
-//     ERR("lastkey2 = %llx,%x,%llx\n", lastkey2.obj_id, lastkey2.obj_type, lastkey2.offset);
-//     ERR("numitems1 = %u\n", numitems1);
-//     ERR("numitems2 = %u\n", numitems2);
-// #endif
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) {
-    LIST_ENTRY* le;
-    UINT32 size, ds, numitems;
-    
-    size = 0;
-    numitems = 0;
-    
-    // FIXME - naïve implementation: maximizes number of filled trees
-    
-    le = t->itemlist.Flink;
-    while (le != &t->itemlist) {
-        tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-        
-        if (!td->ignore) {
-            if (t->header.level == 0)
-                ds = sizeof(leaf_node) + td->size;
-            else
-                ds = sizeof(internal_node);
-            
-            // FIXME - move back if previous item was deleted item with same key
-            if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
-                return split_tree_at(Vcb, t, td, numitems, size);
-
-            size += ds;
-            numitems++;
-        }
-        
-        le = le->Flink;
-    }
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
-    LIST_ENTRY* le;
-    tree_data* nextparitem = NULL;
-    NTSTATUS Status;
-    tree *next_tree, *par;
-    BOOL loaded;
-    
-    TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
-    
-    // FIXME - doesn't capture everything, as it doesn't ascend
-    // FIXME - write proper function and put it in treefuncs.c
-    le = t->paritem->list_entry.Flink;
-    while (le != &t->parent->itemlist) {
-        tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-        
-        if (!td->ignore) {
-            nextparitem = td;
-            break;
-        }
-        
-        le = le->Flink;
-    }
-    
-    if (!nextparitem)
-        return STATUS_SUCCESS;
-    
-    // FIXME - loop, and capture more than one tree if we can
-    
-    TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
-//     nextparitem = t->paritem;
-    
-//     ExAcquireResourceExclusiveLite(&t->parent->nonpaged->load_tree_lock, TRUE);
-    
-    Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded, NULL);
-    if (!NT_SUCCESS(Status)) {
-        ERR("do_load_tree returned %08x\n", Status);
-        return Status;
-    }
-    
-//     ExReleaseResourceLite(&t->parent->nonpaged->load_tree_lock);
-    
-    next_tree = nextparitem->treeholder.tree;
-    
-    if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
-        // merge two trees into one
-        
-        t->header.num_items += next_tree->header.num_items;
-        t->size += next_tree->size;
-        
-        if (next_tree->header.level > 0) {
-            le = next_tree->itemlist.Flink;
-            
-            while (le != &next_tree->itemlist) {
-                tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
-                
-                if (td2->treeholder.tree)
-                    td2->treeholder.tree->parent = t;
-                
-                le = le->Flink;
-            }
-        }
-        
-        t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
-        t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
-        t->itemlist.Blink = next_tree->itemlist.Blink;
-        t->itemlist.Blink->Flink = &t->itemlist;
-        
-//         // TESTING
-//         le = t->itemlist.Flink;
-//         while (le != &t->itemlist) {
-//             tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-//             if (!td->ignore) {
-//                 ERR("key: %llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
-//             }
-//             le = le->Flink;
-//         }
-        
-        next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
-        
-        next_tree->header.num_items = 0;
-        next_tree->size = 0;
-        
-        if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
-            Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, Irp, rollback);
-            
-            if (!NT_SUCCESS(Status)) {
-                ERR("reduce_tree_extent returned %08x\n", Status);
-                return Status;
-            }
-        } else if (next_tree->has_address) {
-            Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, Irp, rollback);
-            
-            if (!NT_SUCCESS(Status)) {
-                ERR("reduce_tree_extent returned %08x\n", Status);
-                return Status;
-            }
-        }
-        
-        if (!nextparitem->ignore) {
-            nextparitem->ignore = TRUE;
-            next_tree->parent->header.num_items--;
-            next_tree->parent->size -= sizeof(internal_node);
-        }
-        
-        par = next_tree->parent;
-        while (par) {
-            par->write = TRUE;
-            par = par->parent;
-        }
-        
-        RemoveEntryList(&nextparitem->list_entry);
-        ExFreePool(next_tree->paritem);
-        next_tree->paritem = NULL;
-        
-        next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
-        
-        free_tree(next_tree);
-    } else {
-        // rebalance by moving items from second tree into first
-        ULONG avg_size = (t->size + next_tree->size) / 2;
-        KEY firstitem = {0, 0, 0};
-        
-        TRACE("attempting rebalance\n");
-        
-        le = next_tree->itemlist.Flink;
-        while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
-            tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-            ULONG size;
-            
-            if (!td->ignore) {
-                if (next_tree->header.level == 0)
-                    size = sizeof(leaf_node) + td->size;
-                else
-                    size = sizeof(internal_node);
-            } else
-                size = 0;
-            
-            if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
-                RemoveEntryList(&td->list_entry);
-                InsertTailList(&t->itemlist, &td->list_entry);
-                
-                if (next_tree->header.level > 0 && td->treeholder.tree)
-                    td->treeholder.tree->parent = t;
-                
-                if (!td->ignore) {
-                    next_tree->size -= size;
-                    t->size += size;
-                    next_tree->header.num_items--;
-                    t->header.num_items++;
-                }
-            } else
-                break;
-            
-            le = next_tree->itemlist.Flink;
-        }
-        
-        le = next_tree->itemlist.Flink;
-        while (le != &next_tree->itemlist) {
-            tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-            
-            if (!td->ignore) {
-                firstitem = td->key;
-                break;
-            }
-            
-            le = le->Flink;
-        }
-        
-//         ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset);
-        
-        // FIXME - once ascension is working, make this work with parent's parent, etc.
-        if (next_tree->paritem)
-            next_tree->paritem->key = firstitem;
-        
-        par = next_tree;
-        while (par) {
-            par->write = TRUE;
-            par = par->parent;
-        }
-    }
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    NTSTATUS Status;
-    
-    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
-        searchkey.obj_id = address;
-        searchkey.obj_type = TYPE_METADATA_ITEM;
-        searchkey.offset = t->header.level;
-        
-        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            return Status;
-        }
-        
-        if (!keycmp(&tp.item->key, &searchkey)) {
-            EXTENT_ITEM_SKINNY_METADATA* eism;
-            
-            if (tp.item->size > 0) {
-                eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
-                
-                if (!eism) {
-                    ERR("out of memory\n");
-                    return STATUS_INSUFFICIENT_RESOURCES;
-                }
-                
-                RtlCopyMemory(eism, tp.item->data, tp.item->size);
-            } else
-                eism = NULL;
-            
-            delete_tree_item(Vcb, &tp, rollback);
-            
-            if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp, rollback)) {
-                ERR("insert_tree_item failed\n");
-                ExFreePool(eism);
-                return STATUS_INTERNAL_ERROR;
-            }
-            
-            return STATUS_SUCCESS;
-        }
-    }
-    
-    searchkey.obj_id = address;
-    searchkey.obj_type = TYPE_EXTENT_ITEM;
-    searchkey.offset = 0xffffffffffffffff;
-    
-    Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
-        EXTENT_ITEM_TREE* eit;
-        
-        if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
-            ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
-            return STATUS_INTERNAL_ERROR;
-        }
-        
-        eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
-                
-        if (!eit) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
-        
-        RtlCopyMemory(eit, tp.item->data, tp.item->size);
-        
-        delete_tree_item(Vcb, &tp, rollback);
-        
-        eit->level = level;
-        
-        if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp, rollback)) {
-            ERR("insert_tree_item failed\n");
-            ExFreePool(eit);
-            return STATUS_INTERNAL_ERROR;
-        }
-    
-        return STATUS_SUCCESS;
-    }
-    
-    ERR("could not find EXTENT_ITEM for address %llx\n", address);
-    
-    return STATUS_INTERNAL_ERROR;
-}
-
-static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
-//     LIST_ENTRY *le, *le2;
-//     write_tree* wt;
-//     tree_data* td;
-    UINT8 level, max_level;
-    UINT32 min_size;
-    BOOL empty, done_deletions = FALSE;
-    NTSTATUS Status;
-    tree* t;
-    
-    TRACE("(%p)\n", Vcb);
-    
-    max_level = 0;
-    
-    for (level = 0; level <= 255; level++) {
-        LIST_ENTRY *le, *nextle;
-        
-        empty = TRUE;
-        
-        TRACE("doing level %u\n", level);
-        
-        le = Vcb->trees.Flink;
-    
-        while (le != &Vcb->trees) {
-            t = CONTAINING_RECORD(le, tree, list_entry);
-            
-            nextle = le->Flink;
-            
-            if (t->write && t->header.level == level) {
-                empty = FALSE;
-                
-                if (t->header.num_items == 0) {
-                    if (t->parent) {
-                        LIST_ENTRY* le2;
-                        KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
-#ifdef __REACTOS__
-                        (void)firstitem;
-#endif
-                        
-                        done_deletions = TRUE;
-            
-                        le2 = t->itemlist.Flink;
-                        while (le2 != &t->itemlist) {
-                            tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
-                            firstitem = td->key;
-                            break;
-                        }
-                        
-                        TRACE("deleting tree in root %llx (first item was %llx,%x,%llx)\n",
-                              t->root->id, firstitem.obj_id, firstitem.obj_type, firstitem.offset);
-                        
-                        t->root->root_item.bytes_used -= Vcb->superblock.node_size;
-                        
-                        if (t->has_new_address) { // delete associated EXTENT_ITEM
-                            Status = reduce_tree_extent(Vcb, t->new_address, t, Irp, rollback);
-                            
-                            if (!NT_SUCCESS(Status)) {
-                                ERR("reduce_tree_extent returned %08x\n", Status);
-                                return Status;
-                            }
-                            
-                            t->has_new_address = FALSE;
-                        } else if (t->has_address) {
-                            Status = reduce_tree_extent(Vcb,t->header.address, t, Irp, rollback);
-                            
-                            if (!NT_SUCCESS(Status)) {
-                                ERR("reduce_tree_extent returned %08x\n", Status);
-                                return Status;
-                            }
-                            
-                            t->has_address = FALSE;
-                        }
-                        
-                        if (!t->paritem->ignore) {
-                            t->paritem->ignore = TRUE;
-                            t->parent->header.num_items--;
-                            t->parent->size -= sizeof(internal_node);
-                        }
-                        
-                        RemoveEntryList(&t->paritem->list_entry);
-                        ExFreePool(t->paritem);
-                        t->paritem = NULL;
-                        
-                        free_tree(t);
-                    } else if (t->header.level != 0) {
-                        if (t->has_new_address) {
-                            Status = update_extent_level(Vcb, t->new_address, t, 0, Irp, rollback);
-                            
-                            if (!NT_SUCCESS(Status)) {
-                                ERR("update_extent_level returned %08x\n", Status);
-                                return Status;
-                            }
-                        }
-                        
-                        t->header.level = 0;
-                    }
-                } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
-                    TRACE("splitting overlarge tree (%x > %x)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header));
-                    Status = split_tree(Vcb, t);
-
-                    if (!NT_SUCCESS(Status)) {
-                        ERR("split_tree returned %08x\n", Status);
-                        return Status;
-                    }
-                }
-            }
-            
-            le = nextle;
-        }
-        
-        if (!empty) {
-            max_level = level;
-        } else {
-            TRACE("nothing found for level %u\n", level);
-            break;
-        }
-    }
-    
-    min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
-    
-    for (level = 0; level <= max_level; level++) {
-        LIST_ENTRY* le;
-        
-        le = Vcb->trees.Flink;
-    
-        while (le != &Vcb->trees) {
-            t = CONTAINING_RECORD(le, tree, list_entry);
-            
-            if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && t->size < min_size) {
-                Status = try_tree_amalgamate(Vcb, t, Irp, rollback);
-                if (!NT_SUCCESS(Status)) {
-                    ERR("try_tree_amalgamate returned %08x\n", Status);
-                    return Status;
-                }
-            }
-            
-            le = le->Flink;
-        }
-    }
-    
-    // simplify trees if top tree only has one entry
-    
-    if (done_deletions) {
-        for (level = max_level; level > 0; level--) {
-            LIST_ENTRY *le, *nextle;
-            
-            le = Vcb->trees.Flink;
-            while (le != &Vcb->trees) {
-                nextle = le->Flink;
-                t = CONTAINING_RECORD(le, tree, list_entry);
-                
-                if (t->write && t->header.level == level) {
-                    if (!t->parent && t->header.num_items == 1) {
-                        LIST_ENTRY* le2 = t->itemlist.Flink;
-                        tree_data* td;
-                        tree* child_tree = NULL;
-
-                        while (le2 != &t->itemlist) {
-                            td = CONTAINING_RECORD(le2, tree_data, list_entry);
-                            if (!td->ignore)
-                                break;
-                            le2 = le2->Flink;
-                        }
-                        
-                        TRACE("deleting top-level tree in root %llx with one item\n", t->root->id);
-                        
-                        if (t->has_new_address) { // delete associated EXTENT_ITEM
-                            Status = reduce_tree_extent(Vcb, t->new_address, t, Irp, rollback);
-                            
-                            if (!NT_SUCCESS(Status)) {
-                                ERR("reduce_tree_extent returned %08x\n", Status);
-                                return Status;
-                            }
-                            
-                            t->has_new_address = FALSE;
-                        } else if (t->has_address) {
-                            Status = reduce_tree_extent(Vcb,t->header.address, t, Irp, rollback);
-                            
-                            if (!NT_SUCCESS(Status)) {
-                                ERR("reduce_tree_extent returned %08x\n", Status);
-                                return Status;
-                            }
-                            
-                            t->has_address = FALSE;
-                        }
-                        
-                        if (!td->treeholder.tree) { // load first item if not already loaded
-                            KEY searchkey = {0,0,0};
-                            traverse_ptr tp;
-                            
-                            Status = find_item(Vcb, t->root, &tp, &searchkey, FALSE, Irp);
-                            if (!NT_SUCCESS(Status)) {
-                                ERR("error - find_item returned %08x\n", Status);
-                                return Status;
-                            }
-                        }
-                        
-                        child_tree = td->treeholder.tree;
-                        
-                        if (child_tree) {
-                            child_tree->parent = NULL;
-                            child_tree->paritem = NULL;
-                        }
-                        
-                        t->root->root_item.bytes_used -= Vcb->superblock.node_size;
-
-                        free_tree(t);
-                        
-                        if (child_tree)
-                            child_tree->root->treeholder.tree = child_tree;
-                    }
-                }
-                
-                le = nextle;
-            }
-        }
-    }
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
-    NTSTATUS Status;
-    
-    if (level > 0) {
-        if (!th->tree) {
-            Status = load_tree(Vcb, th->address, r, &th->tree, NULL, NULL);
-            
-            if (!NT_SUCCESS(Status)) {
-                ERR("load_tree(%llx) returned %08x\n", th->address, Status);
-                return Status;
-            }
-        }
-        
-        if (th->tree->header.level > 0) {
-            LIST_ENTRY* le = th->tree->itemlist.Flink;
-            
-            while (le != &th->tree->itemlist) {
-                tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-                
-                if (!td->ignore) {
-                    Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, Irp, rollback);
-                    
-                    if (!NT_SUCCESS(Status)) {
-                        ERR("remove_root_extents returned %08x\n", Status);
-                        return Status;
-                    }
-                }
-                
-                le = le->Flink;
-            }
-        }
-    }
-    
-    if (!th->tree || th->tree->has_address) {
-        Status = reduce_tree_extent(Vcb, th->address, NULL, Irp, rollback);
-        
-        if (!NT_SUCCESS(Status)) {
-            ERR("reduce_tree_extent(%llx) returned %08x\n", th->address, Status);
-            return Status;
-        }
-    }
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* rollback) {
-    NTSTATUS Status;
-    KEY searchkey;
-    traverse_ptr tp;
-    
-    Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, Irp, rollback);
-    if (!NT_SUCCESS(Status)) {
-        ERR("remove_root_extents returned %08x\n", Status);
-        return Status;
-    }
-    
-    // remove entry in uuid root (tree 9)
-    if (Vcb->uuid_root) {
-        RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(UINT64));
-        searchkey.obj_type = TYPE_SUBVOL_UUID;
-        RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
-        
-        if (searchkey.obj_id != 0 || searchkey.offset != 0) {
-            Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
-            if (!NT_SUCCESS(Status)) {
-                WARN("find_item returned %08x\n", Status);
-            } else {
-                if (!keycmp(&tp.item->key, &searchkey))
-                    delete_tree_item(Vcb, &tp, rollback);
-                else
-                    WARN("could not find (%llx,%x,%llx) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
-            }
-        }
-    }
-    
-    // delete ROOT_ITEM
-    
-    searchkey.obj_id = r->id;
-    searchkey.obj_type = TYPE_ROOT_ITEM;
-    searchkey.offset = 0xffffffffffffffff;
-    
-    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
-        delete_tree_item(Vcb, &tp, rollback);
-    else
-        WARN("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
-    
-    // delete items in tree cache
-    
-    free_trees_root(Vcb, r);
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS drop_roots(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
-    LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2;
-    NTSTATUS Status;
-    
-    while (le != &Vcb->drop_roots) {
-        root* r = CONTAINING_RECORD(le, root, list_entry);
-        
-        le2 = le->Flink;
-        
-        Status = drop_root(Vcb, r, Irp, rollback);
-        if (!NT_SUCCESS(Status)) {
-            ERR("drop_root(%llx) returned %08x\n", r->id, Status);
-            return Status;
-        }
-        
-        le = le2;
-    }
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENTRY* rollback) {
-    CHUNK_ITEM* ci;
-    CHUNK_ITEM_STRIPE* cis;
-    BLOCK_GROUP_ITEM* bgi;
-    UINT16 i, factor;
-    NTSTATUS Status;
-    
-    ci = ExAllocatePoolWithTag(PagedPool, c->size, ALLOC_TAG);
-    if (!ci) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    RtlCopyMemory(ci, c->chunk_item, c->size);
-    
-    if (!insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp, rollback)) {
-        ERR("insert_tree_item failed\n");
-        ExFreePool(ci);
-        return STATUS_INTERNAL_ERROR;
-    }
-
-    if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
-        Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size);
-        if (!NT_SUCCESS(Status)) {
-            ERR("add_to_bootstrap returned %08x\n", Status);
-            return Status;
-        }
-    }
-
-    // add BLOCK_GROUP_ITEM to tree 2
-    
-    bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
-    if (!bgi) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-
-    bgi->used = c->used;
-    bgi->chunk_tree = 0x100;
-    bgi->flags = c->chunk_item->type;
-    
-    if (!insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp, rollback)) {
-        ERR("insert_tree_item failed\n");
-        ExFreePool(bgi);
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    if (c->chunk_item->type & BLOCK_FLAG_RAID0)
-        factor = c->chunk_item->num_stripes;
-    else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
-        factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
-    else // SINGLE, DUPLICATE, RAID1
-        factor = 1;
-
-    // add DEV_EXTENTs to tree 4
-    
-    cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
-    
-    for (i = 0; i < c->chunk_item->num_stripes; i++) {
-        DEV_EXTENT* de;
-        
-        de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
-        if (!de) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
-        
-        de->chunktree = Vcb->chunk_root->id;
-        de->objid = 0x100;
-        de->address = c->offset;
-        de->length = c->chunk_item->size / factor;
-        de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
-
-        if (!insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp, rollback)) {
-            ERR("insert_tree_item failed\n");
-            ExFreePool(de);
-            return STATUS_INTERNAL_ERROR;
-        }
-        
-        // FIXME - no point in calling this twice for the same device
-        Status = update_dev_item(Vcb, c->devices[i], Irp, rollback);
-        if (!NT_SUCCESS(Status)) {
-            ERR("update_dev_item returned %08x\n", Status);
-            return Status;
-        }
-    }
-    
-    c->created = FALSE;
-    
-    return STATUS_SUCCESS;
-}
-
-static void remove_from_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset) {
-    sys_chunk* sc2;
-    LIST_ENTRY* le;
-
-    le = Vcb->sys_chunks.Flink;
-    while (le != &Vcb->sys_chunks) {
-        sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
-        
-        if (sc2->key.obj_id == obj_id && sc2->key.obj_type == obj_type && sc2->key.offset == offset) {
-            RemoveEntryList(&sc2->list_entry);
-            
-            Vcb->superblock.n -= sizeof(KEY) + sc2->size;
-            
-            ExFreePool(sc2->data);
-            ExFreePool(sc2);
-            regen_bootstrap(Vcb);
-            return;
-        }
-        
-        le = le->Flink;
-    }
-}
-
-static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENTRY* rollback) {
-    NTSTATUS Status;
-    KEY searchkey;
-    traverse_ptr tp;
-    UINT64 i, factor;
-    CHUNK_ITEM_STRIPE* cis;
-    
-    TRACE("dropping chunk %llx\n", c->offset);
-    
-    // remove free space cache
-    if (c->cache) {
-        c->cache->deleted = TRUE;
-        
-        flush_fcb(c->cache, TRUE, Irp, rollback);
-        
-        free_fcb(c->cache);
-        
-        searchkey.obj_id = FREE_SPACE_CACHE_ID;
-        searchkey.obj_type = 0;
-        searchkey.offset = c->offset;
-        
-        Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            return Status;
-        }
-
-        if (!keycmp(&tp.item->key, &searchkey)) {
-            delete_tree_item(Vcb, &tp, rollback);
-        }
-    }
-    
-    if (c->chunk_item->type & BLOCK_FLAG_RAID0)
-        factor = c->chunk_item->num_stripes;
-    else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
-        factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
-    else // SINGLE, DUPLICATE, RAID1
-        factor = 1;
-    
-    cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
-    for (i = 0; i < c->chunk_item->num_stripes; i++) {
-        if (!c->created) {
-            // remove DEV_EXTENTs from tree 4
-            searchkey.obj_id = cis[i].dev_id;
-            searchkey.obj_type = TYPE_DEV_EXTENT;
-            searchkey.offset = cis[i].offset;
-            
-            Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp);
-            if (!NT_SUCCESS(Status)) {
-                ERR("error - find_item returned %08x\n", Status);
-                return Status;
-            }
-            
-            if (!keycmp(&tp.item->key, &searchkey)) {
-                delete_tree_item(Vcb, &tp, rollback);
-                
-                if (tp.item->size >= sizeof(DEV_EXTENT)) {
-                    DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data;
-                    
-                    c->devices[i]->devitem.bytes_used -= de->length;
-                    
-                    space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, de->length, rollback);
-                }
-            } else
-                WARN("could not find (%llx,%x,%llx) in dev tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
-        } else {
-            UINT64 len = c->chunk_item->size / factor;
-            
-            c->devices[i]->devitem.bytes_used -= len;
-            space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, len, rollback);
-        }
-    }
-    
-    // modify DEV_ITEMs in chunk tree
-    for (i = 0; i < c->chunk_item->num_stripes; i++) {
-        if (c->devices[i]) {
-            UINT64 j;
-            DEV_ITEM* di;
-            
-            searchkey.obj_id = 1;
-            searchkey.obj_type = TYPE_DEV_ITEM;
-            searchkey.offset = c->devices[i]->devitem.dev_id;
-            
-            Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
-            if (!NT_SUCCESS(Status)) {
-                ERR("error - find_item returned %08x\n", Status);
-                return Status;
-            }
-            
-            if (keycmp(&tp.item->key, &searchkey)) {
-                ERR("error - could not find DEV_ITEM for device %llx\n", searchkey.offset);
-                return STATUS_INTERNAL_ERROR;
-            }
-            
-            delete_tree_item(Vcb, &tp, rollback);
-            
-            di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
-            if (!di) {
-                ERR("out of memory\n");
-                return STATUS_INSUFFICIENT_RESOURCES;
-            }
-            
-            RtlCopyMemory(di, &c->devices[i]->devitem, sizeof(DEV_ITEM));
-            
-            if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp, rollback)) {
-                ERR("insert_tree_item failed\n");
-                return STATUS_INTERNAL_ERROR;
-            }
-            
-            for (j = i + 1; j < c->chunk_item->num_stripes; j++) {
-                if (c->devices[j] == c->devices[i])
-                    c->devices[j] = NULL;
-            }
-        }
-    }
-    
-    if (!c->created) {
-        // remove CHUNK_ITEM from chunk tree
-        searchkey.obj_id = 0x100;
-        searchkey.obj_type = TYPE_CHUNK_ITEM;
-        searchkey.offset = c->offset;
-        
-        Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            return Status;
-        }
-        
-        if (!keycmp(&tp.item->key, &searchkey))
-            delete_tree_item(Vcb, &tp, rollback);
-        else
-            WARN("could not find CHUNK_ITEM for chunk %llx\n", c->offset);
-        
-        // remove BLOCK_GROUP_ITEM from extent tree
-        searchkey.obj_id = c->offset;
-        searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
-        searchkey.offset = 0xffffffffffffffff;
-        
-        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            return Status;
-        }
-        
-        if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
-            delete_tree_item(Vcb, &tp, rollback);
-        else
-            WARN("could not find BLOCK_GROUP_ITEM for chunk %llx\n", c->offset);
-    }
-    
-    if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
-        remove_from_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset);
-    
-    RemoveEntryList(&c->list_entry);
-    
-    if (c->list_entry_changed.Flink)
-        RemoveEntryList(&c->list_entry_changed);
-    
-    ExFreePool(c->chunk_item);
-    ExFreePool(c->devices);
-    
-    while (!IsListEmpty(&c->space)) {
-        space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry);
-        
-        RemoveEntryList(&s->list_entry);
-        ExFreePool(s);
-    }
-    
-    while (!IsListEmpty(&c->deleting)) {
-        space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
-        
-        RemoveEntryList(&s->list_entry);
-        ExFreePool(s);
-    }
-    
-    ExDeleteResourceLite(&c->lock);
-    ExDeleteResourceLite(&c->changed_extents_lock);
-
-    ExFreePool(c);
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS update_chunks(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
-    LIST_ENTRY *le = Vcb->chunks_changed.Flink, *le2;
-    NTSTATUS Status;
-    UINT64 used_minus_cache;
-    
-    ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
-    
-    // FIXME - do tree chunks before data chunks
-    
-    while (le != &Vcb->chunks_changed) {
-        chunk* c = CONTAINING_RECORD(le, chunk, list_entry_changed);
-        
-        le2 = le->Flink;
-        
-        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        
-        used_minus_cache = c->used;
-        
-        // subtract self-hosted cache
-        if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) {
-            LIST_ENTRY* le3;
-            
-            le3 = c->cache->extents.Flink;
-            while (le3 != &c->cache->extents) {
-                extent* ext = CONTAINING_RECORD(le3, extent, list_entry);
-                EXTENT_DATA* ed = ext->data;
-                
-                if (!ext->ignore) {
-                    if (ext->datalen < sizeof(EXTENT_DATA)) {
-                        ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA));
-                        break;
-                    }
-                    
-                    if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
-                        EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
-                        
-                        if (ext->datalen < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
-                            ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen,
-                                sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
-                            break;
-                        }
-                        
-                        if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size)
-                            used_minus_cache -= ed2->size;
-                    }
-                }
-                
-                le3 = le3->Flink;
-            }
-        }
-        
-        if (used_minus_cache == 0) {
-            Status = drop_chunk(Vcb, c, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("drop_chunk returned %08x\n", Status);
-                ExReleaseResourceLite(&c->lock);
-                ExReleaseResourceLite(&Vcb->chunk_lock);
-                return Status;
-            }
-        } else if (c->created) {
-            Status = create_chunk(Vcb, c, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("create_chunk returned %08x\n", Status);
-                ExReleaseResourceLite(&c->lock);
-                ExReleaseResourceLite(&Vcb->chunk_lock);
-                return Status;
-            }
-        }
-        
-        if (used_minus_cache > 0)
-            ExReleaseResourceLite(&c->lock);
-
-        le = le2;
-    }
-    
-    ExReleaseResourceLite(&Vcb->chunk_lock);
-    
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS STDCALL set_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, UINT8* data, UINT16 datalen, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    ULONG xasize, maxlen;
-    DIR_ITEM* xa;
-    NTSTATUS Status;
-    
-    TRACE("(%p, %llx, %llx, %s, %08x, %p, %u)\n", Vcb, subvol->id, inode, name, crc32, data, datalen);
-    
-    searchkey.obj_id = inode;
-    searchkey.obj_type = TYPE_XATTR_ITEM;
-    searchkey.offset = crc32;
-    
-    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    xasize = sizeof(DIR_ITEM) - 1 + (ULONG)strlen(name) + datalen;
-    maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
-    
-    if (!keycmp(&tp.item->key, &searchkey)) { // key exists
-        UINT8* newdata;
-        ULONG size = tp.item->size;
-        
-        xa = (DIR_ITEM*)tp.item->data;
-        
-        if (tp.item->size < sizeof(DIR_ITEM)) {
-            ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
-        } else {
-            while (TRUE) {
-                ULONG oldxasize;
-                
-                if (size < sizeof(DIR_ITEM) || size < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) {
-                    ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                    break;
-                }
-                
-                oldxasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n;
-                
-                if (xa->n == strlen(name) && RtlCompareMemory(name, xa->name, xa->n) == xa->n) {
-                    UINT64 pos;
-                    
-                    // replace
-                    
-                    if (tp.item->size + xasize - oldxasize > maxlen) {
-                        ERR("DIR_ITEM would be over maximum size (%u + %u - %u > %u)\n", tp.item->size, xasize, oldxasize, maxlen);
-                        return STATUS_INTERNAL_ERROR;
-                    }
-                    
-                    newdata = ExAllocatePoolWithTag(PagedPool, tp.item->size + xasize - oldxasize, ALLOC_TAG);
-                    if (!newdata) {
-                        ERR("out of memory\n");
-                        return STATUS_INSUFFICIENT_RESOURCES;
-                    }
-                    
-                    pos = (UINT8*)xa - tp.item->data;
-                    if (pos + oldxasize < tp.item->size) { // copy after changed xattr
-                        RtlCopyMemory(newdata + pos + xasize, tp.item->data + pos + oldxasize, tp.item->size - pos - oldxasize);
-                    }
-                    
-                    if (pos > 0) { // copy before changed xattr
-                        RtlCopyMemory(newdata, tp.item->data, pos);
-                        xa = (DIR_ITEM*)(newdata + pos);
-                    } else
-                        xa = (DIR_ITEM*)newdata;
-                    
-                    xa->key.obj_id = 0;
-                    xa->key.obj_type = 0;
-                    xa->key.offset = 0;
-                    xa->transid = Vcb->superblock.generation;
-                    xa->m = datalen;
-                    xa->n = (UINT16)strlen(name);
-                    xa->type = BTRFS_TYPE_EA;
-                    RtlCopyMemory(xa->name, name, strlen(name));
-                    RtlCopyMemory(xa->name + strlen(name), data, datalen);
-                    
-                    delete_tree_item(Vcb, &tp, rollback);
-                    insert_tree_item(Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, newdata, tp.item->size + xasize - oldxasize, NULL, Irp, rollback);
-                    
-                    break;
-                }
-                
-                if ((UINT8*)xa - (UINT8*)tp.item->data + oldxasize >= size) {
-                    // not found, add to end of data
-                    
-                    if (tp.item->size + xasize > maxlen) {
-                        ERR("DIR_ITEM would be over maximum size (%u + %u > %u)\n", tp.item->size, xasize, maxlen);
-                        return STATUS_INTERNAL_ERROR;
-                    }
-                    
-                    newdata = ExAllocatePoolWithTag(PagedPool, tp.item->size + xasize, ALLOC_TAG);
-                    if (!newdata) {
-                        ERR("out of memory\n");
-                        return STATUS_INSUFFICIENT_RESOURCES;
-                    }
-                    
-                    RtlCopyMemory(newdata, tp.item->data, tp.item->size);
-                    
-                    xa = (DIR_ITEM*)((UINT8*)newdata + tp.item->size);
-                    xa->key.obj_id = 0;
-                    xa->key.obj_type = 0;
-                    xa->key.offset = 0;
-                    xa->transid = Vcb->superblock.generation;
-                    xa->m = datalen;
-                    xa->n = (UINT16)strlen(name);
-                    xa->type = BTRFS_TYPE_EA;
-                    RtlCopyMemory(xa->name, name, strlen(name));
-                    RtlCopyMemory(xa->name + strlen(name), data, datalen);
-                    
-                    delete_tree_item(Vcb, &tp, rollback);
-                    insert_tree_item(Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, newdata, tp.item->size + xasize, NULL, Irp, rollback);
-                    
-                    break;
-                } else {
-                    xa = (DIR_ITEM*)&xa->name[xa->m + xa->n];
-                    size -= oldxasize;
-                }
-            }
-        }
-    } else {
-        if (xasize > maxlen) {
-            ERR("DIR_ITEM would be over maximum size (%u > %u)\n", xasize, maxlen);
-            return STATUS_INTERNAL_ERROR;
-        }
-        
-        xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
-        if (!xa) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
-        
-        xa->key.obj_id = 0;
-        xa->key.obj_type = 0;
-        xa->key.offset = 0;
-        xa->transid = Vcb->superblock.generation;
-        xa->m = datalen;
-        xa->n = (UINT16)strlen(name);
-        xa->type = BTRFS_TYPE_EA;
-        RtlCopyMemory(xa->name, name, strlen(name));
-        RtlCopyMemory(xa->name + strlen(name), data, datalen);
-        
-        insert_tree_item(Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, NULL, Irp, rollback);
-    }
-    
-    return STATUS_SUCCESS;
-}
-
-static BOOL STDCALL delete_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    DIR_ITEM* xa;
-    NTSTATUS Status;
-    
-    TRACE("(%p, %llx, %llx, %s, %08x)\n", Vcb, subvol->id, inode, name, crc32);
-    
-    searchkey.obj_id = inode;
-    searchkey.obj_type = TYPE_XATTR_ITEM;
-    searchkey.offset = crc32;
-    
-    Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return FALSE;
-    }
-    
-    if (!keycmp(&tp.item->key, &searchkey)) { // key exists
-        ULONG size = tp.item->size;
-        
-        if (tp.item->size < sizeof(DIR_ITEM)) {
-            ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
-            
-            return FALSE;
-        } else {
-            xa = (DIR_ITEM*)tp.item->data;
-            
-            while (TRUE) {
-                ULONG oldxasize;
-                
-                if (size < sizeof(DIR_ITEM) || size < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) {
-                    ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                        
-                    return FALSE;
-                }
-                
-                oldxasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n;
-                
-                if (xa->n == strlen(name) && RtlCompareMemory(name, xa->name, xa->n) == xa->n) {
-                    ULONG newsize;
-                    UINT8 *newdata, *dioff;
-                    
-                    newsize = tp.item->size - (sizeof(DIR_ITEM) - 1 + xa->n + xa->m);
-                    
-                    delete_tree_item(Vcb, &tp, rollback);
-                    
-                    if (newsize == 0) {
-                        TRACE("xattr %s deleted\n", name);
-                        
-                        return TRUE;
-                    }
-
-                    // FIXME - deleting collisions almost certainly works, but we should test it properly anyway
-                    newdata = ExAllocatePoolWithTag(PagedPool, newsize, ALLOC_TAG);
-                    if (!newdata) {
-                        ERR("out of memory\n");
-                        return FALSE;
-                    }
-
-                    if ((UINT8*)xa > tp.item->data) {
-                        RtlCopyMemory(newdata, tp.item->data, (UINT8*)xa - tp.item->data);
-                        dioff = newdata + ((UINT8*)xa - tp.item->data);
-                    } else {
-                        dioff = newdata;
-                    }
-                    
-                    if ((UINT8*)&xa->name[xa->n+xa->m] - tp.item->data < tp.item->size)
-                        RtlCopyMemory(dioff, &xa->name[xa->n+xa->m], tp.item->size - ((UINT8*)&xa->name[xa->n+xa->m] - tp.item->data));
-                    
-                    insert_tree_item(Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, newdata, newsize, NULL, Irp, rollback);
-                    
-                        
-                    return TRUE;
-                }
-                
-                if (xa->m + xa->n >= size) { // FIXME - test this works
-                    WARN("xattr %s not found\n", name);
-
-                    return FALSE;
-                } else {
-                    xa = (DIR_ITEM*)&xa->name[xa->m + xa->n];
-                    size -= oldxasize;
-                }
-            }
-        }
-    } else {
-        WARN("xattr %s not found\n", name);
-        
-        return FALSE;
-    }
-}
-
-static NTSTATUS insert_sparse_extent(fcb* fcb, UINT64 start, UINT64 length, PIRP Irp, LIST_ENTRY* rollback) {
-    EXTENT_DATA* ed;
-    EXTENT_DATA2* ed2;
-    
-    TRACE("((%llx, %llx), %llx, %llx)\n", fcb->subvol->id, fcb->inode, start, length);
-    
-    ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
-    if (!ed) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    ed->generation = fcb->Vcb->superblock.generation;
-    ed->decoded_size = length;
-    ed->compression = BTRFS_COMPRESSION_NONE;
-    ed->encryption = BTRFS_ENCRYPTION_NONE;
-    ed->encoding = BTRFS_ENCODING_NONE;
-    ed->type = EXTENT_TYPE_REGULAR;
-    
-    ed2 = (EXTENT_DATA2*)ed->data;
-    ed2->address = 0;
-    ed2->size = 0;
-    ed2->offset = 0;
-    ed2->num_bytes = length;
-    
-    if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, Irp, rollback)) {
-        ERR("insert_tree_item failed\n");
-        return STATUS_INTERNAL_ERROR;
-    }
-
-    return STATUS_SUCCESS;
-}
-
-void flush_fcb(fcb* fcb, BOOL cache, PIRP Irp, LIST_ENTRY* rollback) {
-    traverse_ptr tp;
-    KEY searchkey;
-    NTSTATUS Status;
-    INODE_ITEM* ii;
-    UINT64 ii_offset;
-#ifdef DEBUG_PARANOID
-    UINT64 old_size = 0;
-    BOOL extents_changed;
-#endif
-    
-//     ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
-    
-    while (!IsListEmpty(&fcb->index_list)) {
-        LIST_ENTRY* le = RemoveHeadList(&fcb->index_list);
-        index_entry* ie = CONTAINING_RECORD(le, index_entry, list_entry);
-
-        if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer);
-        if (ie->filepart_uc.Buffer) ExFreePool(ie->filepart_uc.Buffer);
-        ExFreePool(ie);
-    }
-    
-    fcb->index_loaded = FALSE;
-    
-    if (fcb->ads) {
-        if (fcb->deleted)
-            delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, Irp, rollback);
-        else {
-            Status = set_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, (UINT8*)fcb->adsdata.Buffer, fcb->adsdata.Length, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("set_xattr returned %08x\n", Status);
-                goto end;
-            }
-        }
-        goto end;
-    }
-    
-#ifdef DEBUG_PARANOID
-    extents_changed = fcb->extents_changed;
-#endif
-    
-    if (fcb->extents_changed) {
-        BOOL b;
-        traverse_ptr next_tp;
-        LIST_ENTRY* le;
-        BOOL prealloc = FALSE, extents_inline = FALSE;
-        UINT64 last_end;
-        
-        // delete ignored extent items
-        le = fcb->extents.Flink;
-        while (le != &fcb->extents) {
-            LIST_ENTRY* le2 = le->Flink;
-            extent* ext = CONTAINING_RECORD(le, extent, list_entry);
-            
-            if (ext->ignore) {
-                RemoveEntryList(&ext->list_entry);
-                ExFreePool(ext->data);
-                ExFreePool(ext);
-            }
-            
-            le = le2;
-        }
-        
-        le = fcb->extents.Flink;
-        while (le != &fcb->extents) {
-            LIST_ENTRY* le2 = le->Flink;
-            extent* ext = CONTAINING_RECORD(le, extent, list_entry);
-            
-            if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) {
-                extent* nextext = CONTAINING_RECORD(le->Flink, extent, list_entry);
-                    
-                if (ext->data->type == nextext->data->type) {
-                    EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
-                    EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->data->data;
-                    
-                    if (ed2->size != 0 && ed2->address == ned2->address && ed2->size == ned2->size &&
-                        nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) {
-                        chunk* c;
-                    
-                        ext->data->generation = fcb->Vcb->superblock.generation;
-                        ed2->num_bytes += ned2->num_bytes;
-                    
-                        RemoveEntryList(&nextext->list_entry);
-                        ExFreePool(nextext->data);
-                        ExFreePool(nextext);
-                    
-                        c = get_chunk_from_address(fcb->Vcb, ed2->address);
-                            
-                        if (!c) {
-                            ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
-                        } else {
-                            Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
-                                                               fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
-                            if (!NT_SUCCESS(Status)) {
-                                ERR("update_changed_extent_ref returned %08x\n", Status);
-                                goto end;
-                            }
-                        }
-                    
-                        le2 = le;
-                    }
-                }
-            }
-            
-            le = le2;
-        }
-        
-        // delete existing EXTENT_DATA items
-        
-        searchkey.obj_id = fcb->inode;
-        searchkey.obj_type = TYPE_EXTENT_DATA;
-        searchkey.offset = 0;
-        
-        Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            goto end;
-        }
-        
-        do {
-            if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
-                delete_tree_item(fcb->Vcb, &tp, rollback);
-            
-            b = find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp);
-            
-            if (b) {
-                tp = next_tp;
-                
-                if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
-                    break;
-            }
-        } while (b);
-        
-        if (!fcb->deleted) {
-            // add new EXTENT_DATAs
-            
-            last_end = 0;
-            
-            le = fcb->extents.Flink;
-            while (le != &fcb->extents) {
-                extent* ext = CONTAINING_RECORD(le, extent, list_entry);
-                EXTENT_DATA* ed;
-                
-                if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) {
-                    Status = insert_sparse_extent(fcb, last_end, ext->offset - last_end, Irp, rollback);
-                    if (!NT_SUCCESS(Status)) {
-                        ERR("insert_sparse_extent returned %08x\n", Status);
-                        goto end;
-                    }
-                }
-                    
-                ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
-                if (!ed) {
-                    ERR("out of memory\n");
-                    Status = STATUS_INSUFFICIENT_RESOURCES;
-                    goto end;
-                }
-                
-                RtlCopyMemory(ed, ext->data, ext->datalen);
-                
-                if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset, ed, ext->datalen, NULL, Irp, rollback)) {
-                    ERR("insert_tree_item failed\n");
-                    goto end;
-                }
-                
-                if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_PREALLOC)
-                    prealloc = TRUE;
-                
-                if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_INLINE)
-                    extents_inline = TRUE;
-                
-                if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) {
-                    if (ed->type == EXTENT_TYPE_INLINE)
-                        last_end = ext->offset + ed->decoded_size;
-                    else {
-                        EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
-                        
-                        last_end = ext->offset + ed2->num_bytes;
-                    }
-                }
-                
-                le = le->Flink;
-            }
-            
-            if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline &&
-                sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) {
-                Status = insert_sparse_extent(fcb, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end, Irp, rollback);
-                if (!NT_SUCCESS(Status)) {
-                    ERR("insert_sparse_extent returned %08x\n", Status);
-                    goto end;
-                }
-            }
-            
-            // update prealloc flag in INODE_ITEM
-            
-            if (!prealloc)
-                fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC;
-            else
-                fcb->inode_item.flags |= BTRFS_INODE_PREALLOC;
-        }
-        
-        fcb->extents_changed = FALSE;
+        stripenum = (stripenum + 1) % (c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
     }
+
+    ExFreePool(stripeoff);
     
-    if (!fcb->created || cache) {
-        searchkey.obj_id = fcb->inode;
-        searchkey.obj_type = TYPE_INODE_ITEM;
-        searchkey.offset = 0xffffffffffffffff;
-        
-        Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            goto end;
-        }
-        
-        if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
-            if (cache) {
-                ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
-                if (!ii) {
-                    ERR("out of memory\n");
-                    goto end;
-                }
-                
-                RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
-                
-                if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp, rollback)) {
-                    ERR("insert_tree_item failed\n");
-                    goto end;
-                }
-                
-                ii_offset = 0;
-            } else {
-                ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id);
-                goto end;
-            }
-        } else {
-#ifdef DEBUG_PARANOID
-            INODE_ITEM* ii2 = (INODE_ITEM*)tp.item->data;
-            
-            old_size = ii2->st_size;
-#endif
-            
-            ii_offset = tp.item->key.offset;
-        }
-        
-        if (!cache)
-            delete_tree_item(fcb->Vcb, &tp, rollback);
-        else {
-            searchkey.obj_id = fcb->inode;
-            searchkey.obj_type = TYPE_INODE_ITEM;
-            searchkey.offset = ii_offset;
-            
-            Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
-            if (!NT_SUCCESS(Status)) {
-                ERR("error - find_item returned %08x\n", Status);
-                goto end;
-            }
-            
-            if (keycmp(&tp.item->key, &searchkey)) {
-                ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id);
-                goto end;
-            } else
-                RtlCopyMemory(tp.item->data, &fcb->inode_item, min(tp.item->size, sizeof(INODE_ITEM)));
-        }
-    } else
-        ii_offset = 0;
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS STDCALL read_stripe_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID ptr) {
+    read_stripe* stripe = ptr;
+    read_stripe_master* master = stripe->master;
+    ULONG stripes_left = InterlockedDecrement(&master->stripes_left);
     
-#ifdef DEBUG_PARANOID
-    if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) {
-        ERR("error - size has changed but extents not marked as changed\n");
-        int3;
-    }
-#endif
+    stripe->iosb = Irp->IoStatus;
     
-    fcb->created = FALSE;
-        
-    if (fcb->deleted) {
-        traverse_ptr tp2;
-        
-        // delete XATTR_ITEMs
-        
-        searchkey.obj_id = fcb->inode;
-        searchkey.obj_type = TYPE_XATTR_ITEM;
-        searchkey.offset = 0;
-        
-        Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            goto end;
-        }
+    if (stripes_left == 0)
+        KeSetEvent(&master->event, 0, FALSE);
     
-        while (find_next_item(fcb->Vcb, &tp, &tp2, FALSE, Irp)) {
-            tp = tp2;
-            
-            if (tp.item->key.obj_id == fcb->inode) {
-                // FIXME - do metadata thing here too?
-                if (tp.item->key.obj_type == TYPE_XATTR_ITEM) {
-                    delete_tree_item(fcb->Vcb, &tp, rollback);
-                    TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                }
-            } else
-                break;
-        }
-        
-        goto end;
-    }
+    return STATUS_MORE_PROCESSING_REQUIRED;
+}
+
+static NTSTATUS make_read_irp(PIRP old_irp, read_stripe* stripe, UINT64 offset, void* data, UINT32 length) {
+    PIO_STACK_LOCATION IrpSp;
+    PIRP Irp;
     
-    if (!cache) {
-        ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
-        if (!ii) {
-            ERR("out of memory\n");
-            goto end;
-        }
-        
-        RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
+    if (!old_irp) {
+        Irp = IoAllocateIrp(stripe->devobj->StackSize, FALSE);
         
-        if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM), NULL, Irp, rollback)) {
-            ERR("insert_tree_item failed\n");
-            goto end;
-        }
-    }
-    
-    if (fcb->sd_dirty) {
-        Status = set_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_NTACL, EA_NTACL_HASH, (UINT8*)fcb->sd, RtlLengthSecurityDescriptor(fcb->sd), Irp, rollback);
-        if (!NT_SUCCESS(Status)) {
-            ERR("set_xattr returned %08x\n", Status);
+        if (!Irp) {
+            ERR("IoAllocateIrp failed\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
         }
+    } else {
+        Irp = IoMakeAssociatedIrp(old_irp, stripe->devobj->StackSize);
         
-        fcb->sd_dirty = FALSE;
+        if (!Irp) {
+            ERR("IoMakeAssociatedIrp failed\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
     }
     
-    if (fcb->atts_changed) {
-        if (!fcb->atts_deleted) {
-            char val[64];
-            
-            TRACE("inserting new DOSATTRIB xattr\n");
-            sprintf(val, "0x%lx", fcb->atts);
-        
-            Status = set_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, (UINT8*)val, strlen(val), Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("set_xattr returned %08x\n", Status);
-                goto end;
-            }
-        } else
-            delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, Irp, rollback);
-        
-        fcb->atts_changed = FALSE;
-        fcb->atts_deleted = FALSE;
-    }
+    IrpSp = IoGetNextIrpStackLocation(Irp);
+    IrpSp->MajorFunction = IRP_MJ_READ;
     
-    if (fcb->reparse_xattr_changed) {
-        if (fcb->reparse_xattr.Buffer && fcb->reparse_xattr.Length > 0) {
-            Status = set_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_REPARSE, EA_REPARSE_HASH, (UINT8*)fcb->reparse_xattr.Buffer, fcb->reparse_xattr.Length, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("set_xattr returned %08x\n", Status);
-                goto end;
-            }
-        } else
-            delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_REPARSE, EA_REPARSE_HASH, Irp, rollback);
+    if (stripe->devobj->Flags & DO_BUFFERED_IO) {
+        FIXME("FIXME - buffered IO\n");
+        IoFreeIrp(Irp);
+        return STATUS_INTERNAL_ERROR;
+    } else if (stripe->devobj->Flags & DO_DIRECT_IO) {
+        Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
+        if (!Irp->MdlAddress) {
+            ERR("IoAllocateMdl failed\n");
+            IoFreeIrp(Irp);
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
         
-        fcb->reparse_xattr_changed = FALSE;
+        MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
+    } else {
+        Irp->UserBuffer = data;
     }
-    
-end:
-    fcb->dirty = FALSE;
-    
-//     ExReleaseResourceLite(fcb->Header.Resource);
-    return;
-}
 
-static NTSTATUS delete_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, UINT64 parinode, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    NTSTATUS Status;
+    IrpSp->Parameters.Read.Length = length;
+    IrpSp->Parameters.Read.ByteOffset.QuadPart = offset;
     
-    searchkey.obj_id = parsubvolid;
-    searchkey.obj_type = TYPE_ROOT_REF;
-    searchkey.offset = subvolid;
+    Irp->UserIosb = &stripe->iosb;
     
-    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
+    IoSetCompletionRoutine(Irp, read_stripe_completion, stripe, TRUE, TRUE, TRUE);
     
-    if (!keycmp(&searchkey, &tp.item->key)) {
-        if (tp.item->size < sizeof(ROOT_REF)) {
-            ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
-            return STATUS_INTERNAL_ERROR;
-        } else {
-            ROOT_REF* rr;
-            ULONG len;
-            
-            rr = (ROOT_REF*)tp.item->data;
-            len = tp.item->size;
-            
-            do {
-                ULONG itemlen;
-                
-                if (len < sizeof(ROOT_REF) || len < sizeof(ROOT_REF) - 1 + rr->n) {
-                    ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                    break;
-                }
-                
-                itemlen = sizeof(ROOT_REF) - sizeof(char) + rr->n;
-                
-                if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(rr->name, utf8->Buffer, rr->n) == rr->n) {
-                    ULONG newlen = tp.item->size - itemlen;
-                    
-                    delete_tree_item(Vcb, &tp, rollback);
-                    
-                    if (newlen == 0) {
-                        TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                    } else {
-                        UINT8 *newrr = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *rroff;
-                        
-                        if (!newrr) {
-                            ERR("out of memory\n");
-                            return STATUS_INSUFFICIENT_RESOURCES;
-                        }
-                        
-                        TRACE("modifying (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-
-                        if ((UINT8*)rr > tp.item->data) {
-                            RtlCopyMemory(newrr, tp.item->data, (UINT8*)rr - tp.item->data);
-                            rroff = newrr + ((UINT8*)rr - tp.item->data);
-                        } else {
-                            rroff = newrr;
-                        }
-                        
-                        if ((UINT8*)&rr->name[rr->n] - tp.item->data < tp.item->size)
-                            RtlCopyMemory(rroff, &rr->name[rr->n], tp.item->size - ((UINT8*)&rr->name[rr->n] - tp.item->data));
-                        
-                        insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp, rollback);
-                    }
-                    
-                    break;
-                }
-                
-                if (len > itemlen) {
-                    len -= itemlen;
-                    rr = (ROOT_REF*)&rr->name[rr->n];
-                } else
-                    break;
-            } while (len > 0);
-        }
-    } else {
-        WARN("could not find ROOT_REF entry for subvol %llx in %llx\n", searchkey.offset, searchkey.obj_id);
-        return STATUS_NOT_FOUND;
-    }
+    stripe->Irp = Irp;
     
     return STATUS_SUCCESS;
 }
 
-static NTSTATUS add_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, ROOT_REF* rr, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    NTSTATUS Status;
-    
-    searchkey.obj_id = parsubvolid;
-    searchkey.obj_type = TYPE_ROOT_REF;
-    searchkey.offset = subvolid;
+static NTSTATUS prepare_raid5_write(PIRP Irp, chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) {
+    UINT64 startoff, endoff;
+    UINT16 startoffstripe, endoffstripe, stripenum, parity, logstripe;
+    UINT64 start = 0xffffffffffffffff, end = 0;
+    UINT64 pos, stripepos;
+    UINT32 firststripesize, laststripesize;
+    UINT32 i;
+    UINT8* data2 = (UINT8*)data;
+    UINT32 num_reads;
+    BOOL same_stripe = FALSE, multiple_stripes;
     
-    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
+    get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 1, &startoff, &startoffstripe);
+    get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 1, &endoff, &endoffstripe);
     
-    if (!keycmp(&searchkey, &tp.item->key)) {
-        ULONG rrsize = tp.item->size + sizeof(ROOT_REF) - 1 + rr->n;
-        UINT8* rr2;
+    for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
+        UINT64 ststart, stend;
         
-        rr2 = ExAllocatePoolWithTag(PagedPool, rrsize, ALLOC_TAG);
-        if (!rr2) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
+        if (startoffstripe > i) {
+            ststart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+        } else if (startoffstripe == i) {
+            ststart = startoff;
+        } else {
+            ststart = startoff - (startoff % c->chunk_item->stripe_length);
         }
-        
-        if (tp.item->size > 0)
-            RtlCopyMemory(rr2, tp.item->data, tp.item->size);
-        
-        RtlCopyMemory(rr2 + tp.item->size, rr, sizeof(ROOT_REF) - 1 + rr->n);
-        ExFreePool(rr);
-        
-        delete_tree_item(Vcb, &tp, rollback);
-        
-        if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp, rollback)) {
-            ERR("error - failed to insert item\n");
-            ExFreePool(rr2);
-            return STATUS_INTERNAL_ERROR;
+
+        if (endoffstripe > i) {
+            stend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+        } else if (endoffstripe == i) {
+            stend = endoff + 1;
+        } else {
+            stend = endoff - (endoff % c->chunk_item->stripe_length);
         }
-    } else {
-        if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, sizeof(ROOT_REF) - 1 + rr->n, NULL, Irp, rollback)) {
-            ERR("error - failed to insert item\n");
-            ExFreePool(rr);
-            return STATUS_INTERNAL_ERROR;
+
+        if (ststart != stend) {
+            stripes[i].start = ststart;
+            stripes[i].end = stend;
+            
+            if (ststart < start) {
+                start = ststart;
+                firststripesize = c->chunk_item->stripe_length - (ststart % c->chunk_item->stripe_length);
+            }
+
+            if (stend > end) {
+                end = stend;
+                laststripesize = stend % c->chunk_item->stripe_length;
+                if (laststripesize == 0)
+                    laststripesize = c->chunk_item->stripe_length;
+            }
         }
     }
     
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS STDCALL update_root_backref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    UINT8* data;
-    ULONG datalen;
-    NTSTATUS Status;
-    
-    searchkey.obj_id = parsubvolid;
-    searchkey.obj_type = TYPE_ROOT_REF;
-    searchkey.offset = subvolid;
-    
-    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
+    if (start == end) {
+        ERR("error: start == end (%llx)\n", start);
+        return STATUS_INTERNAL_ERROR;
     }
     
-    if (!keycmp(&tp.item->key, &searchkey) && tp.item->size > 0) {
-        datalen = tp.item->size;
-        
-        data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
-        if (!data) {
+    if (startoffstripe == endoffstripe && start / c->chunk_item->stripe_length == end / c->chunk_item->stripe_length) {
+        firststripesize = end - start;
+        laststripesize = firststripesize;
+    }
+
+    for (i = 0; i < c->chunk_item->num_stripes; i++) {
+        stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, end - start, ALLOC_TAG);
+        if (!stripes[i].data) {
             ERR("out of memory\n");
             return STATUS_INSUFFICIENT_RESOURCES;
         }
         
-        RtlCopyMemory(data, tp.item->data, datalen);
-    } else {
-        datalen = 0;
-    }
-    
-    searchkey.obj_id = subvolid;
-    searchkey.obj_type = TYPE_ROOT_BACKREF;
-    searchkey.offset = parsubvolid;
-    
-    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        
-        if (datalen > 0)
-            ExFreePool(data);
-        
-        return Status;
-    }
-    
-    if (!keycmp(&tp.item->key, &searchkey))
-        delete_tree_item(Vcb, &tp, rollback);
-    
-    if (datalen > 0) {
-        if (!insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp, rollback)) {
-            ERR("error - failed to insert item\n");
-            ExFreePool(data);
-            return STATUS_INTERNAL_ERROR;
+        if (i < c->chunk_item->num_stripes - 1) {
+            if (stripes[i].start == 0 && stripes[i].end == 0)
+                stripes[i].start = stripes[i].end = start;
         }
     }
     
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS flush_fileref(file_ref* fileref, PIRP Irp, LIST_ENTRY* rollback) {
-    NTSTATUS Status;
-    
-    // if fileref created and then immediately deleted, do nothing
-    if (fileref->created && fileref->deleted) {
-        fileref->dirty = FALSE;
-        return STATUS_SUCCESS;
-    }
+    num_reads = 0;
+    multiple_stripes = (end - 1) / c->chunk_item->stripe_length != start / c->chunk_item->stripe_length;
     
-    if (fileref->fcb->ads) {
-        fileref->dirty = FALSE;
-        return STATUS_SUCCESS;
+    for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
+        if (stripes[i].start == stripes[i].end) {
+            num_reads++;
+            
+            if (multiple_stripes)
+                num_reads++;
+        } else {
+            if (stripes[i].start > start)
+                num_reads++;
+            
+            if (stripes[i].end < end)
+                num_reads++;
+        }
     }
     
-    if (fileref->created) {
-        ULONG disize;
-        DIR_ITEM *di, *di2;
-        UINT32 crc32;
-        
-        crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->utf8.Buffer, fileref->utf8.Length);
+    if (num_reads > 0) {
+        UINT32 j;
+        read_stripe_master* master;
+        read_stripe* read_stripes;
+        CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
+        NTSTATUS Status;
         
-        disize = sizeof(DIR_ITEM) - 1 + fileref->utf8.Length;
-        di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
-        if (!di) {
+        master = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe_master), ALLOC_TAG);
+        if (!master) {
             ERR("out of memory\n");
             return STATUS_INSUFFICIENT_RESOURCES;
         }
         
-        if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
-            di->key.obj_id = fileref->fcb->inode;
-            di->key.obj_type = TYPE_INODE_ITEM;
-            di->key.offset = 0;
-        } else { // subvolume
-            di->key.obj_id = fileref->fcb->subvol->id;
-            di->key.obj_type = TYPE_ROOT_ITEM;
-            di->key.offset = 0xffffffffffffffff;
-        }
-
-        di->transid = fileref->fcb->Vcb->superblock.generation;
-        di->m = 0;
-        di->n = (UINT16)fileref->utf8.Length;
-        di->type = fileref->fcb->type;
-        RtlCopyMemory(di->name, fileref->utf8.Buffer, fileref->utf8.Length);
-        
-        di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
-        if (!di2) {
+        read_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe) * num_reads, ALLOC_TAG);
+        if (!read_stripes) {
             ERR("out of memory\n");
+            ExFreePool(master);
             return STATUS_INSUFFICIENT_RESOURCES;
         }
         
-        RtlCopyMemory(di2, di, disize);
-              
-        if (!insert_tree_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, fileref->index, di, disize, NULL, Irp, rollback)) {
-            ERR("insert_tree_item failed\n");
-            Status = STATUS_INTERNAL_ERROR;
-            return Status;
-        }
-        
-        Status = add_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, crc32, di2, disize, Irp, rollback);
-        if (!NT_SUCCESS(Status)) {
-            ERR("add_dir_item returned %08x\n", Status);
-            return Status;
-        }
+        parity = (((address - c->offset) / ((c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
+        stripenum = (parity + 1) % c->chunk_item->num_stripes;
         
-        if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
-            Status = add_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, fileref->index, &fileref->utf8, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("add_inode_ref returned %08x\n", Status);
-                return Status;
-            }
-        } else {
-            ULONG rrlen;
-            ROOT_REF* rr;
-
-            rrlen = sizeof(ROOT_REF) - 1 + fileref->utf8.Length;
+        j = 0;
+        for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
+            if (stripes[i].start > start || stripes[i].start == stripes[i].end) {
+                ULONG readlen;
                 
-            rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
-            if (!rr) {
-                ERR("out of memory\n");
-                return STATUS_INSUFFICIENT_RESOURCES;
+                read_stripes[j].Irp = NULL;
+                read_stripes[j].devobj = c->devices[stripenum]->devobj;
+                read_stripes[j].master = master;
+                
+                if (stripes[i].start != stripes[i].end)
+                    readlen = stripes[i].start - start;
+                else
+                    readlen = firststripesize;
+                
+                Status = make_read_irp(Irp, &read_stripes[j], start + cis[stripenum].offset, stripes[stripenum].data, readlen);
+                
+                if (!NT_SUCCESS(Status)) {
+                    ERR("make_read_irp returned %08x\n", Status);
+                    j++;
+                    goto readend;
+                }
+                
+                stripes[stripenum].skip_start = readlen;
+                
+                j++;
+                if (j == num_reads) break;
             }
             
-            rr->dir = fileref->parent->fcb->inode;
-            rr->index = fileref->index;
-            rr->n = fileref->utf8.Length;
-            RtlCopyMemory(rr->name, fileref->utf8.Buffer, fileref->utf8.Length);
+            stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
+        }
+        
+        if (j < num_reads) {
+            parity = (((address + length - 1 - c->offset) / ((c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
+            stripenum = (parity + 1) % c->chunk_item->num_stripes;
             
-            Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("add_root_ref returned %08x\n", Status);
-                return Status;
+            for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
+                if ((stripes[i].start != stripes[i].end && stripes[i].end < end) || (stripes[i].start == stripes[i].end && multiple_stripes)) {
+                    read_stripes[j].Irp = NULL;
+                    read_stripes[j].devobj = c->devices[stripenum]->devobj;
+                    read_stripes[j].master = master;
+                
+                    if (stripes[i].start == stripes[i].end) {
+                        Status = make_read_irp(Irp, &read_stripes[j], start + firststripesize + cis[stripenum].offset, &stripes[stripenum].data[firststripesize], laststripesize);
+                        stripes[stripenum].skip_end = laststripesize;
+                    } else {
+                        Status = make_read_irp(Irp, &read_stripes[j], stripes[i].end + cis[stripenum].offset, &stripes[stripenum].data[stripes[i].end - start], end - stripes[i].end);
+                        stripes[stripenum].skip_end = end - stripes[i].end;
+                    }
+                    
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("make_read_irp returned %08x\n", Status);
+                        j++;
+                        goto readend;
+                    }
+                    
+                    j++;
+                    if (j == num_reads) break;
+                }
+                
+                stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
             }
-            
-            Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback);
+        }
+        
+        master->stripes_left = j;
+        KeInitializeEvent(&master->event, NotificationEvent, FALSE);
+        
+        for (i = 0; i < j; i++) {
+            Status = IoCallDriver(read_stripes[i].devobj, read_stripes[i].Irp);
             if (!NT_SUCCESS(Status)) {
-                ERR("update_root_backref returned %08x\n", Status);
-                return Status;
+                ERR("IoCallDriver returned %08x\n", Status);
+                goto readend;
             }
         }
         
-        fileref->created = FALSE;
-    } else if (fileref->deleted) {
-        UINT32 crc32;
-        KEY searchkey;
-        traverse_ptr tp;
-        ANSI_STRING* name;
+        KeWaitForSingleObject(&master->event, Executive, KernelMode, FALSE, NULL);
         
-        if (fileref->oldutf8.Buffer)
-            name = &fileref->oldutf8;
-        else
-            name = &fileref->utf8;
-
-        crc32 = calc_crc32c(0xfffffffe, (UINT8*)name->Buffer, name->Length);
+        for (i = 0; i < j; i++) {
+            if (!NT_SUCCESS(read_stripes[i].iosb.Status)) {
+                Status = read_stripes[i].iosb.Status;
+                goto readend;
+            }
+        }
+        
+        Status = STATUS_SUCCESS;
 
-        TRACE("deleting %.*S\n", file_desc_fileref(fileref));
+readend:
+        for (i = 0; i < j; i++) {
+            if (read_stripes[i].Irp) {
+                if (read_stripes[i].devobj->Flags & DO_DIRECT_IO) {
+                    MmUnlockPages(read_stripes[i].Irp->MdlAddress);
+                    IoFreeMdl(read_stripes[i].Irp->MdlAddress);
+                }
+                
+                IoFreeIrp(read_stripes[i].Irp); // FIXME - what if IoCallDriver fails and other Irps are still running?
+            }
+        }
         
-        // delete DIR_ITEM (0x54)
+        ExFreePool(read_stripes);
+        ExFreePool(master);
         
-        Status = delete_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, crc32, name, Irp, rollback);
-        if (!NT_SUCCESS(Status)) {
-            ERR("delete_dir_item returned %08x\n", Status);
+        if (!NT_SUCCESS(Status))
             return Status;
-        }
+    }
+    
+    pos = 0;
+    
+    parity = (((address - c->offset) / ((c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
+    stripepos = 0;
+    
+    if ((address - c->offset) % (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)) > 0) {
+        UINT16 firstdata;
+        BOOL first = TRUE;
         
-        if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
-            // delete INODE_REF (0xc)
+        stripenum = (parity + 1) % c->chunk_item->num_stripes;
+        
+        for (logstripe = 0; logstripe < c->chunk_item->num_stripes - 1; logstripe++) {
+            ULONG copylen;
             
-            Status = delete_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, name, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("delete_inode_ref returned %08x\n", Status);
-                return Status;
-            }
-        } else { // subvolume
-            Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("delete_root_ref returned %08x\n", Status);
-            }
+            if (pos >= length)
+                break;
             
-            Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("update_root_backref returned %08x\n", Status);
-                return Status;
+            if (stripes[logstripe].start < start + firststripesize && stripes[logstripe].start != stripes[logstripe].end) {
+                copylen = min(start + firststripesize - stripes[logstripe].start, length - pos);
+                
+                if (!first && copylen < c->chunk_item->stripe_length) {
+                    same_stripe = TRUE;
+                    break;
+                }
+
+                RtlCopyMemory(&stripes[stripenum].data[firststripesize - copylen], &data2[pos], copylen);
+                
+                pos += copylen;
+                first = FALSE;
             }
+            
+            stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
         }
         
-        // delete DIR_INDEX (0x60)
+        firstdata = parity == 0 ? 1 : 0;
         
-        searchkey.obj_id = fileref->parent->fcb->inode;
-        searchkey.obj_type = TYPE_DIR_INDEX;
-        searchkey.offset = fileref->index;
-
-        Status = find_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            Status = STATUS_INTERNAL_ERROR;
-            return Status;
-        }
+        RtlCopyMemory(stripes[parity].data, stripes[firstdata].data, firststripesize);
         
-        if (!keycmp(&searchkey, &tp.item->key)) {
-            delete_tree_item(fileref->fcb->Vcb, &tp, rollback);
-            TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+        for (i = firstdata + 1; i < c->chunk_item->num_stripes; i++) {
+            if (i != parity)
+                do_xor(&stripes[parity].data[0], &stripes[i].data[0], firststripesize);
         }
         
-        if (fileref->oldutf8.Buffer) {
-            ExFreePool(fileref->oldutf8.Buffer);
-            fileref->oldutf8.Buffer = NULL;
+        if (!same_stripe) {
+            stripepos = firststripesize;
+            parity = (parity + 1) % c->chunk_item->num_stripes;
         }
-    } else { // rename or change type
-        PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->utf8;
-        UINT32 crc32, oldcrc32;
-        ULONG disize;
-        DIR_ITEM *di, *di2;
-        KEY searchkey;
-        traverse_ptr tp;
-        
-        crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->utf8.Buffer, fileref->utf8.Length);
-        
-        if (!fileref->oldutf8.Buffer)
-            oldcrc32 = crc32;
-        else
-            oldcrc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->oldutf8.Buffer, fileref->oldutf8.Length);
-
-        // delete DIR_ITEM (0x54)
+    }
+    
+    while (length >= pos + c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)) {
+        UINT16 firstdata;
         
-        Status = delete_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, oldcrc32, oldutf8, Irp, rollback);
-        if (!NT_SUCCESS(Status)) {
-            ERR("delete_dir_item returned %08x\n", Status);
-            return Status;
+        stripenum = (parity + 1) % c->chunk_item->num_stripes;
+        
+        for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
+            RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], c->chunk_item->stripe_length);
+            
+            pos += c->chunk_item->stripe_length;
+            stripenum = (stripenum +1) % c->chunk_item->num_stripes;
         }
         
-        // add DIR_ITEM (0x54)
+        firstdata = parity == 0 ? 1 : 0;
         
-        disize = sizeof(DIR_ITEM) - 1 + fileref->utf8.Length;
-        di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
-        if (!di) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
+        RtlCopyMemory(&stripes[parity].data[stripepos], &stripes[firstdata].data[stripepos], c->chunk_item->stripe_length);
         
-        di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
-        if (!di2) {
-            ERR("out of memory\n");
-            ExFreePool(di);
-            return STATUS_INSUFFICIENT_RESOURCES;
+        for (i = firstdata + 1; i < c->chunk_item->num_stripes; i++) {
+            if (i != parity)
+                do_xor(&stripes[parity].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
         }
         
-        if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
-            di->key.obj_id = fileref->fcb->inode;
-            di->key.obj_type = TYPE_INODE_ITEM;
-            di->key.offset = 0;
-        } else { // subvolume
-            di->key.obj_id = fileref->fcb->subvol->id;
-            di->key.obj_type = TYPE_ROOT_ITEM;
-            di->key.offset = 0xffffffffffffffff;
+        parity = (parity + 1) % c->chunk_item->num_stripes;
+        stripepos += c->chunk_item->stripe_length;
+    }
+    
+    if (pos < length) {
+        UINT16 firstdata;
+        
+        if (!same_stripe) {
+            stripenum = (parity + 1) % c->chunk_item->num_stripes;
+            i = 0;
+        } else
+            i = logstripe;
+        
+        while (pos < length) {
+            ULONG copylen;
+            
+            copylen = min(stripes[i].end - start - stripepos, length - pos);
+
+            RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], copylen);
+            
+            pos += copylen;
+            stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
+            i++;
         }
         
-        di->transid = fileref->fcb->Vcb->superblock.generation;
-        di->m = 0;
-        di->n = (UINT16)fileref->utf8.Length;
-        di->type = fileref->fcb->type;
-        RtlCopyMemory(di->name, fileref->utf8.Buffer, fileref->utf8.Length);
+        firstdata = parity == 0 ? 1 : 0;
         
-        RtlCopyMemory(di2, di, disize);
+        RtlCopyMemory(&stripes[parity].data[stripepos], &stripes[firstdata].data[stripepos], laststripesize);
         
-        Status = add_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, crc32, di, disize, Irp, rollback);
-        if (!NT_SUCCESS(Status)) {
-            ERR("add_dir_item returned %08x\n", Status);
-            return Status;
+        for (i = firstdata + 1; i < c->chunk_item->num_stripes; i++) {
+            if (i != parity)
+                do_xor(&stripes[parity].data[stripepos], &stripes[i].data[stripepos], laststripesize);
         }
+    }
+    
+    for (i = 0; i < c->chunk_item->num_stripes; i++) {
+        stripes[i].start = start;
+        stripes[i].end = end;
+    }
+    
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS prepare_raid6_write(PIRP Irp, chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) {
+    UINT64 startoff, endoff;
+    UINT16 startoffstripe, endoffstripe, stripenum, parity1, parity2, logstripe;
+    UINT64 start = 0xffffffffffffffff, end = 0;
+    UINT64 pos, stripepos;
+    UINT32 firststripesize, laststripesize;
+    UINT32 i;
+    UINT8* data2 = (UINT8*)data;
+    UINT32 num_reads;
+    BOOL same_stripe = FALSE, multiple_stripes;
+    
+    get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 2, &startoff, &startoffstripe);
+    get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 2, &endoff, &endoffstripe);
+    
+    for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
+        UINT64 ststart, stend;
         
-        if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
-            // delete INODE_REF (0xc)
-            
-            Status = delete_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, oldutf8, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("delete_inode_ref returned %08x\n", Status);
-                return Status;
-            }
-            
-            // add INODE_REF (0xc)
+        if (startoffstripe > i) {
+            ststart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+        } else if (startoffstripe == i) {
+            ststart = startoff;
+        } else {
+            ststart = startoff - (startoff % c->chunk_item->stripe_length);
+        }
+
+        if (endoffstripe > i) {
+            stend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+        } else if (endoffstripe == i) {
+            stend = endoff + 1;
+        } else {
+            stend = endoff - (endoff % c->chunk_item->stripe_length);
+        }
+
+        if (ststart != stend) {
+            stripes[i].start = ststart;
+            stripes[i].end = stend;
             
-            Status = add_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, fileref->index, &fileref->utf8, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("add_inode_ref returned %08x\n", Status);
-                return Status;
+            if (ststart < start) {
+                start = ststart;
+                firststripesize = c->chunk_item->stripe_length - (ststart % c->chunk_item->stripe_length);
             }
-        } else { // subvolume
-            ULONG rrlen;
-            ROOT_REF* rr;
-            
-            // FIXME - make sure this works with duff subvols within snapshots
-            
-            Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("delete_root_ref returned %08x\n", Status);
+
+            if (stend > end) {
+                end = stend;
+                laststripesize = stend % c->chunk_item->stripe_length;
+                if (laststripesize == 0)
+                    laststripesize = c->chunk_item->stripe_length;
             }
+        }
+    }
+    
+    if (start == end) {
+        ERR("error: start == end (%llx)\n", start);
+        return STATUS_INTERNAL_ERROR;
+    }
+    
+    if (startoffstripe == endoffstripe && start / c->chunk_item->stripe_length == end / c->chunk_item->stripe_length) {
+        firststripesize = end - start;
+        laststripesize = firststripesize;
+    }
+
+    for (i = 0; i < c->chunk_item->num_stripes; i++) {
+        stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, end - start, ALLOC_TAG);
+        if (!stripes[i].data) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        if (i < c->chunk_item->num_stripes - 2) {
+            if (stripes[i].start == 0 && stripes[i].end == 0)
+                stripes[i].start = stripes[i].end = start;
+        }
+    }
+    
+    num_reads = 0;
+    multiple_stripes = (end - 1) / c->chunk_item->stripe_length != start / c->chunk_item->stripe_length;
+    
+    for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
+        if (stripes[i].start == stripes[i].end) {
+            num_reads++;
             
-            rrlen = sizeof(ROOT_REF) - 1 + fileref->utf8.Length;
+            if (multiple_stripes)
+                num_reads++;
+        } else {
+            if (stripes[i].start > start)
+                num_reads++;
             
-            rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
-            if (!rr) {
-                ERR("out of memory\n");
-                return STATUS_INSUFFICIENT_RESOURCES;
+            if (stripes[i].end < end)
+                num_reads++;
+        }
+    }
+    
+    if (num_reads > 0) {
+        UINT32 j;
+        read_stripe_master* master;
+        read_stripe* read_stripes;
+        CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
+        NTSTATUS Status;
+        
+        master = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe_master), ALLOC_TAG);
+        if (!master) {
+            ERR("out of memory\n");
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        read_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe) * num_reads, ALLOC_TAG);
+        if (!read_stripes) {
+            ERR("out of memory\n");
+            ExFreePool(master);
+            return STATUS_INSUFFICIENT_RESOURCES;
+        }
+        
+        parity1 = (((address - c->offset) / ((c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
+        stripenum = (parity1 + 2) % c->chunk_item->num_stripes;
+        
+        j = 0;
+        for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
+            if (stripes[i].start > start || stripes[i].start == stripes[i].end) {
+                ULONG readlen;
+                
+                read_stripes[j].Irp = NULL;
+                read_stripes[j].devobj = c->devices[stripenum]->devobj;
+                read_stripes[j].master = master;
+                
+                if (stripes[i].start != stripes[i].end)
+                    readlen = stripes[i].start - start;
+                else
+                    readlen = firststripesize;
+                
+                Status = make_read_irp(Irp, &read_stripes[j], start + cis[stripenum].offset, stripes[stripenum].data, readlen);
+                
+                if (!NT_SUCCESS(Status)) {
+                    ERR("make_read_irp returned %08x\n", Status);
+                    j++;
+                    goto readend;
+                }
+                
+                stripes[stripenum].skip_start = readlen;
+                
+                j++;
+                if (j == num_reads) break;
             }
             
-            rr->dir = fileref->parent->fcb->inode;
-            rr->index = fileref->index;
-            rr->n = fileref->utf8.Length;
-            RtlCopyMemory(rr->name, fileref->utf8.Buffer, fileref->utf8.Length);
+            stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
+        }
+        
+        if (j < num_reads) {
+            parity1 = (((address + length - 1 - c->offset) / ((c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
+            stripenum = (parity1 + 2) % c->chunk_item->num_stripes;
             
-            Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("add_root_ref returned %08x\n", Status);
-                return Status;
+            for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
+                if ((stripes[i].start != stripes[i].end && stripes[i].end < end) || (stripes[i].start == stripes[i].end && multiple_stripes)) {
+                    read_stripes[j].Irp = NULL;
+                    read_stripes[j].devobj = c->devices[stripenum]->devobj;
+                    read_stripes[j].master = master;
+                
+                    if (stripes[i].start == stripes[i].end) {
+                        Status = make_read_irp(Irp, &read_stripes[j], start + firststripesize + cis[stripenum].offset, &stripes[stripenum].data[firststripesize], laststripesize);
+                        stripes[stripenum].skip_end = laststripesize;
+                    } else {
+                        Status = make_read_irp(Irp, &read_stripes[j], stripes[i].end + cis[stripenum].offset, &stripes[stripenum].data[stripes[i].end - start], end - stripes[i].end);
+                        stripes[stripenum].skip_end = end - stripes[i].end;
+                    }
+                    
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("make_read_irp returned %08x\n", Status);
+                        j++;
+                        goto readend;
+                    }
+                    
+                    j++;
+                    if (j == num_reads) break;
+                }
+                
+                stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
             }
-            
-            Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback);
+        }
+        
+        master->stripes_left = j;
+        KeInitializeEvent(&master->event, NotificationEvent, FALSE);
+        
+        for (i = 0; i < j; i++) {
+            Status = IoCallDriver(read_stripes[i].devobj, read_stripes[i].Irp);
             if (!NT_SUCCESS(Status)) {
-                ERR("update_root_backref returned %08x\n", Status);
-                return Status;
+                ERR("IoCallDriver returned %08x\n", Status);
+                goto readend;
             }
         }
         
-        // delete DIR_INDEX (0x60)
+        KeWaitForSingleObject(&master->event, Executive, KernelMode, FALSE, NULL);
         
-        searchkey.obj_id = fileref->parent->fcb->inode;
-        searchkey.obj_type = TYPE_DIR_INDEX;
-        searchkey.offset = fileref->index;
-        
-        Status = find_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            Status = STATUS_INTERNAL_ERROR;
-            return Status;
+        for (i = 0; i < j; i++) {
+            if (!NT_SUCCESS(read_stripes[i].iosb.Status)) {
+                Status = read_stripes[i].iosb.Status;
+                goto readend;
+            }
         }
         
-        if (!keycmp(&searchkey, &tp.item->key)) {
-            delete_tree_item(fileref->fcb->Vcb, &tp, rollback);
-            TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-        } else
-            WARN("could not find (%llx,%x,%llx) in subvol %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, fileref->fcb->subvol->id);
+        Status = STATUS_SUCCESS;
+
+readend:
+        for (i = 0; i < j; i++) {
+            if (read_stripes[i].Irp) {
+                if (read_stripes[i].devobj->Flags & DO_DIRECT_IO) {
+                    MmUnlockPages(read_stripes[i].Irp->MdlAddress);
+                    IoFreeMdl(read_stripes[i].Irp->MdlAddress);
+                }
+                
+                IoFreeIrp(read_stripes[i].Irp); // FIXME - what if IoCallDriver fails and other Irps are still running?
+            }
+        }
         
-        // add DIR_INDEX (0x60)
+        ExFreePool(read_stripes);
+        ExFreePool(master);
         
-        if (!insert_tree_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, fileref->index, di2, disize, NULL, Irp, rollback)) {
-            ERR("insert_tree_item failed\n");
-            Status = STATUS_INTERNAL_ERROR;
+        if (!NT_SUCCESS(Status))
             return Status;
-        }
-
-        if (fileref->oldutf8.Buffer) {
-            ExFreePool(fileref->oldutf8.Buffer);
-            fileref->oldutf8.Buffer = NULL;
-        }
     }
-
-    fileref->dirty = FALSE;
     
-    return STATUS_SUCCESS;
-}
-
-static void convert_shared_data_refs(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
-    LIST_ENTRY* le;
-    NTSTATUS Status;
+    pos = 0;
+    
+    parity1 = (((address - c->offset) / ((c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
+    parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
+    stripepos = 0;
     
-    le = Vcb->trees.Flink;
-    while (le != &Vcb->trees) {
-        tree* t = CONTAINING_RECORD(le, tree, list_entry);
+    if ((address - c->offset) % (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)) > 0) {
+        BOOL first = TRUE;
         
-        if (t->write && t->header.level == 0 &&
-            (t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF))) {
-            LIST_ENTRY* le2;
-            BOOL old = !(t->header.flags & HEADER_FLAG_MIXED_BACKREF);
+        stripenum = (parity2 + 1) % c->chunk_item->num_stripes;
+        
+        for (logstripe = 0; logstripe < c->chunk_item->num_stripes - 2; logstripe++) {
+            ULONG copylen;
+            
+            if (pos >= length)
+                break;
             
-            le2 = Vcb->shared_extents.Flink;
-            while (le2 != &Vcb->shared_extents) {
-                shared_data* sd = CONTAINING_RECORD(le2, shared_data, list_entry);
+            if (stripes[logstripe].start < start + firststripesize && stripes[logstripe].start != stripes[logstripe].end) {
+                copylen = min(start + firststripesize - stripes[logstripe].start, length - pos);
                 
-                if (sd->address == t->header.address) {
-                    LIST_ENTRY* le3 = sd->entries.Flink;
-                    while (le3 != &sd->entries) {
-                        shared_data_entry* sde = CONTAINING_RECORD(le3, shared_data_entry, list_entry);
-                        
-                        TRACE("tree %llx; root %llx, objid %llx, offset %llx, count %x\n",
-                              t->header.address, sde->edr.root, sde->edr.objid, sde->edr.offset, sde->edr.count);
-                        
-                        Status = increase_extent_refcount_data(Vcb, sde->address, sde->size, sde->edr.root, sde->edr.objid, sde->edr.offset, sde->edr.count, Irp, rollback);
-                        
-                        if (!NT_SUCCESS(Status))
-                            WARN("increase_extent_refcount_data returned %08x\n", Status);
-                        
-                        if (old) {
-                            Status = decrease_extent_refcount_old(Vcb, sde->address, sde->size, sd->address, Irp, rollback);
-                            
-                            if (!NT_SUCCESS(Status))
-                                WARN("decrease_extent_refcount_old returned %08x\n", Status);
-                        } else {
-                            Status = decrease_extent_refcount_shared_data(Vcb, sde->address, sde->size, sd->address, sd->parent, Irp, rollback);
-                            
-                            if (!NT_SUCCESS(Status))
-                                WARN("decrease_extent_refcount_shared_data returned %08x\n", Status);
-                        }
-                        
-                        le3 = le3->Flink;
-                    }
+                if (!first && copylen < c->chunk_item->stripe_length) {
+                    same_stripe = TRUE;
                     break;
                 }
+
+                RtlCopyMemory(&stripes[stripenum].data[firststripesize - copylen], &data2[pos], copylen);
                 
-                le2 = le2->Flink;
+                pos += copylen;
+                first = FALSE;
             }
             
-            t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
-            t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
+            stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
         }
         
-        le = le->Flink;
-    }
-}
-
-static NTSTATUS add_root_item_to_cache(device_extension* Vcb, UINT64 root, PIRP Irp, LIST_ENTRY* rollback) {
-    KEY searchkey;
-    traverse_ptr tp;
-    NTSTATUS Status;
-    
-    searchkey.obj_id = root;
-    searchkey.obj_type = TYPE_ROOT_ITEM;
-    searchkey.offset = 0xffffffffffffffff;
-    
-    Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("error - find_item returned %08x\n", Status);
-        return Status;
-    }
-    
-    if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
-        ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
-        int3;
-        return STATUS_INTERNAL_ERROR;
+        i = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
+        RtlCopyMemory(stripes[parity1].data, stripes[i].data, firststripesize);
+        RtlCopyMemory(stripes[parity2].data, stripes[i].data, firststripesize);
+        i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
+        
+        do {
+            do_xor(stripes[parity1].data, stripes[i].data, firststripesize);
+            
+            galois_double(stripes[parity2].data, firststripesize);
+            do_xor(stripes[parity2].data, stripes[i].data, firststripesize);
+            
+            i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
+        } while (i != parity2);
+        
+        if (!same_stripe) {
+            stripepos = firststripesize;
+            parity1 = parity2;
+            parity2 = (parity2 + 1) % c->chunk_item->num_stripes;
+        }
     }
     
-    if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
-        ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
-        if (!ri) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
+    while (length >= pos + c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)) {
+        stripenum = (parity2 + 1) % c->chunk_item->num_stripes;
         
-        if (tp.item->size > 0)
-            RtlCopyMemory(ri, tp.item->data, tp.item->size);
-        
-        RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
+        for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
+            RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], c->chunk_item->stripe_length);
+            
+            pos += c->chunk_item->stripe_length;
+            stripenum = (stripenum +1) % c->chunk_item->num_stripes;
+        }
         
-        delete_tree_item(Vcb, &tp, rollback);
+        i = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
+        RtlCopyMemory(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
+        RtlCopyMemory(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
+        i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
+
+        do {
+            do_xor(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
+            
+            galois_double(&stripes[parity2].data[stripepos], c->chunk_item->stripe_length);
+            do_xor(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
+            
+            i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
+        } while (i != parity2);
         
-        if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp, rollback)) {
-            ERR("insert_tree_item failed\n");
-            return STATUS_INTERNAL_ERROR;
-        }
-    } else {
-        tp.tree->write = TRUE;
+        parity1 = parity2;
+        parity2 = (parity2 + 1) % c->chunk_item->num_stripes;
+        stripepos += c->chunk_item->stripe_length;
     }
     
-    return STATUS_SUCCESS;
-}
-
-static NTSTATUS add_root_items_to_cache(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
-    LIST_ENTRY* le;
-    NTSTATUS Status;
-    
-    le = Vcb->trees.Flink;
-    while (le != &Vcb->trees) {
-        tree* t = CONTAINING_RECORD(le, tree, list_entry);
+    if (pos < length) {
+        if (!same_stripe) {
+            stripenum = (parity2 + 1) % c->chunk_item->num_stripes;
+            i = 0;
+        } else
+            i = logstripe;
         
-        if (t->write && t->root != Vcb->chunk_root && t->root != Vcb->root_root) {
-            Status = add_root_item_to_cache(Vcb, t->root->id, Irp, rollback);
-            if (!NT_SUCCESS(Status)) {
-                ERR("add_root_item_to_cache returned %08x\n", Status);
-                return Status;
-            }
+        while (pos < length) {
+            ULONG copylen;
+            
+            copylen = min(stripes[i].end - start - stripepos, length - pos);
+
+            RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], copylen);
+            
+            pos += copylen;
+            stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
+            i++;
         }
         
-        le = le->Flink;
+        i = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
+        RtlCopyMemory(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], laststripesize);
+        RtlCopyMemory(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], laststripesize);
+        i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
+
+        do {
+            do_xor(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], laststripesize);
+            
+            galois_double(&stripes[parity2].data[stripepos], laststripesize);
+            do_xor(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], laststripesize);
+            
+            i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
+        } while (i != parity2);
     }
     
-    // make sure we always update the extent tree
-    Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp, rollback);
-    if (!NT_SUCCESS(Status)) {
-        ERR("add_root_item_to_cache returned %08x\n", Status);
-        return Status;
+    for (i = 0; i < c->chunk_item->num_stripes; i++) {
+        stripes[i].start = start;
+        stripes[i].end = end;
     }
     
     return STATUS_SUCCESS;
 }
 
-NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, BOOL need_free, UINT32 length, write_data_context* wtc, PIRP Irp, chunk* c) {
     NTSTATUS Status;
-    LIST_ENTRY* le;
-    BOOL cache_changed = FALSE;
-    
-#ifdef DEBUG_WRITE_LOOPS
-    UINT loops = 0;
-#endif
-    
-    TRACE("(%p)\n", Vcb);
-    
-    while (!IsListEmpty(&Vcb->dirty_filerefs)) {
-        dirty_fileref* dirt;
-        
-        le = RemoveHeadList(&Vcb->dirty_filerefs);
-        
-        dirt = CONTAINING_RECORD(le, dirty_fileref, list_entry);
-        
-        flush_fileref(dirt->fileref, Irp, rollback);
-        free_fileref(dirt->fileref);
-        ExFreePool(dirt);
-    }
+    UINT32 i;
+    CHUNK_ITEM_STRIPE* cis;
+    write_data_stripe* stripe;
+    write_stripe* stripes = NULL;
+    BOOL need_free2;
     
-    // We process deleted streams first, so we don't run over our xattr
-    // limit unless we absolutely have to.
+    TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
     
-    le = Vcb->dirty_fcbs.Flink;
-    while (le != &Vcb->dirty_fcbs) {
-        dirty_fcb* dirt;
-        LIST_ENTRY* le2 = le->Flink;
-        
-        dirt = CONTAINING_RECORD(le, dirty_fcb, list_entry);
-        
-        if (dirt->fcb->deleted && dirt->fcb->ads) {
-            RemoveEntryList(le);
-            
-            flush_fcb(dirt->fcb, FALSE, Irp, rollback);
-            free_fcb(dirt->fcb);
-            ExFreePool(dirt);
+    if (!c) {
+        c = get_chunk_from_address(Vcb, address);
+        if (!c) {
+            ERR("could not get chunk for address %llx\n", address);
+            return STATUS_INTERNAL_ERROR;
         }
-        
-        le = le2;
     }
     
-    le = Vcb->dirty_fcbs.Flink;
-    while (le != &Vcb->dirty_fcbs) {
-        dirty_fcb* dirt;
-        LIST_ENTRY* le2 = le->Flink;
-        
-        dirt = CONTAINING_RECORD(le, dirty_fcb, list_entry);
-        
-        if (dirt->fcb->subvol != Vcb->root_root || dirt->fcb->deleted) {
-            RemoveEntryList(le);
-            
-            flush_fcb(dirt->fcb, FALSE, Irp, rollback);
-            free_fcb(dirt->fcb);
-            ExFreePool(dirt);
-        }
-        
-        le = le2;
+    stripes = ExAllocatePoolWithTag(PagedPool, sizeof(write_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
+    if (!stripes) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
     }
     
-    convert_shared_data_refs(Vcb, Irp, rollback);
+    RtlZeroMemory(stripes, sizeof(write_stripe) * c->chunk_item->num_stripes);
     
-    ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE);
-    if (!IsListEmpty(&Vcb->sector_checksums)) {
-        update_checksum_tree(Vcb, Irp, rollback);
-    }
-    ExReleaseResourceLite(&Vcb->checksum_lock);
+    cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
     
-    if (!IsListEmpty(&Vcb->drop_roots)) {
-        Status = drop_roots(Vcb, Irp, rollback);
-        
+    if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
+        Status = prepare_raid0_write(c, address, data, length, stripes);
         if (!NT_SUCCESS(Status)) {
-            ERR("drop_roots returned %08x\n", Status);
+            ERR("prepare_raid0_write returned %08x\n", Status);
+            ExFreePool(stripes);
             return Status;
         }
-    }
-    
-    if (!IsListEmpty(&Vcb->chunks_changed)) {
-        Status = update_chunks(Vcb, Irp, rollback);
         
+        if (need_free)
+            ExFreePool(data);
+
+        need_free2 = TRUE;
+    } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
+        Status = prepare_raid10_write(c, address, data, length, stripes);
         if (!NT_SUCCESS(Status)) {
-            ERR("update_chunks returned %08x\n", Status);
+            ERR("prepare_raid10_write returned %08x\n", Status);
+            ExFreePool(stripes);
             return Status;
         }
-    }
-    
-    // If only changing superblock, e.g. changing label, we still need to rewrite
-    // the root tree so the generations match, otherwise you won't be able to mount on Linux.
-    if (!Vcb->root_root->treeholder.tree || !Vcb->root_root->treeholder.tree->write) {
-        KEY searchkey;
-        
-        traverse_ptr tp;
-        
-        searchkey.obj_id = 0;
-        searchkey.obj_type = 0;
-        searchkey.offset = 0;
         
-        Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+        if (need_free)
+            ExFreePool(data);
+
+        need_free2 = TRUE;
+    } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
+        Status = prepare_raid5_write(Irp, c, address, data, length, stripes);
         if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
+            ERR("prepare_raid5_write returned %08x\n", Status);
+            ExFreePool(stripes);
             return Status;
         }
         
-        Vcb->root_root->treeholder.tree->write = TRUE;
-    }
-    
-    Status = add_root_items_to_cache(Vcb, Irp, rollback);
-    if (!NT_SUCCESS(Status)) {
-        ERR("add_root_items_to_cache returned %08x\n", Status);
-        return Status;
-    }
-    
-    do {
-        Status = add_parents(Vcb, rollback);
+        if (need_free)
+            ExFreePool(data);
+
+        need_free2 = TRUE;
+    } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
+        Status = prepare_raid6_write(Irp, c, address, data, length, stripes);
         if (!NT_SUCCESS(Status)) {
-            ERR("add_parents returned %08x\n", Status);
-            goto end;
+            ERR("prepare_raid6_write returned %08x\n", Status);
+            ExFreePool(stripes);
+            return Status;
         }
         
-        Status = do_splits(Vcb, Irp, rollback);
-        if (!NT_SUCCESS(Status)) {
-            ERR("do_splits returned %08x\n", Status);
-            goto end;
+        if (need_free)
+            ExFreePool(data);
+
+        need_free2 = TRUE;
+    } else {  // write same data to every location - SINGLE, DUP, RAID1
+        for (i = 0; i < c->chunk_item->num_stripes; i++) {
+            stripes[i].start = address - c->offset;
+            stripes[i].end = stripes[i].start + length;
+            stripes[i].data = data;
         }
+        need_free2 = need_free;
+    }
+
+    for (i = 0; i < c->chunk_item->num_stripes; i++) {
+        PIO_STACK_LOCATION IrpSp;
         
-        Status = allocate_tree_extents(Vcb, Irp, rollback);
-        if (!NT_SUCCESS(Status)) {
-            ERR("add_parents returned %08x\n", Status);
-            goto end;
-        }
+        // FIXME - handle missing devices
         
-        Status = update_chunk_usage(Vcb, Irp, rollback);
-        if (!NT_SUCCESS(Status)) {
-            ERR("update_chunk_usage returned %08x\n", Status);
+        stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_stripe), ALLOC_TAG);
+        if (!stripe) {
+            ERR("out of memory\n");
+            Status = STATUS_INSUFFICIENT_RESOURCES;
             goto end;
         }
         
-        Status = allocate_cache(Vcb, &cache_changed, Irp, rollback);
-        if (!NT_SUCCESS(Status)) {
-            ERR("allocate_cache returned %08x\n", Status);
-            goto end;
+        if (stripes[i].start + stripes[i].skip_start == stripes[i].end - stripes[i].skip_end || stripes[i].start == stripes[i].end) {
+            stripe->status = WriteDataStatus_Ignore;
+            stripe->Irp = NULL;
+            stripe->buf = stripes[i].data;
+            stripe->need_free = need_free2;
+        } else {
+            stripe->context = (struct _write_data_context*)wtc;
+            stripe->buf = stripes[i].data;
+            stripe->need_free = need_free2;
+            stripe->device = c->devices[i];
+            RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
+            stripe->status = WriteDataStatus_Pending;
+            
+            if (!Irp) {
+                stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
+            
+                if (!stripe->Irp) {
+                    ERR("IoAllocateIrp failed\n");
+                    Status = STATUS_INTERNAL_ERROR;
+                    goto end;
+                }
+            } else {
+                stripe->Irp = IoMakeAssociatedIrp(Irp, stripe->device->devobj->StackSize);
+                
+                if (!stripe->Irp) {
+                    ERR("IoMakeAssociatedIrp failed\n");
+                    Status = STATUS_INTERNAL_ERROR;
+                    goto end;
+                }
+            }
+            
+            IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
+            IrpSp->MajorFunction = IRP_MJ_WRITE;
+            
+            if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
+                stripe->Irp->AssociatedIrp.SystemBuffer = stripes[i].data + stripes[i].skip_start;
+
+                stripe->Irp->Flags = IRP_BUFFERED_IO;
+            } else if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
+                stripe->Irp->MdlAddress = IoAllocateMdl(stripes[i].data + stripes[i].skip_start,
+                                                        stripes[i].end - stripes[i].start - stripes[i].skip_start - stripes[i].skip_end, FALSE, FALSE, NULL);
+                if (!stripe->Irp->MdlAddress) {
+                    ERR("IoAllocateMdl failed\n");
+                    Status = STATUS_INTERNAL_ERROR;
+                    goto end;
+                }
+                
+                MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess);
+            } else {
+                stripe->Irp->UserBuffer = stripes[i].data + stripes[i].skip_start;
+            }
+            
+#ifdef DEBUG_PARANOID
+            if (stripes[i].end < stripes[i].start + stripes[i].skip_start + stripes[i].skip_end) {
+                ERR("trying to write stripe with negative length (%llx < %llx + %x + %x)\n",
+                    stripes[i].end, stripes[i].start, stripes[i].skip_start, stripes[i].skip_end);
+                int3;
+            }
+#endif
+
+            IrpSp->Parameters.Write.Length = stripes[i].end - stripes[i].start - stripes[i].skip_start - stripes[i].skip_end;
+            IrpSp->Parameters.Write.ByteOffset.QuadPart = stripes[i].start + cis[i].offset + stripes[i].skip_start;
+            
+            stripe->Irp->UserIosb = &stripe->iosb;
+            wtc->stripes_left++;
+
+            IoSetCompletionRoutine(stripe->Irp, write_data_completion, stripe, TRUE, TRUE, TRUE);
         }
 
-#ifdef DEBUG_WRITE_LOOPS
-        loops++;
-        
-        if (cache_changed)
-            ERR("cache has changed, looping again\n");
-#endif        
-    } while (cache_changed || !trees_consistent(Vcb, rollback));
+        InsertTailList(&wtc->stripes, &stripe->list_entry);
+    }
     
-#ifdef DEBUG_WRITE_LOOPS
-    ERR("%u loops\n", loops);
-#endif
+    Status = STATUS_SUCCESS;
     
-    TRACE("trees consistent\n");
+end:
+
+    if (stripes) ExFreePool(stripes);
     
-    Status = update_root_root(Vcb, Irp, rollback);
     if (!NT_SUCCESS(Status)) {
-        ERR("update_root_root returned %08x\n", Status);
-        goto end;
+        free_write_data_stripes(wtc);
+        ExFreePool(wtc);
     }
     
-    Status = write_trees(Vcb, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("write_trees returned %08x\n", Status);
-        goto end;
-    }
+    return Status;
+}
+
+void get_raid56_lock_range(chunk* c, UINT64 address, UINT64 length, UINT64* lockaddr, UINT64* locklen) {
+    UINT64 startoff, endoff;
+    UINT16 startoffstripe, endoffstripe, datastripes;
+    UINT64 start = 0xffffffffffffffff, end = 0, logend;
+    UINT16 i;
     
-    Vcb->superblock.cache_generation = Vcb->superblock.generation;
+    datastripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
     
-    Status = write_superblocks(Vcb, Irp);
-    if (!NT_SUCCESS(Status)) {
-        ERR("write_superblocks returned %08x\n", Status);
-        goto end;
+    get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, datastripes, &startoff, &startoffstripe);
+    get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, datastripes, &endoff, &endoffstripe);
+
+    for (i = 0; i < datastripes; i++) {
+        UINT64 ststart, stend;
+        
+        if (startoffstripe > i) {
+            ststart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+        } else if (startoffstripe == i) {
+            ststart = startoff;
+        } else {
+            ststart = startoff - (startoff % c->chunk_item->stripe_length);
+        }
+
+        if (endoffstripe > i) {
+            stend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+        } else if (endoffstripe == i) {
+            stend = endoff + 1;
+        } else {
+            stend = endoff - (endoff % c->chunk_item->stripe_length);
+        }
+
+        if (ststart != stend) {
+            if (ststart < start)
+                start = ststart;
+
+            if (stend > end)
+                end = stend;
+        }
     }
     
-    clean_space_cache(Vcb);
+    *lockaddr = c->offset + ((start / c->chunk_item->stripe_length) * c->chunk_item->stripe_length * datastripes) +
+                start % c->chunk_item->stripe_length;
+               
+    logend = c->offset + ((end / c->chunk_item->stripe_length) * c->chunk_item->stripe_length * datastripes);
+    logend += c->chunk_item->stripe_length * (datastripes - 1);
+    logend += end % c->chunk_item->stripe_length == 0 ? c->chunk_item->stripe_length : (end % c->chunk_item->stripe_length);
+    *locklen = logend - *lockaddr;
+}
+
+NTSTATUS STDCALL write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c) {
+    write_data_context* wtc;
+    NTSTATUS Status;
+    UINT64 lockaddr, locklen;
+// #ifdef DEBUG_PARANOID
+//     UINT8* buf2;
+// #endif
     
-    Vcb->superblock.generation++;
+    wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG);
+    if (!wtc) {
+        ERR("out of memory\n");
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+
+    KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
+    InitializeListHead(&wtc->stripes);
+    wtc->tree = FALSE;
+    wtc->stripes_left = 0;
     
-    Status = STATUS_SUCCESS;
+    if (!c) {
+        c = get_chunk_from_address(Vcb, address);
+        if (!c) {
+            ERR("could not get chunk for address %llx\n", address);
+            return STATUS_INTERNAL_ERROR;
+        }
+    }
     
-    le = Vcb->trees.Flink;
-    while (le != &Vcb->trees) {
-        tree* t = CONTAINING_RECORD(le, tree, list_entry);
-        
-#ifdef DEBUG_PARANOID
-        KEY searchkey;
-        traverse_ptr tp;
+    if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
+        get_raid56_lock_range(c, address, length, &lockaddr, &locklen);
+        chunk_lock_range(Vcb, c, lockaddr, locklen);
+    }
+    
+    Status = write_data(Vcb, address, data, FALSE, length, wtc, Irp, c);
+    if (!NT_SUCCESS(Status)) {
+        ERR("write_data returned %08x\n", Status);
         
-        searchkey.obj_id = t->header.address;
-        searchkey.obj_type = TYPE_METADATA_ITEM;
-        searchkey.offset = 0xffffffffffffffff;
+        if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
+            chunk_unlock_range(Vcb, c, lockaddr, locklen);
         
-        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            int3;
+        free_write_data_stripes(wtc);
+        ExFreePool(wtc);
+        return Status;
+    }
+    
+    if (wtc->stripes.Flink != &wtc->stripes) {
+        // launch writes and wait
+        LIST_ENTRY* le = wtc->stripes.Flink;
+        while (le != &wtc->stripes) {
+            write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
+            
+            if (stripe->status != WriteDataStatus_Ignore)
+                IoCallDriver(stripe->device->devobj, stripe->Irp);
+            
+            le = le->Flink;
         }
         
-        if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
-            searchkey.obj_id = t->header.address;
-            searchkey.obj_type = TYPE_EXTENT_ITEM;
-            searchkey.offset = 0xffffffffffffffff;
+        KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
+        
+        le = wtc->stripes.Flink;
+        while (le != &wtc->stripes) {
+            write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
             
-            Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-            if (!NT_SUCCESS(Status)) {
-                ERR("error - find_item returned %08x\n", Status);
-                int3;
+            if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
+                Status = stripe->iosb.Status;
+                break;
             }
             
-            if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
-                ERR("error - could not find entry in extent tree for tree at %llx\n", t->header.address);
-                int3;
-            }
+            le = le->Flink;
         }
-#endif
         
-        t->write = FALSE;
-        
-        le = le->Flink;
+        free_write_data_stripes(wtc);
     }
     
-    Vcb->need_write = FALSE;
-    
-    while (!IsListEmpty(&Vcb->drop_roots)) {
-        LIST_ENTRY* le = RemoveHeadList(&Vcb->drop_roots);
-        root* r = CONTAINING_RECORD(le, root, list_entry);
+    if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
+        chunk_unlock_range(Vcb, c, lockaddr, locklen);
 
-        ExDeleteResourceLite(&r->nonpaged->load_tree_lock);
-        ExFreePool(r->nonpaged);
-        ExFreePool(r);
-    }
-    
-end:
-    TRACE("do_write returning %08x\n", Status);
-    
-    return Status;
-}
+    ExFreePool(wtc);
 
-static __inline BOOL entry_in_ordered_list(LIST_ENTRY* list, UINT64 value) {
-    LIST_ENTRY* le = list->Flink;
-    ordered_list* ol;
-    
-    while (le != list) {
-        ol = (ordered_list*)le;
-        
-        if (ol->key > value)
-            return FALSE;
-        else if (ol->key == value)
-            return TRUE;
-        
-        le = le->Flink;
-    }
-    
-    return FALSE;
-}
+// #ifdef DEBUG_PARANOID
+//     buf2 = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
+//     Status = read_data(Vcb, address, length, NULL, FALSE, buf2, NULL, Irp);
+//     
+//     if (!NT_SUCCESS(Status) || RtlCompareMemory(buf2, data, length) != length)
+//         int3;
+//     
+//     ExFreePool(buf2);
+// #endif
 
-static changed_extent* get_changed_extent_item(chunk* c, UINT64 address, UINT64 size, BOOL no_csum) {
-    LIST_ENTRY* le;
-    changed_extent* ce;
-    
-    le = c->changed_extents.Flink;
-    while (le != &c->changed_extents) {
-        ce = CONTAINING_RECORD(le, changed_extent, list_entry);
-        
-        if (ce->address == address && ce->size == size)
-            return ce;
-        
-        le = le->Flink;
-    }
-    
-    ce = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent), ALLOC_TAG);
-    if (!ce) {
-        ERR("out of memory\n");
-        return NULL;
-    }
-    
-    ce->address = address;
-    ce->size = size;
-    ce->old_size = size;
-    ce->count = 0;
-    ce->old_count = 0;
-    ce->no_csum = no_csum;
-    InitializeListHead(&ce->refs);
-    InitializeListHead(&ce->old_refs);
-    
-    InsertTailList(&c->changed_extents, &ce->list_entry);
-    
-    return ce;
+    return STATUS_SUCCESS;
 }
 
-NTSTATUS update_changed_extent_ref(device_extension* Vcb, chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, signed long long count,
-                                   BOOL no_csum, UINT64 new_size, PIRP Irp) {
+static NTSTATUS STDCALL write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
+    write_data_stripe* stripe = conptr;
+    write_data_context* context = (write_data_context*)stripe->context;
     LIST_ENTRY* le;
-    changed_extent* ce;
-    changed_extent_ref* cer;
-    NTSTATUS Status;
-    KEY searchkey;
-    traverse_ptr tp;
-    UINT64 old_count;
     
-    ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
-    
-    ce = get_changed_extent_item(c, address, size, no_csum);
+    // FIXME - we need a lock here
     
-    if (!ce) {
-        ERR("get_changed_extent_item failed\n");
-        Status = STATUS_INTERNAL_ERROR;
+    if (stripe->status == WriteDataStatus_Cancelling) {
+        stripe->status = WriteDataStatus_Cancelled;
         goto end;
     }
     
-    if (IsListEmpty(&ce->refs) && IsListEmpty(&ce->old_refs)) { // new entry
-        searchkey.obj_id = address;
-        searchkey.obj_type = TYPE_EXTENT_ITEM;
-        searchkey.offset = 0xffffffffffffffff;
-        
-        Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("error - find_item returned %08x\n", Status);
-            goto end;
-        }
-        
-        if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
-            ERR("could not find address %llx in extent tree\n", address);
-            Status = STATUS_INTERNAL_ERROR;
-            goto end;
-        }
+    stripe->iosb = Irp->IoStatus;
+    
+    if (NT_SUCCESS(Irp->IoStatus.Status)) {
+        stripe->status = WriteDataStatus_Success;
+    } else {
+        le = context->stripes.Flink;
         
-        if (tp.item->key.offset != size) {
-            ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, size);
-            Status = STATUS_INTERNAL_ERROR;
-            goto end;
-        }
+        stripe->status = WriteDataStatus_Error;
         
-        if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
-            EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
+        while (le != &context->stripes) {
+            write_data_stripe* s2 = CONTAINING_RECORD(le, write_data_stripe, list_entry);
             
-            ce->count = ce->old_count = eiv0->refcount;
-        } else if (tp.item->size >= sizeof(EXTENT_ITEM)) {
-            EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
+            if (s2->status == WriteDataStatus_Pending) {
+                s2->status = WriteDataStatus_Cancelling;
+                IoCancelIrp(s2->Irp);
+            }
             
-            ce->count = ce->old_count = ei->refcount;
-        } else {
-            ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
-            Status = STATUS_INTERNAL_ERROR;
-            goto end;
+            le = le->Flink;
         }
     }
     
-    ce->size = new_size;
+end:
+    if (InterlockedDecrement(&context->stripes_left) == 0)
+        KeSetEvent(&context->Event, 0, FALSE);
+
+    return STATUS_MORE_PROCESSING_REQUIRED;
+}
+
+void free_write_data_stripes(write_data_context* wtc) {
+    LIST_ENTRY *le, *le2, *nextle;
     
-    le = ce->refs.Flink;
-    while (le != &ce->refs) {
-        cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
+    le = wtc->stripes.Flink;
+    while (le != &wtc->stripes) {
+        write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
         
-        if (cer->edr.root == root && cer->edr.objid == objid && cer->edr.offset == offset) {
-            ce->count += count;
-            cer->edr.count += count;
-            Status = STATUS_SUCCESS;
-            goto end;
+        if (stripe->Irp) {
+            if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
+                MmUnlockPages(stripe->Irp->MdlAddress);
+                IoFreeMdl(stripe->Irp->MdlAddress);
+            }
         }
         
         le = le->Flink;
     }
     
-    old_count = find_extent_data_refcount(Vcb, address, size, root, objid, offset, Irp);
-    
-    if (old_count > 0) {
-        cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
-    
-        if (!cer) {
-            ERR("out of memory\n");
-            Status = STATUS_INSUFFICIENT_RESOURCES;
-            goto end;
+    le = wtc->stripes.Flink;
+    while (le != &wtc->stripes) {
+        write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
+        
+        nextle = le->Flink;
+
+        if (stripe->buf && stripe->need_free) {
+            ExFreePool(stripe->buf);
+            
+            le2 = le->Flink;
+            while (le2 != &wtc->stripes) {
+                write_data_stripe* s2 = CONTAINING_RECORD(le2, write_data_stripe, list_entry);
+                
+                if (s2->buf == stripe->buf)
+                    s2->buf = NULL;
+                
+                le2 = le2->Flink;
+            }
+            
         }
         
-        cer->edr.root = root;
-        cer->edr.objid = objid;
-        cer->edr.offset = offset;
-        cer->edr.count = old_count;
+        ExFreePool(stripe);
         
-        InsertTailList(&ce->old_refs, &cer->list_entry);
-    }
-    
-    cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
-    
-    if (!cer) {
-        ERR("out of memory\n");
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-        goto end;
+        le = nextle;
     }
-    
-    cer->edr.root = root;
-    cer->edr.objid = objid;
-    cer->edr.offset = offset;
-    cer->edr.count = old_count + count;
-    
-    InsertTailList(&ce->refs, &cer->list_entry);
-    
-    ce->count += count;
-    
-    Status = STATUS_SUCCESS;
-    
-end:
-    ExReleaseResourceLite(&c->changed_extents_lock);
-    
-    return Status;
 }
 
 NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, PIRP Irp, LIST_ENTRY* rollback) {
@@ -5594,6 +1838,7 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                         remove_fcb_extent(fcb, ext, rollback);
                         
                         fcb->inode_item.st_blocks -= len;
+                        fcb->inode_item_changed = TRUE;
                     } else if (start_data <= ext->offset && end_data < ext->offset + len) { // remove beginning
                         EXTENT_DATA* ned;
                         UINT64 size;
@@ -5635,6 +1880,7 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                         remove_fcb_extent(fcb, ext, rollback);
                         
                         fcb->inode_item.st_blocks -= end_data - ext->offset;
+                        fcb->inode_item_changed = TRUE;
                     } else if (start_data > ext->offset && end_data >= ext->offset + len) { // remove end
                         EXTENT_DATA* ned;
                         UINT64 size;
@@ -5676,6 +1922,7 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                         remove_fcb_extent(fcb, ext, rollback);
                         
                         fcb->inode_item.st_blocks -= ext->offset + len - start_data;
+                        fcb->inode_item_changed = TRUE;
                     } else if (start_data > ext->offset && end_data < ext->offset + len) { // remove middle
                         EXTENT_DATA *ned1, *ned2;
                         UINT64 size;
@@ -5710,7 +1957,7 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                         newext1->offset = ext->offset;
                         newext1->data = ned1;
                         newext1->datalen = sizeof(EXTENT_DATA) - 1 + size;
-                        newext1->unique = FALSE;
+                        newext1->unique = ext->unique;
                         newext1->ignore = FALSE;
                         
                         size = ext->offset + len - end_data;
@@ -5746,7 +1993,7 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                         newext2->offset = end_data;
                         newext2->data = ned2;
                         newext2->datalen = sizeof(EXTENT_DATA) - 1 + size;
-                        newext2->unique = FALSE;
+                        newext2->unique = ext->unique;
                         newext2->ignore = FALSE;
                         
                         InsertHeadList(&ext->list_entry, &newext1->list_entry);
@@ -5755,13 +2002,15 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                         remove_fcb_extent(fcb, ext, rollback);
                         
                         fcb->inode_item.st_blocks -= end_data - start_data;
+                        fcb->inode_item_changed = TRUE;
                     }
                 } else if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
                     if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
-                        if (ed2->address != 0) {
+                        if (ed2->size != 0) {
                             chunk* c;
                             
                             fcb->inode_item.st_blocks -= len;
+                            fcb->inode_item_changed = TRUE;
                             
                             c = get_chunk_from_address(Vcb, ed2->address);
                             
@@ -5769,7 +2018,7 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                                 ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
                             } else {
                                 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
-                                                                   fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
+                                                                   fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
                                 if (!NT_SUCCESS(Status)) {
                                     ERR("update_changed_extent_ref returned %08x\n", Status);
                                     goto end;
@@ -5783,8 +2032,10 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                         EXTENT_DATA2* ned2;
                         extent* newext;
                         
-                        if (ed2->address != 0)
+                        if (ed2->size != 0) {
                             fcb->inode_item.st_blocks -= end_data - ext->offset;
+                            fcb->inode_item_changed = TRUE;
+                        }
                         
                         ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
                         if (!ned) {
@@ -5811,7 +2062,7 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                         ned->type = ed->type;
                         ned2->address = ed2->address;
                         ned2->size = ed2->size;
-                        ned2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - ext->offset));
+                        ned2->offset = ed2->offset + (end_data - ext->offset);
                         ned2->num_bytes = ed2->num_bytes - (end_data - ext->offset);
 
                         newext->offset = end_data;
@@ -5827,8 +2078,10 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                         EXTENT_DATA2* ned2;
                         extent* newext;
                         
-                        if (ed2->address != 0)
+                        if (ed2->size != 0) {
                             fcb->inode_item.st_blocks -= ext->offset + len - start_data;
+                            fcb->inode_item_changed = TRUE;
+                        }
                         
                         ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
                         if (!ned) {
@@ -5855,7 +2108,7 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                         ned->type = ed->type;
                         ned2->address = ed2->address;
                         ned2->size = ed2->size;
-                        ned2->offset = ed2->address == 0 ? 0 : ed2->offset;
+                        ned2->offset = ed2->offset;
                         ned2->num_bytes = start_data - ext->offset;
 
                         newext->offset = ext->offset;
@@ -5871,10 +2124,11 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                         EXTENT_DATA2 *neda2, *nedb2;
                         extent *newext1, *newext2;
                         
-                        if (ed2->address != 0) {
+                        if (ed2->size != 0) {
                             chunk* c;
                             
                             fcb->inode_item.st_blocks -= end_data - start_data;
+                            fcb->inode_item_changed = TRUE;
                             
                             c = get_chunk_from_address(Vcb, ed2->address);
                             
@@ -5882,7 +2136,7 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                                 ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
                             } else {
                                 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
-                                                                   fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
+                                                                   fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
                                 if (!NT_SUCCESS(Status)) {
                                     ERR("update_changed_extent_ref returned %08x\n", Status);
                                     goto end;
@@ -5934,7 +2188,7 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                         neda->type = ed->type;
                         neda2->address = ed2->address;
                         neda2->size = ed2->size;
-                        neda2->offset = ed2->address == 0 ? 0 : ed2->offset;
+                        neda2->offset = ed2->offset;
                         neda2->num_bytes = start_data - ext->offset;
 
                         nedb2 = (EXTENT_DATA2*)&nedb->data[0];
@@ -5947,19 +2201,19 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
                         nedb->type = ed->type;
                         nedb2->address = ed2->address;
                         nedb2->size = ed2->size;
-                        nedb2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - ext->offset));
+                        nedb2->offset = ed2->offset + (end_data - ext->offset);
                         nedb2->num_bytes = ext->offset + len - end_data;
                         
                         newext1->offset = ext->offset;
                         newext1->data = neda;
                         newext1->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
-                        newext1->unique = FALSE;
+                        newext1->unique = ext->unique;
                         newext1->ignore = FALSE;
                         
                         newext2->offset = end_data;
                         newext2->data = nedb;
                         newext2->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
-                        newext2->unique = FALSE;
+                        newext2->unique = ext->unique;
                         newext2->ignore = FALSE;
                         
                         InsertHeadList(&ext->list_entry, &newext1->list_entry);
@@ -5974,8 +2228,6 @@ NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT
         le = le2;
     }
     
-    // FIXME - do bitmap analysis of changed extents, and free what we can
-    
     Status = STATUS_SUCCESS;
 
 end:
@@ -6036,7 +2288,7 @@ static void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* e
     re->fcb = fcb;
     re->ext = ext;
     
-    add_rollback(rollback, ROLLBACK_INSERT_EXTENT, re);
+    add_rollback(fcb->Vcb, rollback, ROLLBACK_INSERT_EXTENT, re);
 }
 
 static BOOL add_extent_to_fcb(fcb* fcb, UINT64 offset, EXTENT_DATA* ed, ULONG edsize, BOOL unique, LIST_ENTRY* rollback) {
@@ -6092,50 +2344,8 @@ static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) {
         re->fcb = fcb;
         re->ext = ext;
         
-        add_rollback(rollback, ROLLBACK_DELETE_EXTENT, re);
-    }
-}
-
-static void add_changed_extent_ref(chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, UINT32 count, BOOL no_csum) {
-    changed_extent* ce;
-    changed_extent_ref* cer;
-    LIST_ENTRY* le;
-    
-    ce = get_changed_extent_item(c, address, size, no_csum);
-    
-    if (!ce) {
-        ERR("get_changed_extent_item failed\n");
-        return;
-    }
-    
-    le = ce->refs.Flink;
-    while (le != &ce->refs) {
-        cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
-        
-        if (cer->edr.root == root && cer->edr.objid == objid && cer->edr.offset == offset) {
-            ce->count += count;
-            cer->edr.count += count;
-            return;
-        }
-        
-        le = le->Flink;
-    }
-    
-    cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
-    
-    if (!cer) {
-        ERR("out of memory\n");
-        return;
+        add_rollback(fcb->Vcb, rollback, ROLLBACK_DELETE_EXTENT, re);
     }
-    
-    cer->edr.root = root;
-    cer->edr.objid = objid;
-    cer->edr.offset = offset;
-    cer->edr.count = count;
-    
-    InsertTailList(&ce->refs, &cer->list_entry);
-    
-    ce->count += count;
 }
 
 BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, BOOL prealloc, void* data,
@@ -6169,14 +2379,6 @@ BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start
 //     }
 // #endif
     
-    if (data) {
-        Status = do_write_data(Vcb, address, data, length, changed_sector_list, Irp);
-        if (!NT_SUCCESS(Status)) {
-            ERR("do_write_data returned %08x\n", Status);
-            return FALSE;
-        }
-    }
-    
     // add extent data to inode
     ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
     if (!ed) {
@@ -6199,132 +2401,33 @@ BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start
     
     if (!add_extent_to_fcb(fcb, start_data, ed, edsize, TRUE, rollback)) {
         ERR("add_extent_to_fcb failed\n");
-        ExFreePool(ed);
-        return FALSE;
-    }
-    
-    increase_chunk_usage(c, length);
-    space_list_subtract(Vcb, c, FALSE, address, length, rollback);
-    
-    fcb->inode_item.st_blocks += decoded_size;
-    
-    fcb->extents_changed = TRUE;
-    mark_fcb_dirty(fcb);
-    
-    ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
-    
-    add_changed_extent_ref(c, address, length, fcb->subvol->id, fcb->inode, start_data, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
-    
-    ExReleaseResourceLite(&c->changed_extents_lock);
-
-    return TRUE;
-}
-
-static BOOL extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
-                        LIST_ENTRY* changed_sector_list, extent* ext, chunk* c, PIRP Irp, LIST_ENTRY* rollback) {
-    EXTENT_DATA* ed;
-    EXTENT_DATA2 *ed2, *ed2orig;
-    extent* newext;
-    UINT64 addr, origsize;
-    NTSTATUS Status;
-    LIST_ENTRY* le;
-    
-    TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data,
-                                                              length, data, changed_sector_list, ext, c, rollback);
-    
-    ed2orig = (EXTENT_DATA2*)ext->data->data;
-    
-    origsize = ed2orig->size;
-    addr = ed2orig->address + ed2orig->size;
-    
-    Status = write_data_complete(Vcb, addr, data, length, Irp, c);
-    if (!NT_SUCCESS(Status)) {
-        ERR("write_data returned %08x\n", Status);
-        return FALSE;
-    }
-    
-    le = fcb->extents.Flink;
-    while (le != &fcb->extents) {
-        extent* ext2 = CONTAINING_RECORD(le, extent, list_entry);
-            
-        if (!ext2->ignore && (ext2->data->type == EXTENT_TYPE_REGULAR || ext2->data->type == EXTENT_TYPE_PREALLOC)) {
-            EXTENT_DATA2* ed2b = (EXTENT_DATA2*)ext2->data->data;
-            
-            if (ed2b->address == ed2orig->address) {
-                ed2b->size = origsize + length;
-                ext2->data->decoded_size = origsize + length;
-            }
-        }
-                
-        le = le->Flink;
-    }
-    
-    ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
-    if (!ed) {
-        ERR("out of memory\n");
-        return FALSE;
-    }
-    
-    newext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
-    if (!newext) {
-        ERR("out of memory\n");
-        ExFreePool(ed);
-        return FALSE;
-    }
-    
-    RtlCopyMemory(ed, ext->data, ext->datalen);
-
-    ed2 = (EXTENT_DATA2*)ed->data;
-    ed2->offset = ed2orig->offset + ed2orig->num_bytes;
-    ed2->num_bytes = length;
-    
-    RtlCopyMemory(newext, ext, sizeof(extent));
-    newext->offset = ext->offset + ed2orig->num_bytes;
-    newext->data = ed;
-    
-    InsertHeadList(&ext->list_entry, &newext->list_entry);
-    
-    add_insert_extent_rollback(rollback, fcb, newext);
-    
-    Status = update_changed_extent_ref(Vcb, c, ed2orig->address, origsize, fcb->subvol->id, fcb->inode, newext->offset - ed2->offset,
-                                       1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
-
-    if (!NT_SUCCESS(Status)) {
-        ERR("update_changed_extent_ref returned %08x\n", Status);
-        return FALSE;
-    }
-    
-    if (changed_sector_list) {
-        int i;
-        changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
-        if (!sc) {
-            ERR("out of memory\n");
-            return FALSE;
-        }
-        
-        sc->ol.key = addr;
-        sc->length = length / Vcb->superblock.sector_size;
-        sc->deleted = FALSE;
-        
-        sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
-        if (!sc->checksums) {
-            ERR("out of memory\n");
-            ExFreePool(sc);
-            return FALSE;
-        }
-        
-        for (i = 0; i < sc->length; i++) {
-            sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
-        }
-        insert_into_ordered_list(changed_sector_list, &sc->ol);
+        ExFreePool(ed);
+        return FALSE;
     }
     
     increase_chunk_usage(c, length);
-      
-    space_list_subtract(Vcb, c, FALSE, addr, length, NULL); // no rollback as we don't reverse extending the extent
-     
-    fcb->inode_item.st_blocks += length;
+    space_list_subtract(Vcb, c, FALSE, address, length, rollback);
+    
+    fcb->inode_item.st_blocks += decoded_size;
+    
+    fcb->extents_changed = TRUE;
+    fcb->inode_item_changed = TRUE;
+    mark_fcb_dirty(fcb);
+    
+    ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
+    
+    add_changed_extent_ref(c, address, length, fcb->subvol->id, fcb->inode, start_data, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
+    
+    ExReleaseResourceLite(&c->changed_extents_lock);
     
+    ExReleaseResourceLite(&c->lock);
+      
+    if (data) {
+        Status = do_write_data(Vcb, address, data, length, changed_sector_list, Irp);
+        if (!NT_SUCCESS(Status))
+            ERR("do_write_data returned %08x\n", Status);
+    }
+
     return TRUE;
 }
 
@@ -6359,11 +2462,6 @@ static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data,
     if (!ext)
         return FALSE;
 
-    if (!ext->unique) {
-        TRACE("extent was not unique\n");
-        return FALSE;
-    }
-    
     ed = ext->data;
     
     if (ext->datalen < sizeof(EXTENT_DATA)) {
@@ -6371,8 +2469,8 @@ static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data,
         return FALSE;
     }
     
-    if (ed->type != EXTENT_TYPE_REGULAR) {
-        TRACE("not extending extent which is not EXTENT_TYPE_REGULAR\n");
+    if (ed->type != EXTENT_TYPE_REGULAR && ed->type != EXTENT_TYPE_PREALLOC) {
+        TRACE("not extending extent which is not regular or prealloc\n");
         return FALSE;
     }
     
@@ -6388,31 +2486,6 @@ static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data,
         return FALSE;
     }
     
-    if (ed->compression != BTRFS_COMPRESSION_NONE) {
-        TRACE("not extending a compressed extent\n");
-        return FALSE;
-    }
-    
-    if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
-        WARN("encryption not supported\n");
-        return FALSE;
-    }
-    
-    if (ed->encoding != BTRFS_ENCODING_NONE) {
-        WARN("other encodings not supported\n");
-        return FALSE;
-    }
-    
-    if (ed2->size - ed2->offset != ed2->num_bytes) {
-        TRACE("last EXTENT_DATA does not run all the way to the end of the extent\n");
-        return FALSE;
-    }
-    
-    if (ed2->size >= MAX_EXTENT_SIZE) {
-        TRACE("extent size was too large to extend (%llx >= %llx)\n", ed2->size, (UINT64)MAX_EXTENT_SIZE);
-        return FALSE;
-    }
-    
     c = get_chunk_from_address(Vcb, ed2->address);
     
     ExAcquireResourceExclusiveLite(&c->lock, TRUE);
@@ -6422,14 +2495,14 @@ static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data,
         s = CONTAINING_RECORD(le, space, list_entry);
         
         if (s->address == ed2->address + ed2->size) {
-            UINT64 newlen = min(min(s->size, length), MAX_EXTENT_SIZE - ed2->size);
+            UINT64 newlen = min(min(s->size, length), MAX_EXTENT_SIZE);
             
-            success = extend_data(Vcb, fcb, start_data, newlen, data, changed_sector_list, ext, c, Irp, rollback);
+            success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen);
             
             if (success)
                 *written += newlen;
             
-            break;
+            return success;
         } else if (s->address > ed2->address + ed2->size)
             break;
         
@@ -6438,7 +2511,7 @@ static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data,
     
     ExReleaseResourceLite(&c->lock);
     
-    return success;
+    return FALSE;
 }
 
 static NTSTATUS insert_prealloc_extent(fcb* fcb, UINT64 start, UINT64 length, LIST_ENTRY* rollback) {
@@ -6466,17 +2539,18 @@ static NTSTATUS insert_prealloc_extent(fcb* fcb, UINT64 start, UINT64 length, LI
         while (le != &fcb->Vcb->chunks) {
             c = CONTAINING_RECORD(le, chunk, list_entry);
             
-            ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-            
-            if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
-                if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen)) {
-                    ExReleaseResourceLite(&c->lock);
-                    ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
-                    goto cont;
+            if (!c->readonly) {
+                ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+                
+                if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
+                    if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen)) {
+                        ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
+                        goto cont;
+                    }
                 }
+                
+                ExReleaseResourceLite(&c->lock);
             }
-            
-            ExReleaseResourceLite(&c->lock);
 
             le = le->Flink;
         }
@@ -6491,10 +2565,8 @@ static NTSTATUS insert_prealloc_extent(fcb* fcb, UINT64 start, UINT64 length, LI
             ExAcquireResourceExclusiveLite(&c->lock, TRUE);
             
             if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
-                if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen)) {
-                    ExReleaseResourceLite(&c->lock);
+                if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen))
                     goto cont;
-                }
             }
             
             ExReleaseResourceLite(&c->lock);
@@ -6532,8 +2604,6 @@ NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT6
     
     TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list);
     
-    // FIXME - split data up if not enough space for just one extent
-    
     if (start_data > 0) {
         try_extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, Irp, &written, rollback);
         
@@ -6561,14 +2631,14 @@ NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT6
         while (le != &Vcb->chunks) {
             c = CONTAINING_RECORD(le, chunk, list_entry);
             
-            ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-            
-            if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen) {
-                if (insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) {
+            if (!c->readonly) {
+                ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+                
+                if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
+                    insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) {
                     written += newlen;
                     
                     if (written == orig_length) {
-                        ExReleaseResourceLite(&c->lock);
                         ExReleaseResourceLite(&Vcb->chunk_lock);
                         return STATUS_SUCCESS;
                     } else {
@@ -6578,10 +2648,9 @@ NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT6
                         data = &((UINT8*)data)[newlen];
                         break;
                     }
-                }
+                } else
+                    ExReleaseResourceLite(&c->lock);
             }
-            
-            ExReleaseResourceLite(&c->lock);
 
             le = le->Flink;
         }
@@ -6599,23 +2668,20 @@ NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT6
             
             ExAcquireResourceExclusiveLite(&c->lock, TRUE);
             
-            if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen) {
-                if (insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) {
-                    written += newlen;
-                    
-                    if (written == orig_length) {
-                        ExReleaseResourceLite(&c->lock);
-                        return STATUS_SUCCESS;
-                    } else {
-                        done = TRUE;
-                        start_data += newlen;
-                        length -= newlen;
-                        data = &((UINT8*)data)[newlen];
-                    }
+            if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
+                insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) {
+                written += newlen;
+                
+                if (written == orig_length)
+                    return STATUS_SUCCESS;
+                else {
+                    done = TRUE;
+                    start_data += newlen;
+                    length -= newlen;
+                    data = &((UINT8*)data)[newlen];
                 }
-            }
-            
-            ExReleaseResourceLite(&c->lock);
+            } else            
+                ExReleaseResourceLite(&c->lock);
         } else
             ExReleaseResourceLite(&Vcb->chunk_lock);
         
@@ -6630,195 +2696,6 @@ NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT6
     return STATUS_DISK_FULL;
 }
 
-static void update_checksum_tree(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
-    LIST_ENTRY* le = Vcb->sector_checksums.Flink;
-    changed_sector* cs;
-    traverse_ptr tp, next_tp;
-    KEY searchkey;
-    UINT32* data;
-    NTSTATUS Status;
-    
-    if (!Vcb->checksum_root) {
-        ERR("no checksum root\n");
-        goto exit;
-    }
-    
-    while (le != &Vcb->sector_checksums) {
-        UINT64 startaddr, endaddr;
-        ULONG len;
-        UINT32* checksums;
-        RTL_BITMAP bmp;
-        ULONG* bmparr;
-        ULONG runlength, index;
-        
-        cs = (changed_sector*)le;
-        
-        searchkey.obj_id = EXTENT_CSUM_ID;
-        searchkey.obj_type = TYPE_EXTENT_CSUM;
-        searchkey.offset = cs->ol.key;
-        
-        // FIXME - create checksum_root if it doesn't exist at all
-        
-        Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
-        if (!NT_SUCCESS(Status)) { // tree is completely empty
-            // FIXME - do proper check here that tree is empty
-            if (!cs->deleted) {
-                checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * cs->length, ALLOC_TAG);
-                if (!checksums) {
-                    ERR("out of memory\n");
-                    goto exit;
-                }
-                
-                RtlCopyMemory(checksums, cs->checksums, sizeof(UINT32) * cs->length);
-                
-                if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, cs->ol.key, checksums, sizeof(UINT32) * cs->length, NULL, Irp, rollback)) {
-                    ERR("insert_tree_item failed\n");
-                    ExFreePool(checksums);
-                    goto exit;
-                }
-            }
-        } else {
-            UINT32 tplen;
-            
-            // FIXME - check entry is TYPE_EXTENT_CSUM?
-            
-            if (tp.item->key.offset < cs->ol.key && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= cs->ol.key)
-                startaddr = tp.item->key.offset;
-            else
-                startaddr = cs->ol.key;
-            
-            searchkey.obj_id = EXTENT_CSUM_ID;
-            searchkey.obj_type = TYPE_EXTENT_CSUM;
-            searchkey.offset = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
-            
-            Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
-            if (!NT_SUCCESS(Status)) {
-                ERR("error - find_item returned %08x\n", Status);
-                goto exit;
-            }
-            
-            tplen = tp.item->size / sizeof(UINT32);
-            
-            if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= cs->ol.key + (cs->length * Vcb->superblock.sector_size))
-                endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
-            else
-                endaddr = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
-            
-            TRACE("cs starts at %llx (%x sectors)\n", cs->ol.key, cs->length);
-            TRACE("startaddr = %llx\n", startaddr);
-            TRACE("endaddr = %llx\n", endaddr);
-            
-            len = (endaddr - startaddr) / Vcb->superblock.sector_size;
-            
-            checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG);
-            if (!checksums) {
-                ERR("out of memory\n");
-                goto exit;
-            }
-            
-            bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
-            if (!bmparr) {
-                ERR("out of memory\n");
-                ExFreePool(checksums);
-                goto exit;
-            }
-                
-            RtlInitializeBitMap(&bmp, bmparr, len);
-            RtlSetAllBits(&bmp);
-            
-            searchkey.obj_id = EXTENT_CSUM_ID;
-            searchkey.obj_type = TYPE_EXTENT_CSUM;
-            searchkey.offset = cs->ol.key;
-            
-            Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
-            if (!NT_SUCCESS(Status)) {
-                ERR("error - find_item returned %08x\n", Status);
-                goto exit;
-            }
-            
-            // set bit = free space, cleared bit = allocated sector
-            
-    //         ERR("start loop\n");
-            while (tp.item->key.offset < endaddr) {
-    //             ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-                if (tp.item->key.offset >= startaddr) {
-                    if (tp.item->size > 0) {
-                        RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, tp.item->size);
-                        RtlClearBits(&bmp, (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size, tp.item->size / sizeof(UINT32));
-                    }
-                    
-                    delete_tree_item(Vcb, &tp, rollback);
-                }
-                
-                if (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) {
-                    tp = next_tp;
-                } else
-                    break;
-            }
-    //         ERR("end loop\n");
-            
-            if (cs->deleted) {
-                RtlSetBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
-            } else {
-                RtlCopyMemory(&checksums[(cs->ol.key - startaddr) / Vcb->superblock.sector_size], cs->checksums, cs->length * sizeof(UINT32));
-                RtlClearBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
-            }
-            
-            runlength = RtlFindFirstRunClear(&bmp, &index);
-            
-            while (runlength != 0) {
-                do {
-                    ULONG rl;
-                    
-                    if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE)
-                        rl = MAX_CSUM_SIZE / sizeof(UINT32);
-                    else
-                        rl = runlength;
-                    
-                    data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG);
-                    if (!data) {
-                        ERR("out of memory\n");
-                        ExFreePool(bmparr);
-                        ExFreePool(checksums);
-                        goto exit;
-                    }
-                    
-                    RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl);
-                    
-                    if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, startaddr + (index * Vcb->superblock.sector_size), data, sizeof(UINT32) * rl, NULL, Irp, rollback)) {
-                        ERR("insert_tree_item failed\n");
-                        ExFreePool(data);
-                        ExFreePool(bmparr);
-                        ExFreePool(checksums);
-                        goto exit;
-                    }
-                    
-                    runlength -= rl;
-                    index += rl;
-                } while (runlength > 0);
-                
-                runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
-            }
-            
-            ExFreePool(bmparr);
-            ExFreePool(checksums);
-        }
-        
-        le = le->Flink;
-    }
-    
-exit:
-    while (!IsListEmpty(&Vcb->sector_checksums)) {
-        le = RemoveHeadList(&Vcb->sector_checksums);
-        cs = (changed_sector*)le;
-        
-        if (cs->checksums)
-            ExFreePool(cs->checksums);
-        
-        ExFreePool(cs);
-    }
-}
-
 void commit_checksum_changes(device_extension* Vcb, LIST_ENTRY* changed_sector_list) {
     while (!IsListEmpty(changed_sector_list)) {
         LIST_ENTRY* le = RemoveHeadList(changed_sector_list);
@@ -6839,6 +2716,7 @@ NTSTATUS truncate_file(fcb* fcb, UINT64 end, PIRP Irp, LIST_ENTRY* rollback) {
     }
     
     fcb->inode_item.st_size = end;
+    fcb->inode_item_changed = TRUE;
     TRACE("setting st_size to %llx\n", end);
 
     fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
@@ -6919,6 +2797,8 @@ NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIR
                 RtlCopyMemory(data, ed->data, origlength);
                 
                 fcb->inode_item.st_blocks -= origlength;
+                fcb->inode_item_changed = TRUE;
+                mark_fcb_dirty(fcb);
                 
                 remove_fcb_extent(fcb, ext, rollback);
                 
@@ -7002,10 +2882,12 @@ NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIR
                     }
                     
                     fcb->extents_changed = TRUE;
-                    mark_fcb_dirty(fcb);
                 }
                 
                 fcb->inode_item.st_size = end;
+                fcb->inode_item_changed = TRUE;
+                mark_fcb_dirty(fcb);
+                
                 TRACE("setting st_size to %llx\n", end);
                 
                 TRACE("newalloc = %llx\n", newalloc);
@@ -7027,6 +2909,7 @@ NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIR
                 }
                 
                 fcb->extents_changed = TRUE;
+                fcb->inode_item_changed = TRUE;
                 mark_fcb_dirty(fcb);
                 
                 fcb->inode_item.st_size = end;
@@ -7064,6 +2947,7 @@ NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIR
                 }
                 
                 fcb->extents_changed = TRUE;
+                fcb->inode_item_changed = TRUE;
                 mark_fcb_dirty(fcb);
                 
                 fcb->inode_item.st_size = end;
@@ -7079,206 +2963,6 @@ NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIR
     return STATUS_SUCCESS;
 }
 
-// #ifdef DEBUG_PARANOID
-// static void print_loaded_trees(tree* t, int spaces) {
-//     char pref[10];
-//     int i;
-//     LIST_ENTRY* le;
-//     
-//     for (i = 0; i < spaces; i++) {
-//         pref[i] = ' ';
-//     }
-//     pref[spaces] = 0;
-//     
-//     if (!t) {
-//         ERR("%s(not loaded)\n", pref);
-//         return;
-//     }
-//     
-//     le = t->itemlist.Flink;
-//     while (le != &t->itemlist) {
-//         tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-//         
-//         ERR("%s%llx,%x,%llx ignore=%s\n", pref, td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE");
-//         
-//         if (t->header.level > 0) {
-//             print_loaded_trees(td->treeholder.tree, spaces+1);
-//         }
-//         
-//         le = le->Flink;
-//     }
-// }
-
-// static void check_extents_consistent(device_extension* Vcb, fcb* fcb) {
-//     KEY searchkey;
-//     traverse_ptr tp, next_tp;
-//     UINT64 length, oldlength, lastoff, alloc;
-//     NTSTATUS Status;
-//     EXTENT_DATA* ed;
-//     EXTENT_DATA2* ed2;
-//     
-//     if (fcb->ads || fcb->inode_item.st_size == 0 || fcb->deleted)
-//         return;
-//     
-//     TRACE("inode = %llx, subvol = %llx\n", fcb->inode, fcb->subvol->id);
-//     
-//     searchkey.obj_id = fcb->inode;
-//     searchkey.obj_type = TYPE_EXTENT_DATA;
-//     searchkey.offset = 0;
-//     
-//     Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
-//     if (!NT_SUCCESS(Status)) {
-//         ERR("error - find_item returned %08x\n", Status);
-//         goto failure;
-//     }
-//     
-//     if (keycmp(&searchkey, &tp.item->key)) {
-//         ERR("could not find EXTENT_DATA at offset 0\n");
-//         goto failure;
-//     }
-//     
-//     if (tp.item->size < sizeof(EXTENT_DATA)) {
-//         ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
-//         goto failure;
-//     }
-//     
-//     ed = (EXTENT_DATA*)tp.item->data;
-//     ed2 = (EXTENT_DATA2*)&ed->data[0];
-//     
-//     length = oldlength = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
-//     lastoff = tp.item->key.offset;
-//     
-//     TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
-//     
-//     alloc = 0;
-//     if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
-//         alloc += length;
-//     }
-//     
-//     while (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
-//         if (next_tp.item->key.obj_id != searchkey.obj_id || next_tp.item->key.obj_type != searchkey.obj_type)
-//             break;
-//         
-//         tp = next_tp;
-//         
-//         if (tp.item->size < sizeof(EXTENT_DATA)) {
-//             ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
-//             goto failure;
-//         }
-//         
-//         ed = (EXTENT_DATA*)tp.item->data;
-//         ed2 = (EXTENT_DATA2*)&ed->data[0];
-//     
-//         length = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
-//     
-//         TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
-//         
-//         if (tp.item->key.offset != lastoff + oldlength) {
-//             ERR("EXTENT_DATA in %llx,%llx was at %llx, expected %llx\n", fcb->subvol->id, fcb->inode, tp.item->key.offset, lastoff + oldlength);
-//             goto failure;
-//         }
-//         
-//         if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
-//             alloc += length;
-//         }
-//         
-//         oldlength = length;
-//         lastoff = tp.item->key.offset;
-//     }
-//     
-//     if (alloc != fcb->inode_item.st_blocks) {
-//         ERR("allocation size was %llx, expected %llx\n", alloc, fcb->inode_item.st_blocks);
-//         goto failure;
-//     }
-//     
-// //     if (fcb->inode_item.st_blocks != lastoff + oldlength) {
-// //         ERR("extents finished at %x, expected %x\n", (UINT32)(lastoff + oldlength), (UINT32)fcb->inode_item.st_blocks);
-// //         goto failure;
-// //     }
-//     
-//     return;
-//     
-// failure:
-//     if (fcb->subvol->treeholder.tree)
-//         print_loaded_trees(fcb->subvol->treeholder.tree, 0);
-// 
-//     int3;
-// }
-
-// static void check_extent_tree_consistent(device_extension* Vcb) {
-//     KEY searchkey;
-//     traverse_ptr tp, next_tp;
-//     UINT64 lastaddr;
-//     BOOL b, inconsistency;
-//     
-//     searchkey.obj_id = 0;
-//     searchkey.obj_type = 0;
-//     searchkey.offset = 0;
-//     
-//     if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
-//         ERR("error - could not find any entries in extent_root\n");
-//         int3;
-//     }
-//     
-//     lastaddr = 0;
-//     inconsistency = FALSE;
-//     
-//     do {
-//         if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
-// //             ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
-//             
-//             if (tp.item->key.obj_id < lastaddr) {
-// //                 ERR("inconsistency!\n");
-// //                 int3;
-//                 inconsistency = TRUE;
-//             }
-//             
-//             lastaddr = tp.item->key.obj_id + tp.item->key.offset;
-//         }
-//         
-//         b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
-//         if (b) {
-//             free_traverse_ptr(&tp);
-//             tp = next_tp;
-//         }
-//     } while (b);
-//     
-//     free_traverse_ptr(&tp);
-//     
-//     if (!inconsistency)
-//         return;
-//     
-//     ERR("Inconsistency detected:\n");
-//     
-//     if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
-//         ERR("error - could not find any entries in extent_root\n");
-//         int3;
-//     }
-//     
-//     do {
-//         if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
-//             ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
-//             
-//             if (tp.item->key.obj_id < lastaddr) {
-//                 ERR("inconsistency!\n");
-//             }
-//             
-//             lastaddr = tp.item->key.obj_id + tp.item->key.offset;
-//         }
-//         
-//         b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
-//         if (b) {
-//             free_traverse_ptr(&tp);
-//             tp = next_tp;
-//         }
-//     } while (b);
-//     
-//     free_traverse_ptr(&tp);
-//     
-//     int3;
-// }
-// #endif
-
 static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, UINT64 end_data, void* data, UINT64* written,
                                        LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) {
     EXTENT_DATA* ed = ext->data;
@@ -7381,7 +3065,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data,
         newext1->offset = ext->offset;
         newext1->data = ned;
         newext1->datalen = ext->datalen;
-        newext1->unique = FALSE;
+        newext1->unique = ext->unique;
         newext1->ignore = FALSE;
         InsertHeadList(&ext->list_entry, &newext1->list_entry);
         
@@ -7390,7 +3074,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data,
         newext2->offset = end_data;
         newext2->data = nedb;
         newext2->datalen = ext->datalen;
-        newext2->unique = FALSE;
+        newext2->unique = ext->unique;
         newext2->ignore = FALSE;
         InsertHeadList(&newext1->list_entry, &newext2->list_entry);
         
@@ -7402,7 +3086,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data,
             ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
         else {
             Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
-                                                fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
+                                                fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
             
             if (!NT_SUCCESS(Status)) {
                 ERR("update_changed_extent_ref returned %08x\n", Status);
@@ -7469,7 +3153,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data,
         newext1->offset = ext->offset;
         newext1->data = ned;
         newext1->datalen = ext->datalen;
-        newext1->unique = FALSE;
+        newext1->unique = ext->unique;
         newext1->ignore = FALSE;
         InsertHeadList(&ext->list_entry, &newext1->list_entry);
         
@@ -7478,7 +3162,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data,
         newext2->offset = start_data;
         newext2->data = nedb;
         newext2->datalen = ext->datalen;
-        newext2->unique = FALSE;
+        newext2->unique = ext->unique;
         newext2->ignore = FALSE;
         InsertHeadList(&newext1->list_entry, &newext2->list_entry);
         
@@ -7490,7 +3174,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data,
             ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
         else {
             Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
-                                               fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
+                                               fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
             
             if (!NT_SUCCESS(Status)) {
                 ERR("update_changed_extent_ref returned %08x\n", Status);
@@ -7583,7 +3267,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data,
         newext1->offset = ext->offset;
         newext1->data = ned;
         newext1->datalen = ext->datalen;
-        newext1->unique = FALSE;
+        newext1->unique = ext->unique;
         newext1->ignore = FALSE;
         InsertHeadList(&ext->list_entry, &newext1->list_entry);
         
@@ -7592,7 +3276,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data,
         newext2->offset = start_data;
         newext2->data = nedb;
         newext2->datalen = ext->datalen;
-        newext2->unique = FALSE;
+        newext2->unique = ext->unique;
         newext2->ignore = FALSE;
         InsertHeadList(&newext1->list_entry, &newext2->list_entry);
         
@@ -7601,7 +3285,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data,
         newext3->offset = end_data;
         newext3->data = nedc;
         newext3->datalen = ext->datalen;
-        newext3->unique = FALSE;
+        newext3->unique = ext->unique;
         newext3->ignore = FALSE;
         InsertHeadList(&newext2->list_entry, &newext3->list_entry);
         
@@ -7613,7 +3297,7 @@ static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data,
             ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
         else {
             Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 2,
-                                               fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
+                                               fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
             
             if (!NT_SUCCESS(Status)) {
                 ERR("update_changed_extent_ref returned %08x\n", Status);
@@ -7648,7 +3332,6 @@ NTSTATUS do_write_file(fcb* fcb, UINT64 start, UINT64 end_data, void* data, LIST
             EXTENT_DATA* ed = ext->data;
             EXTENT_DATA2* ed2 = ed->type == EXTENT_TYPE_INLINE ? NULL : (EXTENT_DATA2*)ed->data;
             UINT64 len;
-            BOOL nocow;
             
             len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
             
@@ -7658,9 +3341,7 @@ NTSTATUS do_write_file(fcb* fcb, UINT64 start, UINT64 end_data, void* data, LIST
             if (ext->offset > start + written + length)
                 break;
             
-            nocow = (ext->unique && fcb->inode_item.flags & BTRFS_INODE_NODATACOW) || ed->type == EXTENT_TYPE_PREALLOC;
-           
-            if (nocow) {
+            if ((fcb->inode_item.flags & BTRFS_INODE_NODATACOW || ed->type == EXTENT_TYPE_PREALLOC) && ext->unique) {
                 if (max(last_cow_start, start + written) < ext->offset) {
                     UINT64 start_write = max(last_cow_start, start + written);
                     
@@ -7769,9 +3450,6 @@ nextitem:
         }
     }
     
-    // FIXME - make extending work again (here?)
-    // FIXME - make maximum extent size 128 MB again (here?)
-    
 #ifdef DEBUG_PARANOID
     last_off = 0xffffffffffffffff;
     
@@ -7823,6 +3501,7 @@ NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* da
         // bother with the rest of it.
         if (s2 == 0 && e2 == COMPRESSED_EXTENT_SIZE && !compressed && !fcb->Vcb->options.compress_force) {
             fcb->inode_item.flags |= BTRFS_INODE_NOCOMPRESS;
+            fcb->inode_item_changed = TRUE;
             mark_fcb_dirty(fcb);
             
             // write subsequent data non-compressed
@@ -7879,7 +3558,7 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void
     ccb = FileObject->FsContext2;
     fileref = ccb ? ccb->fileref : NULL;
     
-    if (fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
+    if (!fcb->ads && fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
         WARN("tried to write to something other than a file or symlink (inode %llx, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
         return STATUS_INVALID_DEVICE_REQUEST;
     }
@@ -7933,12 +3612,20 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void
             tree_lock = TRUE;
     }
         
-    if (no_cache && !ExIsResourceAcquiredExclusiveLite(fcb->Header.Resource)) {
-        if (!ExAcquireResourceExclusiveLite(fcb->Header.Resource, wait)) {
-            Status = STATUS_PENDING;
-            goto end;
-        } else
-            fcb_lock = TRUE;
+    if (no_cache) {
+        if (pagefile) {
+            if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
+                Status = STATUS_PENDING;
+                goto end;
+            } else
+                fcb_lock = TRUE;
+        } else if (!ExIsResourceAcquiredExclusiveLite(fcb->Header.Resource)) {
+            if (!ExAcquireResourceExclusiveLite(fcb->Header.Resource, wait)) {
+                Status = STATUS_PENDING;
+                goto end;
+            } else
+                fcb_lock = TRUE;
+        }
     }
     
     nocsum = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
@@ -8220,7 +3907,9 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void
         
         origii->transid = Vcb->superblock.generation;
         origii->sequence++;
-        origii->st_ctime = now;
+        
+        if (!ccb->user_set_change_time)
+            origii->st_ctime = now;
         
         if (!fcb->ads) {
             if (changed_length) {
@@ -8229,9 +3918,14 @@ NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void
                 filter |= FILE_NOTIFY_CHANGE_SIZE;
             }
             
-            origii->st_mtime = now;
-            filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
-        }
+            if (!ccb->user_set_write_time) {
+                origii->st_mtime = now;
+                filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
+            }
+            
+            fcb->inode_item_changed = TRUE;
+        } else
+            fileref->parent->fcb->inode_item_changed = TRUE;
         
         mark_fcb_dirty(fcb->ads ? fileref->parent->fcb : fcb);
     }
@@ -8363,7 +4057,7 @@ NTSTATUS write_file(device_extension* Vcb, PIRP Irp, BOOL wait, BOOL deferred_wr
 exit:
 //     if (locked) {
         if (NT_SUCCESS(Status))
-            clear_rollback(&rollback);
+            clear_rollback(Vcb, &rollback);
         else
             do_rollback(Vcb, &rollback);
 //