[BTRFS]
authorPierre Schweitzer <pierre@reactos.org>
Fri, 8 Sep 2017 08:02:43 +0000 (08:02 +0000)
committerPierre Schweitzer <pierre@reactos.org>
Fri, 8 Sep 2017 08:02:43 +0000 (08:02 +0000)
Sync btrfs to 1.0.

CORE-13763

svn path=/trunk/; revision=75786

35 files changed:
reactos/drivers/filesystems/btrfs/CMakeLists.txt
reactos/drivers/filesystems/btrfs/balance.c
reactos/drivers/filesystems/btrfs/btrfs.c
reactos/drivers/filesystems/btrfs/btrfs.h
reactos/drivers/filesystems/btrfs/btrfs.rc
reactos/drivers/filesystems/btrfs/btrfs_drv.h
reactos/drivers/filesystems/btrfs/btrfsioctl.h
reactos/drivers/filesystems/btrfs/cache.c
reactos/drivers/filesystems/btrfs/calcthread.c
reactos/drivers/filesystems/btrfs/compress.c
reactos/drivers/filesystems/btrfs/crc32c.c
reactos/drivers/filesystems/btrfs/create.c
reactos/drivers/filesystems/btrfs/devctrl.c
reactos/drivers/filesystems/btrfs/dirctrl.c
reactos/drivers/filesystems/btrfs/extent-tree.c
reactos/drivers/filesystems/btrfs/fastio.c
reactos/drivers/filesystems/btrfs/fileinfo.c
reactos/drivers/filesystems/btrfs/flushthread.c
reactos/drivers/filesystems/btrfs/free-space.c
reactos/drivers/filesystems/btrfs/fsctl.c
reactos/drivers/filesystems/btrfs/galois.c
reactos/drivers/filesystems/btrfs/guid.c [new file with mode: 0644]
reactos/drivers/filesystems/btrfs/pnp.c
reactos/drivers/filesystems/btrfs/read.c
reactos/drivers/filesystems/btrfs/registry.c
reactos/drivers/filesystems/btrfs/reparse.c
reactos/drivers/filesystems/btrfs/scrub.c [new file with mode: 0644]
reactos/drivers/filesystems/btrfs/search.c
reactos/drivers/filesystems/btrfs/security.c
reactos/drivers/filesystems/btrfs/send.c [new file with mode: 0644]
reactos/drivers/filesystems/btrfs/treefuncs.c
reactos/drivers/filesystems/btrfs/volume.c [new file with mode: 0755]
reactos/drivers/filesystems/btrfs/worker-thread.c
reactos/drivers/filesystems/btrfs/write.c
reactos/media/doc/README.FSD

index 581bab9..65e61d2 100644 (file)
@@ -20,13 +20,17 @@ list(APPEND SOURCE
     free-space.c
     fsctl.c
     galois.c
+    guid.c
     pnp.c
     read.c
     registry.c
     reparse.c
+    scrub.c
     search.c
     security.c
+    send.c
     treefuncs.c
+    volume.c
     worker-thread.c
     write.c
     btrfs_drv.h)
@@ -37,5 +41,4 @@ add_definitions(-D__KERNEL__)
 set_module_type(btrfs kernelmodedriver)
 target_link_libraries(btrfs rtlver ntoskrnl_vista zlib_solo wdmguid ${PSEH_LIB})
 add_importlibs(btrfs ntoskrnl hal)
-add_pch(btrfs btrfs_drv.h SOURCE)
 add_cd_file(TARGET btrfs DESTINATION reactos/system32/drivers NO_CAB FOR all)
index 63e8821..c961fc4 100644 (file)
@@ -1,22 +1,23 @@
-/* Copyright (c) Mark Harmstone 2016
- * 
+/* Copyright (c) Mark Harmstone 2016-17
+ *
  * This file is part of WinBtrfs.
- * 
+ *
  * WinBtrfs is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public Licence as published by
  * the Free Software Foundation, either version 3 of the Licence, or
  * (at your option) any later version.
- * 
+ *
  * WinBtrfs is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public Licence for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public Licence
  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
 
 #include "btrfs_drv.h"
 #include "btrfsioctl.h"
+#include <ntddstor.h>
 
 typedef struct {
     UINT64 address;
@@ -31,12 +32,13 @@ typedef struct {
 
 typedef struct {
     UINT8 type;
-    
+    UINT64 hash;
+
     union {
         TREE_BLOCK_REF tbr;
         SHARED_BLOCK_REF sbr;
     };
-    
+
     metadata_reloc* parent;
     BOOL top;
     LIST_ENTRY list_entry;
@@ -54,70 +56,81 @@ typedef struct {
 
 typedef struct {
     UINT8 type;
-    
+    UINT64 hash;
+
     union {
         EXTENT_DATA_REF edr;
         SHARED_DATA_REF sdr;
     };
-    
+
     metadata_reloc* parent;
     LIST_ENTRY list_entry;
 } data_reloc_ref;
 
-extern LIST_ENTRY volumes;
-extern ERESOURCE volumes_lock;
+#ifndef _MSC_VER // not in mingw yet
+#define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000
+#endif
+
+#define BALANCE_UNIT 0x100000 // only read 1 MB at a time
 
-static NTSTATUS add_metadata_reloc(device_extension* Vcb, LIST_ENTRY* items, traverse_ptr* tp, BOOL skinny, metadata_reloc** mr2, chunk* c, LIST_ENTRY* rollback) {
+static NTSTATUS add_metadata_reloc(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items, traverse_ptr* tp,
+                                   BOOL skinny, metadata_reloc** mr2, chunk* c, LIST_ENTRY* rollback) {
+    NTSTATUS Status;
     metadata_reloc* mr;
     EXTENT_ITEM* ei;
     UINT16 len;
     UINT64 inline_rc;
     UINT8* ptr;
-    
+
     mr = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc), ALLOC_TAG);
     if (!mr) {
         ERR("out of memory\n");
         return STATUS_INSUFFICIENT_RESOURCES;
     }
-    
+
     mr->address = tp->item->key.obj_id;
     mr->data = NULL;
     mr->ei = (EXTENT_ITEM*)tp->item->data;
     mr->system = FALSE;
     InitializeListHead(&mr->refs);
-    
-    delete_tree_item(Vcb, tp, rollback);
-    
+
+    Status = delete_tree_item(Vcb, tp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("delete_tree_item returned %08x\n", Status);
+        ExFreePool(mr);
+        return Status;
+    }
+
     if (!c)
         c = get_chunk_from_address(Vcb, tp->item->key.obj_id);
-        
+
     if (c) {
         ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        
-        decrease_chunk_usage(c, Vcb->superblock.node_size);
-        
-        space_list_add(Vcb, c, TRUE, tp->item->key.obj_id, Vcb->superblock.node_size, rollback);
-        
+
+        c->used -= Vcb->superblock.node_size;
+
+        space_list_add(c, tp->item->key.obj_id, Vcb->superblock.node_size, rollback);
+
         ExReleaseResourceLite(&c->lock);
     }
-    
+
     ei = (EXTENT_ITEM*)tp->item->data;
     inline_rc = 0;
-    
+
     len = tp->item->size - sizeof(EXTENT_ITEM);
     ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM);
     if (!skinny) {
         len -= sizeof(EXTENT_ITEM2);
         ptr += sizeof(EXTENT_ITEM2);
     }
-    
+
     while (len > 0) {
         UINT8 secttype = *ptr;
-        ULONG sectlen = secttype == TYPE_TREE_BLOCK_REF ? sizeof(TREE_BLOCK_REF) : (secttype == TYPE_SHARED_BLOCK_REF ? sizeof(SHARED_BLOCK_REF) : 0);
+        UINT16 sectlen = secttype == TYPE_TREE_BLOCK_REF ? sizeof(TREE_BLOCK_REF) : (secttype == TYPE_SHARED_BLOCK_REF ? sizeof(SHARED_BLOCK_REF) : 0);
         metadata_reloc_ref* ref;
-        
+
         len--;
-        
+
         if (sectlen > len) {
             ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen);
             return STATUS_INTERNAL_ERROR;
@@ -127,13 +140,13 @@ static NTSTATUS add_metadata_reloc(device_extension* Vcb, LIST_ENTRY* items, tra
             ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype);
             return STATUS_INTERNAL_ERROR;
         }
-        
+
         ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG);
         if (!ref) {
             ERR("out of memory\n");
             return STATUS_INSUFFICIENT_RESOURCES;
         }
-        
+
         if (secttype == TYPE_TREE_BLOCK_REF) {
             ref->type = TYPE_TREE_BLOCK_REF;
             RtlCopyMemory(&ref->tbr, ptr + sizeof(UINT8), sizeof(TREE_BLOCK_REF));
@@ -147,99 +160,108 @@ static NTSTATUS add_metadata_reloc(device_extension* Vcb, LIST_ENTRY* items, tra
             ExFreePool(ref);
             return STATUS_INTERNAL_ERROR;
         }
-        
+
         ref->parent = NULL;
         ref->top = FALSE;
         InsertTailList(&mr->refs, &ref->list_entry);
-        
+
         len -= sectlen;
         ptr += sizeof(UINT8) + sectlen;
     }
-    
+
     if (inline_rc < ei->refcount) { // look for non-inline entries
         traverse_ptr tp2 = *tp, next_tp;
-        
+
         while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) {
             tp2 = next_tp;
-            
+
             if (tp2.item->key.obj_id == tp->item->key.obj_id) {
-                if (tp2.item->key.obj_type == TYPE_TREE_BLOCK_REF && tp2.item->size >= sizeof(TREE_BLOCK_REF)) {
+                if (tp2.item->key.obj_type == TYPE_TREE_BLOCK_REF) {
                     metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG);
                     if (!ref) {
                         ERR("out of memory\n");
                         return STATUS_INSUFFICIENT_RESOURCES;
                     }
-                    
+
                     ref->type = TYPE_TREE_BLOCK_REF;
-                    RtlCopyMemory(&ref->tbr, tp2.item->data, sizeof(TREE_BLOCK_REF));
+                    ref->tbr.offset = tp2.item->key.offset;
                     ref->parent = NULL;
                     ref->top = FALSE;
                     InsertTailList(&mr->refs, &ref->list_entry);
-                    
-                    delete_tree_item(Vcb, &tp2, rollback);
-                } else if (tp2.item->key.obj_type == TYPE_SHARED_BLOCK_REF && tp2.item->size >= sizeof(SHARED_BLOCK_REF)) {
+
+                    Status = delete_tree_item(Vcb, &tp2);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("delete_tree_item returned %08x\n", Status);
+                        return Status;
+                    }
+                } else if (tp2.item->key.obj_type == TYPE_SHARED_BLOCK_REF) {
                     metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG);
                     if (!ref) {
                         ERR("out of memory\n");
                         return STATUS_INSUFFICIENT_RESOURCES;
                     }
-                    
+
                     ref->type = TYPE_SHARED_BLOCK_REF;
-                    RtlCopyMemory(&ref->sbr, tp2.item->data, sizeof(SHARED_BLOCK_REF));
+                    ref->sbr.offset = tp2.item->key.offset;
                     ref->parent = NULL;
                     ref->top = FALSE;
                     InsertTailList(&mr->refs, &ref->list_entry);
-                    
-                    delete_tree_item(Vcb, &tp2, rollback);
+
+                    Status = delete_tree_item(Vcb, &tp2);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("delete_tree_item returned %08x\n", Status);
+                        return Status;
+                    }
                 }
             } else
                 break;
         }
     }
-    
+
     InsertTailList(items, &mr->list_entry);
-    
+
     if (mr2)
         *mr2 = mr;
-    
+
     return STATUS_SUCCESS;
 }
 
-static NTSTATUS add_metadata_reloc_parent(device_extension* Vcb, LIST_ENTRY* items, UINT64 address, metadata_reloc** mr2, LIST_ENTRY* rollback) {
+static NTSTATUS add_metadata_reloc_parent(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items,
+                                          UINT64 address, metadata_reloc** mr2, LIST_ENTRY* rollback) {
     LIST_ENTRY* le;
     KEY searchkey;
     traverse_ptr tp;
     BOOL skinny = FALSE;
     NTSTATUS Status;
-    
+
     le = items->Flink;
     while (le != items) {
         metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
-        
+
         if (mr->address == address) {
             *mr2 = mr;
             return STATUS_SUCCESS;
         }
-        
+
         le = le->Flink;
     }
-    
+
     searchkey.obj_id = address;
     searchkey.obj_type = TYPE_METADATA_ITEM;
     searchkey.offset = 0xffffffffffffffff;
-    
+
     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
     if (!NT_SUCCESS(Status)) {
         ERR("find_item returned %08x\n", Status);
         return Status;
     }
-    
+
     if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM))
         skinny = TRUE;
     else if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size &&
              tp.item->size >= sizeof(EXTENT_ITEM)) {
         EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
-        
+
         if (!(ei->flags & EXTENT_ITEM_TREE_BLOCK)) {
             ERR("EXTENT_ITEM for %llx found, but tree flag not set\n", address);
             return STATUS_INTERNAL_ERROR;
@@ -248,17 +270,60 @@ static NTSTATUS add_metadata_reloc_parent(device_extension* Vcb, LIST_ENTRY* ite
         ERR("could not find valid EXTENT_ITEM for address %llx\n", address);
         return STATUS_INTERNAL_ERROR;
     }
-    
+
     Status = add_metadata_reloc(Vcb, items, &tp, skinny, mr2, NULL, rollback);
     if (!NT_SUCCESS(Status)) {
         ERR("add_metadata_reloc returned %08x\n", Status);
         return Status;
     }
-    
+
     return STATUS_SUCCESS;
 }
 
-static NTSTATUS add_metadata_reloc_extent_item(device_extension* Vcb, metadata_reloc* mr, LIST_ENTRY* rollback) {
+static void sort_metadata_reloc_refs(metadata_reloc* mr) {
+    LIST_ENTRY newlist, *le;
+
+    if (mr->refs.Flink == mr->refs.Blink) // 0 or 1 items
+        return;
+
+    // insertion sort
+
+    InitializeListHead(&newlist);
+
+    while (!IsListEmpty(&mr->refs)) {
+        metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry);
+        BOOL inserted = FALSE;
+
+        if (ref->type == TYPE_TREE_BLOCK_REF)
+            ref->hash = ref->tbr.offset;
+        else if (ref->type == TYPE_SHARED_BLOCK_REF)
+            ref->hash = ref->parent->new_address;
+
+        le = newlist.Flink;
+        while (le != &newlist) {
+            metadata_reloc_ref* ref2 = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
+
+            if (ref->type < ref2->type || (ref->type == ref2->type && ref->hash > ref2->hash)) {
+                InsertHeadList(le->Blink, &ref->list_entry);
+                inserted = TRUE;
+                break;
+            }
+
+            le = le->Flink;
+        }
+
+        if (!inserted)
+            InsertTailList(&newlist, &ref->list_entry);
+    }
+
+    newlist.Flink->Blink = &mr->refs;
+    newlist.Blink->Flink = &mr->refs;
+    mr->refs.Flink = newlist.Flink;
+    mr->refs.Blink = newlist.Blink;
+}
+
+static NTSTATUS add_metadata_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, metadata_reloc* mr) {
+    NTSTATUS Status;
     LIST_ENTRY* le;
     UINT64 rc = 0;
     UINT16 inline_len;
@@ -266,160 +331,141 @@ static NTSTATUS add_metadata_reloc_extent_item(device_extension* Vcb, metadata_r
     metadata_reloc_ref* first_noninline = NULL;
     EXTENT_ITEM* ei;
     UINT8* ptr;
-    
+
     inline_len = sizeof(EXTENT_ITEM);
     if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA))
         inline_len += sizeof(EXTENT_ITEM2);
-    
+
+    sort_metadata_reloc_refs(mr);
+
     le = mr->refs.Flink;
     while (le != &mr->refs) {
         metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
-        ULONG extlen = 0;
-        
+        UINT16 extlen = 0;
+
         rc++;
-        
+
         if (ref->type == TYPE_TREE_BLOCK_REF)
             extlen += sizeof(TREE_BLOCK_REF);
         else if (ref->type == TYPE_SHARED_BLOCK_REF)
             extlen += sizeof(SHARED_BLOCK_REF);
 
         if (all_inline) {
-            if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) {
+            if ((ULONG)(inline_len + 1 + extlen) > (Vcb->superblock.node_size >> 2)) {
                 all_inline = FALSE;
                 first_noninline = ref;
             } else
                 inline_len += extlen + 1;
         }
-        
+
         le = le->Flink;
     }
-    
+
     ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
     if (!ei) {
         ERR("out of memory\n");
         return STATUS_INSUFFICIENT_RESOURCES;
     }
-    
+
     ei->refcount = rc;
     ei->generation = mr->ei->generation;
     ei->flags = mr->ei->flags;
     ptr = (UINT8*)&ei[1];
-    
+
     if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
         EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)ptr;
-        
+
         ei2->firstitem = *(KEY*)&mr->data[1];
         ei2->level = mr->data->level;
-        
+
         ptr += sizeof(EXTENT_ITEM2);
     }
-    
+
     le = mr->refs.Flink;
     while (le != &mr->refs) {
         metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
-        
+
         if (ref == first_noninline)
             break;
-        
+
         *ptr = ref->type;
         ptr++;
-        
+
         if (ref->type == TYPE_TREE_BLOCK_REF) {
             TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)ptr;
-            
+
             tbr->offset = ref->tbr.offset;
-            
+
             ptr += sizeof(TREE_BLOCK_REF);
         } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
             SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)ptr;
-            
+
             sbr->offset = ref->parent->new_address;
-            
+
             ptr += sizeof(SHARED_BLOCK_REF);
         }
-        
+
         le = le->Flink;
     }
-    
-    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
-        if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_METADATA_ITEM, mr->data->level, ei, inline_len, NULL, NULL, rollback)) {
-            ERR("insert_tree_item failed\n");
-            return STATUS_INTERNAL_ERROR;
-        }
-    } else {
-        if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, ei, inline_len, NULL, NULL, rollback)) {
-            ERR("insert_tree_item failed\n");
-            return STATUS_INTERNAL_ERROR;
-        }
+
+    if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)
+        Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_METADATA_ITEM, mr->data->level, ei, inline_len, NULL, NULL);
+    else
+        Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, ei, inline_len, NULL, NULL);
+
+    if (!NT_SUCCESS(Status)) {
+        ERR("insert_tree_item returned %08x\n", Status);
+        ExFreePool(ei);
+        return Status;
     }
-    
+
     if (!all_inline) {
         le = &first_noninline->list_entry;
-        
+
         while (le != &mr->refs) {
             metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
-            
+
             if (ref->type == TYPE_TREE_BLOCK_REF) {
-                TREE_BLOCK_REF* tbr;
-                
-                tbr = ExAllocatePoolWithTag(PagedPool, sizeof(TREE_BLOCK_REF), ALLOC_TAG);
-                if (!tbr) {
-                    ERR("out of memory\n");
-                    return STATUS_INSUFFICIENT_RESOURCES;
-                }
-                
-                tbr->offset = ref->tbr.offset;
-                
-                if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_TREE_BLOCK_REF, tbr->offset, tbr, sizeof(TREE_BLOCK_REF), NULL, NULL, rollback)) {
-                    ERR("insert_tree_item failed\n");
-                    return STATUS_INTERNAL_ERROR;
+                Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_TREE_BLOCK_REF, ref->tbr.offset, NULL, 0, NULL, NULL);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("insert_tree_item returned %08x\n", Status);
+                    return Status;
                 }
             } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
-                SHARED_BLOCK_REF* sbr;
-                
-                sbr = ExAllocatePoolWithTag(PagedPool, sizeof(SHARED_BLOCK_REF), ALLOC_TAG);
-                if (!sbr) {
-                    ERR("out of memory\n");
-                    return STATUS_INSUFFICIENT_RESOURCES;
-                }
-                
-                sbr->offset = ref->parent->new_address;
-                
-                if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_SHARED_BLOCK_REF, sbr->offset, sbr, sizeof(SHARED_BLOCK_REF), NULL, NULL, rollback)) {
-                    ERR("insert_tree_item failed\n");
-                    return STATUS_INTERNAL_ERROR;
+                Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_SHARED_BLOCK_REF, ref->parent->new_address, NULL, 0, NULL, NULL);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("insert_tree_item returned %08x\n", Status);
+                    return Status;
                 }
             }
-            
+
             le = le->Flink;
         }
     }
-    
+
     if (ei->flags & EXTENT_ITEM_SHARED_BACKREFS || mr->data->flags & HEADER_FLAG_SHARED_BACKREF || !(mr->data->flags & HEADER_FLAG_MIXED_BACKREF)) {
         if (mr->data->level > 0) {
             UINT16 i;
             internal_node* in = (internal_node*)&mr->data[1];
-                        
+
             for (i = 0; i < mr->data->num_items; i++) {
                 UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, in[i].address, mr->address, NULL);
 
                 if (sbrrc > 0) {
-                    NTSTATUS Status;
                     SHARED_BLOCK_REF sbr;
-                    
+
                     sbr.offset = mr->new_address;
-                    
-                    Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
-                                                      NULL, rollback);
+
+                    Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, NULL);
                     if (!NT_SUCCESS(Status)) {
                         ERR("increase_extent_refcount returned %08x\n", Status);
                         return Status;
                     }
-        
+
                     sbr.offset = mr->address;
-                    
+
                     Status = decrease_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
-                                                      sbr.offset, FALSE, NULL, rollback);
+                                                      sbr.offset, FALSE, NULL);
                     if (!NT_SUCCESS(Status)) {
                         ERR("decrease_extent_refcount returned %08x\n", Status);
                         return Status;
@@ -429,86 +475,84 @@ static NTSTATUS add_metadata_reloc_extent_item(device_extension* Vcb, metadata_r
         } else {
             UINT16 i;
             leaf_node* ln = (leaf_node*)&mr->data[1];
-            
+
             for (i = 0; i < mr->data->num_items; i++) {
                 if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
                     EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset);
-                    
+
                     if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
-                        
+
                         if (ed2->size > 0) { // not sparse
-                            UINT64 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, mr->address, NULL);
-                            
+                            UINT32 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, mr->address, NULL);
+
                             if (sdrrc > 0) {
-                                NTSTATUS Status;
                                 SHARED_DATA_REF sdr;
                                 chunk* c;
-                                
+
                                 sdr.offset = mr->new_address;
                                 sdr.count = sdrrc;
-                                
-                                Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
-                                                                  NULL, rollback);
+
+                                Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, NULL);
                                 if (!NT_SUCCESS(Status)) {
                                     ERR("increase_extent_refcount returned %08x\n", Status);
                                     return Status;
                                 }
-                                
+
                                 sdr.offset = mr->address;
-                                
+
                                 Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
-                                                                  sdr.offset, FALSE, NULL, rollback);
+                                                                  sdr.offset, FALSE, NULL);
                                 if (!NT_SUCCESS(Status)) {
                                     ERR("decrease_extent_refcount returned %08x\n", Status);
                                     return Status;
                                 }
-                                
+
                                 c = get_chunk_from_address(Vcb, ed2->address);
-                                
+
                                 if (c) {
                                     // check changed_extents
-                                    
+
                                     ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
-                                    
+
                                     le = c->changed_extents.Flink;
-                                    
+
                                     while (le != &c->changed_extents) {
                                         changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry);
-                                        
+
                                         if (ce->address == ed2->address) {
                                             LIST_ENTRY* le2;
-                                            
+
                                             le2 = ce->refs.Flink;
                                             while (le2 != &ce->refs) {
                                                 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
-                                                
+
                                                 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) {
                                                     cer->sdr.offset = mr->new_address;
                                                     break;
                                                 }
-                                                
+
                                                 le2 = le2->Flink;
                                             }
-                                            
+
                                             le2 = ce->old_refs.Flink;
                                             while (le2 != &ce->old_refs) {
                                                 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
-                                                
+
                                                 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) {
                                                     cer->sdr.offset = mr->new_address;
                                                     break;
                                                 }
-                                                
+
                                                 le2 = le2->Flink;
                                             }
-                                            
+
                                             break;
                                         }
-                                        
+
                                         le = le->Flink;
                                     }
-                                    
+
                                     ExReleaseResourceLite(&c->changed_extents_lock);
                                 }
                             }
@@ -522,303 +566,304 @@ static NTSTATUS add_metadata_reloc_extent_item(device_extension* Vcb, metadata_r
     return STATUS_SUCCESS;
 }
 
-static NTSTATUS write_metadata_items(device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* data_items, chunk* c, LIST_ENTRY* rollback) {
+static NTSTATUS write_metadata_items(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items,
+                                     LIST_ENTRY* data_items, chunk* c, LIST_ENTRY* rollback) {
     LIST_ENTRY tree_writes, *le;
     NTSTATUS Status;
     traverse_ptr tp;
     UINT8 level, max_level = 0;
     chunk* newchunk = NULL;
-    
+
     InitializeListHead(&tree_writes);
-    
+
     le = items->Flink;
     while (le != items) {
         metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
         LIST_ENTRY* le2;
         chunk* pc;
-        
-//         ERR("address %llx\n", mr->address);
-        
+
         mr->data = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
         if (!mr->data) {
             ERR("out of memory\n");
             return STATUS_INSUFFICIENT_RESOURCES;
         }
-        
+
         Status = read_data(Vcb, mr->address, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)mr->data,
-                           c && mr->address >= c->offset && mr->address < c->offset + c->chunk_item->size ? c : NULL, &pc, NULL, FALSE);
+                           c && mr->address >= c->offset && mr->address < c->offset + c->chunk_item->size ? c : NULL, &pc, NULL, 0, FALSE, NormalPagePriority);
         if (!NT_SUCCESS(Status)) {
             ERR("read_data returned %08x\n", Status);
             return Status;
         }
-        
+
         if (pc->chunk_item->type & BLOCK_FLAG_SYSTEM)
             mr->system = TRUE;
-        
+
         if (data_items && mr->data->level == 0) {
-            LIST_ENTRY* le2 = data_items->Flink;
+            le2 = data_items->Flink;
             while (le2 != data_items) {
                 data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry);
                 leaf_node* ln = (leaf_node*)&mr->data[1];
                 UINT16 i;
-                
+
                 for (i = 0; i < mr->data->num_items; i++) {
                     if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
                         EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset);
-                        
+
                         if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
                             EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
-                            
+
                             if (ed2->address == dr->address)
                                 ed2->address = dr->new_address;
                         }
                     }
                 }
-                
+
                 le2 = le2->Flink;
             }
         }
-        
+
         if (mr->data->level > max_level)
             max_level = mr->data->level;
-        
+
         le2 = mr->refs.Flink;
         while (le2 != &mr->refs) {
             metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry);
-            
+
             if (ref->type == TYPE_TREE_BLOCK_REF) {
                 KEY* firstitem;
                 root* r = NULL;
                 LIST_ENTRY* le3;
                 tree* t;
-                
+
                 firstitem = (KEY*)&mr->data[1];
-                
+
                 le3 = Vcb->roots.Flink;
                 while (le3 != &Vcb->roots) {
                     root* r2 = CONTAINING_RECORD(le3, root, list_entry);
-                    
+
                     if (r2->id == ref->tbr.offset) {
                         r = r2;
                         break;
                     }
-                    
+
                     le3 = le3->Flink;
                 }
-                
+
                 if (!r) {
                     ERR("could not find subvol with id %llx\n", ref->tbr.offset);
                     return STATUS_INTERNAL_ERROR;
                 }
-                
+
                 Status = find_item_to_level(Vcb, r, &tp, firstitem, FALSE, mr->data->level + 1, NULL);
                 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
                     ERR("find_item_to_level returned %08x\n", Status);
                     return Status;
                 }
-                
+
                 t = tp.tree;
                 while (t && t->header.level < mr->data->level + 1) {
                     t = t->parent;
                 }
-                
+
                 if (!t)
                     ref->top = TRUE;
                 else {
                     metadata_reloc* mr2;
-                    
+
                     Status = add_metadata_reloc_parent(Vcb, items, t->header.address, &mr2, rollback);
                     if (!NT_SUCCESS(Status)) {
                         ERR("add_metadata_reloc_parent returned %08x\n", Status);
                         return Status;
                     }
-                    
+
                     ref->parent = mr2;
                 }
             } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
                 metadata_reloc* mr2;
-                
+
                 Status = add_metadata_reloc_parent(Vcb, items, ref->sbr.offset, &mr2, rollback);
                 if (!NT_SUCCESS(Status)) {
                     ERR("add_metadata_reloc_parent returned %08x\n", Status);
                     return Status;
                 }
-                
+
                 ref->parent = mr2;
             }
-            
+
             le2 = le2->Flink;
         }
-        
+
         le = le->Flink;
     }
-    
+
     le = items->Flink;
     while (le != items) {
         metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
         LIST_ENTRY* le2;
         UINT32 hash;
-        
+
         mr->t = NULL;
-        
+
         hash = calc_crc32c(0xffffffff, (UINT8*)&mr->address, sizeof(UINT64));
-        
+
         le2 = Vcb->trees_ptrs[hash >> 24];
-        
+
         if (le2) {
             while (le2 != &Vcb->trees_hash) {
                 tree* t = CONTAINING_RECORD(le2, tree, list_entry_hash);
-                
+
                 if (t->header.address == mr->address) {
                     mr->t = t;
                     break;
                 } else if (t->hash > hash)
                     break;
-                
+
                 le2 = le2->Flink;
             }
         }
-        
+
         le = le->Flink;
     }
-    
+
     for (level = 0; level <= max_level; level++) {
         le = items->Flink;
         while (le != items) {
             metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
-            
+
             if (mr->data->level == level) {
                 BOOL done = FALSE;
                 LIST_ENTRY* le2;
                 tree_write* tw;
                 UINT64 flags;
                 tree* t3;
-                
+
                 if (mr->system)
                     flags = Vcb->system_flags;
                 else if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS)
                     flags = Vcb->data_flags;
                 else
                     flags = Vcb->metadata_flags;
-                
+
                 if (newchunk) {
                     ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
-                    
+
                     if (newchunk->chunk_item->type == flags && find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) {
-                        increase_chunk_usage(newchunk, Vcb->superblock.node_size);
-                        space_list_subtract(Vcb, newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
+                        newchunk->used += Vcb->superblock.node_size;
+                        space_list_subtract(newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
                         done = TRUE;
                     }
-                    
+
                     ExReleaseResourceLite(&newchunk->lock);
                 }
-                
+
                 if (!done) {
                     ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
-    
+
                     le2 = Vcb->chunks.Flink;
                     while (le2 != &Vcb->chunks) {
                         chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry);
-                        
+
                         if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == flags) {
                             ExAcquireResourceExclusiveLite(&c2->lock, TRUE);
-                            
+
                             if ((c2->chunk_item->size - c2->used) >= Vcb->superblock.node_size) {
                                 if (find_metadata_address_in_chunk(Vcb, c2, &mr->new_address)) {
-                                    increase_chunk_usage(c2, Vcb->superblock.node_size);
-                                    space_list_subtract(Vcb, c2, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
+                                    c2->used += Vcb->superblock.node_size;
+                                    space_list_subtract(c2, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
                                     ExReleaseResourceLite(&c2->lock);
                                     newchunk = c2;
                                     done = TRUE;
                                     break;
                                 }
                             }
-                            
+
                             ExReleaseResourceLite(&c2->lock);
                         }
 
                         le2 = le2->Flink;
                     }
-                    
+
                     // allocate new chunk if necessary
                     if (!done) {
-                        newchunk = alloc_chunk(Vcb, flags);
-                        
-                        if (!newchunk) {
-                            ERR("could not allocate new chunk\n");
+                        Status = alloc_chunk(Vcb, flags, &newchunk, FALSE);
+
+                        if (!NT_SUCCESS(Status)) {
+                            ERR("alloc_chunk returned %08x\n", Status);
                             ExReleaseResourceLite(&Vcb->chunk_lock);
-                            Status = STATUS_DISK_FULL;
                             goto end;
                         }
-                        
+
                         ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
-                        
+
+                        newchunk->balance_num = Vcb->balance.balance_num;
+
                         if (!find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) {
                             ExReleaseResourceLite(&newchunk->lock);
+                            ExReleaseResourceLite(&Vcb->chunk_lock);
                             ERR("could not find address in new chunk\n");
                             Status = STATUS_DISK_FULL;
                             goto end;
                         } else {
-                            increase_chunk_usage(newchunk, Vcb->superblock.node_size);
-                            space_list_subtract(Vcb, newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
+                            newchunk->used += Vcb->superblock.node_size;
+                            space_list_subtract(newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
                         }
-                        
+
                         ExReleaseResourceLite(&newchunk->lock);
                     }
-                    
+
                     ExReleaseResourceLite(&Vcb->chunk_lock);
                 }
-                
+
                 // update parents
                 le2 = mr->refs.Flink;
                 while (le2 != &mr->refs) {
                     metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry);
-                    
+
                     if (ref->parent) {
                         UINT16 i;
                         internal_node* in = (internal_node*)&ref->parent->data[1];
-                        
+
                         for (i = 0; i < ref->parent->data->num_items; i++) {
                             if (in[i].address == mr->address) {
                                 in[i].address = mr->new_address;
                                 break;
                             }
                         }
-                        
+
                         if (ref->parent->t) {
                             LIST_ENTRY* le3;
-                            
+
                             le3 = ref->parent->t->itemlist.Flink;
                             while (le3 != &ref->parent->t->itemlist) {
                                 tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry);
-                                
+
                                 if (!td->inserted && td->treeholder.address == mr->address)
                                     td->treeholder.address = mr->new_address;
-                                
+
                                 le3 = le3->Flink;
                             }
                         }
                     } else if (ref->top && ref->type == TYPE_TREE_BLOCK_REF) {
                         LIST_ENTRY* le3;
                         root* r = NULL;
-                        
+
                         // alter ROOT_ITEM
-                        
+
                         le3 = Vcb->roots.Flink;
                         while (le3 != &Vcb->roots) {
                             root* r2 = CONTAINING_RECORD(le3, root, list_entry);
-                            
+
                             if (r2->id == ref->tbr.offset) {
                                 r = r2;
                                 break;
                             }
-                            
+
                             le3 = le3->Flink;
                         }
-                        
+
                         if (r) {
                             r->treeholder.address = mr->new_address;
-                            
+
                             if (r == Vcb->root_root)
                                 Vcb->superblock.root_tree_addr = mr->new_address;
                             else if (r == Vcb->chunk_root)
@@ -826,92 +871,96 @@ static NTSTATUS write_metadata_items(device_extension* Vcb, LIST_ENTRY* items, L
                             else if (r->root_item.block_number == mr->address) {
                                 KEY searchkey;
                                 ROOT_ITEM* ri;
-                                
+
                                 r->root_item.block_number = mr->new_address;
-                                
+
                                 searchkey.obj_id = r->id;
                                 searchkey.obj_type = TYPE_ROOT_ITEM;
                                 searchkey.offset = 0xffffffffffffffff;
-                                
+
                                 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
                                 if (!NT_SUCCESS(Status)) {
                                     ERR("find_item returned %08x\n", Status);
                                     goto end;
                                 }
-                                
+
                                 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
                                     ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
                                     Status = STATUS_INTERNAL_ERROR;
                                     goto end;
                                 }
-                                
+
                                 ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
                                 if (!ri) {
                                     ERR("out of memory\n");
                                     Status = STATUS_INSUFFICIENT_RESOURCES;
                                     goto end;
                                 }
-                                
+
                                 RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM));
-                                
-                                delete_tree_item(Vcb, &tp, rollback);
-                                
-                                if (!insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, NULL, rollback)) {
-                                    ERR("insert_tree_item failed\n");
-                                    Status = STATUS_INTERNAL_ERROR;
+
+                                Status = delete_tree_item(Vcb, &tp);
+                                if (!NT_SUCCESS(Status)) {
+                                    ERR("delete_tree_item returned %08x\n", Status);
+                                    goto end;
+                                }
+
+                                Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, NULL);
+                                if (!NT_SUCCESS(Status)) {
+                                    ERR("insert_tree_item returned %08x\n", Status);
                                     goto end;
                                 }
                             }
                         }
                     }
-                    
+
                     le2 = le2->Flink;
                 }
-                
+
                 mr->data->address = mr->new_address;
-                
+
                 t3 = mr->t;
 
                 while (t3) {
                     UINT8 h;
                     BOOL inserted;
                     tree* t4 = NULL;
-                    
+
                     // check if tree loaded more than once
                     if (t3->list_entry.Flink != &Vcb->trees_hash) {
                         tree* nt = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash);
-                        
+
                         if (nt->header.address == t3->header.address)
                             t4 = nt;
                     }
-                    
+
                     t3->header.address = mr->new_address;
-                    
+
                     h = t3->hash >> 24;
-                    
+
                     if (Vcb->trees_ptrs[h] == &t3->list_entry_hash) {
                         if (t3->list_entry_hash.Flink == &Vcb->trees_hash)
                             Vcb->trees_ptrs[h] = NULL;
                         else {
                             tree* t2 = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash);
-                            
+
                             if (t2->hash >> 24 == h)
                                 Vcb->trees_ptrs[h] = &t2->list_entry_hash;
                             else
                                 Vcb->trees_ptrs[h] = NULL;
                         }
                     }
-                        
+
                     RemoveEntryList(&t3->list_entry_hash);
-                    
+
                     t3->hash = calc_crc32c(0xffffffff, (UINT8*)&t3->header.address, sizeof(UINT64));
                     h = t3->hash >> 24;
-                    
+
                     if (!Vcb->trees_ptrs[h]) {
                         UINT8 h2 = h;
-                        
+
                         le2 = Vcb->trees_hash.Flink;
-                        
+
                         if (h2 > 0) {
                             h2--;
                             do {
@@ -919,23 +968,23 @@ static NTSTATUS write_metadata_items(device_extension* Vcb, LIST_ENTRY* items, L
                                     le2 = Vcb->trees_ptrs[h2];
                                     break;
                                 }
-                                    
+
                                 h2--;
                             } while (h2 > 0);
                         }
                     } else
                         le2 = Vcb->trees_ptrs[h];
-                    
+
                     inserted = FALSE;
                     while (le2 != &Vcb->trees_hash) {
                         tree* t2 = CONTAINING_RECORD(le2, tree, list_entry_hash);
-                        
+
                         if (t2->hash >= t3->hash) {
                             InsertHeadList(le2->Blink, &t3->list_entry_hash);
                             inserted = TRUE;
                             break;
                         }
-                        
+
                         le2 = le2->Flink;
                     }
 
@@ -944,106 +993,105 @@ static NTSTATUS write_metadata_items(device_extension* Vcb, LIST_ENTRY* items, L
 
                     if (!Vcb->trees_ptrs[h] || t3->list_entry_hash.Flink == Vcb->trees_ptrs[h])
                         Vcb->trees_ptrs[h] = &t3->list_entry_hash;
-                    
+
                     if (data_items && level == 0) {
                         le2 = data_items->Flink;
-                        
+
                         while (le2 != data_items) {
                             data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry);
                             LIST_ENTRY* le3 = t3->itemlist.Flink;
-                            
+
                             while (le3 != &t3->itemlist) {
                                 tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry);
-                                
+
                                 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
                                     EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
-                                    
+
                                     if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
                                         EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
-                                        
+
                                         if (ed2->address == dr->address)
                                             ed2->address = dr->new_address;
                                     }
                                 }
-                                
+
                                 le3 = le3->Flink;
                             }
-                            
+
                             le2 = le2->Flink;
                         }
                     }
-                    
+
                     t3 = t4;
                 }
 
                 *((UINT32*)mr->data) = ~calc_crc32c(0xffffffff, (UINT8*)&mr->data->fs_uuid, Vcb->superblock.node_size - sizeof(mr->data->csum));
-                
+
                 tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG);
                 if (!tw) {
                     ERR("out of memory\n");
                     Status = STATUS_INSUFFICIENT_RESOURCES;
                     goto end;
                 }
-                
+
                 tw->address = mr->new_address;
                 tw->length = Vcb->superblock.node_size;
                 tw->data = (UINT8*)mr->data;
-                tw->overlap = FALSE;
-                
+
                 if (IsListEmpty(&tree_writes))
                     InsertTailList(&tree_writes, &tw->list_entry);
                 else {
                     BOOL inserted = FALSE;
-                    
+
                     le2 = tree_writes.Flink;
                     while (le2 != &tree_writes) {
                         tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry);
-                        
+
                         if (tw2->address > tw->address) {
                             InsertHeadList(le2->Blink, &tw->list_entry);
                             inserted = TRUE;
                             break;
                         }
-                        
+
                         le2 = le2->Flink;
                     }
-                    
+
                     if (!inserted)
                         InsertTailList(&tree_writes, &tw->list_entry);
                 }
             }
-            
+
             le = le->Flink;
         }
     }
-    
+
+    Status = do_tree_writes(Vcb, &tree_writes, TRUE);
+    if (!NT_SUCCESS(Status)) {
+        ERR("do_tree_writes returned %08x\n", Status);
+        goto end;
+    }
+
     le = items->Flink;
     while (le != items) {
         metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
-        
-        Status = add_metadata_reloc_extent_item(Vcb, mr, rollback);
+
+        Status = add_metadata_reloc_extent_item(Vcb, mr);
         if (!NT_SUCCESS(Status)) {
             ERR("add_metadata_reloc_extent_item returned %08x\n", Status);
             goto end;
         }
-        
+
         le = le->Flink;
     }
-    
-    Status = do_tree_writes(Vcb, &tree_writes, NULL);
-    if (!NT_SUCCESS(Status)) {
-        ERR("do_tree_writes returned %08x\n", Status);
-        goto end;
-    }
-    
+
     Status = STATUS_SUCCESS;
-    
+
 end:
     while (!IsListEmpty(&tree_writes)) {
         tree_write* tw = CONTAINING_RECORD(RemoveHeadList(&tree_writes), tree_write, list_entry);
         ExFreePool(tw);
     }
-    
+
     return Status;
 }
 
@@ -1054,153 +1102,263 @@ static NTSTATUS balance_metadata_chunk(device_extension* Vcb, chunk* c, BOOL* ch
     BOOL b;
     LIST_ENTRY items, rollback;
     UINT32 loaded = 0;
-    
+
     TRACE("chunk %llx\n", c->offset);
-    
+
     InitializeListHead(&rollback);
     InitializeListHead(&items);
-    
+
     ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
-    
+
     searchkey.obj_id = c->offset;
     searchkey.obj_type = TYPE_METADATA_ITEM;
     searchkey.offset = 0xffffffffffffffff;
-    
+
     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
     if (!NT_SUCCESS(Status)) {
         ERR("find_item returned %08x\n", Status);
         goto end;
     }
-    
+
     do {
         traverse_ptr next_tp;
-        
+
         if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
             break;
-        
+
         if (tp.item->key.obj_id >= c->offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) {
             BOOL tree = FALSE, skinny = FALSE;
-            
+
             if (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
                 tree = TRUE;
                 skinny = TRUE;
             } else if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size &&
                        tp.item->size >= sizeof(EXTENT_ITEM)) {
                 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
-                
+
                 if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
                     tree = TRUE;
             }
-            
+
             if (tree) {
                 Status = add_metadata_reloc(Vcb, &items, &tp, skinny, NULL, c, &rollback);
-                
+
                 if (!NT_SUCCESS(Status)) {
                     ERR("add_metadata_reloc returned %08x\n", Status);
                     goto end;
                 }
-                
+
                 loaded++;
-                
+
                 if (loaded >= 64) // only do 64 at a time
                     break;
             }
         }
-    
+
         b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
-        
+
         if (b)
             tp = next_tp;
     } while (b);
-    
+
     if (IsListEmpty(&items)) {
         *changed = FALSE;
         Status = STATUS_SUCCESS;
         goto end;
     } else
         *changed = TRUE;
-    
+
     Status = write_metadata_items(Vcb, &items, NULL, c, &rollback);
     if (!NT_SUCCESS(Status)) {
         ERR("write_metadata_items returned %08x\n", Status);
         goto end;
     }
-    
+
     Status = STATUS_SUCCESS;
-    
+
     Vcb->need_write = TRUE;
-    
+
 end:
+    if (NT_SUCCESS(Status)) {
+        Status = do_write(Vcb, NULL);
+        if (!NT_SUCCESS(Status))
+            ERR("do_write returned %08x\n", Status);
+    }
+
     if (NT_SUCCESS(Status))
-        clear_rollback(Vcb, &rollback);
+        clear_rollback(&rollback);
     else
         do_rollback(Vcb, &rollback);
-    
+
+    free_trees(Vcb);
+
     ExReleaseResourceLite(&Vcb->tree_lock);
-    
+
     while (!IsListEmpty(&items)) {
         metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&items), metadata_reloc, list_entry);
-        
+
         while (!IsListEmpty(&mr->refs)) {
             metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry);
-            
+
             ExFreePool(ref);
         }
-        
+
         ExFreePool(mr);
     }
-    
+
     return Status;
 }
 
-static NTSTATUS add_data_reloc(device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* metadata_items, traverse_ptr* tp, chunk* c, LIST_ENTRY* rollback) {
+static NTSTATUS data_reloc_add_tree_edr(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* metadata_items,
+                                        data_reloc* dr, EXTENT_DATA_REF* edr, LIST_ENTRY* rollback) {
+    NTSTATUS Status;
+    LIST_ENTRY* le;
+    KEY searchkey;
+    traverse_ptr tp;
+    root* r = NULL;
+    metadata_reloc* mr;
+    UINT64 last_tree = 0;
+    data_reloc_ref* ref;
+
+    le = Vcb->roots.Flink;
+    while (le != &Vcb->roots) {
+        root* r2 = CONTAINING_RECORD(le, root, list_entry);
+
+        if (r2->id == edr->root) {
+            r = r2;
+            break;
+        }
+
+        le = le->Flink;
+    }
+
+    if (!r) {
+        ERR("could not find subvol %llx\n", edr->count);
+        return STATUS_INTERNAL_ERROR;
+    }
+
+    searchkey.obj_id = edr->objid;
+    searchkey.obj_type = TYPE_EXTENT_DATA;
+    searchkey.offset = 0;
+
+    Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_item returned %08x\n", Status);
+        return Status;
+    }
+
+    if (tp.item->key.obj_id < searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type < searchkey.obj_type)) {
+        traverse_ptr tp2;
+
+        if (find_next_item(Vcb, &tp, &tp2, FALSE, NULL))
+            tp = tp2;
+        else {
+            ERR("could not find EXTENT_DATA for inode %llx in root %llx\n", searchkey.obj_id, r->id);
+            return STATUS_INTERNAL_ERROR;
+        }
+    }
+
+    ref = NULL;
+
+    while (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
+        traverse_ptr tp2;
+
+        if (tp.item->size >= sizeof(EXTENT_DATA)) {
+            EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
+
+            if ((ed->type == EXTENT_TYPE_PREALLOC || ed->type == EXTENT_TYPE_REGULAR) && tp.item->size >= offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)) {
+                EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
+
+                if (ed2->address == dr->address && ed2->size == dr->size && tp.item->key.offset - ed2->offset == edr->offset) {
+                    if (ref && last_tree == tp.tree->header.address)
+                        ref->edr.count++;
+                    else {
+                        ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
+                        if (!ref) {
+                            ERR("out of memory\n");
+                            return STATUS_INSUFFICIENT_RESOURCES;
+                        }
+
+                        ref->type = TYPE_EXTENT_DATA_REF;
+                        RtlCopyMemory(&ref->edr, edr, sizeof(EXTENT_DATA_REF));
+                        ref->edr.count = 1;
+
+                        Status = add_metadata_reloc_parent(Vcb, metadata_items, tp.tree->header.address, &mr, rollback);
+                        if (!NT_SUCCESS(Status)) {
+                            ERR("add_metadata_reloc_parent returned %08x\n", Status);
+                            ExFreePool(ref);
+                            return Status;
+                        }
+
+                        last_tree = tp.tree->header.address;
+                        ref->parent = mr;
+
+                        InsertTailList(&dr->refs, &ref->list_entry);
+                    }
+                }
+            }
+        }
+
+        if (find_next_item(Vcb, &tp, &tp2, FALSE, NULL))
+            tp = tp2;
+        else
+            break;
+    }
+
+    return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_data_reloc(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* metadata_items,
+                               traverse_ptr* tp, chunk* c, LIST_ENTRY* rollback) {
+    NTSTATUS Status;
     data_reloc* dr;
     EXTENT_ITEM* ei;
     UINT16 len;
     UINT64 inline_rc;
     UINT8* ptr;
-    
+
     dr = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc), ALLOC_TAG);
     if (!dr) {
         ERR("out of memory\n");
         return STATUS_INSUFFICIENT_RESOURCES;
     }
-    
+
     dr->address = tp->item->key.obj_id;
     dr->size = tp->item->key.offset;
     dr->ei = (EXTENT_ITEM*)tp->item->data;
     InitializeListHead(&dr->refs);
-    
-    delete_tree_item(Vcb, tp, rollback);
-    
+
+    Status = delete_tree_item(Vcb, tp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("delete_tree_item returned %08x\n", Status);
+        return Status;
+    }
+
     if (!c)
         c = get_chunk_from_address(Vcb, tp->item->key.obj_id);
-        
+
     if (c) {
         ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        
-        decrease_chunk_usage(c, tp->item->key.offset);
-        
-        space_list_add(Vcb, c, TRUE, tp->item->key.obj_id, tp->item->key.offset, rollback);
-        
+
+        c->used -= tp->item->key.offset;
+
+        space_list_add(c, tp->item->key.obj_id, tp->item->key.offset, rollback);
+
         ExReleaseResourceLite(&c->lock);
     }
-    
+
     ei = (EXTENT_ITEM*)tp->item->data;
     inline_rc = 0;
-    
+
     len = tp->item->size - sizeof(EXTENT_ITEM);
     ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM);
-    
+
     while (len > 0) {
         UINT8 secttype = *ptr;
-        ULONG sectlen = secttype == TYPE_EXTENT_DATA_REF ? sizeof(EXTENT_DATA_REF) : (secttype == TYPE_SHARED_DATA_REF ? sizeof(SHARED_DATA_REF) : 0);
-        data_reloc_ref* ref;
-        NTSTATUS Status;
-        metadata_reloc* mr;
-        
+        UINT16 sectlen = secttype == TYPE_EXTENT_DATA_REF ? sizeof(EXTENT_DATA_REF) : (secttype == TYPE_SHARED_DATA_REF ? sizeof(SHARED_DATA_REF) : 0);
+
         len--;
-        
+
         if (sectlen > len) {
             ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen);
             return STATUS_INTERNAL_ERROR;
@@ -1210,196 +1368,173 @@ static NTSTATUS add_data_reloc(device_extension* Vcb, LIST_ENTRY* items, LIST_EN
             ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype);
             return STATUS_INTERNAL_ERROR;
         }
-        
-        ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
-        if (!ref) {
-            ERR("out of memory\n");
-            return STATUS_INSUFFICIENT_RESOURCES;
-        }
-        
+
         if (secttype == TYPE_EXTENT_DATA_REF) {
-            LIST_ENTRY* le;
-            KEY searchkey;
-            traverse_ptr tp3;
-            root* r = NULL;
-            
-            ref->type = TYPE_EXTENT_DATA_REF;
-            RtlCopyMemory(&ref->edr, ptr + sizeof(UINT8), sizeof(EXTENT_DATA_REF));
-            inline_rc += ref->edr.count;
-            
-            le = Vcb->roots.Flink;
-            while (le != &Vcb->roots) {
-                root* r2 = CONTAINING_RECORD(le, root, list_entry);
-                
-                if (r2->id == ref->edr.root) {
-                    r = r2;
-                    break;
-                }
-                
-                le = le->Flink;
-            }
-            
-            if (!r) {
-                ERR("could not find subvol %llx\n", ref->edr.count);
-                ExFreePool(ref);
-                return STATUS_INTERNAL_ERROR;
-            }
-            
-            searchkey.obj_id = ref->edr.objid;
-            searchkey.obj_type = TYPE_EXTENT_DATA;
-            searchkey.offset = ref->edr.offset;
-            
-            Status = find_item(Vcb, r, &tp3, &searchkey, FALSE, NULL);
-            if (!NT_SUCCESS(Status)) {
-                ERR("find_item returned %08x\n", Status);
-                ExFreePool(ref);
-                return Status;
-            }
-            
-            if (keycmp(tp3.item->key, searchkey)) {
-                ERR("could not find (%llx,%x,%llx) in root %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, r->id);
-                ExFreePool(ref);
-                return STATUS_INTERNAL_ERROR;
-            }
-            
-            Status = add_metadata_reloc_parent(Vcb, metadata_items, tp3.tree->header.address, &mr, rollback);
+            EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8));
+
+            inline_rc += edr->count;
+
+            Status = data_reloc_add_tree_edr(Vcb, metadata_items, dr, edr, rollback);
             if (!NT_SUCCESS(Status)) {
-                ERR("add_metadata_reloc_parent returned %08x\n", Status);
-                ExFreePool(ref);
+                ERR("data_reloc_add_tree_edr returned %08x\n", Status);
                 return Status;
             }
-            
-            ref->parent = mr;
         } else if (secttype == TYPE_SHARED_DATA_REF) {
+            metadata_reloc* mr;
+            data_reloc_ref* ref;
+
+            ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
+            if (!ref) {
+                ERR("out of memory\n");
+                return STATUS_INSUFFICIENT_RESOURCES;
+            }
+
             ref->type = TYPE_SHARED_DATA_REF;
             RtlCopyMemory(&ref->sdr, ptr + sizeof(UINT8), sizeof(SHARED_DATA_REF));
             inline_rc += ref->sdr.count;
-            
+
             Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback);
             if (!NT_SUCCESS(Status)) {
                 ERR("add_metadata_reloc_parent returned %08x\n", Status);
                 ExFreePool(ref);
                 return Status;
             }
-            
+
             ref->parent = mr;
+
+            InsertTailList(&dr->refs, &ref->list_entry);
         } else {
             ERR("unexpected tree type %x\n", secttype);
-            ExFreePool(ref);
             return STATUS_INTERNAL_ERROR;
         }
-        
-        InsertTailList(&dr->refs, &ref->list_entry);
-        
+
+
         len -= sectlen;
         ptr += sizeof(UINT8) + sectlen;
     }
-    
+
     if (inline_rc < ei->refcount) { // look for non-inline entries
         traverse_ptr tp2 = *tp, next_tp;
-        
+
         while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) {
-            metadata_reloc* mr;
-            NTSTATUS Status;
-            
             tp2 = next_tp;
-            
+
             if (tp2.item->key.obj_id == tp->item->key.obj_id) {
                 if (tp2.item->key.obj_type == TYPE_EXTENT_DATA_REF && tp2.item->size >= sizeof(EXTENT_DATA_REF)) {
-                    data_reloc_ref* ref;
-                    LIST_ENTRY* le;
-                    KEY searchkey;
-                    traverse_ptr tp3;
-                    root* r = NULL;
-
-                    ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
-                    if (!ref) {
-                        ERR("out of memory\n");
-                        return STATUS_INSUFFICIENT_RESOURCES;
-                    }
-                    
-                    ref->type = TYPE_EXTENT_DATA_REF;
-                    RtlCopyMemory(&ref->edr, tp2.item->data, sizeof(EXTENT_DATA_REF));
-                    
-                    le = Vcb->roots.Flink;
-                    while (le != &Vcb->roots) {
-                        root* r2 = CONTAINING_RECORD(le, root, list_entry);
-                        
-                        if (r2->id == ref->edr.root) {
-                            r = r2;
-                            break;
-                        }
-                        
-                        le = le->Flink;
-                    }
-                    
-                    if (!r) {
-                        ERR("could not find subvol %llx\n", ref->edr.count);
-                        ExFreePool(ref);
-                        return STATUS_INTERNAL_ERROR;
-                    }
-                    
-                    searchkey.obj_id = ref->edr.objid;
-                    searchkey.obj_type = TYPE_EXTENT_DATA;
-                    searchkey.offset = ref->edr.offset;
-                    
-                    Status = find_item(Vcb, r, &tp3, &searchkey, FALSE, NULL);
+                    Status = data_reloc_add_tree_edr(Vcb, metadata_items, dr, (EXTENT_DATA_REF*)tp2.item->data, rollback);
                     if (!NT_SUCCESS(Status)) {
-                        ERR("find_item returned %08x\n", Status);
-                        ExFreePool(ref);
+                        ERR("data_reloc_add_tree_edr returned %08x\n", Status);
                         return Status;
                     }
-                    
-                    if (!keycmp(tp3.item->key, searchkey)) {
-                        ERR("could not find (%llx,%x,%llx) in root %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, r->id);
-                        ExFreePool(ref);
-                        return STATUS_INTERNAL_ERROR;
-                    }
-                    
-                    Status = add_metadata_reloc_parent(Vcb, metadata_items, tp3.tree->header.address, &mr, rollback);
+
+                    Status = delete_tree_item(Vcb, &tp2);
                     if (!NT_SUCCESS(Status)) {
-                        ERR("add_metadata_reloc_parent returned %08x\n", Status);
-                        ExFreePool(ref);
+                        ERR("delete_tree_item returned %08x\n", Status);
                         return Status;
                     }
-                    
-                    ref->parent = mr;
-                    InsertTailList(&dr->refs, &ref->list_entry);
-                    
-                    delete_tree_item(Vcb, &tp2, rollback);
-                } else if (tp2.item->key.obj_type == TYPE_SHARED_DATA_REF && tp2.item->size >= sizeof(SHARED_DATA_REF)) {
-                    data_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
+                } else if (tp2.item->key.obj_type == TYPE_SHARED_DATA_REF && tp2.item->size >= sizeof(UINT32)) {
+                    metadata_reloc* mr;
+                    data_reloc_ref* ref;
+
+                    ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
                     if (!ref) {
                         ERR("out of memory\n");
                         return STATUS_INSUFFICIENT_RESOURCES;
                     }
-                    
+
                     ref->type = TYPE_SHARED_DATA_REF;
-                    RtlCopyMemory(&ref->sdr, tp2.item->data, sizeof(SHARED_DATA_REF));
-                    
+                    ref->sdr.offset = tp2.item->key.offset;
+                    ref->sdr.count = *((UINT32*)tp2.item->data);
+
                     Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback);
                     if (!NT_SUCCESS(Status)) {
                         ERR("add_metadata_reloc_parent returned %08x\n", Status);
                         ExFreePool(ref);
                         return Status;
                     }
-                    
+
                     ref->parent = mr;
                     InsertTailList(&dr->refs, &ref->list_entry);
-                    
-                    delete_tree_item(Vcb, &tp2, rollback);
+
+                    Status = delete_tree_item(Vcb, &tp2);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("delete_tree_item returned %08x\n", Status);
+                        return Status;
+                    }
                 }
             } else
                 break;
         }
     }
-    
+
     InsertTailList(items, &dr->list_entry);
-    
+
     return STATUS_SUCCESS;
 }
 
-static NTSTATUS add_data_reloc_extent_item(device_extension* Vcb, data_reloc* dr, LIST_ENTRY* rollback) {
+static void sort_data_reloc_refs(data_reloc* dr) {
+    LIST_ENTRY newlist, *le;
+
+    if (IsListEmpty(&dr->refs))
+        return;
+
+    // insertion sort
+
+    InitializeListHead(&newlist);
+
+    while (!IsListEmpty(&dr->refs)) {
+        data_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&dr->refs), data_reloc_ref, list_entry);
+        BOOL inserted = FALSE;
+
+        if (ref->type == TYPE_EXTENT_DATA_REF)
+            ref->hash = get_extent_data_ref_hash2(ref->edr.root, ref->edr.objid, ref->edr.offset);
+        else if (ref->type == TYPE_SHARED_DATA_REF)
+            ref->hash = ref->parent->new_address;
+
+        le = newlist.Flink;
+        while (le != &newlist) {
+            data_reloc_ref* ref2 = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
+
+            if (ref->type < ref2->type || (ref->type == ref2->type && ref->hash > ref2->hash)) {
+                InsertHeadList(le->Blink, &ref->list_entry);
+                inserted = TRUE;
+                break;
+            }
+
+            le = le->Flink;
+        }
+
+        if (!inserted)
+            InsertTailList(&newlist, &ref->list_entry);
+    }
+
+    le = newlist.Flink;
+    while (le != &newlist) {
+        data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
+
+        if (le->Flink != &newlist) {
+            data_reloc_ref* ref2 = CONTAINING_RECORD(le->Flink, data_reloc_ref, list_entry);
+
+            if (ref->type == TYPE_EXTENT_DATA_REF && ref2->type == TYPE_EXTENT_DATA_REF && ref->edr.root == ref2->edr.root &&
+                ref->edr.objid == ref2->edr.objid && ref->edr.offset == ref2->edr.offset) {
+                RemoveEntryList(&ref2->list_entry);
+                ref->edr.count += ref2->edr.count;
+                ExFreePool(ref2);
+                continue;
+            }
+        }
+
+        le = le->Flink;
+    }
+
+    newlist.Flink->Blink = &dr->refs;
+    newlist.Blink->Flink = &dr->refs;
+    dr->refs.Flink = newlist.Flink;
+    dr->refs.Blink = newlist.Blink;
+}
+
+static NTSTATUS add_data_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, data_reloc* dr) {
+    NTSTATUS Status;
     LIST_ENTRY* le;
     UINT64 rc = 0;
     UINT16 inline_len;
@@ -1407,118 +1542,120 @@ static NTSTATUS add_data_reloc_extent_item(device_extension* Vcb, data_reloc* dr
     data_reloc_ref* first_noninline = NULL;
     EXTENT_ITEM* ei;
     UINT8* ptr;
-    
+
     inline_len = sizeof(EXTENT_ITEM);
-    
+
+    sort_data_reloc_refs(dr);
+
     le = dr->refs.Flink;
     while (le != &dr->refs) {
         data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
-        ULONG extlen = 0;
-        
-        rc++;
-        
-        if (ref->type == TYPE_EXTENT_DATA_REF)
+        UINT16 extlen = 0;
+
+        if (ref->type == TYPE_EXTENT_DATA_REF) {
             extlen += sizeof(EXTENT_DATA_REF);
-        else if (ref->type == TYPE_SHARED_DATA_REF)
+            rc += ref->edr.count;
+        } else if (ref->type == TYPE_SHARED_DATA_REF) {
             extlen += sizeof(SHARED_DATA_REF);
+            rc++;
+        }
 
         if (all_inline) {
-            if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) {
+            if ((ULONG)(inline_len + 1 + extlen) > (Vcb->superblock.node_size >> 2)) {
                 all_inline = FALSE;
                 first_noninline = ref;
             } else
                 inline_len += extlen + 1;
         }
-        
+
         le = le->Flink;
     }
-    
+
     ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
     if (!ei) {
         ERR("out of memory\n");
         return STATUS_INSUFFICIENT_RESOURCES;
     }
-    
+
     ei->refcount = rc;
     ei->generation = dr->ei->generation;
     ei->flags = dr->ei->flags;
     ptr = (UINT8*)&ei[1];
-    
+
     le = dr->refs.Flink;
     while (le != &dr->refs) {
         data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
-        
+
         if (ref == first_noninline)
             break;
-        
+
         *ptr = ref->type;
         ptr++;
-        
+
         if (ref->type == TYPE_EXTENT_DATA_REF) {
             EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)ptr;
-            
+
             RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF));
-            
+
             ptr += sizeof(EXTENT_DATA_REF);
         } else if (ref->type == TYPE_SHARED_DATA_REF) {
             SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)ptr;
-            
+
             sdr->offset = ref->parent->new_address;
             sdr->count = ref->sdr.count;
-            
+
             ptr += sizeof(SHARED_DATA_REF);
         }
-        
+
         le = le->Flink;
     }
-    
-    if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_ITEM, dr->size, ei, inline_len, NULL, NULL, rollback)) {
-        ERR("insert_tree_item failed\n");
-        return STATUS_INTERNAL_ERROR;
+
+    Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_ITEM, dr->size, ei, inline_len, NULL, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("insert_tree_item returned %08x\n", Status);
+        return Status;
     }
-    
+
     if (!all_inline) {
         le = &first_noninline->list_entry;
-        
+
         while (le != &dr->refs) {
             data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
-            
+
             if (ref->type == TYPE_EXTENT_DATA_REF) {
                 EXTENT_DATA_REF* edr;
-                UINT64 off;
-                
+
                 edr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA_REF), ALLOC_TAG);
                 if (!edr) {
                     ERR("out of memory\n");
                     return STATUS_INSUFFICIENT_RESOURCES;
                 }
-                
+
                 RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF));
-                
-                off = get_extent_data_ref_hash2(ref->edr.root, ref->edr.objid, ref->edr.offset);
-                
-                if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_DATA_REF, off, edr, sizeof(EXTENT_DATA_REF), NULL, NULL, rollback)) {
-                    ERR("insert_tree_item failed\n");
-                    return STATUS_INTERNAL_ERROR;
+
+                Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_DATA_REF, ref->hash, edr, sizeof(EXTENT_DATA_REF), NULL, NULL);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("insert_tree_item returned %08x\n", Status);
+                    return Status;
                 }
             } else if (ref->type == TYPE_SHARED_DATA_REF) {
-                SHARED_DATA_REF* sdr;
-                
-                sdr = ExAllocatePoolWithTag(PagedPool, sizeof(SHARED_DATA_REF), ALLOC_TAG);
+                UINT32* sdr;
+
+                sdr = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32), ALLOC_TAG);
                 if (!sdr) {
                     ERR("out of memory\n");
                     return STATUS_INSUFFICIENT_RESOURCES;
                 }
-                
-                sdr->offset = ref->parent->new_address;
-                sdr->count = ref->sdr.count;
-                
-                if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_SHARED_DATA_REF, sdr->offset, sdr, sizeof(SHARED_DATA_REF), NULL, NULL, rollback)) {
-                    ERR("insert_tree_item failed\n");
-                    return STATUS_INTERNAL_ERROR;
+
+                *sdr = ref->sdr.count;
+
+                Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_SHARED_DATA_REF, ref->parent->new_address, sdr, sizeof(UINT32), NULL, NULL);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("insert_tree_item returned %08x\n", Status);
+                    return Status;
                 }
             }
-            
+
             le = le->Flink;
         }
     }
@@ -1535,71 +1672,71 @@ static NTSTATUS balance_data_chunk(device_extension* Vcb, chunk* c, BOOL* change
     UINT64 loaded = 0, num_loaded = 0;
     chunk* newchunk = NULL;
     UINT8* data = NULL;
-    
+
     TRACE("chunk %llx\n", c->offset);
-    
+
     InitializeListHead(&rollback);
     InitializeListHead(&items);
     InitializeListHead(&metadata_items);
-    
+
     ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
-    
+
     searchkey.obj_id = c->offset;
     searchkey.obj_type = TYPE_EXTENT_ITEM;
     searchkey.offset = 0xffffffffffffffff;
-    
+
     Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
     if (!NT_SUCCESS(Status)) {
         ERR("find_item returned %08x\n", Status);
         goto end;
     }
-    
+
     do {
         traverse_ptr next_tp;
-        
+
         if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
             break;
-        
+
         if (tp.item->key.obj_id >= c->offset && tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
             BOOL tree = FALSE;
-            
+
             if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
                 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
-                
+
                 if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
                     tree = TRUE;
             }
-            
+
             if (!tree) {
                 Status = add_data_reloc(Vcb, &items, &metadata_items, &tp, c, &rollback);
-                
+
                 if (!NT_SUCCESS(Status)) {
                     ERR("add_data_reloc returned %08x\n", Status);
                     goto end;
                 }
-                
+
                 loaded += tp.item->key.offset;
                 num_loaded++;
-                
+
                 if (loaded >= 0x1000000 || num_loaded >= 100) // only do so much at a time, so we don't block too obnoxiously
                     break;
             }
         }
-    
+
         b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
-        
+
         if (b)
             tp = next_tp;
     } while (b);
-    
+
     if (IsListEmpty(&items)) {
         *changed = FALSE;
         Status = STATUS_SUCCESS;
         goto end;
     } else
         *changed = TRUE;
-    
-    data = ExAllocatePoolWithTag(PagedPool, 0x100000, ALLOC_TAG);
+
+    data = ExAllocatePoolWithTag(PagedPool, BALANCE_UNIT, ALLOC_TAG);
     if (!data) {
         ERR("out of memory\n");
         Status = STATUS_INSUFFICIENT_RESOURCES;
@@ -1612,248 +1749,443 @@ static NTSTATUS balance_data_chunk(device_extension* Vcb, chunk* c, BOOL* change
         BOOL done = FALSE;
         LIST_ENTRY* le2;
         UINT32* csum;
-        UINT64 off;
-        
+        RTL_BITMAP bmp;
+        ULONG* bmparr;
+        ULONG runlength, index, lastoff;
+
         if (newchunk) {
             ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
-            
+
             if (find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) {
-                increase_chunk_usage(newchunk, dr->size);
-                space_list_subtract(Vcb, newchunk, FALSE, dr->new_address, dr->size, &rollback);
+                newchunk->used += dr->size;
+                space_list_subtract(newchunk, FALSE, dr->new_address, dr->size, &rollback);
                 done = TRUE;
             }
-            
+
             ExReleaseResourceLite(&newchunk->lock);
         }
-        
+
         if (!done) {
             ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
 
             le2 = Vcb->chunks.Flink;
             while (le2 != &Vcb->chunks) {
                 chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry);
-                
+
                 if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == Vcb->data_flags) {
                     ExAcquireResourceExclusiveLite(&c2->lock, TRUE);
-                    
+
                     if ((c2->chunk_item->size - c2->used) >= dr->size) {
                         if (find_data_address_in_chunk(Vcb, c2, dr->size, &dr->new_address)) {
-                            increase_chunk_usage(c2, dr->size);
-                            space_list_subtract(Vcb, c2, FALSE, dr->new_address, dr->size, &rollback);
+                            c2->used += dr->size;
+                            space_list_subtract(c2, FALSE, dr->new_address, dr->size, &rollback);
                             ExReleaseResourceLite(&c2->lock);
                             newchunk = c2;
                             done = TRUE;
                             break;
                         }
                     }
-                    
+
                     ExReleaseResourceLite(&c2->lock);
                 }
 
                 le2 = le2->Flink;
             }
-            
+
             // allocate new chunk if necessary
             if (!done) {
-                newchunk = alloc_chunk(Vcb, Vcb->data_flags);
-                
-                if (!newchunk) {
-                    ERR("could not allocate new chunk\n");
+                Status = alloc_chunk(Vcb, Vcb->data_flags, &newchunk, FALSE);
+
+                if (!NT_SUCCESS(Status)) {
+                    ERR("alloc_chunk returned %08x\n", Status);
                     ExReleaseResourceLite(&Vcb->chunk_lock);
-                    Status = STATUS_DISK_FULL;
                     goto end;
                 }
-                
+
                 ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
-                
+
+                newchunk->balance_num = Vcb->balance.balance_num;
+
                 if (!find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) {
                     ExReleaseResourceLite(&newchunk->lock);
+                    ExReleaseResourceLite(&Vcb->chunk_lock);
                     ERR("could not find address in new chunk\n");
                     Status = STATUS_DISK_FULL;
                     goto end;
                 } else {
-                    increase_chunk_usage(newchunk, dr->size);
-                    space_list_subtract(Vcb, newchunk, FALSE, dr->new_address, dr->size, &rollback);
+                    newchunk->used += dr->size;
+                    space_list_subtract(newchunk, FALSE, dr->new_address, dr->size, &rollback);
                 }
-                
+
                 ExReleaseResourceLite(&newchunk->lock);
             }
-            
+
             ExReleaseResourceLite(&Vcb->chunk_lock);
         }
-        
+
         dr->newchunk = newchunk;
-        
-        csum = ExAllocatePoolWithTag(PagedPool, dr->size * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG);
+
+        bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align((dr->size / Vcb->superblock.sector_size) + 1, sizeof(ULONG)), ALLOC_TAG);
+        if (!bmparr) {
+            ERR("out of memory\n");
+            Status = STATUS_INSUFFICIENT_RESOURCES;
+            goto end;
+        }
+
+        csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(dr->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG);
         if (!csum) {
             ERR("out of memory\n");
+            ExFreePool(bmparr);
             Status = STATUS_INSUFFICIENT_RESOURCES;
             goto end;
         }
-        
-        Status = load_csum(Vcb, csum, dr->address, dr->size / Vcb->superblock.sector_size, NULL);
 
-        if (NT_SUCCESS(Status)) {
-            add_checksum_entry(Vcb, dr->new_address, dr->size / Vcb->superblock.sector_size, csum, NULL, &rollback);
-            add_checksum_entry(Vcb, dr->address, dr->size / Vcb->superblock.sector_size, NULL, NULL, &rollback);
+        RtlInitializeBitMap(&bmp, bmparr, (ULONG)(dr->size / Vcb->superblock.sector_size));
+        RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum
+
+        searchkey.obj_id = EXTENT_CSUM_ID;
+        searchkey.obj_type = TYPE_EXTENT_CSUM;
+        searchkey.offset = dr->address;
+
+        Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, NULL);
+        if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
+            ERR("find_item returned %08x\n", Status);
+            ExFreePool(csum);
+            ExFreePool(bmparr);
+            goto end;
         }
 
-        ExFreePool(csum);
-        
-        off = 0;
-        
-        while (off < dr->size) {
-            ULONG ds = min(dr->size - off, 0x100000);
-            
-            Status = read_data(Vcb, dr->address + off, ds, NULL, FALSE, data, c, NULL, NULL, FALSE);
-            if (!NT_SUCCESS(Status)) {
-                ERR("read_data returned %08x\n", Status);
-                goto end;
-            }
-            
-            Status = write_data_complete(Vcb, dr->new_address + off, data, ds, NULL, newchunk);
-            if (!NT_SUCCESS(Status)) {
-                ERR("write_data_complete returned %08x\n", Status);
-                goto end;
+        if (Status != STATUS_NOT_FOUND) {
+            do {
+                traverse_ptr next_tp;
+
+                if (tp.item->key.obj_type == TYPE_EXTENT_CSUM) {
+                    if (tp.item->key.offset >= dr->address + dr->size)
+                        break;
+                    else if (tp.item->size >= sizeof(UINT32) && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= dr->address) {
+                        UINT64 cs = max(dr->address, tp.item->key.offset);
+                        UINT64 ce = min(dr->address + dr->size, tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)));
+
+                        RtlCopyMemory(csum + ((cs - dr->address) / Vcb->superblock.sector_size),
+                                      tp.item->data + ((cs - tp.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size),
+                                      (ULONG)((ce - cs) * sizeof(UINT32) / Vcb->superblock.sector_size));
+
+                        RtlClearBits(&bmp, (ULONG)((cs - dr->address) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size));
+
+                        if (ce == dr->address + dr->size)
+                            break;
+                    }
+                }
+
+                if (find_next_item(Vcb, &tp, &next_tp, FALSE, NULL))
+                    tp = next_tp;
+                else
+                    break;
+            } while (TRUE);
+        }
+
+        lastoff = 0;
+        runlength = RtlFindFirstRunClear(&bmp, &index);
+
+        while (runlength != 0) {
+            if (index > lastoff) {
+                ULONG off = lastoff;
+                ULONG size = index - lastoff;
+
+                // handle no csum run
+                do {
+                    ULONG rl;
+
+                    if (size * Vcb->superblock.sector_size > BALANCE_UNIT)
+                        rl = BALANCE_UNIT / Vcb->superblock.sector_size;
+                    else
+                        rl = size;
+
+                    Status = read_data(Vcb, dr->address + (off * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, NULL, FALSE, data,
+                                       c, NULL, NULL, 0, FALSE, NormalPagePriority);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("read_data returned %08x\n", Status);
+                        ExFreePool(csum);
+                        ExFreePool(bmparr);
+                        goto end;
+                    }
+
+                    Status = write_data_complete(Vcb, dr->new_address + (off * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size,
+                                                 NULL, newchunk, FALSE, 0, NormalPagePriority);
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("write_data_complete returned %08x\n", Status);
+                        ExFreePool(csum);
+                        ExFreePool(bmparr);
+                        goto end;
+                    }
+
+                    size -= rl;
+                    off += rl;
+                } while (size > 0);
             }
-            
-            off += ds;
+
+            add_checksum_entry(Vcb, dr->new_address + (index * Vcb->superblock.sector_size), runlength, &csum[index], NULL);
+            add_checksum_entry(Vcb, dr->address + (index * Vcb->superblock.sector_size), runlength, NULL, NULL);
+
+            // handle csum run
+            do {
+                ULONG rl;
+
+                if (runlength * Vcb->superblock.sector_size > BALANCE_UNIT)
+                    rl = BALANCE_UNIT / Vcb->superblock.sector_size;
+                else
+                    rl = runlength;
+
+                Status = read_data(Vcb, dr->address + (index * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, &csum[index], FALSE, data,
+                                   c, NULL, NULL, 0, FALSE, NormalPagePriority);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("read_data returned %08x\n", Status);
+                    ExFreePool(csum);
+                    ExFreePool(bmparr);
+                    goto end;
+                }
+
+                Status = write_data_complete(Vcb, dr->new_address + (index * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size,
+                                             NULL, newchunk, FALSE, 0, NormalPagePriority);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("write_data_complete returned %08x\n", Status);
+                    ExFreePool(csum);
+                    ExFreePool(bmparr);
+                    goto end;
+                }
+
+                runlength -= rl;
+                index += rl;
+            } while (runlength > 0);
+
+            lastoff = index;
+            runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
+        }
+
+        ExFreePool(csum);
+        ExFreePool(bmparr);
+
+        // handle final nocsum run
+        if (lastoff < dr->size / Vcb->superblock.sector_size) {
+            ULONG off = lastoff;
+            ULONG size = (ULONG)((dr->size / Vcb->superblock.sector_size) - lastoff);
+
+            do {
+                ULONG rl;
+
+                if (size * Vcb->superblock.sector_size > BALANCE_UNIT)
+                    rl = BALANCE_UNIT / Vcb->superblock.sector_size;
+                else
+                    rl = size;
+
+                Status = read_data(Vcb, dr->address + (off * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, NULL, FALSE, data,
+                                   c, NULL, NULL, 0, FALSE, NormalPagePriority);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("read_data returned %08x\n", Status);
+                    ExFreePool(csum);
+                    ExFreePool(bmparr);
+                    goto end;
+                }
+
+                Status = write_data_complete(Vcb, dr->new_address + (off * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size,
+                                             NULL, newchunk, FALSE, 0, NormalPagePriority);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("write_data_complete returned %08x\n", Status);
+                    ExFreePool(csum);
+                    ExFreePool(bmparr);
+                    goto end;
+                }
+
+                size -= rl;
+                off += rl;
+            } while (size > 0);
         }
 
         le = le->Flink;
     }
-    
+
     ExFreePool(data);
     data = NULL;
-    
+
     Status = write_metadata_items(Vcb, &metadata_items, &items, NULL, &rollback);
     if (!NT_SUCCESS(Status)) {
         ERR("write_metadata_items returned %08x\n", Status);
         goto end;
     }
-    
+
     le = items.Flink;
     while (le != &items) {
         data_reloc* dr = CONTAINING_RECORD(le, data_reloc, list_entry);
-        
-        Status = add_data_reloc_extent_item(Vcb, dr, &rollback);
+
+        Status = add_data_reloc_extent_item(Vcb, dr);
         if (!NT_SUCCESS(Status)) {
             ERR("add_data_reloc_extent_item returned %08x\n", Status);
             goto end;
         }
-        
+
         le = le->Flink;
     }
-    
+
     le = c->changed_extents.Flink;
     while (le != &c->changed_extents) {
         LIST_ENTRY *le2, *le3;
         changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry);
-        
+
         le3 = le->Flink;
-        
+
         le2 = items.Flink;
         while (le2 != &items) {
             data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry);
-            
+
             if (ce->address == dr->address) {
                 ce->address = dr->new_address;
                 RemoveEntryList(&ce->list_entry);
                 InsertTailList(&dr->newchunk->changed_extents, &ce->list_entry);
                 break;
             }
-            
+
             le2 = le2->Flink;
         }
-        
+
         le = le3;
     }
-    
-    // update open FCBs
-    // FIXME - speed this up
-    
-    ExAcquireResourceSharedLite(&Vcb->fcb_lock, TRUE);
-    
-    le = Vcb->all_fcbs.Flink;
-    while (le != &Vcb->all_fcbs) {
-        struct _fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_all);
-        LIST_ENTRY* le2;
 
-        ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
-        
-        le2 = fcb->extents.Flink;
-        while (le2 != &fcb->extents) {
-            extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
-            
-            if (!ext->ignore) {
-                if (ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) {
-                    EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
-                    
-                    if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) {
-                        LIST_ENTRY* le3 = items.Flink;
-                        while (le3 != &items) {
-                            data_reloc* dr = CONTAINING_RECORD(le3, data_reloc, list_entry);
-                            
-                            if (ed2->address == dr->address) {
-                                ed2->address = dr->new_address;
-                                break;
+    Status = STATUS_SUCCESS;
+
+    Vcb->need_write = TRUE;
+
+end:
+    if (NT_SUCCESS(Status)) {
+        // update extents in cache inodes before we flush
+        le = Vcb->chunks.Flink;
+        while (le != &Vcb->chunks) {
+            chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
+
+            if (c2->cache) {
+                LIST_ENTRY* le2;
+
+                ExAcquireResourceExclusiveLite(c2->cache->Header.Resource, TRUE);
+
+                le2 = c2->cache->extents.Flink;
+                while (le2 != &c2->cache->extents) {
+                    extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
+
+                    if (!ext->ignore) {
+                        if (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) {
+                            EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
+
+                            if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) {
+                                LIST_ENTRY* le3 = items.Flink;
+                                while (le3 != &items) {
+                                    data_reloc* dr = CONTAINING_RECORD(le3, data_reloc, list_entry);
+
+                                    if (ed2->address == dr->address) {
+                                        ed2->address = dr->new_address;
+                                        break;
+                                    }
+
+                                    le3 = le3->Flink;
+                                }
                             }
-                            
-                            le3 = le3->Flink;
                         }
                     }
+
+                    le2 = le2->Flink;
                 }
+
+                ExReleaseResourceLite(c2->cache->Header.Resource);
             }
-            
-            le2 = le2->Flink;
+
+            le = le->Flink;
         }
-        
-        ExReleaseResourceLite(fcb->Header.Resource);
-        
-        le = le->Flink;
+
+        Status = do_write(Vcb, NULL);
+        if (!NT_SUCCESS(Status))
+            ERR("do_write returned %08x\n", Status);
     }
-    
-    ExReleaseResourceLite(&Vcb->fcb_lock);
-    
-    Status = STATUS_SUCCESS;
-    
-    Vcb->need_write = TRUE;
-    
-end:
-    if (NT_SUCCESS(Status))
-        clear_rollback(Vcb, &rollback);
-    else
+
+    if (NT_SUCCESS(Status)) {
+        clear_rollback(&rollback);
+
+        // update open FCBs
+        // FIXME - speed this up(?)
+
+        ExAcquireResourceSharedLite(&Vcb->fcb_lock, TRUE);
+
+        le = Vcb->all_fcbs.Flink;
+        while (le != &Vcb->all_fcbs) {
+            struct _fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_all);
+            LIST_ENTRY* le2;
+
+            ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
+
+            le2 = fcb->extents.Flink;
+            while (le2 != &fcb->extents) {
+                extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
+
+                if (!ext->ignore) {
+                    if (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) {
+                        EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
+
+                        if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) {
+                            LIST_ENTRY* le3 = items.Flink;
+                            while (le3 != &items) {
+                                data_reloc* dr = CONTAINING_RECORD(le3, data_reloc, list_entry);
+
+                                if (ed2->address == dr->address) {
+                                    ed2->address = dr->new_address;
+                                    break;
+                                }
+
+                                le3 = le3->Flink;
+                            }
+                        }
+                    }
+                }
+
+                le2 = le2->Flink;
+            }
+
+            ExReleaseResourceLite(fcb->Header.Resource);
+
+            le = le->Flink;
+        }
+
+        ExReleaseResourceLite(&Vcb->fcb_lock);
+    } else
         do_rollback(Vcb, &rollback);
-    
+
+    free_trees(Vcb);
+
     ExReleaseResourceLite(&Vcb->tree_lock);
-    
+
     if (data)
         ExFreePool(data);
-    
+
     while (!IsListEmpty(&items)) {
         data_reloc* dr = CONTAINING_RECORD(RemoveHeadList(&items), data_reloc, list_entry);
-        
+
         while (!IsListEmpty(&dr->refs)) {
             data_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&dr->refs), data_reloc_ref, list_entry);
-            
+
             ExFreePool(ref);
         }
-        
+
         ExFreePool(dr);
     }
-    
+
     while (!IsListEmpty(&metadata_items)) {
         metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&metadata_items), metadata_reloc, list_entry);
-        
+
         while (!IsListEmpty(&mr->refs)) {
             metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry);
-            
+
             ExFreePool(ref);
         }
-        
+
         ExFreePool(mr);
     }
-    
+
     return Status;
 }
 
@@ -1876,41 +2208,41 @@ static __inline UINT64 get_chunk_dup_type(chunk* c) {
 
 static BOOL should_balance_chunk(device_extension* Vcb, UINT8 sort, chunk* c) {
     btrfs_balance_opts* opts;
-    
+
     opts = &Vcb->balance.opts[sort];
-    
+
     if (!(opts->flags & BTRFS_BALANCE_OPTS_ENABLED))
         return FALSE;
-    
+
     if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) {
         UINT64 type = get_chunk_dup_type(c);
-        
+
         if (!(type & opts->profiles))
             return FALSE;
     }
-    
+
     if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) {
         UINT16 i;
         CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
         BOOL b = FALSE;
-        
+
         for (i = 0; i < c->chunk_item->num_stripes; i++) {
             if (cis[i].dev_id == opts->devid) {
                 b = TRUE;
                 break;
             }
         }
-        
+
         if (!b)
             return FALSE;
     }
-    
+
     if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) {
         UINT16 i, factor;
         UINT64 physsize;
         CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
         BOOL b = FALSE;
-        
+
         if (c->chunk_item->type & BLOCK_FLAG_RAID0)
             factor = c->chunk_item->num_stripes;
         else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
@@ -1921,48 +2253,49 @@ static BOOL should_balance_chunk(device_extension* Vcb, UINT8 sort, chunk* c) {
             factor = c->chunk_item->num_stripes - 2;
         else // SINGLE, DUPLICATE, RAID1
             factor = 1;
-        
+
         physsize = c->chunk_item->size / factor;
-        
+
         for (i = 0; i < c->chunk_item->num_stripes; i++) {
-            if (cis[i].offset >= opts->drange_start && cis[i].offset + physsize < opts->drange_end) {
+            if (cis[i].offset < opts->drange_end && cis[i].offset + physsize >= opts->drange_start &&
+                (!(opts->flags & BTRFS_BALANCE_OPTS_DEVID) || cis[i].dev_id == opts->devid)) {
                 b = TRUE;
                 break;
             }
         }
-        
+
         if (!b)
             return FALSE;
     }
-    
+
     if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) {
         if (c->offset + c->chunk_item->size <= opts->vrange_start || c->offset > opts->vrange_end)
             return FALSE;
     }
-    
+
     if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) {
         if (c->chunk_item->num_stripes < opts->stripes_start || c->chunk_item->num_stripes < opts->stripes_end)
             return FALSE;
     }
-    
+
     if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) {
         UINT64 usage = c->used * 100 / c->chunk_item->size;
-        
+
         // usage == 0 should mean completely empty, not just that usage rounds to 0%
         if (c->used > 0 && usage == 0)
             usage = 1;
-        
+
         if (usage < opts->usage_start || usage > opts->usage_end)
             return FALSE;
     }
-    
+
     if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT && opts->flags & BTRFS_BALANCE_OPTS_SOFT) {
         UINT64 type = get_chunk_dup_type(c);
-        
+
         if (type == opts->convert)
             return FALSE;
     }
-    
+
     return TRUE;
 }
 
@@ -1987,32 +2320,32 @@ static void copy_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) {
         args->devid = opts->devid;
         args->flags |= BALANCE_ARGS_FLAGS_DEVID;
     }
-    
+
     if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) {
         args->drange_start = opts->drange_start;
         args->drange_end = opts->drange_end;
         args->flags |= BALANCE_ARGS_FLAGS_DRANGE;
     }
-    
+
     if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) {
         args->vrange_start = opts->vrange_start;
         args->vrange_end = opts->vrange_end;
         args->flags |= BALANCE_ARGS_FLAGS_VRANGE;
     }
-    
+
     if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT) {
         args->convert = opts->convert;
         args->flags |= BALANCE_ARGS_FLAGS_CONVERT;
-        
+
         if (opts->flags & BTRFS_BALANCE_OPTS_SOFT)
             args->flags |= BALANCE_ARGS_FLAGS_SOFT;
     }
-    
+
     if (opts->flags & BTRFS_BALANCE_OPTS_LIMIT) {
         if (args->limit_start == 0) {
             args->flags |= BALANCE_ARGS_FLAGS_LIMIT_RANGE;
-            args->limit_start = opts->limit_start;
-            args->limit_end = opts->limit_end;
+            args->limit_start = (UINT32)opts->limit_start;
+            args->limit_end = (UINT32)opts->limit_end;
         } else {
             args->flags |= BALANCE_ARGS_FLAGS_LIMIT;
             args->limit = opts->limit_end;
@@ -2027,175 +2360,179 @@ static void copy_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) {
 }
 
 static NTSTATUS add_balance_item(device_extension* Vcb) {
-    LIST_ENTRY rollback;
     KEY searchkey;
     traverse_ptr tp;
     NTSTATUS Status;
     BALANCE_ITEM* bi;
-    
-    InitializeListHead(&rollback);
-    
+
     searchkey.obj_id = BALANCE_ITEM_ID;
     searchkey.obj_type = TYPE_TEMP_ITEM;
     searchkey.offset = 0;
-    
+
     ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
-    
+
     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
     if (!NT_SUCCESS(Status)) {
         ERR("find_item returned %08x\n", Status);
         goto end;
     }
-    
-    if (!keycmp(tp.item->key, searchkey))
-        delete_tree_item(Vcb, &tp, &rollback);
-    
+
+    if (!keycmp(tp.item->key, searchkey)) {
+        Status = delete_tree_item(Vcb, &tp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("delete_tree_item returned %08x\n", Status);
+            goto end;
+        }
+    }
+
     bi = ExAllocatePoolWithTag(PagedPool, sizeof(BALANCE_ITEM), ALLOC_TAG);
     if (!bi) {
         ERR("out of memory\n");
         Status = STATUS_INSUFFICIENT_RESOURCES;
         goto end;
     }
-    
+
     RtlZeroMemory(bi, sizeof(BALANCE_ITEM));
-    
+
     if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) {
         bi->flags |= BALANCE_FLAGS_DATA;
         copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data);
     }
-    
+
     if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) {
         bi->flags |= BALANCE_FLAGS_METADATA;
         copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata);
     }
-    
+
     if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED) {
         bi->flags |= BALANCE_FLAGS_SYSTEM;
         copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system);
     }
-    
-    if (!insert_tree_item(Vcb, Vcb->root_root, BALANCE_ITEM_ID, TYPE_TEMP_ITEM, 0, bi, sizeof(BALANCE_ITEM), NULL, NULL, &rollback)) {
-        ERR("insert_tree_item failed\n");
-        Status = STATUS_INTERNAL_ERROR;
+
+    Status = insert_tree_item(Vcb, Vcb->root_root, BALANCE_ITEM_ID, TYPE_TEMP_ITEM, 0, bi, sizeof(BALANCE_ITEM), NULL, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("insert_tree_item returned %08x\n", Status);
+        ExFreePool(bi);
         goto end;
     }
-    
+
     Status = STATUS_SUCCESS;
-    
+
 end:
     if (NT_SUCCESS(Status)) {
-        do_write(Vcb, NULL, &rollback);
-        free_trees(Vcb);
-        
-        clear_rollback(Vcb, &rollback);
-    } else
-        do_rollback(Vcb, &rollback);
-    
+        Status = do_write(Vcb, NULL);
+        if (!NT_SUCCESS(Status))
+            ERR("do_write returned %08x\n", Status);
+    }
+
+    free_trees(Vcb);
+
     ExReleaseResourceLite(&Vcb->tree_lock);
-    
+
     return Status;
 }
 
 static NTSTATUS remove_balance_item(device_extension* Vcb) {
-    LIST_ENTRY rollback;
     KEY searchkey;
     traverse_ptr tp;
     NTSTATUS Status;
-    
-    InitializeListHead(&rollback);
-    
+
     searchkey.obj_id = BALANCE_ITEM_ID;
     searchkey.obj_type = TYPE_TEMP_ITEM;
     searchkey.offset = 0;
-    
+
     ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
-    
+
     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
     if (!NT_SUCCESS(Status)) {
         ERR("find_item returned %08x\n", Status);
         goto end;
     }
-    
+
     if (!keycmp(tp.item->key, searchkey)) {
-        delete_tree_item(Vcb, &tp, &rollback);
-        
-        do_write(Vcb, NULL, &rollback);
+        Status = delete_tree_item(Vcb, &tp);
+        if (!NT_SUCCESS(Status)) {
+            ERR("delete_tree_item returned %08x\n", Status);
+            goto end;
+        }
+
+        Status = do_write(Vcb, NULL);
+        if (!NT_SUCCESS(Status)) {
+            ERR("do_write returned %08x\n", Status);
+            goto end;
+        }
+
         free_trees(Vcb);
     }
 
     Status = STATUS_SUCCESS;
-    
+
 end:
-    if (NT_SUCCESS(Status))
-        clear_rollback(Vcb, &rollback);
-    else
-        do_rollback(Vcb, &rollback);
-    
     ExReleaseResourceLite(&Vcb->tree_lock);
-    
+
     return Status;
 }
 
 static void load_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) {
     opts->flags = BTRFS_BALANCE_OPTS_ENABLED;
-    
+
     if (args->flags & BALANCE_ARGS_FLAGS_PROFILES) {
         opts->flags |= BTRFS_BALANCE_OPTS_PROFILES;
         opts->profiles = args->profiles;
     }
-    
+
     if (args->flags & BALANCE_ARGS_FLAGS_USAGE) {
         opts->flags |= BTRFS_BALANCE_OPTS_USAGE;
-        
+
         opts->usage_start = 0;
-        opts->usage_end = args->usage;
+        opts->usage_end = (UINT8)args->usage;
     } else if (args->flags & BALANCE_ARGS_FLAGS_USAGE_RANGE) {
         opts->flags |= BTRFS_BALANCE_OPTS_USAGE;
-        
-        opts->usage_start = args->usage_start;
-        opts->usage_end = args->usage_end;
+
+        opts->usage_start = (UINT8)args->usage_start;
+        opts->usage_end = (UINT8)args->usage_end;
     }
-    
+
     if (args->flags & BALANCE_ARGS_FLAGS_DEVID) {
         opts->flags |= BTRFS_BALANCE_OPTS_DEVID;
         opts->devid = args->devid;
     }
-    
+
     if (args->flags & BALANCE_ARGS_FLAGS_DRANGE) {
         opts->flags |= BTRFS_BALANCE_OPTS_DRANGE;
         opts->drange_start = args->drange_start;
         opts->drange_end = args->drange_end;
     }
-    
+
     if (args->flags & BALANCE_ARGS_FLAGS_VRANGE) {
         opts->flags |= BTRFS_BALANCE_OPTS_VRANGE;
         opts->vrange_start = args->vrange_start;
         opts->vrange_end = args->vrange_end;
     }
-    
+
     if (args->flags & BALANCE_ARGS_FLAGS_LIMIT) {
         opts->flags |= BTRFS_BALANCE_OPTS_LIMIT;
-        
+
         opts->limit_start = 0;
         opts->limit_end = args->limit;
     } else if (args->flags & BALANCE_ARGS_FLAGS_LIMIT_RANGE) {
         opts->flags |= BTRFS_BALANCE_OPTS_LIMIT;
-        
+
         opts->limit_start = args->limit_start;
         opts->limit_end = args->limit_end;
     }
-    
+
     if (args->flags & BALANCE_ARGS_FLAGS_STRIPES_RANGE) {
         opts->flags |= BTRFS_BALANCE_OPTS_STRIPES;
-        
-        opts->stripes_start = args->stripes_start;
-        opts->stripes_end = args->stripes_end;
+
+        opts->stripes_start = (UINT16)args->stripes_start;
+        opts->stripes_end = (UINT16)args->stripes_end;
     }
-    
+
     if (args->flags & BALANCE_ARGS_FLAGS_CONVERT) {
         opts->flags |= BTRFS_BALANCE_OPTS_CONVERT;
         opts->convert = args->convert;
-        
+
         if (args->flags & BALANCE_ARGS_FLAGS_SOFT)
             opts->flags |= BTRFS_BALANCE_OPTS_SOFT;
     }
@@ -2205,584 +2542,860 @@ static NTSTATUS remove_superblocks(device* dev) {
     NTSTATUS Status;
     superblock* sb;
     int i = 0;
-    
+
     sb = ExAllocatePoolWithTag(PagedPool, sizeof(superblock), ALLOC_TAG);
     if (!sb) {
         ERR("out of memory\n");
         return STATUS_INSUFFICIENT_RESOURCES;
     }
-    
+
     RtlZeroMemory(sb, sizeof(superblock));
-    
-    while (superblock_addrs[i] > 0 && dev->length >= superblock_addrs[i] + sizeof(superblock)) {
+
+    while (superblock_addrs[i] > 0 && dev->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) {
         Status = write_data_phys(dev->devobj, superblock_addrs[i], sb, sizeof(superblock));
-        
+
         if (!NT_SUCCESS(Status)) {
             ExFreePool(sb);
             return Status;
         }
-        
+
         i++;
     }
-    
-    ExFreePool(sb);
-    
-    return STATUS_SUCCESS;
-}
 
-static NTSTATUS replace_mount_dev(device_extension* Vcb, device* dev, PDEVICE_OBJECT mountmgr, BOOL part0) {
-    NTSTATUS Status;
-    MOUNTDEV_NAME mdn, *mdn2 = NULL, *mdn3 = NULL;
-    ULONG mdnsize, mmpsize;
-    MOUNTMGR_MOUNT_POINT* mmp = NULL;
-    MOUNTMGR_MOUNT_POINTS mmps, *mmps2 = NULL;
-    ULONG i;
-    UNICODE_STRING us;
-
-    // get old device name
-    
-    Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL);
-    if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {
-        ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
-        return Status;
-    }
-    
-    mdnsize = offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength;
-    
-    mdn2 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG);
-    if (!mdn2) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn2, mdnsize, TRUE, NULL);
-    if (!NT_SUCCESS(Status)) {
-        ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
-        goto end;
-    }
-    
-    // get new device name
-    
-    Status = dev_ioctl(first_device(Vcb)->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL);
-    if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {
-        ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
-        goto end2;
-    }
-    
-    mdnsize = offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength;
-    
-    mdn3 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG);
-    if (!mdn3) {
-        ERR("out of memory\n");
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-        goto end2;
-    }
-    
-    Status = dev_ioctl(first_device(Vcb)->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn3, mdnsize, TRUE, NULL);
-    if (!NT_SUCCESS(Status)) {
-        ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
-        goto end2;
-    }
-    
-    // query and delete existing mount points
-    
-    mmpsize = sizeof(MOUNTMGR_MOUNT_POINT) + mdn2->NameLength;
-    
-    mmp = ExAllocatePoolWithTag(PagedPool, mmpsize, ALLOC_TAG);
-    if (!mmp) {
-        ERR("out of memory\n");
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-        goto end2;
-    }
-    
-    RtlZeroMemory(mmp, sizeof(MOUNTMGR_MOUNT_POINT));
-    mmp->DeviceNameOffset = sizeof(MOUNTMGR_MOUNT_POINT);
-    mmp->DeviceNameLength = mdn2->NameLength;
-    RtlCopyMemory(&mmp[1], mdn2->Name, mdn2->NameLength);
-    
-    Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_QUERY_POINTS, mmp, mmpsize, &mmps, mmpsize, TRUE, NULL);
-    if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {
-        ERR("IOCTL_MOUNTMGR_QUERY_POINTS returned %08x\n", Status);
-        goto end2;
-    }
-    
-    mmps2 = ExAllocatePoolWithTag(PagedPool, mmps.Size, ALLOC_TAG);
-    if (!mmps2) {
-        ERR("out of memory\n");
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-        goto end2;
-    }
-    
-    Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_DELETE_POINTS, mmp, mmpsize, mmps2, mmps.Size, TRUE, NULL);
-    if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {
-        ERR("IOCTL_MOUNTMGR_DELETE_POINTS returned %08x\n", Status);
-        goto end2;
-    }
-    
-    // re-create mount points
-    
-    for (i = 0; i < mmps2->NumberOfMountPoints; i++) {
-        if (mmps2->MountPoints[i].SymbolicLinkNameOffset != 0) {
-            ULONG mcpilen;
-            MOUNTMGR_CREATE_POINT_INPUT* mcpi;
-            
-            mcpilen = sizeof(MOUNTMGR_CREATE_POINT_INPUT) + mmps2->MountPoints[i].SymbolicLinkNameLength + mdn3->NameLength;
-            
-            mcpi = ExAllocatePoolWithTag(PagedPool, mcpilen, ALLOC_TAG);
-            if (!mcpi) {
-                ERR("out of memory\n");
-                Status = STATUS_INSUFFICIENT_RESOURCES;
-                goto end2;
-            }
-            
-            mcpi->SymbolicLinkNameOffset = sizeof(MOUNTMGR_CREATE_POINT_INPUT);
-            mcpi->SymbolicLinkNameLength = mmps2->MountPoints[i].SymbolicLinkNameLength;
-            mcpi->DeviceNameOffset = mcpi->SymbolicLinkNameOffset + mcpi->SymbolicLinkNameLength;
-            mcpi->DeviceNameLength = mdn3->NameLength;
-            
-            RtlCopyMemory((UINT8*)mcpi + mcpi->SymbolicLinkNameOffset, (UINT8*)mmps2 + mmps2->MountPoints[i].SymbolicLinkNameOffset,
-                          mcpi->SymbolicLinkNameLength);
-            RtlCopyMemory((UINT8*)mcpi + mcpi->DeviceNameOffset, mdn3->Name, mdn3->NameLength);
-
-            Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_CREATE_POINT, mcpi, mcpilen, NULL, 0, TRUE, NULL);
-            if (!NT_SUCCESS(Status)) {
-                ERR("IOCTL_MOUNTMGR_CREATE_POINT returned %08x\n", Status);
-                ExFreePool(mcpi);
-                goto end2;
-            }
-            
-            ExFreePool(mcpi);
-        }
-    }
-    
-    Status = STATUS_SUCCESS;
-    
-end2:
-    // re-add old device back to mountmgr
-    
-    if (!part0) {
-        us.Buffer = mdn2->Name;
-        us.Length = us.MaximumLength = mdn2->NameLength;
-        
-        add_volume(mountmgr, &us);
-    }
-    
-end:
-    if (mdn2)
-        ExFreePool(mdn2);
-    
-    if (mdn3)
-        ExFreePool(mdn3);
-    
-    if (mmp)
-        ExFreePool(mmp);
-    
-    if (mmps2)
-        ExFreePool(mmps2);
+    ExFreePool(sb);
 
-    return Status;
+    return STATUS_SUCCESS;
 }
 
-static NTSTATUS finish_removing_device(device_extension* Vcb, device* dev) {
+static NTSTATUS finish_removing_device(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, device* dev) {
     KEY searchkey;
     traverse_ptr tp;
     NTSTATUS Status;
-    LIST_ENTRY rollback, *le;
-    BOOL first_dev, part0 = FALSE;
-    
-    InitializeListHead(&rollback);
-    
-    if (Vcb->need_write)
-        do_write(Vcb, NULL, &rollback);
-    
+    LIST_ENTRY* le;
+    volume_device_extension* vde;
+
+    if (Vcb->need_write) {
+        Status = do_write(Vcb, NULL);
+
+        if (!NT_SUCCESS(Status))
+            ERR("do_write returned %08x\n", Status);
+    } else
+        Status = STATUS_SUCCESS;
+
     free_trees(Vcb);
-    
-    clear_rollback(Vcb, &rollback);
-    
+
+    if (!NT_SUCCESS(Status))
+        return Status;
+
     // remove entry in chunk tree
 
     searchkey.obj_id = 1;
     searchkey.obj_type = TYPE_DEV_ITEM;
     searchkey.offset = dev->devitem.dev_id;
-    
+
     Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, NULL);
     if (!NT_SUCCESS(Status)) {
         ERR("find_item returned %08x\n", Status);
         return Status;
     }
 
-    if (!keycmp(searchkey, tp.item->key))
-        delete_tree_item(Vcb, &tp, &rollback);
-    
+    if (!keycmp(searchkey, tp.item->key)) {
+        Status = delete_tree_item(Vcb, &tp);
+
+        if (!NT_SUCCESS(Status)) {
+            ERR("delete_tree_item returned %08x\n", Status);
+            return Status;
+        }
+    }
+
     // remove stats entry in device tree
-    
+
     searchkey.obj_id = 0;
     searchkey.obj_type = TYPE_DEV_STATS;
     searchkey.offset = dev->devitem.dev_id;
-    
+
     Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, NULL);
     if (!NT_SUCCESS(Status)) {
         ERR("find_item returned %08x\n", Status);
         return Status;
     }
 
-    if (!keycmp(searchkey, tp.item->key))
-        delete_tree_item(Vcb, &tp, &rollback);
-    
+    if (!keycmp(searchkey, tp.item->key)) {
+        Status = delete_tree_item(Vcb, &tp);
+
+        if (!NT_SUCCESS(Status)) {
+            ERR("delete_tree_item returned %08x\n", Status);
+            return Status;
+        }
+    }
+
     // update superblock
-    
+
     Vcb->superblock.num_devices--;
     Vcb->superblock.total_bytes -= dev->devitem.num_bytes;
     Vcb->devices_loaded--;
-    
-    first_dev = first_device(Vcb) == dev;
-    
+
     RemoveEntryList(&dev->list_entry);
-    
+
     // flush
-    
-    do_write(Vcb, NULL, &rollback);
-    
+
+    Status = do_write(Vcb, NULL);
+    if (!NT_SUCCESS(Status))
+        ERR("do_write returned %08x\n", Status);
+
     free_trees(Vcb);
-    
-    clear_rollback(Vcb, &rollback);
-    
-    if (!dev->readonly) {
+
+    if (!NT_SUCCESS(Status))
+        return Status;
+
+    if (!dev->readonly && dev->devobj) {
         Status = remove_superblocks(dev);
         if (!NT_SUCCESS(Status))
             WARN("remove_superblocks returned %08x\n", Status);
     }
-    
+
     // remove entry in volume list
-    
-    ExAcquireResourceExclusiveLite(&volumes_lock, TRUE);
-    
-    le = volumes.Flink;
-    while (le != &volumes) {
-        volume* v = CONTAINING_RECORD(le, volume, list_entry);
-        
-        if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) &&
-            RtlCompareMemory(&dev->devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
-            PFILE_OBJECT FileObject;
-            PDEVICE_OBJECT mountmgr;
-            UNICODE_STRING mmdevpath;
-        
-            RemoveEntryList(&v->list_entry);
-        
-            // re-add entry to mountmgr
-
-            if (!first_dev && v->part_num != 0) {
-                RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME);
-                Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr);
-                if (!NT_SUCCESS(Status))
-                    ERR("IoGetDeviceObjectPointer returned %08x\n", Status);
-                else {
-                    add_volume(mountmgr, &v->devpath);
-                    ObDereferenceObject(FileObject);
+
+    vde = Vcb->vde;
+
+    if (dev->devobj) {
+        pdo_device_extension* pdode = vde->pdode;
+
+        ExAcquireResourceExclusiveLite(&pdode->child_lock, TRUE);
+
+        le = pdode->children.Flink;
+        while (le != &pdode->children) {
+            volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry);
+
+            if (RtlCompareMemory(&dev->devitem.device_uuid, &vc->uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
+                PFILE_OBJECT FileObject;
+                PDEVICE_OBJECT mountmgr;
+                UNICODE_STRING mmdevpath;
+
+                pdode->children_loaded--;
+
+                if (vc->had_drive_letter) { // re-add entry to mountmgr
+                    RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME);
+                    Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr);
+                    if (!NT_SUCCESS(Status))
+                        ERR("IoGetDeviceObjectPointer returned %08x\n", Status);
+                    else {
+                        MOUNTDEV_NAME mdn;
+
+                        Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL);
+                        if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW)
+                            ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
+                        else {
+                            MOUNTDEV_NAME* mdn2;
+                            ULONG mdnsize = (ULONG)offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength;
+
+                            mdn2 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG);
+                            if (!mdn2)
+                                ERR("out of memory\n");
+                            else {
+                                Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn2, mdnsize, TRUE, NULL);
+                                if (!NT_SUCCESS(Status))
+                                    ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
+                                else {
+                                    UNICODE_STRING name;
+
+                                    name.Buffer = mdn2->Name;
+                                    name.Length = name.MaximumLength = mdn2->NameLength;
+
+                                    Status = mountmgr_add_drive_letter(mountmgr, &name);
+                                    if (!NT_SUCCESS(Status))
+                                        WARN("mountmgr_add_drive_letter returned %08x\n", Status);
+                                }
+
+                                ExFreePool(mdn2);
+                            }
+                        }
+
+                        ObDereferenceObject(FileObject);
+                    }
                 }
+
+                ExFreePool(vc->pnp_name.Buffer);
+                RemoveEntryList(&vc->list_entry);
+                ExFreePool(vc);
+
+                ObDereferenceObject(vc->fileobj);
+
+                break;
             }
-            
-            part0 = v->part_num == 0 ? TRUE : FALSE;
-        
-            if (v->devpath.Buffer)
-                ExFreePool(v->devpath.Buffer);
-            
-            ExFreePool(v);
-            break;
+
+            le = le->Flink;
         }
-        
-        le = le->Flink;
+
+        if (pdode->children_loaded > 0 && vde->device->Characteristics & FILE_REMOVABLE_MEDIA) {
+            vde->device->Characteristics &= ~FILE_REMOVABLE_MEDIA;
+
+            le = pdode->children.Flink;
+            while (le != &pdode->children) {
+                volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry);
+
+                if (vc->devobj->Characteristics & FILE_REMOVABLE_MEDIA) {
+                    vde->device->Characteristics |= FILE_REMOVABLE_MEDIA;
+                    break;
+                }
+
+                le = le->Flink;
+            }
+        }
+
+        pdode->num_children = Vcb->superblock.num_devices;
+
+        ExReleaseResourceLite(&pdode->child_lock);
+
+        // free dev
+
+        if (dev->trim && !dev->readonly && !Vcb->options.no_trim)
+            trim_whole_device(dev);
     }
-    
-    ExReleaseResourceLite(&volumes_lock);
-    
-    if (first_dev) {
-        PDEVICE_OBJECT DeviceObject, olddev;
-        device* newfirstdev;
-        PFILE_OBJECT FileObject;
-        UNICODE_STRING mmdevpath;
-        PDEVICE_OBJECT mountmgr;
-        
-        DeviceObject = Vcb->Vpb->DeviceObject;
-        
-        olddev = DeviceObject->Vpb->RealDevice;
-        newfirstdev = first_device(Vcb);
-        
-        ObReferenceObject(newfirstdev->devobj);
-        DeviceObject->Vpb->RealDevice = newfirstdev->devobj;
-        ObDereferenceObject(olddev);
-        
-        RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME);
-        Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr);
-        if (!NT_SUCCESS(Status))
-            ERR("IoGetDeviceObjectPointer returned %08x\n", Status);
-        else {
-            Status = replace_mount_dev(Vcb, dev, mountmgr, part0);
-            if (!NT_SUCCESS(Status))
-                ERR("replace_mount_dev returned %08x\n", Status);
-            
-            ObDereferenceObject(FileObject);
-        }
-        
-    }
-    
-    // free dev
-    
-    ObDereferenceObject(dev->devobj);
-    
+
     while (!IsListEmpty(&dev->space)) {
         LIST_ENTRY* le2 = RemoveHeadList(&dev->space);
         space* s = CONTAINING_RECORD(le2, space, list_entry);
-        
+
         ExFreePool(s);
     }
-    
+
     ExFreePool(dev);
-    
+
+    if (Vcb->trim) {
+        Vcb->trim = FALSE;
+
+        le = Vcb->devices.Flink;
+        while (le != &Vcb->devices) {
+            device* dev2 = CONTAINING_RECORD(le, device, list_entry);
+
+            if (dev2->trim) {
+                Vcb->trim = TRUE;
+                break;
+            }
+
+            le = le->Flink;
+        }
+    }
+
+    FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_CHANGE_SIZE);
+
     return STATUS_SUCCESS;
 }
 
+static void trim_unalloc_space(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, device* dev) {
+    DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa;
+    DEVICE_DATA_SET_RANGE* ranges;
+    ULONG datalen, i;
+    KEY searchkey;
+    traverse_ptr tp;
+    NTSTATUS Status;
+    BOOL b;
+    UINT64 lastoff = 0x100000; // don't TRIM the first megabyte, in case someone has been daft enough to install GRUB there
+    LIST_ENTRY* le;
+
+    dev->num_trim_entries = 0;
+
+    searchkey.obj_id = dev->devitem.dev_id;
+    searchkey.obj_type = TYPE_DEV_EXTENT;
+    searchkey.offset = 0;
+
+    Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, NULL);
+    if (!NT_SUCCESS(Status)) {
+        ERR("find_item returned %08x\n", Status);
+        return;
+    }
+
+    do {
+        traverse_ptr next_tp;
+
+        if (tp.item->key.obj_id == dev->devitem.dev_id && tp.item->key.obj_type == TYPE_DEV_EXTENT) {
+            if (tp.item->size >= sizeof(DEV_EXTENT)) {
+                DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data;
+
+                if (tp.item->key.offset > lastoff)
+                    add_trim_entry_avoid_sb(Vcb, dev, lastoff, tp.item->key.offset - lastoff);
+
+                lastoff = tp.item->key.offset + de->length;
+            } else {
+                ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DEV_EXTENT));
+                return;
+            }
+        }
+
+        b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
+
+        if (b) {
+            tp = next_tp;
+            if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
+                break;
+        }
+    } while (b);
+
+    if (lastoff < dev->devitem.num_bytes)
+        add_trim_entry_avoid_sb(Vcb, dev, lastoff, dev->devitem.num_bytes - lastoff);
+
+    if (dev->num_trim_entries == 0)
+        return;
+
+    datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE));
+
+    dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
+    if (!dmdsa) {
+        ERR("out of memory\n");
+        goto end;
+    }
+
+    dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES);
+    dmdsa->Action = DeviceDsmAction_Trim;
+    dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED;
+    dmdsa->ParameterBlockOffset = 0;
+    dmdsa->ParameterBlockLength = 0;
+    dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64));
+    dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE);
+
+    ranges = (DEVICE_DATA_SET_RANGE*)((UINT8*)dmdsa + dmdsa->DataSetRangesOffset);
+
+    i = 0;
+    le = dev->trim_list.Flink;
+    while (le != &dev->trim_list) {
+        space* s = CONTAINING_RECORD(le, space, list_entry);
+
+        ranges[i].StartingOffset = s->address;
+        ranges[i].LengthInBytes = s->size;
+        i++;
+
+        le = le->Flink;
+    }
+
+    Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES, dmdsa, datalen, NULL, 0, TRUE, NULL);
+    if (!NT_SUCCESS(Status))
+        WARN("IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES returned %08x\n", Status);
+
+    ExFreePool(dmdsa);
+
+end:
+    while (!IsListEmpty(&dev->trim_list)) {
+        space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry);
+        ExFreePool(s);
+    }
+
+    dev->num_trim_entries = 0;
+}
+
+static NTSTATUS try_consolidation(device_extension* Vcb, UINT64 flags, chunk** newchunk) {
+    NTSTATUS Status;
+    BOOL changed;
+    LIST_ENTRY* le;
+    chunk* rc;
+
+    // FIXME - allow with metadata chunks?
+
+    while (TRUE) {
+        rc = NULL;
+
+        ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
+
+        ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
+
+        // choose the least-used chunk we haven't looked at yet
+        le = Vcb->chunks.Flink;
+        while (le != &Vcb->chunks) {
+            chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
+
+            // FIXME - skip full-size chunks over e.g. 90% full?
+            if (c->chunk_item->type & BLOCK_FLAG_DATA && !c->readonly && c->balance_num != Vcb->balance.balance_num && (!rc || c->used < rc->used))
+                rc = c;
+
+            le = le->Flink;
+        }
+
+        ExReleaseResourceLite(&Vcb->chunk_lock);
+
+        if (!rc) {
+            ExReleaseResourceLite(&Vcb->tree_lock);
+            break;
+        }
+
+        if (rc->list_entry_balance.Flink) {
+            RemoveEntryList(&rc->list_entry_balance);
+            Vcb->balance.chunks_left--;
+        }
+
+        rc->list_entry_balance.Flink = (LIST_ENTRY*)1; // so it doesn't get dropped
+        rc->reloc = TRUE;
+
+        ExReleaseResourceLite(&Vcb->tree_lock);
+
+        do {
+            changed = FALSE;
+
+            Status = balance_data_chunk(Vcb, rc, &changed);
+            if (!NT_SUCCESS(Status)) {
+                ERR("balance_data_chunk returned %08x\n", Status);
+                Vcb->balance.status = Status;
+                rc->list_entry_balance.Flink = NULL;
+                rc->reloc = FALSE;
+                return Status;
+            }
+
+            KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
+
+            if (Vcb->readonly)
+                Vcb->balance.stopping = TRUE;
+
+            if (Vcb->balance.stopping)
+                return STATUS_SUCCESS;
+        } while (changed);
+
+        rc->list_entry_balance.Flink = NULL;
+
+        rc->changed = TRUE;
+        rc->space_changed = TRUE;
+        rc->balance_num = Vcb->balance.balance_num;
+
+        Status = do_write(Vcb, NULL);
+        if (!NT_SUCCESS(Status)) {
+            ERR("do_write returned %08x\n", Status);
+            return Status;
+        }
+    }
+
+    ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
+
+    Status = alloc_chunk(Vcb, flags, &rc, TRUE);
+
+    ExReleaseResourceLite(&Vcb->chunk_lock);
+
+    if (NT_SUCCESS(Status)) {
+        *newchunk = rc;
+        return Status;
+    } else {
+        ERR("alloc_chunk returned %08x\n", Status);
+        return Status;
+    }
+}
+
+static NTSTATUS regenerate_space_list(device_extension* Vcb, device* dev) {
+    LIST_ENTRY* le;
+
+    while (!IsListEmpty(&dev->space)) {
+        space* s = CONTAINING_RECORD(RemoveHeadList(&dev->space), space, list_entry);
+
+        ExFreePool(s);
+    }
+
+    // The Linux driver doesn't like to allocate chunks within the first megabyte of a device.
+
+    space_list_add2(&dev->space, NULL, 0x100000, dev->devitem.num_bytes - 0x100000, NULL, NULL);
+
+    le = Vcb->chunks.Flink;
+    while (le != &Vcb->chunks) {
+        UINT16 n;
+        chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
+        CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
+
+        for (n = 0; n < c->chunk_item->num_stripes; n++) {
+            UINT64 stripe_size = 0;
+
+            if (cis[n].dev_id == dev->devitem.dev_id) {
+                if (stripe_size == 0) {
+                    UINT16 factor;
+
+                    if (c->chunk_item->type & BLOCK_FLAG_RAID0)
+                        factor = c->chunk_item->num_stripes;
+                    else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
+                        factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
+                    else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
+                        factor = c->chunk_item->num_stripes - 1;
+                    else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
+                        factor = c->chunk_item->num_stripes - 2;
+                    else // SINGLE, DUP, RAID1
+                        factor = 1;
+
+                    stripe_size = c->chunk_item->size / factor;
+                }
+
+                space_list_subtract2(&dev->space, NULL, cis[n].offset, stripe_size, NULL, NULL);
+            }
+        }
+
+        le = le->Flink;
+    }
+
+    return STATUS_SUCCESS;
+}
+
+_Function_class_(KSTART_ROUTINE)
 #ifndef __REACTOS__
-static void balance_thread(void* context) {
+void balance_thread(void* context) {
 #else
-static void NTAPI balance_thread(void* context) {
+void NTAPI balance_thread(void* context) {
 #endif
     device_extension* Vcb = (device_extension*)context;
     LIST_ENTRY chunks;
     LIST_ENTRY* le;
-    UINT64 num_chunks[3];
+    UINT64 num_chunks[3], okay_metadata_chunks = 0, okay_data_chunks = 0, okay_system_chunks = 0;
+    UINT64 old_data_flags = 0, old_metadata_flags = 0, old_system_flags = 0;
     NTSTATUS Status;
-    
+
+    Vcb->balance.balance_num++;
+
     Vcb->balance.stopping = FALSE;
-    Vcb->balance.cancelling = FALSE;
     KeInitializeEvent(&Vcb->balance.finished, NotificationEvent, FALSE);
-    
-    if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_CONVERT)
+
+    if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_CONVERT) {
+        old_data_flags = Vcb->data_flags;
         Vcb->data_flags = BLOCK_FLAG_DATA | (Vcb->balance.opts[BALANCE_OPTS_DATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_DATA].convert);
-    
-    if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_CONVERT)
+
+        FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_CHANGE_SIZE);
+    }
+
+    if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_CONVERT) {
+        old_metadata_flags = Vcb->metadata_flags;
         Vcb->metadata_flags = BLOCK_FLAG_METADATA | (Vcb->balance.opts[BALANCE_OPTS_METADATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_METADATA].convert);
-    
-    if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_CONVERT)
+    }
+
+    if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_CONVERT) {
+        old_system_flags = Vcb->system_flags;
         Vcb->system_flags = BLOCK_FLAG_SYSTEM | (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert);
-    
+    }
+
     if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) {
         if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED)
             RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &Vcb->balance.opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts));
         else if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED)
             RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &Vcb->balance.opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts));
     }
-    
+
+    num_chunks[0] = num_chunks[1] = num_chunks[2] = 0;
+    Vcb->balance.total_chunks = Vcb->balance.chunks_left = 0;
+
+    InitializeListHead(&chunks);
+
     // FIXME - what are we supposed to do with limit_start?
-    
+
     if (!Vcb->readonly) {
-        if (!Vcb->balance.removing) {
+        if (!Vcb->balance.removing && !Vcb->balance.shrinking) {
             Status = add_balance_item(Vcb);
             if (!NT_SUCCESS(Status)) {
                 ERR("add_balance_item returned %08x\n", Status);
+                Vcb->balance.status = Status;
+                goto end;
+            }
+        } else {
+            if (Vcb->need_write) {
+                Status = do_write(Vcb, NULL);
+
+                free_trees(Vcb);
+
+                if (!NT_SUCCESS(Status)) {
+                    ERR("do_write returned %08x\n", Status);
+                    Vcb->balance.status = Status;
+                    goto end;
+                }
+            }
+        }
+    }
+
+    KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
+
+    if (Vcb->balance.stopping)
+        goto end;
+
+    ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
+
+    le = Vcb->chunks.Flink;
+    while (le != &Vcb->chunks) {
+        chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
+        UINT8 sort;
+
+        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+        if (c->chunk_item->type & BLOCK_FLAG_DATA)
+            sort = BALANCE_OPTS_DATA;
+        else if (c->chunk_item->type & BLOCK_FLAG_METADATA)
+            sort = BALANCE_OPTS_METADATA;
+        else if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
+            sort = BALANCE_OPTS_SYSTEM;
+        else {
+            ERR("unexpected chunk type %llx\n", c->chunk_item->type);
+            ExReleaseResourceLite(&c->lock);
+            break;
+        }
+
+        if ((!(Vcb->balance.opts[sort].flags & BTRFS_BALANCE_OPTS_LIMIT) || num_chunks[sort] < Vcb->balance.opts[sort].limit_end) &&
+            should_balance_chunk(Vcb, sort, c)) {
+            InsertTailList(&chunks, &c->list_entry_balance);
+
+            num_chunks[sort]++;
+            Vcb->balance.total_chunks++;
+            Vcb->balance.chunks_left++;
+        } else if (sort == BALANCE_OPTS_METADATA)
+            okay_metadata_chunks++;
+        else if (sort == BALANCE_OPTS_DATA)
+            okay_data_chunks++;
+        else if (sort == BALANCE_OPTS_SYSTEM)
+            okay_system_chunks++;
+
+        if (!c->cache_loaded) {
+            Status = load_cache_chunk(Vcb, c, NULL);
+
+            if (!NT_SUCCESS(Status)) {
+                ERR("load_cache_chunk returned %08x\n", Status);
+                Vcb->balance.status = Status;
+                ExReleaseResourceLite(&c->lock);
+                ExReleaseResourceLite(&Vcb->chunk_lock);
+                goto end;
+            }
+        }
+
+        ExReleaseResourceLite(&c->lock);
+
+        le = le->Flink;
+    }
+
+    ExReleaseResourceLite(&Vcb->chunk_lock);
+
+    // If we're doing a full balance, try and allocate a new chunk now, before we mess things up
+    if (okay_metadata_chunks == 0 || okay_data_chunks == 0 || okay_system_chunks == 0) {
+        BOOL consolidated = FALSE;
+        chunk* c;
+
+        if (okay_metadata_chunks == 0) {
+            ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
+
+            Status = alloc_chunk(Vcb, Vcb->metadata_flags, &c, TRUE);
+            if (NT_SUCCESS(Status))
+                c->balance_num = Vcb->balance.balance_num;
+            else if (Status != STATUS_DISK_FULL || consolidated) {
+                ERR("alloc_chunk returned %08x\n", Status);
+                ExReleaseResourceLite(&Vcb->chunk_lock);
+                Vcb->balance.status = Status;
+                goto end;
+            }
+
+            ExReleaseResourceLite(&Vcb->chunk_lock);
+
+            if (Status == STATUS_DISK_FULL) {
+                Status = try_consolidation(Vcb, Vcb->metadata_flags, &c);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("try_consolidation returned %08x\n", Status);
+                    Vcb->balance.status = Status;
+                    goto end;
+                } else
+                    c->balance_num = Vcb->balance.balance_num;
+
+                consolidated = TRUE;
+
+                if (Vcb->balance.stopping)
+                    goto end;
+            }
+        }
+
+        if (okay_data_chunks == 0) {
+            ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
+
+            Status = alloc_chunk(Vcb, Vcb->data_flags, &c, TRUE);
+            if (NT_SUCCESS(Status))
+                c->balance_num = Vcb->balance.balance_num;
+            else if (Status != STATUS_DISK_FULL || consolidated) {
+                ERR("alloc_chunk returned %08x\n", Status);
+                ExReleaseResourceLite(&Vcb->chunk_lock);
+                Vcb->balance.status = Status;
+                goto end;
+            }
+
+            ExReleaseResourceLite(&Vcb->chunk_lock);
+
+            if (Status == STATUS_DISK_FULL) {
+                Status = try_consolidation(Vcb, Vcb->data_flags, &c);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("try_consolidation returned %08x\n", Status);
+                    Vcb->balance.status = Status;
+                    goto end;
+                } else
+                    c->balance_num = Vcb->balance.balance_num;
+
+                consolidated = TRUE;
+
+                if (Vcb->balance.stopping)
+                    goto end;
+            }
+        }
+
+        if (okay_system_chunks == 0) {
+            ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
+
+            Status = alloc_chunk(Vcb, Vcb->system_flags, &c, TRUE);
+            if (NT_SUCCESS(Status))
+                c->balance_num = Vcb->balance.balance_num;
+            else if (Status != STATUS_DISK_FULL || consolidated) {
+                ERR("alloc_chunk returned %08x\n", Status);
+                ExReleaseResourceLite(&Vcb->chunk_lock);
+                Vcb->balance.status = Status;
                 goto end;
             }
-        } else {
-            if (Vcb->need_write) {
-                LIST_ENTRY rollback;
-                
-                InitializeListHead(&rollback);
-                do_write(Vcb, NULL, &rollback);
-                free_trees(Vcb);
-                
-                clear_rollback(Vcb, &rollback);
+
+            ExReleaseResourceLite(&Vcb->chunk_lock);
+
+            if (Status == STATUS_DISK_FULL) {
+                Status = try_consolidation(Vcb, Vcb->system_flags, &c);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("try_consolidation returned %08x\n", Status);
+                    Vcb->balance.status = Status;
+                    goto end;
+                } else
+                    c->balance_num = Vcb->balance.balance_num;
+
+                consolidated = TRUE;
+
+                if (Vcb->balance.stopping)
+                    goto end;
             }
         }
     }
-    
-    num_chunks[0] = num_chunks[1] = num_chunks[2] = 0;
-    Vcb->balance.total_chunks = 0;
-    
-    InitializeListHead(&chunks);
-    
-    KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
-    
-    if (Vcb->balance.stopping)
-        goto end;
-    
+
     ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
-    
-    le = Vcb->chunks.Flink;
-    while (le != &Vcb->chunks) {
-        chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
-        UINT8 sort;
-        
-        ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-        
-        if (c->chunk_item->type & BLOCK_FLAG_DATA)
-            sort = BALANCE_OPTS_DATA;
-        else if (c->chunk_item->type & BLOCK_FLAG_METADATA)
-            sort = BALANCE_OPTS_METADATA;
-        else if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
-            sort = BALANCE_OPTS_SYSTEM;
-        else {
-            ERR("unexpected chunk type %llx\n", c->chunk_item->type);
-            ExReleaseResourceLite(&c->lock);
-            break;
-        }
-        
-        if ((!(Vcb->balance.opts[sort].flags & BTRFS_BALANCE_OPTS_LIMIT) || num_chunks[sort] < Vcb->balance.opts[sort].limit_end) &&
-            should_balance_chunk(Vcb, sort, c)) {
-            c->reloc = TRUE;
-            
-            InsertTailList(&chunks, &c->list_entry_balance);
-            
-            num_chunks[sort]++;
-            Vcb->balance.total_chunks++;
-        }
-        
-        ExReleaseResourceLite(&c->lock);
-        
+
+    le = chunks.Flink;
+    while (le != &chunks) {
+        chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance);
+
+        c->reloc = TRUE;
+
         le = le->Flink;
     }
-    
+
     ExReleaseResourceLite(&Vcb->chunk_lock);
-    
-    Vcb->balance.chunks_left = Vcb->balance.total_chunks;
-    
+
     // do data chunks before metadata
     le = chunks.Flink;
     while (le != &chunks) {
         chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance);
         LIST_ENTRY* le2 = le->Flink;
-        
+
         if (c->chunk_item->type & BLOCK_FLAG_DATA) {
-            NTSTATUS Status;
             BOOL changed;
-            
+
             do {
                 changed = FALSE;
-                
-                FsRtlEnterFileSystem();
-                
+
                 Status = balance_data_chunk(Vcb, c, &changed);
-                
-                FsRtlExitFileSystem();
-                
                 if (!NT_SUCCESS(Status)) {
                     ERR("balance_data_chunk returned %08x\n", Status);
                     Vcb->balance.status = Status;
                     goto end;
                 }
-                
+
                 KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
-                
+
+                if (Vcb->readonly)
+                    Vcb->balance.stopping = TRUE;
+
                 if (Vcb->balance.stopping)
                     break;
             } while (changed);
-        
-            if (!c->list_entry_changed.Flink)
-                InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed);
+
+            c->changed = TRUE;
+            c->space_changed = TRUE;
         }
-            
-        if (Vcb->balance.stopping) {
-            while (le != &chunks) {
-                c = CONTAINING_RECORD(le, chunk, list_entry_balance);
-                c->reloc = FALSE;
-                
-                le = le->Flink;
-            }
+
+        if (Vcb->balance.stopping)
             goto end;
-        }
-        
+
         if (c->chunk_item->type & BLOCK_FLAG_DATA &&
             (!(Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) {
             RemoveEntryList(&c->list_entry_balance);
             c->list_entry_balance.Flink = NULL;
-            
+
             Vcb->balance.chunks_left--;
         }
-        
+
         le = le2;
     }
-    
+
     // do metadata chunks
     while (!IsListEmpty(&chunks)) {
         chunk* c;
-        NTSTATUS Status;
         BOOL changed;
-        
+
         le = RemoveHeadList(&chunks);
         c = CONTAINING_RECORD(le, chunk, list_entry_balance);
-        
+
         if (c->chunk_item->type & BLOCK_FLAG_METADATA || c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
             do {
-                FsRtlEnterFileSystem();
-                
                 Status = balance_metadata_chunk(Vcb, c, &changed);
-                
-                FsRtlExitFileSystem();
-                
                 if (!NT_SUCCESS(Status)) {
                     ERR("balance_metadata_chunk returned %08x\n", Status);
                     Vcb->balance.status = Status;
                     goto end;
                 }
-                
+
                 KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
-                
+
+                if (Vcb->readonly)
+                    Vcb->balance.stopping = TRUE;
+
                 if (Vcb->balance.stopping)
                     break;
             } while (changed);
-            
-            if (!c->list_entry_changed.Flink)
-                InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed);
+
+            c->changed = TRUE;
+            c->space_changed = TRUE;
         }
-        
-        if (Vcb->balance.stopping) {
-            while (le != &chunks) {
-                c = CONTAINING_RECORD(le, chunk, list_entry_balance);
-                c->reloc = FALSE;
-                
-                le = le->Flink;
-                c->list_entry_balance.Flink = NULL;
-            }
+
+        if (Vcb->balance.stopping)
             break;
-        }
-        
+
         c->list_entry_balance.Flink = NULL;
-        
+
         Vcb->balance.chunks_left--;
     }
-    
+
 end:
     if (!Vcb->readonly) {
-        if (!Vcb->balance.removing) {
-            FsRtlEnterFileSystem();
-            Status = remove_balance_item(Vcb);
-            FsRtlExitFileSystem();
-            
-            if (!NT_SUCCESS(Status)) {
-                ERR("remove_balance_item returned %08x\n", Status);
-                goto end;
+        if (Vcb->balance.stopping || !NT_SUCCESS(Vcb->balance.status)) {
+            le = chunks.Flink;
+            while (le != &chunks) {
+                chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance);
+                c->reloc = FALSE;
+
+                le = le->Flink;
+                c->list_entry_balance.Flink = NULL;
             }
-        } else {
+
+            if (old_data_flags != 0)
+                Vcb->data_flags = old_data_flags;
+
+            if (old_metadata_flags != 0)
+                Vcb->metadata_flags = old_metadata_flags;
+
+            if (old_system_flags != 0)
+                Vcb->system_flags = old_system_flags;
+        }
+
+        if (Vcb->balance.removing) {
             device* dev = NULL;
-            
-            FsRtlEnterFileSystem();
+
             ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
-            
+
             le = Vcb->devices.Flink;
             while (le != &Vcb->devices) {
                 device* dev2 = CONTAINING_RECORD(le, device, list_entry);
-                
+
                 if (dev2->devitem.dev_id == Vcb->balance.opts[0].devid) {
                     dev = dev2;
                     break;
                 }
-                
+
                 le = le->Flink;
             }
-            
+
             if (dev) {
                 if (Vcb->balance.chunks_left == 0) {
                     Status = finish_removing_device(Vcb, dev);
-                    
+
                     if (!NT_SUCCESS(Status)) {
                         ERR("finish_removing_device returned %08x\n", Status);
                         dev->reloc = FALSE;
@@ -2790,15 +3403,94 @@ end:
                 } else
                     dev->reloc = FALSE;
             }
-            
+
+            ExReleaseResourceLite(&Vcb->tree_lock);
+        } else if (Vcb->balance.shrinking) {
+            device* dev = NULL;
+
+            ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
+
+            le = Vcb->devices.Flink;
+            while (le != &Vcb->devices) {
+                device* dev2 = CONTAINING_RECORD(le, device, list_entry);
+
+                if (dev2->devitem.dev_id == Vcb->balance.opts[0].devid) {
+                    dev = dev2;
+                    break;
+                }
+
+                le = le->Flink;
+            }
+
+            if (!dev) {
+                ERR("could not find device %llx\n", Vcb->balance.opts[0].devid);
+                Vcb->balance.status = STATUS_INTERNAL_ERROR;
+            }
+
+            if (Vcb->balance.stopping || !NT_SUCCESS(Vcb->balance.status)) {
+                if (dev) {
+                    Status = regenerate_space_list(Vcb, dev);
+                    if (!NT_SUCCESS(Status))
+                        WARN("regenerate_space_list returned %08x\n", Status);
+                }
+            } else {
+                UINT64 old_size;
+
+                old_size = dev->devitem.num_bytes;
+                dev->devitem.num_bytes = Vcb->balance.opts[0].drange_start;
+
+                Status = update_dev_item(Vcb, dev, NULL);
+                if (!NT_SUCCESS(Status)) {
+                    ERR("update_dev_item returned %08x\n", Status);
+                    dev->devitem.num_bytes = old_size;
+                    Vcb->balance.status = Status;
+
+                    Status = regenerate_space_list(Vcb, dev);
+                    if (!NT_SUCCESS(Status))
+                        WARN("regenerate_space_list returned %08x\n", Status);
+                } else {
+                    Vcb->superblock.total_bytes -= old_size - dev->devitem.num_bytes;
+
+                    Status = do_write(Vcb, NULL);
+                    if (!NT_SUCCESS(Status))
+                        ERR("do_write returned %08x\n", Status);
+
+                    free_trees(Vcb);
+                }
+            }
+
+            ExReleaseResourceLite(&Vcb->tree_lock);
+
+            if (!Vcb->balance.stopping && NT_SUCCESS(Vcb->balance.status))
+                FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_CHANGE_SIZE);
+        } else {
+            Status = remove_balance_item(Vcb);
+            if (!NT_SUCCESS(Status)) {
+                ERR("remove_balance_item returned %08x\n", Status);
+                goto end;
+            }
+        }
+
+        if (Vcb->trim && !Vcb->options.no_trim) {
+            ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
+
+            le = Vcb->devices.Flink;
+            while (le != &Vcb->devices) {
+                device* dev2 = CONTAINING_RECORD(le, device, list_entry);
+
+                if (dev2->devobj && !dev2->readonly && dev2->trim)
+                    trim_unalloc_space(Vcb, dev2);
+
+                le = le->Flink;
+            }
+
             ExReleaseResourceLite(&Vcb->tree_lock);
-            FsRtlExitFileSystem();
         }
     }
-    
+
     ZwClose(Vcb->balance.thread);
     Vcb->balance.thread = NULL;
-    
+
     KeSetEvent(&Vcb->balance.finished, 0, FALSE);
 }
 
@@ -2806,26 +3498,36 @@ NTSTATUS start_balance(device_extension* Vcb, void* data, ULONG length, KPROCESS
     NTSTATUS Status;
     btrfs_start_balance* bsb = (btrfs_start_balance*)data;
     UINT8 i;
-    
+
     if (length < sizeof(btrfs_start_balance) || !data)
         return STATUS_INVALID_PARAMETER;
-    
+
     if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
         return STATUS_PRIVILEGE_NOT_HELD;
-    
+
+    if (Vcb->locked) {
+        WARN("cannot start balance while locked\n");
+        return STATUS_DEVICE_NOT_READY;
+    }
+
+    if (Vcb->scrub.thread) {
+        WARN("cannot start balance while scrub running\n");
+        return STATUS_DEVICE_NOT_READY;
+    }
+
     if (Vcb->balance.thread) {
         WARN("balance already running\n");
         return STATUS_DEVICE_NOT_READY;
     }
-    
+
     if (Vcb->readonly)
         return STATUS_MEDIA_WRITE_PROTECTED;
-    
+
     if (!(bsb->opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) &&
         !(bsb->opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) &&
         !(bsb->opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED))
         return STATUS_SUCCESS;
-    
+
     for (i = 0; i < 3; i++) {
         if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) {
             if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_PROFILES) {
@@ -2835,46 +3537,46 @@ NTSTATUS start_balance(device_extension* Vcb, void* data, ULONG length, KPROCESS
                 if (bsb->opts[i].profiles == 0)
                     return STATUS_INVALID_PARAMETER;
             }
-            
+
             if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DEVID) {
                 if (bsb->opts[i].devid == 0)
                     return STATUS_INVALID_PARAMETER;
             }
-            
+
             if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DRANGE) {
                 if (bsb->opts[i].drange_start > bsb->opts[i].drange_end)
                     return STATUS_INVALID_PARAMETER;
             }
-            
+
             if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_VRANGE) {
                 if (bsb->opts[i].vrange_start > bsb->opts[i].vrange_end)
                     return STATUS_INVALID_PARAMETER;
             }
-            
+
             if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_LIMIT) {
                 bsb->opts[i].limit_start = max(1, bsb->opts[i].limit_start);
                 bsb->opts[i].limit_end = max(1, bsb->opts[i].limit_end);
-                
+
                 if (bsb->opts[i].limit_start > bsb->opts[i].limit_end)
                     return STATUS_INVALID_PARAMETER;
             }
-            
+
             if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_STRIPES) {
                 bsb->opts[i].stripes_start = max(1, bsb->opts[i].stripes_start);
                 bsb->opts[i].stripes_end = max(1, bsb->opts[i].stripes_end);
-                
+
                 if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end)
                     return STATUS_INVALID_PARAMETER;
             }
-            
+
             if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) {
                 bsb->opts[i].usage_start = min(100, bsb->opts[i].stripes_start);
                 bsb->opts[i].usage_end = min(100, bsb->opts[i].stripes_end);
-                
+
                 if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end)
                     return STATUS_INVALID_PARAMETER;
             }
-            
+
             if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT) {
                 if (bsb->opts[i].convert != BLOCK_FLAG_RAID0 && bsb->opts[i].convert != BLOCK_FLAG_RAID1 &&
                     bsb->opts[i].convert != BLOCK_FLAG_DUPLICATE && bsb->opts[i].convert != BLOCK_FLAG_RAID10 &&
@@ -2884,79 +3586,77 @@ NTSTATUS start_balance(device_extension* Vcb, void* data, ULONG length, KPROCESS
             }
         }
     }
-    
+
     RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bsb->opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts));
     RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bsb->opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts));
     RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bsb->opts[BALANCE_OPTS_SYSTEM], sizeof(btrfs_balance_opts));
-    
+
     Vcb->balance.paused = FALSE;
     Vcb->balance.removing = FALSE;
+    Vcb->balance.shrinking = FALSE;
     Vcb->balance.status = STATUS_SUCCESS;
     KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused);
-    
+
     Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb);
     if (!NT_SUCCESS(Status)) {
         ERR("PsCreateSystemThread returned %08x\n", Status);
         return Status;
     }
-    
+
     return STATUS_SUCCESS;
 }
 
-NTSTATUS look_for_balance_item(device_extension* Vcb) {
-    LIST_ENTRY rollback;
+NTSTATUS look_for_balance_item(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb) {
     KEY searchkey;
     traverse_ptr tp;
     NTSTATUS Status;
     BALANCE_ITEM* bi;
     int i;
-    
-    InitializeListHead(&rollback);
-    
+
     searchkey.obj_id = BALANCE_ITEM_ID;
     searchkey.obj_type = TYPE_TEMP_ITEM;
     searchkey.offset = 0;
-    
+
     Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
     if (!NT_SUCCESS(Status)) {
         ERR("find_item returned %08x\n", Status);
         return Status;
     }
-    
+
     if (keycmp(tp.item->key, searchkey)) {
         TRACE("no balance item found\n");
         return STATUS_NOT_FOUND;
     }
-    
+
     if (tp.item->size < sizeof(BALANCE_ITEM)) {
         WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
              tp.item->size, sizeof(BALANCE_ITEM));
         return STATUS_INTERNAL_ERROR;
     }
-    
+
     bi = (BALANCE_ITEM*)tp.item->data;
-    
+
     if (bi->flags & BALANCE_FLAGS_DATA)
         load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data);
-    
+
     if (bi->flags & BALANCE_FLAGS_METADATA)
         load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata);
-    
+
     if (bi->flags & BALANCE_FLAGS_SYSTEM)
         load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system);
-    
+
     // do the heuristics that Linux driver does
-    
+
     for (i = 0; i < 3; i++) {
         if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) {
             // if converting, don't redo chunks already done
-            
+
             if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT)
                 Vcb->balance.opts[i].flags |= BTRFS_BALANCE_OPTS_SOFT;
-            
+
             // don't balance chunks more than 90% filled - presumably these
             // have already been done
-            
+
             if (!(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) &&
                 !(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT)
             ) {
@@ -2966,50 +3666,54 @@ NTSTATUS look_for_balance_item(device_extension* Vcb) {
             }
         }
     }
-    
+
     if (Vcb->readonly || Vcb->options.skip_balance)
         Vcb->balance.paused = TRUE;
     else
         Vcb->balance.paused = FALSE;
-    
+
     Vcb->balance.removing = FALSE;
+    Vcb->balance.shrinking = FALSE;
     Vcb->balance.status = STATUS_SUCCESS;
     KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused);
-    
+
     Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb);
     if (!NT_SUCCESS(Status)) {
         ERR("PsCreateSystemThread returned %08x\n", Status);
         return Status;
     }
-    
+
     return STATUS_SUCCESS;
 }
 
 NTSTATUS query_balance(device_extension* Vcb, void* data, ULONG length) {
     btrfs_query_balance* bqb = (btrfs_query_balance*)data;
-    
+
     if (length < sizeof(btrfs_query_balance) || !data)
         return STATUS_INVALID_PARAMETER;
-    
+
     if (!Vcb->balance.thread) {
         bqb->status = BTRFS_BALANCE_STOPPED;
-        
+
         if (!NT_SUCCESS(Vcb->balance.status)) {
             bqb->status |= BTRFS_BALANCE_ERROR;
             bqb->error = Vcb->balance.status;
         }
-        
+
         return STATUS_SUCCESS;
     }
-    
+
     bqb->status = Vcb->balance.paused ? BTRFS_BALANCE_PAUSED : BTRFS_BALANCE_RUNNING;
-    
+
     if (Vcb->balance.removing)
         bqb->status |= BTRFS_BALANCE_REMOVAL;
-    
+
+    if (Vcb->balance.shrinking)
+        bqb->status |= BTRFS_BALANCE_SHRINKING;
+
     if (!NT_SUCCESS(Vcb->balance.status))
         bqb->status |= BTRFS_BALANCE_ERROR;
-    
+
     bqb->chunks_left = Vcb->balance.chunks_left;
     bqb->total_chunks = Vcb->balance.total_chunks;
     bqb->error = Vcb->balance.status;
@@ -3023,51 +3727,50 @@ NTSTATUS query_balance(device_extension* Vcb, void* data, ULONG length) {
 NTSTATUS pause_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
     if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
         return STATUS_PRIVILEGE_NOT_HELD;
-    
+
     if (!Vcb->balance.thread)
         return STATUS_DEVICE_NOT_READY;
-    
+
     if (Vcb->balance.paused)
         return STATUS_DEVICE_NOT_READY;
-    
+
     Vcb->balance.paused = TRUE;
     KeClearEvent(&Vcb->balance.event);
-    
+
     return STATUS_SUCCESS;
 }
 
 NTSTATUS resume_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
     if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
         return STATUS_PRIVILEGE_NOT_HELD;
-    
+
     if (!Vcb->balance.thread)
         return STATUS_DEVICE_NOT_READY;
-    
+
     if (!Vcb->balance.paused)
         return STATUS_DEVICE_NOT_READY;
-    
+
     if (Vcb->readonly)
         return STATUS_MEDIA_WRITE_PROTECTED;
-    
+
     Vcb->balance.paused = FALSE;
     KeSetEvent(&Vcb->balance.event, 0, FALSE);
-    
+
     return STATUS_SUCCESS;
 }
 
 NTSTATUS stop_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
     if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
         return STATUS_PRIVILEGE_NOT_HELD;
-    
+
     if (!Vcb->balance.thread)
         return STATUS_DEVICE_NOT_READY;
-    
+
     Vcb->balance.paused = FALSE;
     Vcb->balance.stopping = TRUE;
-    Vcb->balance.cancelling = TRUE;
     Vcb->balance.status = STATUS_SUCCESS;
     KeSetEvent(&Vcb->balance.event, 0, FALSE);
-    
+
     return STATUS_SUCCESS;
 }
 
@@ -3078,52 +3781,52 @@ NTSTATUS remove_device(device_extension* Vcb, void* data, ULONG length, KPROCESS
     NTSTATUS Status;
     int i;
     UINT64 num_rw_devices;
-    
+
     TRACE("(%p, %p, %x)\n", Vcb, data, length);
-    
+
     if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
         return STATUS_PRIVILEGE_NOT_HELD;
-    
+
     if (length < sizeof(UINT64))
         return STATUS_INVALID_PARAMETER;
-    
+
     devid = *(UINT64*)data;
-    
+
     ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
-    
+
     if (Vcb->readonly) {
         ExReleaseResourceLite(&Vcb->tree_lock);
         return STATUS_MEDIA_WRITE_PROTECTED;
     }
-    
+
     num_rw_devices = 0;
-    
+
     le = Vcb->devices.Flink;
     while (le != &Vcb->devices) {
         device* dev2 = CONTAINING_RECORD(le, device, list_entry);
-        
+
         if (dev2->devitem.dev_id == devid)
             dev = dev2;
-        
+
         if (!dev2->readonly)
             num_rw_devices++;
-        
+
         le = le->Flink;
     }
-    
+
     if (!dev) {
         ExReleaseResourceLite(&Vcb->tree_lock);
         WARN("device %llx not found\n", devid);
         return STATUS_NOT_FOUND;
     }
-    
+
     if (!dev->readonly) {
         if (num_rw_devices == 1) {
             ExReleaseResourceLite(&Vcb->tree_lock);
             WARN("not removing last non-readonly device\n");
             return STATUS_INVALID_PARAMETER;
         }
-        
+
         if (num_rw_devices == 4 &&
             ((Vcb->data_flags & BLOCK_FLAG_RAID10 || Vcb->metadata_flags & BLOCK_FLAG_RAID10 || Vcb->system_flags & BLOCK_FLAG_RAID10) ||
              (Vcb->data_flags & BLOCK_FLAG_RAID6 || Vcb->metadata_flags & BLOCK_FLAG_RAID6 || Vcb->system_flags & BLOCK_FLAG_RAID6))
@@ -3132,13 +3835,13 @@ NTSTATUS remove_device(device_extension* Vcb, void* data, ULONG length, KPROCESS
             ERR("would not be enough devices to satisfy RAID requirement (RAID6/10)\n");
             return STATUS_CANNOT_DELETE;
         }
-        
+
         if (num_rw_devices == 3 && (Vcb->data_flags & BLOCK_FLAG_RAID5 || Vcb->metadata_flags & BLOCK_FLAG_RAID5 || Vcb->system_flags & BLOCK_FLAG_RAID5)) {
             ExReleaseResourceLite(&Vcb->tree_lock);
             ERR("would not be enough devices to satisfy RAID requirement (RAID5)\n");
             return STATUS_CANNOT_DELETE;
         }
-        
+
         if (num_rw_devices == 2 &&
             ((Vcb->data_flags & BLOCK_FLAG_RAID0 || Vcb->metadata_flags & BLOCK_FLAG_RAID0 || Vcb->system_flags & BLOCK_FLAG_RAID0) ||
              (Vcb->data_flags & BLOCK_FLAG_RAID1 || Vcb->metadata_flags & BLOCK_FLAG_RAID1 || Vcb->system_flags & BLOCK_FLAG_RAID1))
@@ -3148,33 +3851,35 @@ NTSTATUS remove_device(device_extension* Vcb, void* data, ULONG length, KPROCESS
             return STATUS_CANNOT_DELETE;
         }
     }
-    
+
     ExReleaseResourceLite(&Vcb->tree_lock);
-    
+
     if (Vcb->balance.thread) {
         WARN("balance already running\n");
         return STATUS_DEVICE_NOT_READY;
     }
-    
+
     dev->reloc = TRUE;
-    
+
     RtlZeroMemory(Vcb->balance.opts, sizeof(btrfs_balance_opts) * 3);
-    
+
     for (i = 0; i < 3; i++) {
         Vcb->balance.opts[i].flags = BTRFS_BALANCE_OPTS_ENABLED | BTRFS_BALANCE_OPTS_DEVID;
         Vcb->balance.opts[i].devid = devid;
     }
-    
+
     Vcb->balance.paused = FALSE;
     Vcb->balance.removing = TRUE;
+    Vcb->balance.shrinking = FALSE;
+    Vcb->balance.status = STATUS_SUCCESS;
     KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused);
-    
+
     Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb);
     if (!NT_SUCCESS(Status)) {
         ERR("PsCreateSystemThread returned %08x\n", Status);
         dev->reloc = FALSE;
         return Status;
     }
-    
+
     return STATUS_SUCCESS;
 }
index 2a54cc3..4bf1242 100644 (file)
@@ -1,17 +1,17 @@
-/* Copyright (c) Mark Harmstone 2016
- * 
+/* Copyright (c) Mark Harmstone 2016-17
+ *
  * This file is part of WinBtrfs.
- * 
+ *
  * WinBtrfs is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public Licence as published by
  * the Free Software Foundation, either version 3 of the Licence, or
  * (at your option) any later version.
- * 
+ *
  * WinBtrfs is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public Licence for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public Licence
  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
 
 #endif
 #include <ntddscsi.h>
 #include "btrfs.h"
-#ifndef __REACTOS__
-#include <winioctl.h>
-#else
-#include <rtlfuncs.h>
-#endif
 #include <ata.h>
 
+#ifndef _MSC_VER
+#include <initguid.h>
+#include <ntddstor.h>
+#undef INITGUID
+#endif
+
+#include <ntdddisk.h>
+#include <ntddvol.h>
+
+#ifdef _MSC_VER
+#include <initguid.h>
+#include <ntddstor.h>
+#undef INITGUID
+#endif
+
 #define INCOMPAT_SUPPORTED (BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF | BTRFS_INCOMPAT_FLAGS_DEFAULT_SUBVOL | BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS | \
                             BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO | BTRFS_INCOMPAT_FLAGS_BIG_METADATA | BTRFS_INCOMPAT_FLAGS_RAID56 | \
                             BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF | BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA | BTRFS_INCOMPAT_FLAGS_NO_HOLES)
-#define COMPAT_RO_SUPPORTED 0
+#define COMPAT_RO_SUPPORTED (BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE | BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID)
 
 static WCHAR device_name[] = {'\\','B','t','r','f','s',0};
 static WCHAR dosdevice_name[] = {'\\','D','o','s','D','e','v','i','c','e','s','\\','B','t','r','f','s',0};
 
+DEFINE_GUID(BtrfsBusInterface, 0x4d414874, 0x6865, 0x6761, 0x6d, 0x65, 0x83, 0x69, 0x17, 0x9a, 0x7d, 0x1d);
+
 PDRIVER_OBJECT drvobj;
-PDEVICE_OBJECT devobj;
+PDEVICE_OBJECT master_devobj;
 #ifndef __REACTOS__
 BOOL have_sse42 = FALSE, have_sse2 = FALSE;
 #endif
 UINT64 num_reads = 0;
-LIST_ENTRY uid_map_list;
-LIST_ENTRY volumes;
-ERESOURCE volumes_lock;
-LIST_ENTRY pnp_disks;
+LIST_ENTRY uid_map_list, gid_map_list;
 LIST_ENTRY VcbList;
 ERESOURCE global_loading_lock;
 UINT32 debug_log_level = 0;
@@ -63,25 +72,41 @@ UINT32 mount_compress_type = 0;
 UINT32 mount_zlib_level = 3;
 UINT32 mount_flush_interval = 30;
 UINT32 mount_max_inline = 2048;
-UINT32 mount_raid5_recalculation = 1;
-UINT32 mount_raid6_recalculation = 1;
 UINT32 mount_skip_balance = 0;
+UINT32 mount_no_barrier = 0;
+UINT32 mount_no_trim = 0;
+UINT32 mount_clear_cache = 0;
+UINT32 mount_allow_degraded = 0;
+UINT32 mount_readonly = 0;
+UINT32 no_pnp = 0;
 BOOL log_started = FALSE;
 UNICODE_STRING log_device, log_file, registry_path;
-tPsUpdateDiskCounters PsUpdateDiskCounters;
-tCcCopyReadEx CcCopyReadEx;
-tCcCopyWriteEx CcCopyWriteEx;
-tCcSetAdditionalCacheAttributesEx CcSetAdditionalCacheAttributesEx;
+tPsUpdateDiskCounters fPsUpdateDiskCounters;
+tCcCopyReadEx fCcCopyReadEx;
+tCcCopyWriteEx fCcCopyWriteEx;
+tCcSetAdditionalCacheAttributesEx fCcSetAdditionalCacheAttributesEx;
+tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
 BOOL diskacc = FALSE;
-void* notification_entry = NULL;
+void *notification_entry = NULL, *notification_entry2 = NULL, *notification_entry3 = NULL;
+ERESOURCE pdo_list_lock, mapping_lock;
+LIST_ENTRY pdo_list;
+BOOL finished_probing = FALSE;
+HANDLE degraded_wait_handle = NULL, mountmgr_thread_handle = NULL;
+BOOL degraded_wait = TRUE;
+KEVENT mountmgr_thread_event;
+BOOL shutting_down = FALSE;
 
 #ifdef _DEBUG
 PFILE_OBJECT comfo = NULL;
 PDEVICE_OBJECT comdo = NULL;
 HANDLE log_handle = NULL;
+ERESOURCE log_lock;
+HANDLE serial_thread_handle = NULL;
+
+static void init_serial(BOOL first_time);
 #endif
 
-static NTSTATUS STDCALL close_file(device_extension* Vcb, PFILE_OBJECT FileObject);
+static NTSTATUS close_file(_In_ PFILE_OBJECT FileObject, _In_ PIRP Irp);
 
 typedef struct {
     KEVENT Event;
@@ -89,49 +114,51 @@ typedef struct {
 } read_context;
 
 #ifdef _DEBUG
-static NTSTATUS STDCALL dbg_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
+_Function_class_(IO_COMPLETION_ROUTINE)
+static NTSTATUS dbg_completion(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp, _In_ PVOID conptr) {
     read_context* context = conptr;
-    
-//     DbgPrint("dbg_completion\n");
-    
+
+    UNUSED(DeviceObject);
+
     context->iosb = Irp->IoStatus;
     KeSetEvent(&context->Event, 0, FALSE);
-    
-//     return STATUS_SUCCESS;
+
     return STATUS_MORE_PROCESSING_REQUIRED;
 }
 
 #ifdef DEBUG_LONG_MESSAGES
-void STDCALL _debug_message(const char* func, const char* file, unsigned int line, char* s, ...) {
+void _debug_message(_In_ const char* func, _In_ const char* file, _In_ unsigned int line, _In_ char* s, ...) {
 #else
-void STDCALL _debug_message(const char* func, char* s, ...) {
+void _debug_message(_In_ const char* func, _In_ char* s, ...) {
 #endif
     LARGE_INTEGER offset;
     PIO_STACK_LOCATION IrpSp;
     NTSTATUS Status;
     PIRP Irp;
     va_list ap;
-    char *buf2 = NULL, *buf;
-    read_context* context = NULL;
+    char *buf2, *buf;
+    read_context context;
     UINT32 length;
-    
+
     buf2 = ExAllocatePoolWithTag(NonPagedPool, 1024, ALLOC_TAG);
-    
+
     if (!buf2) {
         DbgPrint("Couldn't allocate buffer in debug_message\n");
         return;
     }
-    
+
 #ifdef DEBUG_LONG_MESSAGES
-    sprintf(buf2, "%p:%s:%s:%u:", PsGetCurrentThreadId(), func, file, line);
+    sprintf(buf2, "%p:%s:%s:%u:", PsGetCurrentThread(), func, file, line);
 #else
-    sprintf(buf2, "%p:%s:", PsGetCurrentThreadId(), func);
+    sprintf(buf2, "%p:%s:", PsGetCurrentThread(), func);
 #endif
     buf = &buf2[strlen(buf2)];
-    
+
     va_start(ap, s);
     vsprintf(buf, s, ap);
-    
+
+    ExAcquireResourceSharedLite(&log_lock, TRUE);
+
     if (!log_started || (log_device.Length == 0 && log_file.Length == 0)) {
         DbgPrint(buf2);
     } else if (log_device.Length > 0) {
@@ -140,34 +167,26 @@ void STDCALL _debug_message(const char* func, char* s, ...) {
             DbgPrint(buf2);
             goto exit2;
         }
-        
+
         length = (UINT32)strlen(buf2);
-        
+
         offset.u.LowPart = 0;
         offset.u.HighPart = 0;
-        
-        context = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_context), ALLOC_TAG);
-        if (!context) {
-            DbgPrint("Couldn't allocate context in debug_message\n");
-            return;
-        }
-        
-        RtlZeroMemory(context, sizeof(read_context));
-        
-        KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
 
-    //     status = ZwWriteFile(comh, NULL, NULL, NULL, &io, buf2, strlen(buf2), &offset, NULL);
-        
+        RtlZeroMemory(&context, sizeof(read_context));
+
+        KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
+
         Irp = IoAllocateIrp(comdo->StackSize, FALSE);
-        
+
         if (!Irp) {
             DbgPrint("IoAllocateIrp failed\n");
             goto exit2;
         }
-        
+
         IrpSp = IoGetNextIrpStackLocation(Irp);
         IrpSp->MajorFunction = IRP_MJ_WRITE;
-        
+
         if (comdo->Flags & DO_BUFFERED_IO) {
             Irp->AssociatedIrp.SystemBuffer = buf2;
 
@@ -178,82 +197,61 @@ void STDCALL _debug_message(const char* func, char* s, ...) {
                 DbgPrint("IoAllocateMdl failed\n");
                 goto exit;
             }
-            
-            MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
+
+            MmBuildMdlForNonPagedPool(Irp->MdlAddress);
         } else {
             Irp->UserBuffer = buf2;
         }
 
         IrpSp->Parameters.Write.Length = length;
         IrpSp->Parameters.Write.ByteOffset = offset;
-        
-        Irp->UserIosb = &context->iosb;
 
-        Irp->UserEvent = &context->Event;
+        Irp->UserIosb = &context.iosb;
+
+        Irp->UserEvent = &context.Event;
 
-        IoSetCompletionRoutine(Irp, dbg_completion, context, TRUE, TRUE, TRUE);
+        IoSetCompletionRoutine(Irp, dbg_completion, &context, TRUE, TRUE, TRUE);
 
         Status = IoCallDriver(comdo, Irp);
 
         if (Status == STATUS_PENDING) {
-            KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
-            Status = context->iosb.Status;
+            KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
+            Status = context.iosb.Status;
         }
-        
-        if (comdo->Flags & DO_DIRECT_IO) {
-            MmUnlockPages(Irp->MdlAddress);
+
+        if (comdo->Flags & DO_DIRECT_IO)
             IoFreeMdl(Irp->MdlAddress);
-        }
-        
+
         if (!NT_SUCCESS(Status)) {
             DbgPrint("failed to write to COM1 - error %08x\n", Status);
             goto exit;
         }
-        
+
 exit:
         IoFreeIrp(Irp);
     } else if (log_handle != NULL) {
         IO_STATUS_BLOCK iosb;
-        
+
         length = (UINT32)strlen(buf2);
-        
+
         Status = ZwWriteFile(log_handle, NULL, NULL, NULL, &iosb, buf2, length, NULL, NULL);
-        
+
         if (!NT_SUCCESS(Status)) {
             DbgPrint("failed to write to file - error %08x\n", Status);
         }
     }
-    
+
 exit2:
+    ExReleaseResourceLite(&log_lock);
+
     va_end(ap);
-    
-    if (context)
-        ExFreePool(context);
-    
+
     if (buf2)
         ExFreePool(buf2);
 }
 #endif
 
-UINT64 sector_align( UINT64 NumberToBeAligned, UINT64 Alignment )
-{
-    if( Alignment & ( Alignment - 1 ) )
-    {
-        //
-        //  Alignment not a power of 2
-        //  Just returning
-        //
-        return NumberToBeAligned;
-    }
-    if( ( NumberToBeAligned & ( Alignment - 1 ) ) != 0 )
-    {
-        NumberToBeAligned = NumberToBeAligned + Alignment;
-        NumberToBeAligned = NumberToBeAligned & ( ~ (Alignment-1) );
-    }
-    return NumberToBeAligned;
-}
-
-BOOL is_top_level(PIRP Irp) {
+BOOL is_top_level(_In_ PIRP Irp) {
     if (!IoGetTopLevelIrp()) {
         IoSetTopLevelIrp(Irp);
         return TRUE;
@@ -262,132 +260,162 @@ BOOL is_top_level(PIRP Irp) {
     return FALSE;
 }
 
-static void STDCALL DriverUnload(PDRIVER_OBJECT DriverObject) {
+_Function_class_(DRIVER_UNLOAD)
+#ifdef __REACTOS__
+static void NTAPI DriverUnload(_In_ PDRIVER_OBJECT DriverObject) {
+#else
+static void DriverUnload(_In_ PDRIVER_OBJECT DriverObject) {
+#endif
     UNICODE_STRING dosdevice_nameW;
 
     ERR("DriverUnload\n");
-    
+
     free_cache();
-    
+
     IoUnregisterFileSystem(DriverObject->DeviceObject);
-    
+
+    if (notification_entry2)
+#ifdef __REACTOS__
+        IoUnregisterPlugPlayNotification(notification_entry2);
+#else
+        IoUnregisterPlugPlayNotificationEx(notification_entry2);
+#endif
+
+    if (notification_entry3)
+#ifdef __REACTOS__
+        IoUnregisterPlugPlayNotification(notification_entry3);
+#else
+        IoUnregisterPlugPlayNotificationEx(notification_entry3);
+#endif
+
     if (notification_entry)
 #ifdef __REACTOS__
         IoUnregisterPlugPlayNotification(notification_entry);
 #else
         IoUnregisterPlugPlayNotificationEx(notification_entry);
 #endif
-   
+
     dosdevice_nameW.Buffer = dosdevice_name;
     dosdevice_nameW.Length = dosdevice_nameW.MaximumLength = (USHORT)wcslen(dosdevice_name) * sizeof(WCHAR);
 
     IoDeleteSymbolicLink(&dosdevice_nameW);
     IoDeleteDevice(DriverObject->DeviceObject);
-    
+
     while (!IsListEmpty(&uid_map_list)) {
         LIST_ENTRY* le = RemoveHeadList(&uid_map_list);
         uid_map* um = CONTAINING_RECORD(le, uid_map, listentry);
-        
+
         ExFreePool(um->sid);
 
         ExFreePool(um);
     }
-    
+
+    while (!IsListEmpty(&gid_map_list)) {
+        gid_map* gm = CONTAINING_RECORD(RemoveHeadList(&gid_map_list), gid_map, listentry);
+
+        ExFreePool(gm->sid);
+        ExFreePool(gm);
+    }
+
     // FIXME - free volumes and their devpaths
-    // FIXME - free pnp_disks and their devpaths
-    
+
 #ifdef _DEBUG
     if (comfo)
         ObDereferenceObject(comfo);
-    
+
     if (log_handle)
         ZwClose(log_handle);
 #endif
-    
+
     ExDeleteResourceLite(&global_loading_lock);
-    
-    ExDeleteResourceLite(&volumes_lock);
-    
+    ExDeleteResourceLite(&pdo_list_lock);
+
     if (log_device.Buffer)
         ExFreePool(log_device.Buffer);
-    
+
     if (log_file.Buffer)
         ExFreePool(log_file.Buffer);
-    
+
     if (registry_path.Buffer)
         ExFreePool(registry_path.Buffer);
+
+#ifdef _DEBUG
+    ExDeleteResourceLite(&log_lock);
+#endif
+    ExDeleteResourceLite(&mapping_lock);
 }
 
-static BOOL STDCALL get_last_inode(device_extension* Vcb, root* r, PIRP Irp) {
+static BOOL get_last_inode(_In_ _Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ root* r, _In_opt_ PIRP Irp) {
     KEY searchkey;
     traverse_ptr tp, prev_tp;
     NTSTATUS Status;
-    
+
     // get last entry
     searchkey.obj_id = 0xffffffffffffffff;
     searchkey.obj_type = 0xff;
     searchkey.offset = 0xffffffffffffffff;
-    
+
     Status = find_item(Vcb, r, &tp, &searchkey, FALSE, Irp);
     if (!NT_SUCCESS(Status)) {
         ERR("error - find_item returned %08x\n", Status);
         return FALSE;
     }
-    
+
     if (tp.item->key.obj_type == TYPE_INODE_ITEM || (tp.item->key.obj_type == TYPE_ROOT_ITEM && !(tp.item->key.obj_id & 0x8000000000000000))) {
         r->lastinode = tp.item->key.obj_id;
         TRACE("last inode for tree %llx is %llx\n", r->id, r->lastinode);
         return TRUE;
     }
-    
-    while (find_prev_item(Vcb, &tp, &prev_tp, FALSE, Irp)) {
+
+    while (find_prev_item(Vcb, &tp, &prev_tp, Irp)) {
         tp = prev_tp;
-        
+
         TRACE("moving on to %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-        
+
         if (tp.item->key.obj_type == TYPE_INODE_ITEM || (tp.item->key.obj_type == TYPE_ROOT_ITEM && !(tp.item->key.obj_id & 0x8000000000000000))) {
             r->lastinode = tp.item->key.obj_id;
             TRACE("last inode for tree %llx is %llx\n", r->id, r->lastinode);
             return TRUE;
         }
     }
-    
+
     r->lastinode = SUBVOL_ROOT_INODE;
-    
+
     WARN("no INODE_ITEMs in tree %llx\n", r->id);
-    
+
     return TRUE;
 }
 
-BOOL extract_xattr(void* item, USHORT size, char* name, UINT8** data, UINT16* datalen) {
+_Success_(return)
+static BOOL extract_xattr(_In_reads_bytes_(size) void* item, _In_ USHORT size, _In_z_ char* name, _Out_ UINT8** data, _Out_ UINT16* datalen) {
     DIR_ITEM* xa = (DIR_ITEM*)item;
     USHORT xasize;
-    
+
     while (TRUE) {
         if (size < sizeof(DIR_ITEM) || size < (sizeof(DIR_ITEM) - 1 + xa->m + xa->n)) {
             WARN("DIR_ITEM is truncated\n");
             return FALSE;
         }
-        
+
         if (xa->n == strlen(name) && RtlCompareMemory(name, xa->name, xa->n) == xa->n) {
             TRACE("found xattr %s\n", name);
-            
+
             *datalen = xa->m;
-            
+
             if (xa->m > 0) {
                 *data = ExAllocatePoolWithTag(PagedPool, xa->m, ALLOC_TAG);
                 if (!*data) {
                     ERR("out of memory\n");
                     return FALSE;
                 }
-                
+
                 RtlCopyMemory(*data, &xa->name[xa->n], xa->m);
             } else
                 *data = NULL;
-            
+
             return TRUE;
         }
-        
+
         xasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n;
 
         if (size > xasize) {
@@ -396,84 +424,96 @@ BOOL extract_xattr(void* item, USHORT size, char* name, UINT8** data, UINT16* da
         } else
             break;
     }
-    
+
     TRACE("xattr %s not found\n", name);
-    
+
     return FALSE;
 }
 
-BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, UINT8** data, UINT16* datalen, PIRP Irp) {
+_Success_(return)
+BOOL get_xattr(_In_ _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ root* subvol, _In_ UINT64 inode, _In_z_ char* name, _In_ UINT32 crc32,
+               _Out_ UINT8** data, _Out_ UINT16* datalen, _In_opt_ PIRP Irp) {
     KEY searchkey;
     traverse_ptr tp;
     NTSTATUS Status;
-    
+
     TRACE("(%p, %llx, %llx, %s, %08x, %p, %p)\n", Vcb, subvol->id, inode, name, crc32, data, datalen);
-    
+
     searchkey.obj_id = inode;
     searchkey.obj_type = TYPE_XATTR_ITEM;
     searchkey.offset = crc32;
-    
+
     Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
     if (!NT_SUCCESS(Status)) {
         ERR("error - find_item returned %08x\n", Status);
         return FALSE;
     }
-    
+
     if (keycmp(tp.item->key, searchkey)) {
         TRACE("could not find item (%llx,%x,%llx)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
         return FALSE;
     }
-    
+
     if (tp.item->size < sizeof(DIR_ITEM)) {
         ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
         return FALSE;
     }
-    
+
     return extract_xattr(tp.item->data, tp.item->size, name, data, datalen);
 }
 
-static NTSTATUS STDCALL drv_close(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
+_Dispatch_type_(IRP_MJ_CLOSE)
+_Function_class_(DRIVER_DISPATCH)
+static NTSTATUS drv_close(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) {
     NTSTATUS Status;
     PIO_STACK_LOCATION IrpSp;
     device_extension* Vcb = DeviceObject->DeviceExtension;
     BOOL top_level;
 
-    TRACE("close\n");
-    
     FsRtlEnterFileSystem();
 
+    TRACE("close\n");
+
     top_level = is_top_level(Irp);
-    
-    if (DeviceObject == devobj || (Vcb && Vcb->type == VCB_TYPE_PARTITION0)) {
+
+    if (DeviceObject == master_devobj) {
         TRACE("Closing file system\n");
         Status = STATUS_SUCCESS;
-        goto exit;
+        goto end;
+    } else if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
+        Status = vol_close(DeviceObject, Irp);
+        goto end;
+    } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
     }
-    
+
     IrpSp = IoGetCurrentIrpStackLocation(Irp);
-    
+
     // FIXME - unmount if called for volume
     // FIXME - call FsRtlNotifyUninitializeSync(&Vcb->NotifySync) if unmounting
-    
-    Status = close_file(DeviceObject->DeviceExtension, IrpSp->FileObject);
 
-exit:
+    Status = close_file(IrpSp->FileObject, Irp);
+
+end:
     Irp->IoStatus.Status = Status;
     Irp->IoStatus.Information = 0;
-    
+
     IoCompleteRequest( Irp, IO_DISK_INCREMENT );
-    
-    if (top_level) 
+
+    if (top_level)
         IoSetTopLevelIrp(NULL);
-    
-    FsRtlExitFileSystem();
-    
+
     TRACE("returning %08x\n", Status);
 
+    FsRtlExitFileSystem();
+
     return Status;
 }
 
-static NTSTATUS STDCALL drv_flush_buffers(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
+_Dispatch_type_(IRP_MJ_FLUSH_BUFFERS)
+_Function_class_(DRIVER_DISPATCH)
+static NTSTATUS drv_flush_buffers(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) {
     NTSTATUS Status;
     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation( Irp );
     PFILE_OBJECT FileObject = IrpSp->FileObject;
@@ -481,47 +521,65 @@ static NTSTATUS STDCALL drv_flush_buffers(IN PDEVICE_OBJECT DeviceObject, IN PIR
     device_extension* Vcb = DeviceObject->DeviceExtension;
     BOOL top_level;
 
-    TRACE("flush buffers\n");
-    
     FsRtlEnterFileSystem();
 
+    TRACE("flush buffers\n");
+
     top_level = is_top_level(Irp);
-    
-    if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
-        Status = part0_passthrough(DeviceObject, Irp);
-        goto exit;
+
+    if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
+        Status = vol_flush_buffers(DeviceObject, Irp);
+        goto end;
+    } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
+    }
+
+    if (!fcb) {
+        ERR("fcb was NULL\n");
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
     }
-    
+
+    if (fcb == Vcb->volume_fcb) {
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
+    }
+
+    Irp->IoStatus.Information = 0;
+
+    fcb->Header.IsFastIoPossible = fast_io_possible(fcb);
+
     Status = STATUS_SUCCESS;
     Irp->IoStatus.Status = Status;
-    Irp->IoStatus.Information = 0;
-    
+
     if (fcb->type != BTRFS_TYPE_DIRECTORY) {
         CcFlushCache(&fcb->nonpaged->segment_object, NULL, 0, &Irp->IoStatus);
-        
+
         if (fcb->Header.PagingIoResource) {
             ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, TRUE);
             ExReleaseResourceLite(fcb->Header.PagingIoResource);
         }
-        
+
         Status = Irp->IoStatus.Status;
     }
-    
+
+end:
     IoCompleteRequest(Irp, IO_NO_INCREMENT);
-    
-exit:
-    if (top_level) 
+
+    TRACE("returning %08x\n", Status);
+
+    if (top_level)
         IoSetTopLevelIrp(NULL);
-    
+
     FsRtlExitFileSystem();
 
     return Status;
 }
 
-static void calculate_total_space(device_extension* Vcb, LONGLONG* totalsize, LONGLONG* freespace) {
-    UINT16 nfactor, dfactor;
-    UINT64 sectors_used;
-    
+static void calculate_total_space(_In_ device_extension* Vcb, _Out_ UINT64* totalsize, _Out_ UINT64* freespace) {
+    UINT64 nfactor, dfactor, sectors_used;
+
     if (Vcb->data_flags & BLOCK_FLAG_DUPLICATE || Vcb->data_flags & BLOCK_FLAG_RAID1 || Vcb->data_flags & BLOCK_FLAG_RAID10) {
         nfactor = 1;
         dfactor = 2;
@@ -535,98 +593,199 @@ static void calculate_total_space(device_extension* Vcb, LONGLONG* totalsize, LO
         nfactor = 1;
         dfactor = 1;
     }
-    
+
     sectors_used = Vcb->superblock.bytes_used / Vcb->superblock.sector_size;
-    
+
     *totalsize = (Vcb->superblock.total_bytes / Vcb->superblock.sector_size) * nfactor / dfactor;
     *freespace = sectors_used > *totalsize ? 0 : (*totalsize - sectors_used);
 }
 
-static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
+#ifndef __REACTOS__
+// This function exists because we have to lie about our FS type in certain situations.
+// MPR!MprGetConnection queries the FS type, and compares it to a whitelist. If it doesn't match,
+// it will return ERROR_NO_NET_OR_BAD_PATH, which prevents UAC from working.
+// The command mklink refuses to create hard links on anything other than NTFS, so we have to
+// blacklist cmd.exe too.
+
+static BOOL lie_about_fs_type() {
+    NTSTATUS Status;
+    PROCESS_BASIC_INFORMATION pbi;
+    PPEB peb;
+    LIST_ENTRY* le;
+    ULONG retlen;
+
+    static WCHAR mpr[] = L"MPR.DLL";
+    static WCHAR cmd[] = L"CMD.EXE";
+    static WCHAR fsutil[] = L"FSUTIL.EXE";
+    UNICODE_STRING mprus, cmdus, fsutilus;
+
+    mprus.Buffer = mpr;
+    mprus.Length = mprus.MaximumLength = (USHORT)(wcslen(mpr) * sizeof(WCHAR));
+    cmdus.Buffer = cmd;
+    cmdus.Length = cmdus.MaximumLength = (USHORT)(wcslen(cmd) * sizeof(WCHAR));
+    fsutilus.Buffer = fsutil;
+    fsutilus.Length = fsutilus.MaximumLength = (USHORT)(wcslen(fsutil) * sizeof(WCHAR));
+
+    if (!PsGetCurrentProcess())
+        return FALSE;
+
+    Status = ZwQueryInformationProcess(NtCurrentProcess(), ProcessBasicInformation, &pbi, sizeof(pbi), &retlen);
+
+    if (!NT_SUCCESS(Status)) {
+        ERR("ZwQueryInformationProcess returned %08x\n", Status);
+        return FALSE;
+    }
+
+    if (!pbi.PebBaseAddress)
+        return FALSE;
+
+    peb = pbi.PebBaseAddress;
+
+    if (!peb->Ldr)
+        return FALSE;
+
+    le = peb->Ldr->InMemoryOrderModuleList.Flink;
+    while (le != &peb->Ldr->InMemoryOrderModuleList) {
+        LDR_DATA_TABLE_ENTRY* entry = CONTAINING_RECORD(le, LDR_DATA_TABLE_ENTRY, InMemoryOrderLinks);
+        BOOL blacklist = FALSE;
+
+        if (entry->FullDllName.Length >= mprus.Length) {
+            UNICODE_STRING name;
+
+            name.Buffer = &entry->FullDllName.Buffer[(entry->FullDllName.Length - mprus.Length) / sizeof(WCHAR)];
+            name.Length = name.MaximumLength = mprus.Length;
+
+            blacklist = FsRtlAreNamesEqual(&name, &mprus, TRUE, NULL);
+        }
+
+        if (!blacklist && entry->FullDllName.Length >= cmdus.Length) {
+            UNICODE_STRING name;
+
+            name.Buffer = &entry->FullDllName.Buffer[(entry->FullDllName.Length - cmdus.Length) / sizeof(WCHAR)];
+            name.Length = name.MaximumLength = cmdus.Length;
+
+            blacklist = FsRtlAreNamesEqual(&name, &cmdus, TRUE, NULL);
+        }
+
+        if (!blacklist && entry->FullDllName.Length >= fsutilus.Length) {
+            UNICODE_STRING name;
+
+            name.Buffer = &entry->FullDllName.Buffer[(entry->FullDllName.Length - fsutilus.Length) / sizeof(WCHAR)];
+            name.Length = name.MaximumLength = fsutilus.Length;
+
+            blacklist = FsRtlAreNamesEqual(&name, &fsutilus, TRUE, NULL);
+        }
+
+        if (blacklist) {
+            void** frames;
+            ULONG i, num_frames;
+
+            frames = ExAllocatePoolWithTag(PagedPool, 256 * sizeof(void*), ALLOC_TAG);
+            if (!frames) {
+                ERR("out of memory\n");
+                return FALSE;
+            }
+
+            num_frames = RtlWalkFrameChain(frames, 256, 1);
+
+            for (i = 0; i < num_frames; i++) {
+                // entry->Reserved3[1] appears to be the image size
+                if (frames[i] >= entry->DllBase && (ULONG_PTR)frames[i] <= (ULONG_PTR)entry->DllBase + (ULONG_PTR)entry->Reserved3[1]) {
+                    ExFreePool(frames);
+                    return TRUE;
+                }
+            }
+
+            ExFreePool(frames);
+        }
+
+        le = le->Flink;
+    }
+
+    return FALSE;
+}
+#endif
+
+_Dispatch_type_(IRP_MJ_QUERY_VOLUME_INFORMATION)
+_Function_class_(DRIVER_DISPATCH)
+static NTSTATUS drv_query_volume_information(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) {
     PIO_STACK_LOCATION IrpSp;
     NTSTATUS Status;
     ULONG BytesCopied = 0;
     device_extension* Vcb = DeviceObject->DeviceExtension;
     BOOL top_level;
-    
-#ifndef __REACTOS__
-    // An unfortunate necessity - we have to lie about our FS type. MPR!MprGetConnection polls for this,
-    // and compares it to a whitelist. If it doesn't match, it will return ERROR_NO_NET_OR_BAD_PATH,
-    // which prevents UAC from working.
-    // FIXME - only lie if we detect that we're being called by mpr.dll
-    
-    WCHAR* fs_name = L"NTFS";
-    ULONG fs_name_len = 4 * sizeof(WCHAR);
-#else
-    WCHAR* fs_name = L"Btrfs";
-    ULONG fs_name_len = 5 * sizeof(WCHAR);
-#endif
 
-    TRACE("query volume information\n");
-    
     FsRtlEnterFileSystem();
+
+    TRACE("query volume information\n");
     top_level = is_top_level(Irp);
-    
-    if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
-        Status = part0_passthrough(DeviceObject, Irp);
-        goto exit;
-    }    
-    
+
+    if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
+        Status = vol_query_volume_information(DeviceObject, Irp);
+        goto end;
+    } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
+    }
+
     IrpSp = IoGetCurrentIrpStackLocation(Irp);
-    
+
     Status = STATUS_NOT_IMPLEMENTED;
-    
+
     switch (IrpSp->Parameters.QueryVolume.FsInformationClass) {
         case FileFsAttributeInformation:
         {
             FILE_FS_ATTRIBUTE_INFORMATION* data = Irp->AssociatedIrp.SystemBuffer;
             BOOL overflow = FALSE;
+#ifndef __REACTOS__
+            WCHAR* fs_name = (Irp->RequestorMode == UserMode && lie_about_fs_type()) ? L"NTFS" : L"Btrfs";
+            ULONG fs_name_len = (ULONG)wcslen(fs_name) * sizeof(WCHAR);
+#else
+            WCHAR* fs_name = L"Btrfs";
+            ULONG fs_name_len = 5 * sizeof(WCHAR);
+#endif
             ULONG orig_fs_name_len = fs_name_len;
-            
+
             TRACE("FileFsAttributeInformation\n");
-            
+
             if (IrpSp->Parameters.QueryVolume.Length < sizeof(FILE_FS_ATTRIBUTE_INFORMATION) - sizeof(WCHAR) + fs_name_len) {
                 if (IrpSp->Parameters.QueryVolume.Length > sizeof(FILE_FS_ATTRIBUTE_INFORMATION) - sizeof(WCHAR))
                     fs_name_len = IrpSp->Parameters.QueryVolume.Length - sizeof(FILE_FS_ATTRIBUTE_INFORMATION) + sizeof(WCHAR);
                 else
                     fs_name_len = 0;
-                
+
                 overflow = TRUE;
             }
-            
+
             data->FileSystemAttributes = FILE_CASE_PRESERVED_NAMES | FILE_CASE_SENSITIVE_SEARCH |
                                          FILE_UNICODE_ON_DISK | FILE_NAMED_STREAMS | FILE_SUPPORTS_HARD_LINKS | FILE_PERSISTENT_ACLS |
                                          FILE_SUPPORTS_REPARSE_POINTS | FILE_SUPPORTS_SPARSE_FILES | FILE_SUPPORTS_OBJECT_IDS |
-                                         FILE_SUPPORTS_OPEN_BY_FILE_ID | FILE_SUPPORTS_EXTENDED_ATTRIBUTES;
+                                         FILE_SUPPORTS_OPEN_BY_FILE_ID | FILE_SUPPORTS_EXTENDED_ATTRIBUTES | FILE_SUPPORTS_BLOCK_REFCOUNTING;
             if (Vcb->readonly)
                 data->FileSystemAttributes |= FILE_READ_ONLY_VOLUME;
-                                         
+
             // should also be FILE_FILE_COMPRESSION when supported
             data->MaximumComponentNameLength = 255; // FIXME - check
             data->FileSystemNameLength = orig_fs_name_len;
             RtlCopyMemory(data->FileSystemName, fs_name, fs_name_len);
-            
+
             BytesCopied = sizeof(FILE_FS_ATTRIBUTE_INFORMATION) - sizeof(WCHAR) + fs_name_len;
             Status = overflow ? STATUS_BUFFER_OVERFLOW : STATUS_SUCCESS;
             break;
         }
 
-        case FileFsControlInformation:
-            FIXME("STUB: FileFsControlInformation\n");
-            break;
-
         case FileFsDeviceInformation:
         {
             FILE_FS_DEVICE_INFORMATION* ffdi = Irp->AssociatedIrp.SystemBuffer;
-            
+
             TRACE("FileFsDeviceInformation\n");
-            
+
             ffdi->DeviceType = FILE_DEVICE_DISK;
-            
+
             ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
-            ffdi->Characteristics = first_device(Vcb)->devobj->Characteristics;
+            ffdi->Characteristics = Vcb->Vpb->RealDevice->Characteristics;
             ExReleaseResourceLite(&Vcb->tree_lock);
-            
+
             if (Vcb->readonly)
                 ffdi->Characteristics |= FILE_READ_ONLY_DEVICE;
             else
@@ -634,59 +793,55 @@ static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObj
 
             BytesCopied = sizeof(FILE_FS_DEVICE_INFORMATION);
             Status = STATUS_SUCCESS;
-            
-            break;
-        }
 
-        case FileFsDriverPathInformation:
-            FIXME("STUB: FileFsDriverPathInformation\n");
             break;
+        }
 
         case FileFsFullSizeInformation:
         {
             FILE_FS_FULL_SIZE_INFORMATION* ffsi = Irp->AssociatedIrp.SystemBuffer;
-            
+
             TRACE("FileFsFullSizeInformation\n");
-            
-            calculate_total_space(Vcb, &ffsi->TotalAllocationUnits.QuadPart, &ffsi->ActualAvailableAllocationUnits.QuadPart);
+
+            calculate_total_space(Vcb, (UINT64*)&ffsi->TotalAllocationUnits.QuadPart, (UINT64*)&ffsi->ActualAvailableAllocationUnits.QuadPart);
             ffsi->CallerAvailableAllocationUnits.QuadPart = ffsi->ActualAvailableAllocationUnits.QuadPart;
             ffsi->SectorsPerAllocationUnit = 1;
             ffsi->BytesPerSector = Vcb->superblock.sector_size;
-            
+
             BytesCopied = sizeof(FILE_FS_FULL_SIZE_INFORMATION);
             Status = STATUS_SUCCESS;
-            
+
             break;
         }
 
         case FileFsObjectIdInformation:
         {
             FILE_FS_OBJECTID_INFORMATION* ffoi = Irp->AssociatedIrp.SystemBuffer;
-            
+
             TRACE("FileFsObjectIdInformation\n");
-            
+
             RtlCopyMemory(ffoi->ObjectId, &Vcb->superblock.uuid.uuid[0], sizeof(UCHAR) * 16);
             RtlZeroMemory(ffoi->ExtendedInfo, sizeof(ffoi->ExtendedInfo));
-            
+
             BytesCopied = sizeof(FILE_FS_OBJECTID_INFORMATION);
             Status = STATUS_SUCCESS;
-            
+
             break;
         }
 
         case FileFsSizeInformation:
         {
             FILE_FS_SIZE_INFORMATION* ffsi = Irp->AssociatedIrp.SystemBuffer;
-            
+
             TRACE("FileFsSizeInformation\n");
-            
-            calculate_total_space(Vcb, &ffsi->TotalAllocationUnits.QuadPart, &ffsi->AvailableAllocationUnits.QuadPart);
+
+            calculate_total_space(Vcb, (UINT64*)&ffsi->TotalAllocationUnits.QuadPart, (UINT64*)&ffsi->AvailableAllocationUnits.QuadPart);
             ffsi->SectorsPerAllocationUnit = 1;
             ffsi->BytesPerSector = Vcb->superblock.sector_size;
-            
+
             BytesCopied = sizeof(FILE_FS_SIZE_INFORMATION);
             Status = STATUS_SUCCESS;
-            
+
             break;
         }
 
@@ -696,69 +851,79 @@ static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObj
             FILE_FS_VOLUME_INFORMATION ffvi;
             BOOL overflow = FALSE;
             ULONG label_len, orig_label_len;
-            
+
             TRACE("FileFsVolumeInformation\n");
             TRACE("max length = %u\n", IrpSp->Parameters.QueryVolume.Length);
-            
+
             ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
-            
-//             orig_label_len = label_len = (ULONG)(wcslen(Vcb->label) * sizeof(WCHAR));
-            RtlUTF8ToUnicodeN(NULL, 0, &label_len, Vcb->superblock.label, (ULONG)strlen(Vcb->superblock.label));
+
+            Status = RtlUTF8ToUnicodeN(NULL, 0, &label_len, Vcb->superblock.label, (ULONG)strlen(Vcb->superblock.label));
+            if (!NT_SUCCESS(Status)) {
+                ERR("RtlUTF8ToUnicodeN returned %08x\n", Status);
+                ExReleaseResourceLite(&Vcb->tree_lock);
+                break;
+            }
+
             orig_label_len = label_len;
-            
+
             if (IrpSp->Parameters.QueryVolume.Length < sizeof(FILE_FS_VOLUME_INFORMATION) - sizeof(WCHAR) + label_len) {
                 if (IrpSp->Parameters.QueryVolume.Length > sizeof(FILE_FS_VOLUME_INFORMATION) - sizeof(WCHAR))
                     label_len = IrpSp->Parameters.QueryVolume.Length - sizeof(FILE_FS_VOLUME_INFORMATION) + sizeof(WCHAR);
                 else
                     label_len = 0;
-                
+
                 overflow = TRUE;
             }
-            
+
             TRACE("label_len = %u\n", label_len);
-            
+
             ffvi.VolumeCreationTime.QuadPart = 0; // FIXME
             ffvi.VolumeSerialNumber = Vcb->superblock.uuid.uuid[12] << 24 | Vcb->superblock.uuid.uuid[13] << 16 | Vcb->superblock.uuid.uuid[14] << 8 | Vcb->superblock.uuid.uuid[15];
             ffvi.VolumeLabelLength = orig_label_len;
             ffvi.SupportsObjects = FALSE;
-            
+
             RtlCopyMemory(data, &ffvi, min(sizeof(FILE_FS_VOLUME_INFORMATION) - sizeof(WCHAR), IrpSp->Parameters.QueryVolume.Length));
-            
+
             if (label_len > 0) {
                 ULONG bytecount;
-                
-//                 RtlCopyMemory(&data->VolumeLabel[0], Vcb->label, label_len);
-                RtlUTF8ToUnicodeN(&data->VolumeLabel[0], label_len, &bytecount, Vcb->superblock.label, (ULONG)strlen(Vcb->superblock.label));
+
+                Status = RtlUTF8ToUnicodeN(&data->VolumeLabel[0], label_len, &bytecount, Vcb->superblock.label, (ULONG)strlen(Vcb->superblock.label));
+                if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_TOO_SMALL) {
+                    ERR("RtlUTF8ToUnicodeN returned %08x\n", Status);
+                    ExReleaseResourceLite(&Vcb->tree_lock);
+                    break;
+                }
+
                 TRACE("label = %.*S\n", label_len / sizeof(WCHAR), data->VolumeLabel);
             }
-            
+
             ExReleaseResourceLite(&Vcb->tree_lock);
 
             BytesCopied = sizeof(FILE_FS_VOLUME_INFORMATION) - sizeof(WCHAR) + label_len;
             Status = overflow ? STATUS_BUFFER_OVERFLOW : STATUS_SUCCESS;
             break;
         }
-        
+
 #ifndef __REACTOS__
 #ifdef _MSC_VER // not in mingw yet
         case FileFsSectorSizeInformation:
         {
             FILE_FS_SECTOR_SIZE_INFORMATION* data = Irp->AssociatedIrp.SystemBuffer;
-            
+
             data->LogicalBytesPerSector = Vcb->superblock.sector_size;
             data->PhysicalBytesPerSectorForAtomicity = Vcb->superblock.sector_size;
             data->PhysicalBytesPerSectorForPerformance = Vcb->superblock.sector_size;
             data->FileSystemEffectivePhysicalBytesPerSectorForAtomicity = Vcb->superblock.sector_size;
             data->ByteOffsetForSectorAlignment = 0;
             data->ByteOffsetForPartitionAlignment = 0;
-            
+
             data->Flags = SSINFO_FLAGS_ALIGNED_DEVICE | SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE;
-            
-            if (Vcb->trim)
+
+            if (Vcb->trim && !Vcb->options.no_trim)
                 data->Flags |= SSINFO_FLAGS_TRIM_ENABLED;
-            
+
             BytesCopied = sizeof(FILE_FS_SECTOR_SIZE_INFORMATION);
-  
+
             break;
         }
 #endif
@@ -769,131 +934,64 @@ static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObj
             WARN("unknown FsInformationClass %u\n", IrpSp->Parameters.QueryVolume.FsInformationClass);
             break;
     }
-    
-//     if (NT_SUCCESS(Status) && IrpSp->Parameters.QueryVolume.Length < BytesCopied) { // FIXME - should not copy anything if overflow
-//         WARN("overflow: %u < %u\n", IrpSp->Parameters.QueryVolume.Length, BytesCopied);
-//         BytesCopied = IrpSp->Parameters.QueryVolume.Length;
-//         Status = STATUS_BUFFER_OVERFLOW;
-//     }
 
-    Irp->IoStatus.Status = Status;
-    
     if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW)
         Irp->IoStatus.Information = 0;
     else
         Irp->IoStatus.Information = BytesCopied;
-    
+
+end:
+    Irp->IoStatus.Status = Status;
+
     IoCompleteRequest( Irp, IO_DISK_INCREMENT );
-    
-exit:
-    if (top_level) 
+
+    if (top_level)
         IoSetTopLevelIrp(NULL);
-    
-    FsRtlExitFileSystem();
-    
+
     TRACE("query volume information returning %08x\n", Status);
 
+    FsRtlExitFileSystem();
+
     return Status;
 }
 
-static NTSTATUS STDCALL read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
+_Function_class_(IO_COMPLETION_ROUTINE)
+#ifdef __REACTOS__
+static NTSTATUS NTAPI read_completion(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp, _In_ PVOID conptr) {
+#else
+static NTSTATUS read_completion(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp, _In_ PVOID conptr) {
+#endif
     read_context* context = conptr;
-    
-//     DbgPrint("read_completion\n");
-    
+
+    UNUSED(DeviceObject);
+
     context->iosb = Irp->IoStatus;
     KeSetEvent(&context->Event, 0, FALSE);
-    
-//     return STATUS_SUCCESS;
+
     return STATUS_MORE_PROCESSING_REQUIRED;
 }
 
-// static void test_tree_deletion(device_extension* Vcb) {
-//     KEY searchkey/*, endkey*/;
-//     traverse_ptr tp, next_tp;
-//     root* r;
-//     
-//     searchkey.obj_id = 0x100;
-//     searchkey.obj_type = 0x54;
-//     searchkey.offset = 0xca4ab2f5;
-//     
-// //     endkey.obj_id = 0x100;
-// //     endkey.obj_type = 0x60;
-// //     endkey.offset = 0x15a;
-//     
-//     r = Vcb->roots;
-//     while (r && r->id != 0x102)
-//         r = r->next;
-//     
-//     if (!r) {
-//         ERR("error - could not find root\n");
-//         return;
-//     }
-//     
-//     if (!find_item(Vcb, r, &tp, &searchkey, NULL, FALSE)) {
-//         ERR("error - could not find key\n");
-//         return;
-//     }
-//     
-//     while (TRUE/*keycmp(tp.item->key, endkey) < 1*/) {
-//         tp.item->ignore = TRUE;
-//         add_to_tree_cache(tc, tp.tree);
-//         
-//         if (find_next_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
-//             free_traverse_ptr(&tp);
-//             tp = next_tp;
-//         } else
-//             break;
-//     }
-//     
-//     free_traverse_ptr(&tp);
-// }
-
-// static void test_tree_splitting(device_extension* Vcb) {
-//     int i;
-//     
-//     for (i = 0; i < 1000; i++) {
-//         char* data = ExAllocatePoolWithTag(PagedPool, 4, ALLOC_TAG);
-//         
-//         insert_tree_item(Vcb, Vcb->extent_root, 0, 0xfd, i, data, 4, NULL);
-//     }
-// }
-
-// static void test_dropping_tree(device_extension* Vcb) {
-//     LIST_ENTRY* le = Vcb->roots.Flink;
-//     
-//     while (le != &Vcb->roots) {
-//         root* r = CONTAINING_RECORD(le, root, list_entry);
-//         
-//         if (r->id == 0x101) {
-//             RemoveEntryList(&r->list_entry);
-//             InsertTailList(&Vcb->drop_roots, &r->list_entry);
-//             return;
-//         }
-//         
-//         le = le->Flink;
-//     }
-// }
-
-NTSTATUS create_root(device_extension* Vcb, UINT64 id, root** rootptr, BOOL no_tree, UINT64 offset, PIRP Irp, LIST_ENTRY* rollback) {
+NTSTATUS create_root(_In_ _Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ UINT64 id,
+                     _Out_ root** rootptr, _In_ BOOL no_tree, _In_ UINT64 offset, _In_opt_ PIRP Irp) {
+    NTSTATUS Status;
     root* r;
-    tree* t;
+    tree* t = NULL;
     ROOT_ITEM* ri;
     traverse_ptr tp;
-    
+
     r = ExAllocatePoolWithTag(PagedPool, sizeof(root), ALLOC_TAG);
     if (!r) {
         ERR("out of memory\n");
         return STATUS_INSUFFICIENT_RESOURCES;
     }
-    
+
     r->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(root_nonpaged), ALLOC_TAG);
     if (!r->nonpaged) {
         ERR("out of memory\n");
         ExFreePool(r);
         return STATUS_INSUFFICIENT_RESOURCES;
     }
-    
+
     if (!no_tree) {
         t = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
         if (!t) {
@@ -902,52 +1000,62 @@ NTSTATUS create_root(device_extension* Vcb, UINT64 id, root** rootptr, BOOL no_t
             ExFreePool(r);
             return STATUS_INSUFFICIENT_RESOURCES;
         }
+
+        t->is_unique = TRUE;
+        t->uniqueness_determined = TRUE;
+        t->buf = NULL;
     }
-    
+
     ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
     if (!ri) {
         ERR("out of memory\n");
-        
-        if (!no_tree)
+
+        if (t)
             ExFreePool(t);
-        
+
         ExFreePool(r->nonpaged);
         ExFreePool(r);
         return STATUS_INSUFFICIENT_RESOURCES;
     }
-    
+
     r->id = id;
     r->treeholder.address = 0;
     r->treeholder.generation = Vcb->superblock.generation;
-    r->treeholder.tree = no_tree ? NULL : t;
+    r->treeholder.tree = t;
     r->lastinode = 0;
-    r->path.Buffer = NULL;
+    r->dirty = FALSE;
+    r->received = FALSE;
+    r->reserved = NULL;
+    r->parent = 0;
+    r->send_ops = 0;
     RtlZeroMemory(&r->root_item, sizeof(ROOT_ITEM));
     r->root_item.num_references = 1;
     InitializeListHead(&r->fcbs);
-    
+
     RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM));
-    
+
     // We ask here for a traverse_ptr to the item we're inserting, so we can
     // copy some of the tree's variables
-    
-    if (!insert_tree_item(Vcb, Vcb->root_root, id, TYPE_ROOT_ITEM, offset, ri, sizeof(ROOT_ITEM), &tp, Irp, rollback)) {
-        ERR("insert_tree_item failed\n");
+
+    Status = insert_tree_item(Vcb, Vcb->root_root, id, TYPE_ROOT_ITEM, offset, ri, sizeof(ROOT_ITEM), &tp, Irp);
+    if (!NT_SUCCESS(Status)) {
+        ERR("insert_tree_item returned %08x\n", Status);
         ExFreePool(ri);
-        
-        if (!no_tree)
+
+        if (t)
             ExFreePool(t);
-        
+
         ExFreePool(r->nonpaged);
         ExFreePool(r);
-        return STATUS_INTERNAL_ERROR;
+        return Status;
     }
-        
+
     ExInitializeResourceLite(&r->nonpaged->load_tree_lock);
-    
+
     InsertTailList(&Vcb->roots, &r->list_entry);
-    
+
     if (!no_tree) {
+        RtlZeroMemory(&t->header, sizeof(tree_header));
         t->header.fs_uuid = tp.tree->header.fs_uuid;
         t->header.address = 0;
         t->header.flags = HEADER_FLAG_MIXED_BACKREF | 1; // 1 == "written"? Why does the Linux driver record this?
@@ -963,313 +1071,34 @@ NTSTATUS create_root(device_extension* Vcb, UINT64 id, root** rootptr, BOOL no_t
         t->parent = NULL;
         t->paritem = NULL;
         t->root = r;
-        
+
         InitializeListHead(&t->itemlist);
-    
+
         t->new_address = 0;
         t->has_new_address = FALSE;
         t->updated_extents = FALSE;
-        
+
         InsertTailList(&Vcb->trees, &t->list_entry);
         t->list_entry_hash.Flink = NULL;
-        
+
         t->write = TRUE;
         Vcb->need_write = TRUE;
     }
-    
+
     *rootptr = r;
 
     return STATUS_SUCCESS;
 }
 
-// static void test_creating_root(device_extension* Vcb) {
-//     NTSTATUS Status;
-//     LIST_ENTRY rollback;
-//     UINT64 id;
-//     root* r;
-//     
-//     InitializeListHead(&rollback);
-//     
-//     if (Vcb->root_root->lastinode == 0)
-//         get_last_inode(Vcb, Vcb->root_root);
-//     
-//     id = Vcb->root_root->lastinode > 0x100 ? (Vcb->root_root->lastinode + 1) : 0x101;
-//     Status = create_root(Vcb, id, &r, &rollback);
-//     
-//     if (!NT_SUCCESS(Status)) {
-//         ERR("create_root returned %08x\n", Status);
-//         do_rollback(Vcb, &rollback);
-//     } else {
-//         Vcb->root_root->lastinode = id;
-//         clear_rollback(&rollback);
-//     }
-// }
-
-// static void test_alloc_chunk(device_extension* Vcb) {
-//     LIST_ENTRY rollback;
-//     chunk* c;
-//     
-//     InitializeListHead(&rollback);
-//     
-//     c = alloc_chunk(Vcb, BLOCK_FLAG_DATA | BLOCK_FLAG_RAID10, &rollback);
-//     if (!c) {
-//         ERR("alloc_chunk failed\n");
-//         do_rollback(Vcb, &rollback);
-//     } else {
-//         clear_rollback(&rollback);
-//     }
-// }
-
-// static void test_space_list(device_extension* Vcb) {
-//     chunk* c;
-//     int i, j;
-//     LIST_ENTRY* le;
-//     
-//     typedef struct {
-//         UINT64 address;
-//         UINT64 length;
-//         BOOL add;
-//     } space_test;
-//     
-//     static const space_test entries[] = {
-//         { 0x1000, 0x1000 },
-//         { 0x3000, 0x2000 },
-//         { 0x6000, 0x1000 },
-//         { 0, 0 }
-//     };
-//     
-//     static const space_test tests[] = {
-//         { 0x0, 0x800, TRUE }, 
-//         { 0x1800, 0x400, TRUE }, 
-//         { 0x800, 0x2000, TRUE }, 
-//         { 0x1000, 0x2000, TRUE }, 
-//         { 0x2000, 0x3800, TRUE }, 
-//         { 0x800, 0x1000, TRUE }, 
-//         { 0x1800, 0x1000, TRUE }, 
-//         { 0x5000, 0x800, TRUE }, 
-//         { 0x5000, 0x1000, TRUE }, 
-//         { 0x7000, 0x1000, TRUE }, 
-//         { 0x8000, 0x1000, TRUE },
-//         { 0x800, 0x800, TRUE }, 
-//         { 0x0, 0x3800, TRUE }, 
-//         { 0x1000, 0x2800, TRUE },
-//         { 0x1000, 0x1000, FALSE },
-//         { 0x800, 0x2000, FALSE },
-//         { 0x0, 0x3800, FALSE },
-//         { 0x2800, 0x1000, FALSE },
-//         { 0x1800, 0x2000, FALSE },
-//         { 0x3800, 0x1000, FALSE },
-//         { 0, 0, FALSE }
-//     };
-//     
-//     c = CONTAINING_RECORD(Vcb->chunks.Flink, chunk, list_entry);
-//     
-//     i = 0;
-//     while (tests[i].length > 0) {
-//         InitializeListHead(&c->space);
-//         InitializeListHead(&c->space_size);
-//         ERR("test %u\n", i);
-//         
-//         j = 0;
-//         while (entries[j].length > 0) {
-//             space* s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
-//             s->address = entries[j].address;
-//             s->size = entries[j].length;
-//             InsertTailList(&c->space, &s->list_entry);
-//             
-//             order_space_entry(s, &c->space_size);
-//             
-//             j++;
-//         }
-//         
-//         if (tests[i].add)
-//             space_list_add(Vcb, c, FALSE, tests[i].address, tests[i].length, NULL);
-//         else
-//             space_list_subtract(Vcb, c, FALSE, tests[i].address, tests[i].length, NULL);
-//         
-//         le = c->space.Flink;
-//         while (le != &c->space) {
-//             space* s = CONTAINING_RECORD(le, space, list_entry);
-//             
-//             ERR("(%llx,%llx)\n", s->address, s->size);
-//             
-//             le = le->Flink;
-//         }
-//         
-//         ERR("--\n");
-//         
-//         le = c->space_size.Flink;
-//         while (le != &c->space_size) {
-//             space* s = CONTAINING_RECORD(le, space, list_entry_size);
-//             
-//             ERR("(%llx,%llx)\n", s->address, s->size);
-//             
-//             le = le->Flink;
-//         }
-//         
-//         i++;
-//     }
-//     
-//     int3;
-// }
-
-#if 0
-void STDCALL tree_test(void* context) {
-    device_extension* Vcb = context;
-    NTSTATUS Status;
-    UINT64 id;
-    LARGE_INTEGER due_time, time;
-    KTIMER timer;
-    root* r;
-    LIST_ENTRY rollback;
-    ULONG seed;
-    
-    InitializeListHead(&rollback);
-    
-    KeInitializeTimer(&timer);
-    
-    id = InterlockedIncrement64(&Vcb->root_root->lastinode);
-    Status = create_root(Vcb, id, &r, FALSE, 0, NULL, &rollback);
-    if (!NT_SUCCESS(Status)) {
-        ERR("create_root returned %08x\n");
-        return;
-    }
-    
-    clear_rollback(Vcb, &rollback);
-    
-    due_time.QuadPart = (UINT64)1 * -10000000;
-    
-    KeQueryPerformanceCounter(&time);
-    seed = time.LowPart;
-    
-    while (TRUE) {
-        UINT32 i;
-        
-        FsRtlEnterFileSystem();
-        
-        ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
-        
-        for (i = 0; i < 100; i++) {
-            void* data;
-            ULONG datalen;
-            UINT64 objid, offset;
-            
-            objid = RtlRandomEx(&seed);
-            objid <<= 32;
-            objid |= RtlRandomEx(&seed);
-            
-            offset = RtlRandomEx(&seed);
-            offset <<= 32;
-            offset |= RtlRandomEx(&seed);
-            
-            datalen = 30;
-            data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
-            
-            if (!insert_tree_item(Vcb, r, objid, 0xfd, offset, data, datalen, NULL, NULL, &rollback)) {
-                ERR("insert_tree_item failed\n");
-            }
-        }
-        
-        for (i = 0; i < 25; i++) {
-            KEY searchkey;
-            traverse_ptr tp;
-            
-            searchkey.obj_id = RtlRandomEx(&seed);
-            searchkey.obj_id <<= 32;
-            searchkey.obj_id |= RtlRandomEx(&seed);
-            
-            searchkey.obj_type = 0xfd;
-            
-            searchkey.offset = RtlRandomEx(&seed);
-            searchkey.offset <<= 32;
-            searchkey.offset |= RtlRandomEx(&seed);
-            
-            Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL);
-            if (!NT_SUCCESS(Status)) {
-                ERR("error - find_item returned %08x\n", Status);
-            } else {
-                delete_tree_item(Vcb, &tp, &rollback);
-            }
-        }
-        
-        clear_rollback(Vcb, &rollback);
-        
-        ExReleaseResourceLite(&Vcb->tree_lock);
-        
-        FsRtlExitFileSystem();
-        
-        KeSetTimer(&timer, due_time, NULL);
-        
-        KeWaitForSingleObject(&timer, Executive, KernelMode, FALSE, NULL);
-    }
-}
-#endif
-
-// static void test_calc_thread(device_extension* Vcb) {
-//     UINT8* data;
-//     ULONG sectors, max_sectors, i, j;
-//     calc_job* cj;
-//     LARGE_INTEGER* sertimes;
-//     LARGE_INTEGER* partimes;
-//     LARGE_INTEGER time1, time2;
-//     
-//     max_sectors = 256;
-//     
-//     sertimes = ExAllocatePoolWithTag(PagedPool, sizeof(LARGE_INTEGER) * max_sectors, ALLOC_TAG);
-//     partimes = ExAllocatePoolWithTag(PagedPool, sizeof(LARGE_INTEGER) * max_sectors, ALLOC_TAG);
-//     RtlZeroMemory(sertimes, sizeof(LARGE_INTEGER) * max_sectors);
-//     RtlZeroMemory(partimes, sizeof(LARGE_INTEGER) * max_sectors);
-//     
-//     for (sectors = 1; sectors <= max_sectors; sectors++) {
-//         data = ExAllocatePoolWithTag(PagedPool, sectors * Vcb->superblock.sector_size, ALLOC_TAG);
-//         RtlZeroMemory(data, sectors * Vcb->superblock.sector_size);
-//         
-//         for (j = 0; j < 100; j++) {
-//             time1 = KeQueryPerformanceCounter(NULL);
-//             
-//             for (i = 0; i < sectors; i++) {
-//                 UINT32 tmp;
-//                 
-//                 tmp = ~calc_crc32c(0xffffffff, data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
-//             }
-//             
-//             time2 = KeQueryPerformanceCounter(NULL);
-//             
-//             sertimes[sectors - 1].QuadPart += time2.QuadPart - time1.QuadPart;
-//             
-//             time1 = KeQueryPerformanceCounter(NULL);
-//             
-//             add_calc_job(Vcb, data, sectors, &cj);
-//             KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL);
-//             
-//             time2 = KeQueryPerformanceCounter(NULL);
-//             
-//             partimes[sectors - 1].QuadPart += time2.QuadPart - time1.QuadPart;
-//             
-//             free_calc_job(cj);
-//         }
-//         
-//         ExFreePool(data);
-//     }
-//     
-//     for (sectors = 1; sectors <= max_sectors; sectors++) {
-//         ERR("%u sectors: serial %llu, parallel %llu\n", sectors, sertimes[sectors - 1].QuadPart, partimes[sectors - 1].QuadPart);
-//     }
-//     
-//     ExFreePool(partimes);
-//     ExFreePool(sertimes);
-// }
-
-static NTSTATUS STDCALL set_label(device_extension* Vcb, FILE_FS_LABEL_INFORMATION* ffli) {
+static NTSTATUS set_label(_In_ device_extension* Vcb, _In_ FILE_FS_LABEL_INFORMATION* ffli) {
     ULONG utf8len;
     NTSTATUS Status;
-    USHORT vollen, i;
-//     HANDLE h;
-    
+    ULONG vollen, i;
+
     TRACE("label = %.*S\n", ffli->VolumeLabelLength / sizeof(WCHAR), ffli->VolumeLabel);
-    
+
     vollen = ffli->VolumeLabelLength;
-    
+
     for (i = 0; i < ffli->VolumeLabelLength / sizeof(WCHAR); i++) {
         if (ffli->VolumeLabel[i] == 0) {
             vollen = i * sizeof(WCHAR);
@@ -1279,45 +1108,35 @@ static NTSTATUS STDCALL set_label(device_extension* Vcb, FILE_FS_LABEL_INFORMATI
             goto end;
         }
     }
-    
+
     if (vollen == 0) {
         utf8len = 0;
     } else {
         Status = RtlUnicodeToUTF8N(NULL, 0, &utf8len, ffli->VolumeLabel, vollen);
         if (!NT_SUCCESS(Status))
             goto end;
-        
+
         if (utf8len > MAX_LABEL_SIZE) {
             Status = STATUS_INVALID_VOLUME_LABEL;
             goto end;
         }
     }
-    
+
     ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
-    
+
     if (utf8len > 0) {
         Status = RtlUnicodeToUTF8N((PCHAR)&Vcb->superblock.label, MAX_LABEL_SIZE, &utf8len, ffli->VolumeLabel, vollen);
         if (!NT_SUCCESS(Status))
             goto release;
     } else
         Status = STATUS_SUCCESS;
-    
+
     if (utf8len < MAX_LABEL_SIZE)
         RtlZeroMemory(Vcb->superblock.label + utf8len, MAX_LABEL_SIZE - utf8len);
-    
-//     test_tree_deletion(Vcb); // TESTING
-//     test_tree_splitting(Vcb);
-//     test_dropping_tree(Vcb);
-//     test_creating_root(Vcb);
-//     test_alloc_chunk(Vcb);
-//     test_space_list(Vcb);
-//     test_calc_thread(Vcb);
-    
+
     Vcb->need_write = TRUE;
-    
-//     PsCreateSystemThread(&h, 0, NULL, NULL, NULL, tree_test, Vcb);
-    
-release:  
+
+release:
     ExReleaseResourceLite(&Vcb->tree_lock);
 
 end:
@@ -1326,35 +1145,40 @@ end:
     return Status;
 }
 
-static NTSTATUS STDCALL drv_set_volume_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
+_Dispatch_type_(IRP_MJ_SET_VOLUME_INFORMATION)
+_Function_class_(DRIVER_DISPATCH)
+static NTSTATUS drv_set_volume_information(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) {
     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
     device_extension* Vcb = DeviceObject->DeviceExtension;
     NTSTATUS Status;
     BOOL top_level;
 
-    TRACE("set volume information\n");
-    
     FsRtlEnterFileSystem();
 
+    TRACE("set volume information\n");
+
     top_level = is_top_level(Irp);
-    
-    if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
-        Status = part0_passthrough(DeviceObject, Irp);
-        goto exit;
+
+    if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
+        Status = vol_set_volume_information(DeviceObject, Irp);
+        goto end;
+    } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
+        Status = STATUS_INVALID_PARAMETER;
+        goto end;
     }
-    
+
     Status = STATUS_NOT_IMPLEMENTED;
-    
+
     if (Vcb->readonly) {
         Status = STATUS_MEDIA_WRITE_PROTECTED;
         goto end;
     }
-    
+
     if (Vcb->removing || Vcb->locked) {
         Status = STATUS_ACCESS_DENIED;
         goto end;
     }
-    
+
     switch (IrpSp->Parameters.SetVolume.FsInformationClass) {
         case FileFsControlInformation:
             FIXME("STUB: FileFsControlInformation\n");
@@ -1362,7 +1186,7 @@ static NTSTATUS STDCALL drv_set_volume_information(IN PDEVICE_OBJECT DeviceObjec
 
         case FileFsLabelInformation:
             TRACE("FileFsLabelInformation\n");
-    
+
             Status = set_label(Vcb, Irp->AssociatedIrp.SystemBuffer);
             break;
 
@@ -1374,346 +1198,367 @@ static NTSTATUS STDCALL drv_set_volume_information(IN PDEVICE_OBJECT DeviceObjec
             WARN("Unrecognized FsInformationClass 0x%x\n", IrpSp->Parameters.SetVolume.FsInformationClass);
             break;
     }
-    
+
 end:
     Irp->IoStatus.Status = Status;
     Irp->IoStatus.Information = 0;
 
+    TRACE("returning %08x\n", Status);
+
     IoCompleteRequest( Irp, IO_NO_INCREMENT );
-    
-exit:
-    if (top_level) 
+
+    if (top_level)
         IoSetTopLevelIrp(NULL);
-    
+
     FsRtlExitFileSystem();
 
     return Status;
 }
 
-static WCHAR* file_desc_fcb(fcb* fcb) {
+static WCHAR* file_desc_fcb(_In_ fcb* fcb) {
     char s[60];
+    NTSTATUS Status;
     UNICODE_STRING us;
     ANSI_STRING as;
-    
+
     if (fcb->debug_desc)
         return fcb->debug_desc;
-    
+
     if (fcb == fcb->Vcb->volume_fcb)
         return L"volume FCB";
-    
+
     fcb->debug_desc = ExAllocatePoolWithTag(PagedPool, 60 * sizeof(WCHAR), ALLOC_TAG);
     if (!fcb->debug_desc)
         return L"(memory error)";
-    
+
     // I know this is pretty hackish...
     // GCC doesn't like %llx in sprintf, and MSVC won't let us use swprintf
     // without the CRT, which breaks drivers.
-    
+
     sprintf(s, "subvol %x, inode %x", (UINT32)fcb->subvol->id, (UINT32)fcb->inode);
-    
+
     as.Buffer = s;
-    as.Length = as.MaximumLength = strlen(s);
-    
+    as.Length = as.MaximumLength = (USHORT)strlen(s);
+
     us.Buffer = fcb->debug_desc;
     us.MaximumLength = 60 * sizeof(WCHAR);
     us.Length = 0;
-    
-    RtlAnsiStringToUnicodeString(&us, &as, FALSE);
-    
+
+    Status = RtlAnsiStringToUnicodeString(&us, &as, FALSE);
+    if (!NT_SUCCESS(Status))
+        return L"(RtlAnsiStringToUnicodeString error)";
+
     us.Buffer[us.Length / sizeof(WCHAR)] = 0;
-    
+
     return fcb->debug_desc;
 }
 
-WCHAR* file_desc_fileref(file_ref* fileref) {
+WCHAR* file_desc_fileref(_In_ file_ref* fileref) {
     NTSTATUS Status;
     UNICODE_STRING fn;
-    
+    ULONG reqlen;
+
     if (fileref->debug_desc)
         return fileref->debug_desc;
-    
-    Status = fileref_get_filename(fileref, &fn, NULL);
-    if (!NT_SUCCESS(Status)) {
+
+    fn.Length = fn.MaximumLength = 0;
+    Status = fileref_get_filename(fileref, &fn, NULL, &reqlen);
+    if (Status != STATUS_BUFFER_OVERFLOW)
         return L"ERROR";
-    }
-    
-    fileref->debug_desc = ExAllocatePoolWithTag(PagedPool, fn.Length + sizeof(WCHAR), ALLOC_TAG);
-    if (!fileref->debug_desc) {
-        ExFreePool(fn.Buffer);
+
+    if (reqlen > 0xffff - sizeof(WCHAR))
+        return L"(too long)";
+
+    fileref->debug_desc = ExAllocatePoolWithTag(PagedPool, reqlen + sizeof(WCHAR), ALLOC_TAG);
+    if (!fileref->debug_desc)
         return L"(memory error)";
+
+    fn.Buffer = fileref->debug_desc;
+    fn.Length = 0;
+    fn.MaximumLength = (USHORT)(reqlen + sizeof(WCHAR));
+
+    Status = fileref_get_filename(fileref, &fn, NULL, &reqlen);
+    if (!NT_SUCCESS(Status)) {
+        ExFreePool(fileref->debug_desc);
+        fileref->debug_desc = NULL;
+        return L"ERROR";
     }
-    
-    RtlCopyMemory(fileref->debug_desc, fn.Buffer, fn.Length);
+
     fileref->debug_desc[fn.Length / sizeof(WCHAR)] = 0;
-    
-    ExFreePool(fn.Buffer);
-    
+
     return fileref->debug_desc;
 }
 
-WCHAR* file_desc(PFILE_OBJECT FileObject) {
+_Ret_z_
+WCHAR* file_desc(_In_ PFILE_OBJECT FileObject) {
     fcb* fcb = FileObject->FsContext;
     ccb* ccb = FileObject->FsContext2;
     file_ref* fileref = ccb ? ccb->fileref : NULL;
-    
+
     if (fileref)
         return file_desc_fileref(fileref);
     else
         return file_desc_fcb(fcb);
 }
 
-void send_notification_fileref(file_ref* fileref, ULONG filter_match, ULONG action) {
+void send_notification_fileref(_In_ file_ref* fileref, _In_ ULONG filter_match, _In_ ULONG action, _In_opt_ PUNICODE_STRING stream) {
     UNICODE_STRING fn;
     NTSTATUS Status;
+    ULONG reqlen;
     USHORT name_offset;
     fcb* fcb = fileref->fcb;
-    
-    Status = fileref_get_filename(fileref, &fn, &name_offset);
+
+    fn.Length = fn.MaximumLength = 0;
+    Status = fileref_get_filename(fileref, &fn, NULL, &reqlen);
+    if (Status != STATUS_BUFFER_OVERFLOW) {
+        ERR("fileref_get_filename returned %08x\n", Status);
+        return;
+    }
+
+    if (reqlen > 0xffff) {
+        WARN("reqlen was too long for FsRtlNotifyFilterReportChange\n");
+        return;
+    }
+
+    fn.Buffer = ExAllocatePoolWithTag(PagedPool, reqlen, ALLOC_TAG);
+    if (!fn.Buffer) {
+        ERR("out of memory\n");
+        return;
+    }
+
+    fn.MaximumLength = (USHORT)reqlen;
+    fn.Length = 0;
+
+    Status = fileref_get_filename(fileref, &fn, &name_offset, &reqlen);
     if (!NT_SUCCESS(Status)) {
         ERR("fileref_get_filename returned %08x\n", Status);
+        ExFreePool(fn.Buffer);
         return;
     }
-    
+
     FsRtlNotifyFilterReportChange(fcb->Vcb->NotifySync, &fcb->Vcb->DirNotifyList, (PSTRING)&fn, name_offset,
-                                  NULL, NULL, filter_match, action, NULL, NULL);
+                                  (PSTRING)stream, NULL, filter_match, action, NULL, NULL);
     ExFreePool(fn.Buffer);
 }
 
-void send_notification_fcb(file_ref* fileref, ULONG filter_match, ULONG action) {
+void send_notification_fcb(_In_ file_ref* fileref, _In_ ULONG filter_match, _In_ ULONG action, _In_opt_ PUNICODE_STRING stream) {
     fcb* fcb = fileref->fcb;
     LIST_ENTRY* le;
     NTSTATUS Status;
-    
+
     // no point looking for hardlinks if st_nlink == 1
     if (fileref->fcb->inode_item.st_nlink == 1) {
-        send_notification_fileref(fileref, filter_match, action);
+        send_notification_fileref(fileref, filter_match, action, stream);
         return;
     }
-    
+
     ExAcquireResourceExclusiveLite(&fcb->Vcb->fcb_lock, TRUE);
-    
+
     le = fcb->hardlinks.Flink;
     while (le != &fcb->hardlinks) {
         hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry);
         file_ref* parfr;
-        
+
         Status = open_fileref_by_inode(fcb->Vcb, fcb->subvol, hl->parent, &parfr, NULL);
-        
-        if (!NT_SUCCESS(Status)) {
+
+        if (!NT_SUCCESS(Status))
             ERR("open_fileref_by_inode returned %08x\n", Status);
-        } else if (!parfr->deleted) {
-            LIST_ENTRY* le2;
-            BOOL found = FALSE, deleted = FALSE;
-            UNICODE_STRING* fn;
-            
-            le2 = parfr->children.Flink;
-            while (le2 != &parfr->children) {
-                file_ref* fr2 = CONTAINING_RECORD(le2, file_ref, list_entry);
-                
-                if (fr2->index == hl->index) {
-                    found = TRUE;
-                    deleted = fr2->deleted;
-                    
-                    if (!deleted)
-                        fn = &fr2->filepart;
-                    
-                    break;
-                }
-                
-                le2 = le2->Flink;
+        else if (!parfr->deleted) {
+            UNICODE_STRING fn;
+            ULONG pathlen;
+
+            fn.Length = fn.MaximumLength = 0;
+            Status = fileref_get_filename(parfr, &fn, NULL, &pathlen);
+            if (Status != STATUS_BUFFER_OVERFLOW) {
+                ERR("fileref_get_filename returned %08x\n", Status);
+                free_fileref(fcb->Vcb, parfr);
+                break;
             }
-            
-            if (!found)
-                fn = &hl->name;
-            
-            if (!deleted) {
-                UNICODE_STRING path;
-                
-                Status = fileref_get_filename(parfr, &path, NULL);
-                if (!NT_SUCCESS(Status)) {
-                    ERR("fileref_get_filename returned %08x\n", Status);
-                } else {
-                    UNICODE_STRING fn2;
-                    ULONG name_offset;
-                    
-                    name_offset = path.Length;
-                    if (parfr != fileref->fcb->Vcb->root_fileref) name_offset += sizeof(WCHAR);
-                    
-                    fn2.Length = fn2.MaximumLength = fn->Length + name_offset;
-                    fn2.Buffer = ExAllocatePoolWithTag(PagedPool, fn2.MaximumLength, ALLOC_TAG);
-                    
-                    RtlCopyMemory(fn2.Buffer, path.Buffer, path.Length);
-                    if (parfr != fileref->fcb->Vcb->root_fileref) fn2.Buffer[path.Length / sizeof(WCHAR)] = '\\';
-                    RtlCopyMemory(&fn2.Buffer[name_offset / sizeof(WCHAR)], fn->Buffer, fn->Length);
-                    
-                    TRACE("%.*S\n", fn2.Length / sizeof(WCHAR), fn2.Buffer);
-                    
-                    FsRtlNotifyFilterReportChange(fcb->Vcb->NotifySync, &fcb->Vcb->DirNotifyList, (PSTRING)&fn2, name_offset,
-                                                  NULL, NULL, filter_match, action, NULL, NULL);
-                    
-                    ExFreePool(fn2.Buffer);
-                    ExFreePool(path.Buffer);
-                }
+
+            if (parfr != fcb->Vcb->root_fileref)
+                pathlen += sizeof(WCHAR);
+
+            if (pathlen + hl->name.Length > 0xffff) {
+                WARN("pathlen + hl->name.Length was too long for FsRtlNotifyFilterReportChange\n");
+                free_fileref(fcb->Vcb, parfr);
+                break;
+            }
+
+            fn.MaximumLength = (USHORT)(pathlen + hl->name.Length);
+            fn.Buffer = ExAllocatePoolWithTag(PagedPool, fn.MaximumLength, ALLOC_TAG);
+            if (!fn.Buffer) {
+                ERR("out of memory\n");
+                free_fileref(fcb->Vcb, parfr);
+                break;
             }
-            
-            free_fileref(parfr);
+
+            Status = fileref_get_filename(parfr, &fn, NULL, NULL);
+            if (!NT_SUCCESS(Status)) {
+                ERR("fileref_get_filename returned %08x\n", Status);
+                free_fileref(fcb->Vcb, parfr);
+                ExFreePool(fn.Buffer);
+                break;
+            }
+
+            if (parfr != fcb->Vcb->root_fileref) {
+                fn.Buffer[(pathlen / sizeof(WCHAR)) - 1] = '\\';
+                fn.Length += sizeof(WCHAR);
+            }
+
+            RtlCopyMemory(&fn.Buffer[pathlen / sizeof(WCHAR)], hl->name.Buffer, hl->name.Length);
+            fn.Length += hl->name.Length;
+
+            FsRtlNotifyFilterReportChange(fcb->Vcb->NotifySync, &fcb->Vcb->DirNotifyList, (PSTRING)&fn, (USHORT)pathlen,
+                                          (PSTRING)stream, NULL, filter_match, action, NULL, NULL);
+
+            ExFreePool(fn.Buffer);
+
+            free_fileref(fcb->Vcb, parfr);
         }
-        
+
         le = le->Flink;
     }
-    
+
     ExReleaseResourceLite(&fcb->Vcb->fcb_lock);
 }
 
-void mark_fcb_dirty(fcb* fcb) {
+void mark_fcb_dirty(_In_ fcb* fcb) {
     if (!fcb->dirty) {
 #ifdef DEBUG_FCB_REFCOUNTS
         LONG rc;
 #endif
-        dirty_fcb* dirt = ExAllocatePoolWithTag(NonPagedPool, sizeof(dirty_fcb), ALLOC_TAG);
-        
-        if (!dirt) {
-            ExFreePool("out of memory\n");
-            return;
-        }
-        
         fcb->dirty = TRUE;
-        
+
 #ifdef DEBUG_FCB_REFCOUNTS
         rc = InterlockedIncrement(&fcb->refcount);
         WARN("fcb %p: refcount now %i\n", fcb, rc);
 #else
         InterlockedIncrement(&fcb->refcount);
 #endif
-        
-        dirt->fcb = fcb;
-        
-        ExInterlockedInsertTailList(&fcb->Vcb->dirty_fcbs, &dirt->list_entry, &fcb->Vcb->dirty_fcbs_lock);
+
+        ExAcquireResourceExclusiveLite(&fcb->Vcb->dirty_fcbs_lock, TRUE);
+        InsertTailList(&fcb->Vcb->dirty_fcbs, &fcb->list_entry_dirty);
+        ExReleaseResourceLite(&fcb->Vcb->dirty_fcbs_lock);
     }
-    
+
     fcb->Vcb->need_write = TRUE;
 }
 
-void mark_fileref_dirty(file_ref* fileref) {
+void mark_fileref_dirty(_In_ file_ref* fileref) {
     if (!fileref->dirty) {
-        dirty_fileref* dirt = ExAllocatePoolWithTag(NonPagedPool, sizeof(dirty_fileref), ALLOC_TAG);
-        
-        if (!dirt) {
-            ExFreePool("out of memory\n");
-            return;
-        }
-        
         fileref->dirty = TRUE;
         increase_fileref_refcount(fileref);
-        
-        dirt->fileref = fileref;
-        
-        ExInterlockedInsertTailList(&fileref->fcb->Vcb->dirty_filerefs, &dirt->list_entry, &fileref->fcb->Vcb->dirty_filerefs_lock);
+
+        ExAcquireResourceExclusiveLite(&fileref->fcb->Vcb->dirty_filerefs_lock, TRUE);
+        InsertTailList(&fileref->fcb->Vcb->dirty_filerefs, &fileref->list_entry_dirty);
+        ExReleaseResourceLite(&fileref->fcb->Vcb->dirty_filerefs_lock);
     }
-    
+
     fileref->fcb->Vcb->need_write = TRUE;
 }
 
-void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line) {
+#ifdef DEBUG_FCB_REFCOUNTS
+void _free_fcb(_Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, _Inout_ fcb* fcb, _In_ const char* func) {
+#else
+void free_fcb(_Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, _Inout_ fcb* fcb) {
+#endif
     LONG rc;
 
-// #ifdef DEBUG    
-//     if (!ExIsResourceAcquiredExclusiveLite(&fcb->Vcb->fcb_lock) && !ExIsResourceAcquiredExclusiveLite(&fcb->Vcb->tree_lock)) {
-//         ERR("fcb_lock not acquired exclusively\n");
-//         int3;
-//     }
-// #endif
-
     rc = InterlockedDecrement(&fcb->refcount);
-    
+
 #ifdef DEBUG_FCB_REFCOUNTS
-//     WARN("fcb %p: refcount now %i (%.*S)\n", fcb, rc, fcb->full_filename.Length / sizeof(WCHAR), fcb->full_filename.Buffer);
 #ifdef DEBUG_LONG_MESSAGES
-    _debug_message(func, file, line, "fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol ? fcb->subvol->id : 0, fcb->inode);
+    ERR("fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol ? fcb->subvol->id : 0, fcb->inode);
 #else
-    _debug_message(func, "fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol ? fcb->subvol->id : 0, fcb->inode);
+    ERR("fcb %p: refcount now %i (subvol %llx, inode %llx)\n", fcb, rc, fcb->subvol ? fcb->subvol->id : 0, fcb->inode);
 #endif
 #endif
-    
+
     if (rc > 0)
         return;
-    
-//     ExAcquireResourceExclusiveLite(&fcb->Vcb->fcb_lock, TRUE);
-    
+
     if (fcb->list_entry.Flink)
         RemoveEntryList(&fcb->list_entry);
-    
+
     if (fcb->list_entry_all.Flink)
         RemoveEntryList(&fcb->list_entry_all);
-    
-//     ExReleaseResourceLite(&fcb->Vcb->fcb_lock);
-   
+
     ExDeleteResourceLite(&fcb->nonpaged->resource);
     ExDeleteResourceLite(&fcb->nonpaged->paging_resource);
     ExDeleteResourceLite(&fcb->nonpaged->dir_children_lock);
-    ExFreePool(fcb->nonpaged);
-    
+
+    ExFreeToNPagedLookasideList(&Vcb->fcb_np_lookaside, fcb->nonpaged);
+
     if (fcb->sd)
         ExFreePool(fcb->sd);
-    
+
     if (fcb->adsxattr.Buffer)
         ExFreePool(fcb->adsxattr.Buffer);
-    
+
     if (fcb->reparse_xattr.Buffer)
         ExFreePool(fcb->reparse_xattr.Buffer);
-    
+
     if (fcb->ea_xattr.Buffer)
         ExFreePool(fcb->ea_xattr.Buffer);
-    
+
     if (fcb->adsdata.Buffer)
         ExFreePool(fcb->adsdata.Buffer);
-    
+
     if (fcb->debug_desc)
         ExFreePool(fcb->debug_desc);
-    
+
     while (!IsListEmpty(&fcb->extents)) {
         LIST_ENTRY* le = RemoveHeadList(&fcb->extents);
         extent* ext = CONTAINING_RECORD(le, extent, list_entry);
-        
+
         if (ext->csum)
             ExFreePool(ext->csum);
-        
-        ExFreePool(ext->data);
+
         ExFreePool(ext);
     }
-    
+
     while (!IsListEmpty(&fcb->hardlinks)) {
         LIST_ENTRY* le = RemoveHeadList(&fcb->hardlinks);
         hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry);
-        
+
         if (hl->name.Buffer)
             ExFreePool(hl->name.Buffer);
-        
+
         if (hl->utf8.Buffer)
             ExFreePool(hl->utf8.Buffer);
 
         ExFreePool(hl);
     }
-    
+
+    while (!IsListEmpty(&fcb->xattrs)) {
+        xattr* xa = CONTAINING_RECORD(RemoveHeadList(&fcb->xattrs), xattr, list_entry);
+
+        ExFreePool(xa);
+    }
+
     while (!IsListEmpty(&fcb->dir_children_index)) {
         LIST_ENTRY* le = RemoveHeadList(&fcb->dir_children_index);
         dir_child* dc = CONTAINING_RECORD(le, dir_child, list_entry_index);
-        
+
         ExFreePool(dc->utf8.Buffer);
         ExFreePool(dc->name.Buffer);
         ExFreePool(dc->name_uc.Buffer);
         ExFreePool(dc);
     }
-    
+
     if (fcb->hash_ptrs)
         ExFreePool(fcb->hash_ptrs);
-    
+
     if (fcb->hash_ptrs_uc)
         ExFreePool(fcb->hash_ptrs_uc);
-    
+
     FsRtlUninitializeFileLock(&fcb->lock);
-    
-    ExFreePool(fcb);
+
+    if (fcb->pool_type == NonPagedPool)
+        ExFreePool(fcb);
+    else
+        ExFreeToPagedLookasideList(&Vcb->fcb_lookaside, fcb);
+
 #ifdef DEBUG_FCB_REFCOUNTS
 #ifdef DEBUG_LONG_MESSAGES
     _debug_message(func, file, line, "freeing fcb %p\n", fcb);
@@ -1723,326 +1568,397 @@ void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line)
 #endif
 }
 
-void _free_fileref(file_ref* fr, const char* func, const char* file, unsigned int line) {
+void free_fileref(_Requires_exclusive_lock_held_(_Curr_->fcb_lock) _In_ device_extension* Vcb, _Inout_ file_ref* fr) {
     LONG rc;
 
-// #ifdef DEBUG    
-//     if (!ExIsResourceAcquiredExclusiveLite(&fr->fcb->Vcb->fcb_lock) && !ExIsResourceAcquiredExclusiveLite(&fr->fcb->Vcb->tree_lock) && !fr->dirty) {
-//         ERR("fcb_lock not acquired exclusively\n");
-//         int3;
-//     }
-// #endif
-
     rc = InterlockedDecrement(&fr->refcount);
-    
+
 #ifdef DEBUG_FCB_REFCOUNTS
-#ifdef DEBUG_LONG_MESSAGES
-    _debug_message(func, file, line, "fileref %p: refcount now %i\n", fr, rc);
-#else
-    _debug_message(func, "fileref %p: refcount now %i\n", fr, rc);
+    ERR("fileref %p: refcount now %i\n", fr, rc);
 #endif
-#endif
-    
+
 #ifdef _DEBUG
     if (rc < 0) {
         ERR("fileref %p: refcount now %i\n", fr, rc);
         int3;
     }
 #endif
-    
+
     if (rc > 0)
         return;
-        
+
     if (fr->parent)
         ExAcquireResourceExclusiveLite(&fr->parent->nonpaged->children_lock, TRUE);
-    
+
     // FIXME - do we need a file_ref lock?
-    
+
     // FIXME - do delete if needed
-    
-    if (fr->filepart.Buffer)
-        ExFreePool(fr->filepart.Buffer);
-    
-    if (fr->filepart_uc.Buffer)
-        ExFreePool(fr->filepart_uc.Buffer);
-    
-    if (fr->utf8.Buffer)
-        ExFreePool(fr->utf8.Buffer);
-    
+
     if (fr->debug_desc)
         ExFreePool(fr->debug_desc);
-    
+
     ExDeleteResourceLite(&fr->nonpaged->children_lock);
-    
-    ExFreePool(fr->nonpaged);
-    
+    ExDeleteResourceLite(&fr->nonpaged->fileref_lock);
+
+    ExFreeToNPagedLookasideList(&Vcb->fileref_np_lookaside, fr->nonpaged);
+
     // FIXME - throw error if children not empty
-    
+
     if (fr->fcb->fileref == fr)
         fr->fcb->fileref = NULL;
-    
-    if (fr->dc)
+
+    if (fr->dc) {
+        if (fr->fcb->ads)
+            fr->dc->size = fr->fcb->adsdata.Length;
+
         fr->dc->fileref = NULL;
+    }
 
     if (fr->list_entry.Flink)
         RemoveEntryList(&fr->list_entry);
-    
+
     if (fr->parent) {
         ExReleaseResourceLite(&fr->parent->nonpaged->children_lock);
-        free_fileref(fr->parent);
+        free_fileref(Vcb, fr->parent);
     }
-    
-    free_fcb(fr->fcb);
-    ExFreePool(fr);
+
+    free_fcb(Vcb, fr->fcb);
+
+    ExFreeToPagedLookasideList(&Vcb->fileref_lookaside, fr);
 }
 
-static NTSTATUS STDCALL close_file(device_extension* Vcb, PFILE_OBJECT FileObject) {
+static NTSTATUS close_file(_In_ PFILE_OBJECT FileObject, _In_ PIRP Irp) {
     fcb* fcb;
     ccb* ccb;
     file_ref* fileref = NULL;
     LONG open_files;
-    
+    device_extension* Vcb;
+
+    UNUSED(Irp);
+
     TRACE("FileObject = %p\n", FileObject);
-    
-    open_files = InterlockedDecrement(&Vcb->open_files);
-    
+
     fcb = FileObject->FsContext;
     if (!fcb) {
         TRACE("FCB was NULL, returning success\n");
-        
-        if (open_files == 0 && Vcb->removing)
-            uninit(Vcb, FALSE);
-        
         return STATUS_SUCCESS;
     }
-    
+
+    open_files = InterlockedDecrement(&fcb->Vcb->open_files);
+
     ccb = FileObject->FsContext2;
-    
+
     TRACE("close called for %S (fcb == %p)\n", file_desc(FileObject), fcb);
-    
+
     // FIXME - make sure notification gets sent if file is being deleted
-    
-    if (ccb) {    
+
+    if (ccb) {
         if (ccb->query_string.Buffer)
             RtlFreeUnicodeString(&ccb->query_string);
-        
+
         if (ccb->filename.Buffer)
             ExFreePool(ccb->filename.Buffer);
-        
+
         // FIXME - use refcounts for fileref
         fileref = ccb->fileref;
-        
-        ExFreePool(ccb);
-    }
-    
-    CcUninitializeCacheMap(FileObject, NULL, NULL);
-    
-    if (open_files == 0 && Vcb->removing) {
-        uninit(Vcb, FALSE);
+
+        if (fcb->Vcb->running_sends > 0) {
+            BOOL send_cancelled = FALSE;
+
+            ExAcquireResourceExclusiveLite(&fcb->Vcb->send_load_lock, TRUE);
+
+            if (ccb->send) {
+                ccb->send->cancelling = TRUE;
+                send_cancelled = TRUE;
+                KeSetEvent(&ccb->send->cleared_event, 0, FALSE);
+            }
+
+            ExReleaseResourceLite(&fcb->Vcb->send_load_lock);
+
+            if (send_cancelled) {
+                while (ccb->send) {
+                    ExAcquireResourceExclusiveLite(&fcb->Vcb->send_load_lock, TRUE);
+                    ExReleaseResourceLite(&fcb->Vcb->send_load_lock);
+                }
+            }
+        }
+
+        ExFreePool(ccb);
+    }
+
+    CcUninitializeCacheMap(FileObject, NULL, NULL);
+
+    if (open_files == 0 && fcb->Vcb->removing) {
+        uninit(fcb->Vcb, FALSE);
         return STATUS_SUCCESS;
     }
-    
-    if (!(Vcb->Vpb->Flags & VPB_MOUNTED))
+
+    if (!(fcb->Vcb->Vpb->Flags & VPB_MOUNTED))
         return STATUS_SUCCESS;
-    
+
+    Vcb = fcb->Vcb;
+
     ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
-    
+
     if (fileref)
-        free_fileref(fileref);
+        free_fileref(fcb->Vcb, fileref);
     else
-        free_fcb(fcb);
-    
+        free_fcb(Vcb, fcb);
+
     ExReleaseResourceLite(&Vcb->fcb_lock);
-    
+
     return STATUS_SUCCESS;
 }
 
-void STDCALL uninit(device_extension* Vcb, BOOL flush) {
-    space* s;
+void uninit(_In_ device_extension* Vcb, _In_ BOOL flush) {
     UINT64 i;
-    LIST_ENTRY rollback;
     NTSTATUS Status;
     LIST_ENTRY* le;
     LARGE_INTEGER time;
-    
-    Vcb->removing = TRUE;
-    
+
+    if (!Vcb->removing) {
+        ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
+        Vcb->removing = TRUE;
+        ExReleaseResourceLite(&Vcb->tree_lock);
+    }
+
     RemoveEntryList(&Vcb->list_entry);
-    
+
     if (Vcb->balance.thread) {
         Vcb->balance.paused = FALSE;
         Vcb->balance.stopping = TRUE;
         KeSetEvent(&Vcb->balance.event, 0, FALSE);
         KeWaitForSingleObject(&Vcb->balance.finished, Executive, KernelMode, FALSE, NULL);
     }
-    
+
+    if (Vcb->scrub.thread) {
+        Vcb->scrub.paused = FALSE;
+        Vcb->scrub.stopping = TRUE;
+        KeSetEvent(&Vcb->scrub.event, 0, FALSE);
+        KeWaitForSingleObject(&Vcb->scrub.finished, Executive, KernelMode, FALSE, NULL);
+    }
+
+    if (Vcb->running_sends != 0) {
+        BOOL send_cancelled = FALSE;
+
+        ExAcquireResourceExclusiveLite(&Vcb->send_load_lock, TRUE);
+
+        le = Vcb->send_ops.Flink;
+        while (le != &Vcb->send_ops) {
+            send_info* send = CONTAINING_RECORD(le, send_info, list_entry);
+
+            if (!send->cancelling) {
+                send->cancelling = TRUE;
+                send_cancelled = TRUE;
+                send->ccb = NULL;
+                KeSetEvent(&send->cleared_event, 0, FALSE);
+            }
+
+            le = le->Flink;
+        }
+
+        ExReleaseResourceLite(&Vcb->send_load_lock);
+
+        if (send_cancelled) {
+            while (Vcb->running_sends != 0) {
+                ExAcquireResourceExclusiveLite(&Vcb->send_load_lock, TRUE);
+                ExReleaseResourceLite(&Vcb->send_load_lock);
+            }
+        }
+    }
+
     Status = registry_mark_volume_unmounted(&Vcb->superblock.uuid);
     if (!NT_SUCCESS(Status) && Status != STATUS_TOO_LATE)
         WARN("registry_mark_volume_unmounted returned %08x\n", Status);
-    
+
     if (flush) {
-        InitializeListHead(&rollback);
-        
         ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
 
-        if (Vcb->need_write && !Vcb->readonly)
-            do_write(Vcb, NULL, &rollback);
-        
+        if (Vcb->need_write && !Vcb->readonly) {
+            Status = do_write(Vcb, NULL);
+            if (!NT_SUCCESS(Status))
+                ERR("do_write returned %08x\n", Status);
+        }
+
         free_trees(Vcb);
-        
-        clear_rollback(Vcb, &rollback);
 
         ExReleaseResourceLite(&Vcb->tree_lock);
     }
-    
+
     for (i = 0; i < Vcb->calcthreads.num_threads; i++) {
         Vcb->calcthreads.threads[i].quit = TRUE;
     }
-    
+
     KeSetEvent(&Vcb->calcthreads.event, 0, FALSE);
-        
+
     for (i = 0; i < Vcb->calcthreads.num_threads; i++) {
         KeWaitForSingleObject(&Vcb->calcthreads.threads[i].finished, Executive, KernelMode, FALSE, NULL);
-        
+
         ZwClose(Vcb->calcthreads.threads[i].handle);
     }
-    
+
     ExDeleteResourceLite(&Vcb->calcthreads.lock);
     ExFreePool(Vcb->calcthreads.threads);
-    
+
     time.QuadPart = 0;
     KeSetTimer(&Vcb->flush_thread_timer, time, NULL); // trigger the timer early
     KeWaitForSingleObject(&Vcb->flush_thread_finished, Executive, KernelMode, FALSE, NULL);
-    
-    free_fcb(Vcb->volume_fcb);
-    
+
+    ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
+    free_fcb(Vcb, Vcb->volume_fcb);
+    free_fcb(Vcb, Vcb->dummy_fcb);
+    ExReleaseResourceLite(&Vcb->fcb_lock);
+
     if (Vcb->root_file)
         ObDereferenceObject(Vcb->root_file);
-    
+
     le = Vcb->chunks.Flink;
     while (le != &Vcb->chunks) {
         chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
-        
+
         if (c->cache) {
-            free_fcb(c->cache);
+            ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
+            free_fcb(Vcb, c->cache);
+            ExReleaseResourceLite(&Vcb->fcb_lock);
             c->cache = NULL;
         }
-        
+
         le = le->Flink;
     }
 
     while (!IsListEmpty(&Vcb->roots)) {
-        LIST_ENTRY* le = RemoveHeadList(&Vcb->roots);
-        root* r = CONTAINING_RECORD(le, root, list_entry);
+        root* r = CONTAINING_RECORD(RemoveHeadList(&Vcb->roots), root, list_entry);
 
         ExDeleteResourceLite(&r->nonpaged->load_tree_lock);
         ExFreePool(r->nonpaged);
         ExFreePool(r);
     }
-    
+
     while (!IsListEmpty(&Vcb->chunks)) {
-        chunk* c;
-        
-        le = RemoveHeadList(&Vcb->chunks);
-        c = CONTAINING_RECORD(le, chunk, list_entry);
-        
+        chunk* c = CONTAINING_RECORD(RemoveHeadList(&Vcb->chunks), chunk, list_entry);
+
         while (!IsListEmpty(&c->space)) {
             LIST_ENTRY* le2 = RemoveHeadList(&c->space);
-            s = CONTAINING_RECORD(le2, space, list_entry);
-            
+            space* s = CONTAINING_RECORD(le2, space, list_entry);
+
             ExFreePool(s);
         }
-        
+
         while (!IsListEmpty(&c->deleting)) {
             LIST_ENTRY* le2 = RemoveHeadList(&c->deleting);
-            s = CONTAINING_RECORD(le2, space, list_entry);
-            
+            space* s = CONTAINING_RECORD(le2, space, list_entry);
+
             ExFreePool(s);
         }
-        
+
         if (c->devices)
             ExFreePool(c->devices);
-        
-        if (c->cache)
-            free_fcb(c->cache);
-        
+
+        if (c->cache) {
+            ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
+            free_fcb(Vcb, c->cache);
+            ExReleaseResourceLite(&Vcb->fcb_lock);
+        }
+
+        ExDeleteResourceLite(&c->range_locks_lock);
+        ExDeleteResourceLite(&c->partial_stripes_lock);
         ExDeleteResourceLite(&c->lock);
         ExDeleteResourceLite(&c->changed_extents_lock);
-        
+
         ExFreePool(c->chunk_item);
         ExFreePool(c);
     }
-    
+
     // FIXME - free any open fcbs?
-    
+
     while (!IsListEmpty(&Vcb->devices)) {
-        LIST_ENTRY* le = RemoveHeadList(&Vcb->devices);
-        device* dev = CONTAINING_RECORD(le, device, list_entry);
-        
+        device* dev = CONTAINING_RECORD(RemoveHeadList(&Vcb->devices), device, list_entry);
+
         while (!IsListEmpty(&dev->space)) {
             LIST_ENTRY* le2 = RemoveHeadList(&dev->space);
             space* s = CONTAINING_RECORD(le2, space, list_entry);
-            
+
             ExFreePool(s);
         }
-        
+
         ExFreePool(dev);
     }
-    
+
+    ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE);
+    while (!IsListEmpty(&Vcb->scrub.errors)) {
+        scrub_error* err = CONTAINING_RECORD(RemoveHeadList(&Vcb->scrub.errors), scrub_error, list_entry);
+
+        ExFreePool(err);
+    }
+    ExReleaseResourceLite(&Vcb->scrub.stats_lock);
+
     ExDeleteResourceLite(&Vcb->fcb_lock);
     ExDeleteResourceLite(&Vcb->load_lock);
     ExDeleteResourceLite(&Vcb->tree_lock);
     ExDeleteResourceLite(&Vcb->chunk_lock);
-    
+    ExDeleteResourceLite(&Vcb->dirty_fcbs_lock);
+    ExDeleteResourceLite(&Vcb->dirty_filerefs_lock);
+    ExDeleteResourceLite(&Vcb->dirty_subvols_lock);
+    ExDeleteResourceLite(&Vcb->scrub.stats_lock);
+    ExDeleteResourceLite(&Vcb->send_load_lock);
+
     ExDeletePagedLookasideList(&Vcb->tree_data_lookaside);
     ExDeletePagedLookasideList(&Vcb->traverse_ptr_lookaside);
-    ExDeletePagedLookasideList(&Vcb->rollback_item_lookaside);
     ExDeletePagedLookasideList(&Vcb->batch_item_lookaside);
+    ExDeletePagedLookasideList(&Vcb->fileref_lookaside);
+    ExDeletePagedLookasideList(&Vcb->fcb_lookaside);
+    ExDeletePagedLookasideList(&Vcb->name_bit_lookaside);
     ExDeleteNPagedLookasideList(&Vcb->range_lock_lookaside);
-    
+    ExDeleteNPagedLookasideList(&Vcb->fileref_np_lookaside);
+    ExDeleteNPagedLookasideList(&Vcb->fcb_np_lookaside);
+
     ZwClose(Vcb->flush_thread_handle);
 }
 
-NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LIST_ENTRY* rollback) {
+NTSTATUS delete_fileref(_In_ file_ref* fileref, _In_ PFILE_OBJECT FileObject, _In_opt_ PIRP Irp, _In_ LIST_ENTRY* rollback) {
     LARGE_INTEGER newlength, time;
     BTRFS_TIME now;
     NTSTATUS Status;
+    ULONG utf8len = 0;
 
     KeQuerySystemTime(&time);
     win_time_to_unix(time, &now);
 
     ExAcquireResourceExclusiveLite(fileref->fcb->Header.Resource, TRUE);
-    
+
     if (fileref->deleted) {
         ExReleaseResourceLite(fileref->fcb->Header.Resource);
         return STATUS_SUCCESS;
     }
-    
+
+    if (fileref->fcb->subvol->send_ops > 0) {
+        ExReleaseResourceLite(fileref->fcb->Header.Resource);
+        return STATUS_ACCESS_DENIED;
+    }
+
     fileref->deleted = TRUE;
     mark_fileref_dirty(fileref);
-    
+
     // delete INODE_ITEM (0x1)
 
     TRACE("nlink = %u\n", fileref->fcb->inode_item.st_nlink);
-    
+
     if (!fileref->fcb->ads) {
         if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
             LIST_ENTRY* le;
-            
+
             mark_fcb_dirty(fileref->fcb);
-            
+
             fileref->fcb->inode_item_changed = TRUE;
-            
+
             if (fileref->fcb->inode_item.st_nlink > 1) {
                 fileref->fcb->inode_item.st_nlink--;
                 fileref->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation;
                 fileref->fcb->inode_item.sequence++;
                 fileref->fcb->inode_item.st_ctime = now;
             } else {
-                fileref->fcb->deleted = TRUE;
-            
                 // excise extents
-                
+
                 if (fileref->fcb->type != BTRFS_TYPE_DIRECTORY && fileref->fcb->inode_item.st_size > 0) {
                     Status = excise_extents(fileref->fcb->Vcb, fileref->fcb, 0, sector_align(fileref->fcb->inode_item.st_size, fileref->fcb->Vcb->superblock.sector_size), Irp, rollback);
                     if (!NT_SUCCESS(Status)) {
@@ -2051,51 +1967,67 @@ NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LI
                         return Status;
                     }
                 }
-                
+
                 fileref->fcb->Header.AllocationSize.QuadPart = 0;
                 fileref->fcb->Header.FileSize.QuadPart = 0;
                 fileref->fcb->Header.ValidDataLength.QuadPart = 0;
-                
+
                 if (FileObject) {
                     CC_FILE_SIZES ccfs;
-                    
+
                     ccfs.AllocationSize = fileref->fcb->Header.AllocationSize;
                     ccfs.FileSize = fileref->fcb->Header.FileSize;
                     ccfs.ValidDataLength = fileref->fcb->Header.ValidDataLength;
-                    
-                    CcSetFileSizes(FileObject, &ccfs);
+
+                    Status = STATUS_SUCCESS;
+
+                    _SEH2_TRY {
+                        CcSetFileSizes(FileObject, &ccfs);
+                    } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
+                        Status = _SEH2_GetExceptionCode();
+                    } _SEH2_END;
+
+                    if (!NT_SUCCESS(Status)) {
+                        ERR("CcSetFileSizes threw exception %08x\n", Status);
+                        ExReleaseResourceLite(fileref->fcb->Header.Resource);
+                        return Status;
+                    }
                 }
+
+                fileref->fcb->deleted = TRUE;
             }
-                
-            le = fileref->fcb->hardlinks.Flink;
-            while (le != &fileref->fcb->hardlinks) {
-                hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry);
-                
-                if (hl->parent == fileref->parent->fcb->inode && hl->index == fileref->index) {
-                    RemoveEntryList(&hl->list_entry);
-                    
-                    if (hl->name.Buffer)
-                        ExFreePool(hl->name.Buffer);
-                    
-                    if (hl->utf8.Buffer)
-                        ExFreePool(hl->utf8.Buffer);
-                    
-                    ExFreePool(hl);
-                    break;
+
+            if (fileref->dc) {
+                le = fileref->fcb->hardlinks.Flink;
+                while (le != &fileref->fcb->hardlinks) {
+                    hardlink* hl = CONTAINING_RECORD(le, hardlink, list_entry);
+
+                    if (hl->parent == fileref->parent->fcb->inode && hl->index == fileref->dc->index) {
+                        RemoveEntryList(&hl->list_entry);
+
+                        if (hl->name.Buffer)
+                            ExFreePool(hl->name.Buffer);
+
+                        if (hl->utf8.Buffer)
+                            ExFreePool(hl->utf8.Buffer);
+
+                        ExFreePool(hl);
+                        break;
+                    }
+
+                    le = le->Flink;
                 }
-                
-                le = le->Flink;
             }
-        } else { // subvolume
+        } else if (fileref->fcb->subvol->parent == fileref->parent->fcb->subvol->id) { // valid subvolume
             if (fileref->fcb->subvol->root_item.num_references > 1) {
                 fileref->fcb->subvol->root_item.num_references--;
-                
+
                 mark_fcb_dirty(fileref->fcb); // so ROOT_ITEM gets updated
             } else {
                 // FIXME - we need a lock here
-                
+
                 RemoveEntryList(&fileref->fcb->subvol->list_entry);
-                
+
                 InsertTailList(&fileref->fcb->Vcb->drop_roots, &fileref->fcb->subvol->list_entry);
             }
         }
@@ -2103,144 +2035,200 @@ NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LI
         fileref->fcb->deleted = TRUE;
         mark_fcb_dirty(fileref->fcb);
     }
-    
+
     // remove dir_child from parent
-    
+
     if (fileref->dc) {
+        TRACE("delete file %.*S\n", fileref->dc->name.Length / sizeof(WCHAR), fileref->dc->name.Buffer);
+
         ExAcquireResourceExclusiveLite(&fileref->parent->fcb->nonpaged->dir_children_lock, TRUE);
         RemoveEntryList(&fileref->dc->list_entry_index);
-        remove_dir_child_from_hash_lists(fileref->parent->fcb, fileref->dc);
+
+        if (!fileref->fcb->ads)
+            remove_dir_child_from_hash_lists(fileref->parent->fcb, fileref->dc);
+
         ExReleaseResourceLite(&fileref->parent->fcb->nonpaged->dir_children_lock);
-        
-        ExFreePool(fileref->dc->utf8.Buffer);
+
+        if (!fileref->oldutf8.Buffer)
+            fileref->oldutf8 = fileref->dc->utf8;
+        else
+            ExFreePool(fileref->dc->utf8.Buffer);
+
+        utf8len = fileref->dc->utf8.Length;
+
+        fileref->oldindex = fileref->dc->index;
+
         ExFreePool(fileref->dc->name.Buffer);
         ExFreePool(fileref->dc->name_uc.Buffer);
         ExFreePool(fileref->dc);
-        
+
         fileref->dc = NULL;
     }
-    
+
     // update INODE_ITEM of parent
-    
-    TRACE("delete file %.*S\n", fileref->filepart.Length / sizeof(WCHAR), fileref->filepart.Buffer);
+
     ExAcquireResourceExclusiveLite(fileref->parent->fcb->Header.Resource, TRUE);
-    TRACE("fileref->parent->fcb->inode_item.st_size (inode %llx) was %llx\n", fileref->parent->fcb->inode, fileref->parent->fcb->inode_item.st_size);
-    fileref->parent->fcb->inode_item.st_size -= fileref->utf8.Length * 2;
-    TRACE("fileref->parent->fcb->inode_item.st_size (inode %llx) now %llx\n", fileref->parent->fcb->inode, fileref->parent->fcb->inode_item.st_size);
+
     fileref->parent->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation;
     fileref->parent->fcb->inode_item.sequence++;
     fileref->parent->fcb->inode_item.st_ctime = now;
-    fileref->parent->fcb->inode_item.st_mtime = now;
-    ExReleaseResourceLite(fileref->parent->fcb->Header.Resource);
+
+    if (!fileref->fcb->ads) {
+        TRACE("fileref->parent->fcb->inode_item.st_size (inode %llx) was %llx\n", fileref->parent->fcb->inode, fileref->parent->fcb->inode_item.st_size);
+        fileref->parent->fcb->inode_item.st_size -= utf8len * 2;
+        TRACE("fileref->parent->fcb->inode_item.st_size (inode %llx) now %llx\n", fileref->parent->fcb->inode, fileref->parent->fcb->inode_item.st_size);
+        fileref->parent->fcb->inode_item.st_mtime = now;
+    }
 
     fileref->parent->fcb->inode_item_changed = TRUE;
+    ExReleaseResourceLite(fileref->parent->fcb->Header.Resource);
+
+    if (!fileref->fcb->ads && fileref->parent->dc)
+        send_notification_fcb(fileref->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED, NULL);
+
     mark_fcb_dirty(fileref->parent->fcb);
-    
-    send_notification_fcb(fileref->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED);
-    
+
     fileref->fcb->subvol->root_item.ctransid = fileref->fcb->Vcb->superblock.generation;
     fileref->fcb->subvol->root_item.ctime = now;
-    
+
     newlength.QuadPart = 0;
-    
+
     if (FileObject && !CcUninitializeCacheMap(FileObject, &newlength, NULL))
         TRACE("CcUninitializeCacheMap failed\n");
 
     ExReleaseResourceLite(fileref->fcb->Header.Resource);
-    
+
     return STATUS_SUCCESS;
 }
 
-static NTSTATUS STDCALL drv_cleanup(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
+_Dispatch_type_(IRP_MJ_CLEANUP)
+_Function_class_(DRIVER_DISPATCH)
+static NTSTATUS drv_cleanup(_In_ PDEVICE_OBJECT DeviceObject, _In_ PIRP Irp) {
     NTSTATUS Status;
     PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
     PFILE_OBJECT FileObject = IrpSp->FileObject;
     device_extension* Vcb = DeviceObject->DeviceExtension;
-    fcb* fcb;
+    fcb* fcb = FileObject->FsContext;
     BOOL top_level;
 
-    TRACE("cleanup\n");
-    
     FsRtlEnterFileSystem();
 
+    TRACE("cleanup\n");
+
     top_level = is_top_level(Irp);
-    
-    if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
-        Status = part0_passthrough(DeviceObject, Irp);
-        goto exit2;
-    }
-    
-    if (DeviceObject == devobj) {
+
+    if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
+        Status = vol_cleanup(DeviceObject, Irp);
+        goto exit;
+    } else if (DeviceObject == master_devobj) {
         TRACE("closing file system\n");
         Status = STATUS_SUCCESS;
         goto exit;
+    } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
+        Status = STATUS_INVALID_PARAMETER;
+        goto exit;
+    }
+
+    if (FileObject->Flags & FO_CLEANUP_COMPLETE) {
+        TRACE("FileObject %p already cleaned up\n", FileObject);
+        Status = STATUS_SUCCESS;
+        goto exit;
+    }
+
+    if (!fcb) {
+        ERR("fcb was NULL\n");
+        Status = STATUS_INVALID_PARAMETER;
+        goto exit;
     }
-    
+
+    // We have to use the pointer to Vcb stored in the fcb, as we can receive cleanup
+    // messages belonging to other devices.
+
     if (FileObject && FileObject->FsContext) {
         LONG oc;
         ccb* ccb;
         file_ref* fileref;
-        
-        fcb = FileObject->FsContext;
+        BOOL locked = TRUE;
+
         ccb = FileObject->FsContext2;
         fileref = ccb ? ccb->fileref : NULL;
-        
+
         TRACE("cleanup called for FileObject %p\n", FileObject);
         TRACE("fileref %p (%S), refcount = %u, open_count = %u\n", fileref, file_desc(FileObject), fileref ? fileref->refcount : 0, fileref ? fileref->open_count : 0);
-        
+
+        ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, TRUE);
+
+        ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
+
         IoRemoveShareAccess(FileObject, &fcb->share_access);
-        
-        FsRtlNotifyCleanup(Vcb->NotifySync, &Vcb->DirNotifyList, ccb);    
-        
+
+        if (ccb)
+            FsRtlNotifyCleanup(fcb->Vcb->NotifySync, &fcb->Vcb->DirNotifyList, ccb);
+
         if (fileref) {
             oc = InterlockedDecrement(&fileref->open_count);
 #ifdef DEBUG_FCB_REFCOUNTS
             ERR("fileref %p: open_count now %i\n", fileref, oc);
 #endif
         }
-        
+
         if (ccb && ccb->options & FILE_DELETE_ON_CLOSE && fileref)
             fileref->delete_on_close = TRUE;
-        
-        if (fileref && fileref->delete_on_close && fcb->type == BTRFS_TYPE_DIRECTORY && fcb->inode_item.st_size > 0)
+
+        if (fileref && fileref->delete_on_close && fcb->type == BTRFS_TYPE_DIRECTORY && fcb->inode_item.st_size > 0 && fcb != fcb->Vcb->dummy_fcb)
             fileref->delete_on_close = FALSE;
-        
-        if (Vcb->locked && Vcb->locked_fileobj == FileObject) {
+
+        if (fcb->Vcb->locked && fcb->Vcb->locked_fileobj == FileObject) {
             TRACE("unlocking volume\n");
-            do_unlock_volume(Vcb);
+            do_unlock_volume(fcb->Vcb);
             FsRtlNotifyVolumeEvent(FileObject, FSRTL_VOLUME_UNLOCK);
         }
-        
+
+        if (ccb && ccb->reserving) {
+            fcb->subvol->reserved = NULL;
+            ccb->reserving = FALSE;
+            // FIXME - flush all of subvol's fcbs
+        }
+
         if (fileref && oc == 0) {
-            if (!Vcb->removing) {
-                LIST_ENTRY rollback;
-        
-                InitializeListHead(&rollback);
-            
+            if (!fcb->Vcb->removing) {
                 if (fileref && fileref->delete_on_close && fileref != fcb->Vcb->root_fileref && fcb != fcb->Vcb->volume_fcb) {
-                    send_notification_fileref(fileref, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_REMOVED);
-                    
-                    ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, TRUE);
-                    
+                    LIST_ENTRY rollback;
+
+                    InitializeListHead(&rollback);
+
+                    if (!fileref->fcb->ads || fileref->dc) {
+                        if (fileref->fcb->ads) {
+                            send_notification_fileref(fileref->parent, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME,
+                                                      FILE_ACTION_REMOVED, &fileref->dc->name);
+                        } else
+                            send_notification_fileref(fileref, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_REMOVED, NULL);
+                    }
+
+                    ExReleaseResourceLite(fcb->Header.Resource);
+                    locked = FALSE;
+
+                    // fcb_lock needs to be acquired before fcb->Header.Resource
                     ExAcquireResourceExclusiveLite(&fcb->Vcb->fcb_lock, TRUE);
-                    
+
                     Status = delete_fileref(fileref, FileObject, Irp, &rollback);
                     if (!NT_SUCCESS(Status)) {
                         ERR("delete_fileref returned %08x\n", Status);
-                        do_rollback(Vcb, &rollback);
+                        do_rollback(fcb->Vcb, &rollback);
                         ExReleaseResourceLite(&fcb->Vcb->fcb_lock);
                         ExReleaseResourceLite(&fcb->Vcb->tree_lock);
                         goto exit;
                     }
-                    
+
                     ExReleaseResourceLite(&fcb->Vcb->fcb_lock);
-                    
-                    ExReleaseResourceLite(&fcb->Vcb->tree_lock);
-                    clear_rollback(Vcb, &rollback);
+
+                    locked = FALSE;
+
+                    clear_rollback(&rollback);
                 } else if (FileObject->Flags & FO_CACHE_SUPPORTED && fcb->nonpaged->segment_object.DataSectionObject) {
                     IO_STATUS_BLOCK iosb;
                     CcFlushCache(FileObject->SectionObjectPointer, NULL, 0, &iosb);
-                    
+
                     if (!NT_SUCCESS(iosb.Status)) {
                         ERR("CcFlushCache returned %08x\n", iosb.Status);
                     }
@@ -2251,44 +2239,51 @@ static NTSTATUS STDCALL drv_cleanup(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp)
                     }
 
                     CcPurgeCacheSection(&fcb->nonpaged->segment_object, NULL, 0, FALSE);
-                    
+
                     TRACE("flushed cache on close (FileObject = %p, fcb = %p, AllocationSize = %llx, FileSize = %llx, ValidDataLength = %llx)\n",
                         FileObject, fcb, fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
                 }
             }
-            
+
             if (fcb->Vcb && fcb != fcb->Vcb->volume_fcb)
                 CcUninitializeCacheMap(FileObject, NULL, NULL);
         }
-        
+
+        if (locked)
+            ExReleaseResourceLite(fcb->Header.Resource);
+
+        ExReleaseResourceLite(&fcb->Vcb->tree_lock);
+
         FileObject->Flags |= FO_CLEANUP_COMPLETE;
     }
-    
+
     Status = STATUS_SUCCESS;
 
 exit:
+    TRACE("returning %08x\n", Status);
+
     Irp->IoStatus.Status = Status;
     Irp->IoStatus.Information = 0;
-    
+
     IoCompleteRequest(Irp, IO_NO_INCREMENT);
-    
-exit2:
-    if (top_level) 
+
+    if (top_level)
         IoSetTopLevelIrp(NULL);
-    
+
     FsRtlExitFileSystem();
 
     return Status;
 }
 
-BOOL get_file_attributes_from_xattr(char* val, UINT16 len, ULONG* atts) {
+_Success_(return)
+BOOL get_file_attributes_from_xattr(_In_reads_bytes_(len) char* val, _In_ UINT16 len, _Out_ ULONG* atts) {
     if (len > 2 && val[0] == '0' && val[1] == 'x') {
         int i;
         ULONG dosnum = 0;
 
         for (i = 2; i < len; i++) {
             dosnum *= 0x10;
-            
+
             if (val[i] >= '0' && val[i] <= '9')
                 dosnum |= val[i] - '0';
             else if (val[i] >= 'a' && val[i] <= 'f')
@@ -2296,252 +2291,260 @@ BOOL get_file_attributes_from_xattr(char* val, UINT16 len, ULONG* atts) {
             else if (val[i] >= 'A' && val[i] <= 'F')
                 dosnum |= val[i] + 10 - 'a';
         }
-        
+
         TRACE("DOSATTRIB: %08x\n", dosnum);
-        
+
         *atts = dosnum;
-        
+
         return TRUE;
     }
-    
+
     return FALSE;
 }
 
-ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r, UINT64 inode, UINT8 type, BOOL dotfile, BOOL ignore_xa, PIRP Irp) {
+ULONG get_file_attributes(_In_ _Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, _In_ root* r, _In_ UINT64 inode,
+                          _In_ UINT8 type, _In_ BOOL dotfile, _In_ BOOL ignore_xa, _In_opt_ PIRP Irp) {
     ULONG att;
     char* eaval;
     UINT16 ealen;
-    
-    // ii can be NULL
-    
+
     if (!ignore_xa && get_xattr(Vcb, r, inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, (UINT8**)&eaval, &ealen, Irp)) {
         ULONG dosnum = 0;
-        
+
         if (get_file_attributes_from_xattr(eaval, ealen, &dosnum)) {
             ExFreePool(eaval);
-            
+
             if (type == BTRFS_TYPE_DIRECTORY)
                 dosnum |= FILE_ATTRIBUTE_DIRECTORY;
             else if (type == BTRFS_TYPE_SYMLINK)
                 dosnum |= FILE_ATTRIBUTE_REPARSE_POINT;
-            
+
+            if (type != BTRFS_TYPE_DIRECTORY)
+                dosnum &= ~FILE_ATTRIBUTE_DIRECTORY;
+
             if (inode == SUBVOL_ROOT_INODE) {
                 if (r->root_item.flags & BTRFS_SUBVOL_READONLY)
                     dosnum |= FILE_ATTRIBUTE_READONLY;
                 else
                     dosnum &= ~FILE_ATTRIBUTE_READONLY;
             }
-            
+
             return dosnum;
         }
-        
+
         ExFreePool(eaval);
     }
-    
+
     switch (type) {
         case BTRFS_TYPE_DIRECTORY:
             att = FILE_ATTRIBUTE_DIRECTORY;
             break;
-            
+
         case BTRFS_TYPE_SYMLINK:
             att = FILE_ATTRIBUTE_REPARSE_POINT;
             break;
-           
+
         default:
             att = 0;
             break;
     }
-    
+
     if (dotfile) {
         att |= FILE_ATTRIBUTE_HIDDEN;
     }
-    
+
     att |= FILE_ATTRIBUTE_ARCHIVE;
-    
+
     if (inode == SUBVOL_ROOT_INODE) {
         if (r->root_item.flags & BTRFS_SUBVOL_READONLY)
             att |= FILE_ATTRIBUTE_READONLY;
         else
             att &= ~FILE_ATTRIBUTE_READONLY;
     }
-    
+
     // FIXME - get READONLY from ii->st_mode
     // FIXME - return SYSTEM for block/char devices?
-    
+
     if (att == 0)
         att = FILE_ATTRIBUTE_NORMAL;
-    
+
     return att;
 }
 
-NTSTATUS sync_read_phys(PDEVICE_OBJECT DeviceObject, LONGLONG StartingOffset, ULONG Length, PUCHAR Buffer, BOOL override) {
-    IO_STATUS_BLOCK* IoStatus;
+NTSTATUS sync_read_phys(_In_ PDEVICE_OBJECT DeviceObject, _In_ UINT64 StartingOffset, _In_ ULONG Length,
+                        _Out_writes_bytes_(Length) PUCHAR Buffer, _In_ BOOL override) {
+    IO_STATUS_BLOCK IoStatus;
     LARGE_INTEGER Offset;
     PIRP Irp;
     PIO_STACK_LOCATION IrpSp;
     NTSTATUS Status;
-    read_context* context;
-    
+    read_context context;
+
     num_reads++;
-    
-    context = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_context), ALLOC_TAG);
-    if (!context) {
-        ERR("out of memory\n");
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
-    
-    RtlZeroMemory(context, sizeof(read_context));
-    KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
-    
-    IoStatus = ExAllocatePoolWithTag(NonPagedPool, sizeof(IO_STATUS_BLOCK), ALLOC_TAG);
-    if (!IoStatus) {
-        ERR("out of memory\n");
-        ExFreePool(context);
-        return STATUS_INSUFFICIENT_RESOURCES;
-    }
 
-    Offset.QuadPart = StartingOffset;
+    RtlZeroMemory(&context, sizeof(read_context));
+    KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
+
+    Offset.QuadPart = (LONGLONG)StartingOffset;
 
-//     Irp = IoBuildSynchronousFsdRequest(IRP_MJ_READ, DeviceObject, Buffer, Length, &Offset, /*&Event*/NULL, IoStatus);
     Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE);
-    
+
     if (!Irp) {
         ERR("IoAllocateIrp failed\n");
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-        goto exit;
+        return STATUS_INSUFFICIENT_RESOURCES;
     }
-    
+
     Irp->Flags |= IRP_NOCACHE;
     IrpSp = IoGetNextIrpStackLocation(Irp);
     IrpSp->MajorFunction = IRP_MJ_READ;
-    
+
     if (override)
         IrpSp->Flags |= SL_