Sync btrfs to 0.7.
CORE-12223
svn path=/trunk/; revision=73062
compress.c
crc32c.c
create.c
+ devctrl.c
dirctrl.c
extent-tree.c
fastio.c
flushthread.c
free-space.c
fsctl.c
+ galois.c
pnp.c
read.c
registry.c
#include <intrin.h>
#endif
#endif
+#include <ntddscsi.h>
#include "btrfs.h"
#ifndef __REACTOS__
#include <winioctl.h>
#else
#include <rtlfuncs.h>
#endif
-#include <mountdev.h>
+#include <ata.h>
#define INCOMPAT_SUPPORTED (BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF | BTRFS_INCOMPAT_FLAGS_DEFAULT_SUBVOL | BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS | \
- BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO | BTRFS_INCOMPAT_FLAGS_BIG_METADATA | BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF | \
- BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA | BTRFS_INCOMPAT_FLAGS_NO_HOLES)
+ BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO | BTRFS_INCOMPAT_FLAGS_BIG_METADATA | BTRFS_INCOMPAT_FLAGS_RAID56 | \
+ BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF | BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA | BTRFS_INCOMPAT_FLAGS_NO_HOLES)
#define COMPAT_RO_SUPPORTED 0
static WCHAR device_name[] = {'\\','B','t','r','f','s',0};
PDRIVER_OBJECT drvobj;
PDEVICE_OBJECT devobj;
#ifndef __REACTOS__
-BOOL have_sse42 = FALSE;
+BOOL have_sse42 = FALSE, have_sse2 = FALSE;
#endif
UINT64 num_reads = 0;
LIST_ENTRY uid_map_list;
UINT32 mount_zlib_level = 3;
UINT32 mount_flush_interval = 30;
UINT32 mount_max_inline = 2048;
+UINT32 mount_raid5_recalculation = 1;
+UINT32 mount_raid6_recalculation = 1;
BOOL log_started = FALSE;
UNICODE_STRING log_device, log_file, registry_path;
return NumberToBeAligned;
}
-int keycmp(const KEY* key1, const KEY* key2) {
- if (key1->obj_id < key2->obj_id) {
- return -1;
- } else if (key1->obj_id > key2->obj_id) {
- return 1;
- }
-
- if (key1->obj_type < key2->obj_type) {
- return -1;
- } else if (key1->obj_type > key2->obj_type) {
- return 1;
- }
-
- if (key1->offset < key2->offset) {
- return -1;
- } else if (key1->offset > key2->offset) {
- return 1;
- }
-
- return 0;
-}
-
BOOL is_top_level(PIRP Irp) {
if (!IoGetTopLevelIrp()) {
IoSetTopLevelIrp(Irp);
ExFreePool(registry_path.Buffer);
}
-BOOL STDCALL get_last_inode(device_extension* Vcb, root* r, PIRP Irp) {
+static BOOL STDCALL get_last_inode(device_extension* Vcb, root* r, PIRP Irp) {
KEY searchkey;
traverse_ptr tp, prev_tp;
NTSTATUS Status;
return FALSE;
}
+ if (tp.item->key.obj_type == TYPE_INODE_ITEM || (tp.item->key.obj_type == TYPE_ROOT_ITEM && !(tp.item->key.obj_id & 0x8000000000000000))) {
+ r->lastinode = tp.item->key.obj_id;
+ TRACE("last inode for tree %llx is %llx\n", r->id, r->lastinode);
+ return TRUE;
+ }
+
while (find_prev_item(Vcb, &tp, &prev_tp, FALSE, Irp)) {
tp = prev_tp;
return FALSE;
}
- if (keycmp(&tp.item->key, &searchkey)) {
+ if (keycmp(tp.item->key, searchkey)) {
TRACE("could not find item (%llx,%x,%llx)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
return FALSE;
}
return FALSE;
}
-NTSTATUS add_dir_item(device_extension* Vcb, root* subvol, UINT64 inode, UINT32 crc32, DIR_ITEM* di, ULONG disize, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp;
- UINT8* di2;
- NTSTATUS Status;
-
- searchkey.obj_id = inode;
- searchkey.obj_type = TYPE_DIR_ITEM;
- searchkey.offset = crc32;
-
- Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (!keycmp(&tp.item->key, &searchkey)) {
- ULONG maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
-
- if (tp.item->size + disize > maxlen) {
- WARN("DIR_ITEM was longer than maxlen (%u + %u > %u)\n", tp.item->size, disize, maxlen);
- return STATUS_INTERNAL_ERROR;
- }
-
- di2 = ExAllocatePoolWithTag(PagedPool, tp.item->size + disize, ALLOC_TAG);
- if (!di2) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- if (tp.item->size > 0)
- RtlCopyMemory(di2, tp.item->data, tp.item->size);
-
- RtlCopyMemory(di2 + tp.item->size, di, disize);
-
- delete_tree_item(Vcb, &tp, rollback);
-
- insert_tree_item(Vcb, subvol, inode, TYPE_DIR_ITEM, crc32, di2, tp.item->size + disize, NULL, Irp, rollback);
-
- ExFreePool(di);
- } else {
- insert_tree_item(Vcb, subvol, inode, TYPE_DIR_ITEM, crc32, di, disize, NULL, Irp, rollback);
- }
-
- return STATUS_SUCCESS;
-}
-
static NTSTATUS STDCALL drv_close(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
NTSTATUS Status;
PIO_STACK_LOCATION IrpSp;
return Status;
}
-static NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
- NTSTATUS Status;
- BOOL top_level;
- device_extension* Vcb = DeviceObject->DeviceExtension;
-
- FsRtlEnterFileSystem();
-
- top_level = is_top_level(Irp);
-
- if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
- Status = part0_passthrough(DeviceObject, Irp);
- goto exit;
- }
-
- FIXME("STUB: query ea\n");
- Status = STATUS_NOT_IMPLEMENTED;
-
- Irp->IoStatus.Status = Status;
- Irp->IoStatus.Information = 0;
-
- IoCompleteRequest( Irp, IO_NO_INCREMENT );
-
-exit:
- if (top_level)
- IoSetTopLevelIrp(NULL);
-
- FsRtlExitFileSystem();
-
- return Status;
-}
-
-static NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
- NTSTATUS Status;
- device_extension* Vcb = DeviceObject->DeviceExtension;
- BOOL top_level;
-
- FsRtlEnterFileSystem();
-
- top_level = is_top_level(Irp);
-
- if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
- Status = part0_passthrough(DeviceObject, Irp);
- goto exit;
- }
-
- FIXME("STUB: set ea\n");
- Status = STATUS_NOT_IMPLEMENTED;
-
- if (Vcb->readonly)
- Status = STATUS_MEDIA_WRITE_PROTECTED;
-
- // FIXME - return STATUS_ACCESS_DENIED if subvol readonly
-
- Irp->IoStatus.Status = Status;
- Irp->IoStatus.Information = 0;
-
- IoCompleteRequest( Irp, IO_NO_INCREMENT );
-
-exit:
- if (top_level)
- IoSetTopLevelIrp(NULL);
-
- FsRtlExitFileSystem();
-
- return Status;
-}
-
static NTSTATUS STDCALL drv_flush_buffers(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
NTSTATUS Status;
PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation( Irp );
}
static void calculate_total_space(device_extension* Vcb, LONGLONG* totalsize, LONGLONG* freespace) {
- UINT8 factor;
+ UINT16 nfactor, dfactor;
+ UINT64 sectors_used;
+
+ if (Vcb->data_flags & BLOCK_FLAG_DUPLICATE || Vcb->data_flags & BLOCK_FLAG_RAID1 || Vcb->data_flags & BLOCK_FLAG_RAID10) {
+ nfactor = 1;
+ dfactor = 2;
+ } else if (Vcb->data_flags & BLOCK_FLAG_RAID5) {
+ nfactor = Vcb->superblock.num_devices - 1;
+ dfactor = Vcb->superblock.num_devices;
+ } else if (Vcb->data_flags & BLOCK_FLAG_RAID6) {
+ nfactor = Vcb->superblock.num_devices - 2;
+ dfactor = Vcb->superblock.num_devices;
+ } else {
+ nfactor = 1;
+ dfactor = 1;
+ }
- if (Vcb->data_flags & BLOCK_FLAG_DUPLICATE || Vcb->data_flags & BLOCK_FLAG_RAID1 || Vcb->data_flags & BLOCK_FLAG_RAID10)
- factor = 2;
- else
- factor = 1;
+ sectors_used = Vcb->superblock.bytes_used / Vcb->superblock.sector_size;
- *totalsize = (Vcb->superblock.total_bytes / Vcb->superblock.sector_size) / factor;
- *freespace = ((Vcb->superblock.total_bytes - Vcb->superblock.bytes_used) / Vcb->superblock.sector_size) / factor;
+ *totalsize = (Vcb->superblock.total_bytes / Vcb->superblock.sector_size) * nfactor / dfactor;
+ *freespace = sectors_used > *totalsize ? 0 : (*totalsize - sectors_used);
}
static NTSTATUS STDCALL drv_query_volume_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
data->FileSystemAttributes = FILE_CASE_PRESERVED_NAMES | FILE_CASE_SENSITIVE_SEARCH |
FILE_UNICODE_ON_DISK | FILE_NAMED_STREAMS | FILE_SUPPORTS_HARD_LINKS | FILE_PERSISTENT_ACLS |
- FILE_SUPPORTS_REPARSE_POINTS | FILE_SUPPORTS_SPARSE_FILES | FILE_SUPPORTS_OBJECT_IDS;
+ FILE_SUPPORTS_REPARSE_POINTS | FILE_SUPPORTS_SPARSE_FILES | FILE_SUPPORTS_OBJECT_IDS |
+ FILE_SUPPORTS_OPEN_BY_FILE_ID | FILE_SUPPORTS_EXTENDED_ATTRIBUTES;
if (Vcb->readonly)
data->FileSystemAttributes |= FILE_READ_ONLY_VOLUME;
Status = overflow ? STATUS_BUFFER_OVERFLOW : STATUS_SUCCESS;
break;
}
+
+#ifdef _MSC_VER // not in mingw yet
+ case FileFsSectorSizeInformation:
+ {
+ FILE_FS_SECTOR_SIZE_INFORMATION* data = Irp->AssociatedIrp.SystemBuffer;
+
+ data->LogicalBytesPerSector = Vcb->superblock.sector_size;
+ data->PhysicalBytesPerSectorForAtomicity = Vcb->superblock.sector_size;
+ data->PhysicalBytesPerSectorForPerformance = Vcb->superblock.sector_size;
+ data->FileSystemEffectivePhysicalBytesPerSectorForAtomicity = Vcb->superblock.sector_size;
+ data->ByteOffsetForSectorAlignment = 0;
+ data->ByteOffsetForPartitionAlignment = 0;
+
+ data->Flags = SSINFO_FLAGS_ALIGNED_DEVICE | SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE;
+
+ if (Vcb->trim)
+ data->Flags |= SSINFO_FLAGS_TRIM_ENABLED;
+
+ BytesCopied = sizeof(FILE_FS_SECTOR_SIZE_INFORMATION);
+
+ break;
+ }
+#endif
default:
Status = STATUS_INVALID_PARAMETER;
// return;
// }
//
-// while (TRUE/*keycmp(&tp.item->key, &endkey) < 1*/) {
+// while (TRUE/*keycmp(tp.item->key, endkey) < 1*/) {
// tp.item->ignore = TRUE;
// add_to_tree_cache(tc, tp.tree);
//
t->new_address = 0;
t->has_new_address = FALSE;
+ t->updated_extents = FALSE;
t->flags = tp.tree->flags;
InsertTailList(&Vcb->trees, &t->list_entry);
// int3;
// }
+#if 0
+void STDCALL tree_test(void* context) {
+ device_extension* Vcb = context;
+ NTSTATUS Status;
+ UINT64 id;
+ LARGE_INTEGER due_time, time;
+ KTIMER timer;
+ root* r;
+ LIST_ENTRY rollback;
+ ULONG seed;
+
+ InitializeListHead(&rollback);
+
+ KeInitializeTimer(&timer);
+
+ id = InterlockedIncrement64(&Vcb->root_root->lastinode);
+ Status = create_root(Vcb, id, &r, FALSE, 0, NULL, &rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("create_root returned %08x\n");
+ return;
+ }
+
+ clear_rollback(Vcb, &rollback);
+
+ due_time.QuadPart = (UINT64)1 * -10000000;
+
+ KeQueryPerformanceCounter(&time);
+ seed = time.LowPart;
+
+ while (TRUE) {
+ UINT32 i;
+
+ FsRtlEnterFileSystem();
+
+ ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
+
+ for (i = 0; i < 100; i++) {
+ void* data;
+ ULONG datalen;
+ UINT64 objid, offset;
+
+ objid = RtlRandomEx(&seed);
+ objid <<= 32;
+ objid |= RtlRandomEx(&seed);
+
+ offset = RtlRandomEx(&seed);
+ offset <<= 32;
+ offset |= RtlRandomEx(&seed);
+
+ datalen = 30;
+ data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
+
+ if (!insert_tree_item(Vcb, r, objid, 0xfd, offset, data, datalen, NULL, NULL, &rollback)) {
+ ERR("insert_tree_item failed\n");
+ }
+ }
+
+ for (i = 0; i < 25; i++) {
+ KEY searchkey;
+ traverse_ptr tp;
+
+ searchkey.obj_id = RtlRandomEx(&seed);
+ searchkey.obj_id <<= 32;
+ searchkey.obj_id |= RtlRandomEx(&seed);
+
+ searchkey.obj_type = 0xfd;
+
+ searchkey.offset = RtlRandomEx(&seed);
+ searchkey.offset <<= 32;
+ searchkey.offset |= RtlRandomEx(&seed);
+
+ Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ } else {
+ delete_tree_item(Vcb, &tp, &rollback);
+ }
+ }
+
+ clear_rollback(Vcb, &rollback);
+
+ ExReleaseResourceLite(&Vcb->tree_lock);
+
+ FsRtlExitFileSystem();
+
+ KeSetTimer(&timer, due_time, NULL);
+
+ KeWaitForSingleObject(&timer, Executive, KernelMode, FALSE, NULL);
+ }
+}
+#endif
+
static NTSTATUS STDCALL set_label(device_extension* Vcb, FILE_FS_LABEL_INFORMATION* ffli) {
ULONG utf8len;
NTSTATUS Status;
USHORT vollen, i;
+// HANDLE h;
TRACE("label = %.*S\n", ffli->VolumeLabelLength / sizeof(WCHAR), ffli->VolumeLabel);
Vcb->need_write = TRUE;
+// PsCreateSystemThread(&h, 0, NULL, NULL, NULL, tree_test, Vcb);
+
release:
ExReleaseResourceLite(&Vcb->tree_lock);
return Status;
}
- if (!keycmp(&searchkey, &tp.item->key)) {
+ if (!keycmp(searchkey, tp.item->key)) {
if (tp.item->size < sizeof(DIR_ITEM)) {
WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
} else {
return Status;
}
- if (!keycmp(&searchkey, &tp.item->key)) {
+ if (!keycmp(searchkey, tp.item->key)) {
if (tp.item->size < sizeof(INODE_REF)) {
WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF));
} else {
return Status;
}
- if (!keycmp(&searchkey, &tp.item->key)) {
+ if (!keycmp(searchkey, tp.item->key)) {
if (tp.item->size < sizeof(INODE_EXTREF)) {
WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_EXTREF));
} else {
if (fcb->reparse_xattr.Buffer)
ExFreePool(fcb->reparse_xattr.Buffer);
+ if (fcb->ea_xattr.Buffer)
+ ExFreePool(fcb->ea_xattr.Buffer);
+
if (fcb->adsdata.Buffer)
ExFreePool(fcb->adsdata.Buffer);
fcb* fcb;
ccb* ccb;
file_ref* fileref = NULL;
+ LONG open_files;
TRACE("FileObject = %p\n", FileObject);
+ open_files = InterlockedDecrement(&Vcb->open_files);
+
fcb = FileObject->FsContext;
if (!fcb) {
TRACE("FCB was NULL, returning success\n");
+
+ if (open_files == 0 && Vcb->removing)
+ uninit(Vcb, FALSE);
+
return STATUS_SUCCESS;
}
CcUninitializeCacheMap(FileObject, NULL, NULL);
+ if (open_files == 0 && Vcb->removing) {
+ uninit(Vcb, FALSE);
+ return STATUS_SUCCESS;
+ }
+
if (!(Vcb->Vpb->Flags & VPB_MOUNTED))
return STATUS_SUCCESS;
free_trees(Vcb);
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
ExReleaseResourceLite(&Vcb->tree_lock);
}
- for (i = 0; i < Vcb->threads.num_threads; i++) {
- Vcb->threads.threads[i].quit = TRUE;
- KeSetEvent(&Vcb->threads.threads[i].event, 0, FALSE);
-
- KeWaitForSingleObject(&Vcb->threads.threads[i].finished, Executive, KernelMode, FALSE, NULL);
-
- ZwClose(Vcb->threads.threads[i].handle);
- }
-
- ExFreePool(Vcb->threads.threads);
-
time.QuadPart = 0;
KeSetTimer(&Vcb->flush_thread_timer, time, NULL); // trigger the timer early
KeWaitForSingleObject(&Vcb->flush_thread_finished, Executive, KernelMode, FALSE, NULL);
ExDeleteResourceLite(&Vcb->checksum_lock);
ExDeleteResourceLite(&Vcb->chunk_lock);
+ ExDeletePagedLookasideList(&Vcb->tree_data_lookaside);
+ ExDeletePagedLookasideList(&Vcb->traverse_ptr_lookaside);
+ ExDeletePagedLookasideList(&Vcb->rollback_item_lookaside);
+ ExDeletePagedLookasideList(&Vcb->batch_item_lookaside);
+ ExDeleteNPagedLookasideList(&Vcb->range_lock_lookaside);
+
ZwClose(Vcb->flush_thread_handle);
}
fileref->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation;
fileref->fcb->inode_item.sequence++;
fileref->fcb->inode_item.st_ctime = now;
+ fileref->fcb->inode_item_changed = TRUE;
} else {
fileref->fcb->deleted = TRUE;
fileref->parent->fcb->inode_item.st_mtime = now;
ExReleaseResourceLite(fileref->parent->fcb->Header.Resource);
+ fileref->parent->fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fileref->parent->fcb);
send_notification_fcb(fileref->parent, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED);
fileref->fcb->subvol->root_item.ctransid = fileref->fcb->Vcb->superblock.generation;
fileref->fcb->subvol->root_item.ctime = now;
- if (FileObject && FileObject->Flags & FO_CACHE_SUPPORTED && fileref->fcb->nonpaged->segment_object.DataSectionObject)
- CcPurgeCacheSection(&fileref->fcb->nonpaged->segment_object, NULL, 0, FALSE);
-
newlength.QuadPart = 0;
if (FileObject && !CcUninitializeCacheMap(FileObject, &newlength, NULL))
fileref = ccb ? ccb->fileref : NULL;
TRACE("cleanup called for FileObject %p\n", FileObject);
- TRACE("fcb %p (%S), refcount = %u, open_count = %u\n", fcb, file_desc(FileObject), fcb->refcount, fcb->open_count);
+ TRACE("fileref %p (%S), refcount = %u, open_count = %u\n", fileref, file_desc(FileObject), fileref ? fileref->refcount : 0, fileref ? fileref->open_count : 0);
IoRemoveShareAccess(FileObject, &fcb->share_access);
FsRtlNotifyCleanup(Vcb->NotifySync, &Vcb->DirNotifyList, ccb);
- oc = InterlockedDecrement(&fcb->open_count);
+ if (fileref) {
+ oc = InterlockedDecrement(&fileref->open_count);
#ifdef DEBUG_FCB_REFCOUNTS
- ERR("fcb %p: open_count now %i\n", fcb, oc);
+ ERR("fileref %p: open_count now %i\n", fileref, oc);
#endif
+ }
if (ccb && ccb->options & FILE_DELETE_ON_CLOSE && fileref)
fileref->delete_on_close = TRUE;
FsRtlNotifyVolumeEvent(FileObject, FSRTL_VOLUME_UNLOCK);
}
- if (oc == 0) {
+ if (fileref && oc == 0) {
if (!Vcb->removing) {
LIST_ENTRY rollback;
ExAcquireResourceSharedLite(&fcb->Vcb->tree_lock, TRUE);
+ ExAcquireResourceExclusiveLite(&fcb->Vcb->fcb_lock, TRUE);
+
Status = delete_fileref(fileref, FileObject, Irp, &rollback);
if (!NT_SUCCESS(Status)) {
ERR("delete_fileref returned %08x\n", Status);
do_rollback(Vcb, &rollback);
+ ExReleaseResourceLite(&fcb->Vcb->fcb_lock);
ExReleaseResourceLite(&fcb->Vcb->tree_lock);
goto exit;
}
+ ExReleaseResourceLite(&fcb->Vcb->fcb_lock);
+
ExReleaseResourceLite(&fcb->Vcb->tree_lock);
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
} else if (FileObject->Flags & FO_CACHE_SUPPORTED && fcb->nonpaged->segment_object.DataSectionObject) {
IO_STATUS_BLOCK iosb;
CcFlushCache(FileObject->SectionObjectPointer, NULL, 0, &iosb);
NTSTATUS Status;
superblock* sb;
unsigned int i, to_read;
- UINT32 crc32;
+ UINT8 valid_superblocks;
to_read = sector_align(sizeof(superblock), device->SectorSize);
}
i = 0;
+ valid_superblocks = 0;
while (superblock_addrs[i] > 0) {
+ UINT32 crc32;
+
if (i > 0 && superblock_addrs[i] + sizeof(superblock) > length)
break;
return Status;
}
- // FIXME - check checksum before accepting?
-
TRACE("got superblock %u!\n", i);
-
- if (i == 0 || sb->generation > Vcb->superblock.generation)
+
+ crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum));
+
+ if (crc32 != *((UINT32*)sb->checksum))
+ WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)sb->checksum));
+ else if (valid_superblocks == 0 || sb->generation > Vcb->superblock.generation) {
RtlCopyMemory(&Vcb->superblock, sb, sizeof(superblock));
+ valid_superblocks++;
+ }
i++;
}
ExFreePool(sb);
- crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
- crc32 = ~crc32;
- TRACE("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)Vcb->superblock.checksum));
-
- if (crc32 != *((UINT32*)Vcb->superblock.checksum))
- return STATUS_INTERNAL_ERROR; // FIXME - correct error?
+ if (valid_superblocks == 0) {
+ ERR("could not find any valid superblocks\n");
+ return STATUS_INTERNAL_ERROR;
+ }
TRACE("label is %s\n", Vcb->superblock.label);
-// utf8_to_utf16(Vcb->superblock.label, Vcb->label, MAX_LABEL_SIZE * sizeof(WCHAR));
return STATUS_SUCCESS;
}
RtlZeroMemory(((UINT8*)&r->root_item) + tp->item->size, sizeof(ROOT_ITEM) - tp->item->size);
}
+ if (!Vcb->readonly && (r->id == BTRFS_ROOT_ROOT || r->id == BTRFS_ROOT_FSTREE || (r->id >= 0x100 && !(r->id & 0xf000000000000000)))) { // FS tree root
+ // FIXME - don't call this if subvol is readonly (though we will have to if we ever toggle this flag)
+ get_last_inode(Vcb, r, NULL);
+
+ if (r->id == BTRFS_ROOT_ROOT && r->lastinode < 0x100)
+ r->lastinode = 0x100;
+ }
+
InsertTailList(&Vcb->roots, &r->list_entry);
switch (r->id) {
case BTRFS_ROOT_UUID:
Vcb->uuid_root = r;
break;
+
+ case BTRFS_ROOT_DATA_RELOC:
+ Vcb->data_reloc_root = r;
}
return STATUS_SUCCESS;
tp = next_tp;
} while (b);
+ if (!Vcb->readonly && !Vcb->data_reloc_root) {
+ root* reloc_root;
+ INODE_ITEM* ii;
+ ULONG irlen;
+ INODE_REF* ir;
+ LARGE_INTEGER time;
+ BTRFS_TIME now;
+ LIST_ENTRY rollback;
+
+ InitializeListHead(&rollback);
+
+ WARN("data reloc root doesn't exist, creating it\n");
+
+ Status = create_root(Vcb, BTRFS_ROOT_DATA_RELOC, &reloc_root, FALSE, 0, Irp, &rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("create_root returned %08x\n", Status);
+ do_rollback(Vcb, &rollback);
+ goto end;
+ }
+
+ reloc_root->root_item.inode.generation = 1;
+ reloc_root->root_item.inode.st_size = 3;
+ reloc_root->root_item.inode.st_blocks = Vcb->superblock.node_size;
+ reloc_root->root_item.inode.st_nlink = 1;
+ reloc_root->root_item.inode.st_mode = 040755;
+ reloc_root->root_item.inode.flags = 0xffffffff80000000;
+ reloc_root->root_item.objid = SUBVOL_ROOT_INODE;
+ reloc_root->root_item.bytes_used = Vcb->superblock.node_size;
+
+ ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
+ if (!ii) {
+ ERR("out of memory\n");
+ do_rollback(Vcb, &rollback);
+ goto end;
+ }
+
+ KeQuerySystemTime(&time);
+ win_time_to_unix(time, &now);
+
+ RtlZeroMemory(ii, sizeof(INODE_ITEM));
+ ii->generation = Vcb->superblock.generation;
+ ii->st_blocks = Vcb->superblock.node_size;
+ ii->st_nlink = 1;
+ ii->st_mode = 040755;
+ ii->st_atime = now;
+ ii->st_ctime = now;
+ ii->st_mtime = now;
+
+ insert_tree_item(Vcb, reloc_root, SUBVOL_ROOT_INODE, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp, &rollback);
+
+ irlen = offsetof(INODE_REF, name[0]) + 2;
+ ir = ExAllocatePoolWithTag(PagedPool, irlen, ALLOC_TAG);
+ if (!ir) {
+ ERR("out of memory\n");
+ do_rollback(Vcb, &rollback);
+ goto end;
+ }
+
+ ir->index = 0;
+ ir->n = 2;
+ ir->name[0] = '.';
+ ir->name[1] = '.';
+
+ insert_tree_item(Vcb, reloc_root, SUBVOL_ROOT_INODE, TYPE_INODE_REF, SUBVOL_ROOT_INODE, ir, irlen, NULL, Irp, &rollback);
+
+ clear_rollback(Vcb, &rollback);
+
+ Vcb->data_reloc_root = reloc_root;
+ Vcb->need_write = TRUE;
+ }
+
+end:
return STATUS_SUCCESS;
}
}
}
+ // The Linux driver doesn't like to allocate chunks within the first megabyte of a device.
+
+ space_list_subtract2(Vcb, &dev->space, NULL, 0, 0x100000, NULL);
+
return STATUS_SUCCESS;
}
Vcb->devices[Vcb->devices_loaded].devobj = DeviceObject;
Vcb->devices[Vcb->devices_loaded].devitem.device_uuid = *uuid;
+ Vcb->devices[Vcb->devices_loaded].seeding = v->seeding;
+ Vcb->devices[Vcb->devices_loaded].readonly = Vcb->devices[Vcb->devices_loaded].seeding;
+ Vcb->devices[Vcb->devices_loaded].removable = FALSE;
Vcb->devices_loaded++;
return &Vcb->devices[Vcb->devices_loaded - 1];
static void init_device(device_extension* Vcb, device* dev, BOOL get_length) {
NTSTATUS Status;
GET_LENGTH_INFORMATION gli;
+ ULONG aptelen;
+ ATA_PASS_THROUGH_EX* apte;
+ IDENTIFY_DEVICE_DATA* idd;
dev->removable = is_device_removable(dev->devobj);
dev->change_count = dev->removable ? get_device_change_count(dev->devobj) : 0;
dev->length = gli.Length.QuadPart;
}
+
+ dev->ssd = FALSE;
+ dev->trim = FALSE;
+ dev->readonly = dev->seeding;
+
+ if (!dev->readonly) {
+ Status = dev_ioctl(dev->devobj, IOCTL_DISK_IS_WRITABLE, NULL, 0,
+ NULL, 0, TRUE, NULL);
+ if (Status == STATUS_MEDIA_WRITE_PROTECTED)
+ dev->readonly = TRUE;
+ }
+
+ aptelen = sizeof(ATA_PASS_THROUGH_EX) + 512;
+ apte = ExAllocatePoolWithTag(NonPagedPool, aptelen, ALLOC_TAG);
+ if (!apte) {
+ ERR("out of memory\n");
+ return;
+ }
+
+ RtlZeroMemory(apte, aptelen);
+
+ apte->Length = sizeof(ATA_PASS_THROUGH_EX);
+ apte->AtaFlags = ATA_FLAGS_DATA_IN;
+ apte->DataTransferLength = aptelen - sizeof(ATA_PASS_THROUGH_EX);
+ apte->TimeOutValue = 3;
+ apte->DataBufferOffset = apte->Length;
+ apte->CurrentTaskFile[6] = 0xec; // IDENTIFY DEVICE
+
+ Status = dev_ioctl(dev->devobj, IOCTL_ATA_PASS_THROUGH, apte, aptelen,
+ apte, aptelen, TRUE, NULL);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("error calling ATA IDENTIFY DEVICE: %08x\n", Status);
+ } else {
+ idd = (IDENTIFY_DEVICE_DATA*)((UINT8*)apte + sizeof(ATA_PASS_THROUGH_EX));
+
+ if (idd->NominalMediaRotationRate == 1) {
+ dev->ssd = TRUE;
+ TRACE("device identified as SSD\n");
+ } else if (idd->NominalMediaRotationRate == 0)
+ TRACE("no rotational speed returned, assuming not SSD\n");
+ else
+ TRACE("rotational speed of %u RPM\n", idd->NominalMediaRotationRate);
+
+ if (idd->DataSetManagementFeature.SupportsTrim) {
+ dev->trim = TRUE;
+ Vcb->trim = TRUE;
+ TRACE("TRIM supported\n");
+ } else
+ TRACE("TRIM not supported\n");
+ }
+
+ ExFreePool(apte);
}
static NTSTATUS STDCALL load_chunk_root(device_extension* Vcb, PIRP Irp) {
while (le != &volumes) {
volume* v = CONTAINING_RECORD(le, volume, list_entry);
- if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) &&
- RtlCompareMemory(&di->device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)
- ) {
+ if (RtlCompareMemory(&di->device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
PFILE_OBJECT FileObject;
PDEVICE_OBJECT DeviceObject;
Vcb->devices[Vcb->devices_loaded].devobj = DeviceObject;
RtlCopyMemory(&Vcb->devices[Vcb->devices_loaded].devitem, di, min(tp.item->size, sizeof(DEV_ITEM)));
init_device(Vcb, &Vcb->devices[i], FALSE);
+
+ Vcb->devices[i].seeding = v->seeding;
+
Vcb->devices[i].length = v->length;
Vcb->devices_loaded++;
c->used = c->oldused = 0;
c->cache = NULL;
c->created = FALSE;
+ c->readonly = FALSE;
c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, tp.item->size, ALLOC_TAG);
if (!c->devices) {
ERR("out of memory\n");
- ExFreePool(c);
ExFreePool(c->chunk_item);
+ ExFreePool(c);
return STATUS_INSUFFICIENT_RESOURCES;
}
for (i = 0; i < c->chunk_item->num_stripes; i++) {
c->devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
TRACE("device %llu = %p\n", i, c->devices[i]);
+
+ if (!c->devices[i]) {
+ ERR("missing device\n");
+ ExFreePool(c->chunk_item);
+ ExFreePool(c);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (c->devices[i]->readonly)
+ c->readonly = TRUE;
}
} else
c->devices = NULL;
InitializeListHead(&c->space_size);
InitializeListHead(&c->deleting);
InitializeListHead(&c->changed_extents);
+
+ InitializeListHead(&c->range_locks);
+ KeInitializeSpinLock(&c->range_locks_spinlock);
+ KeInitializeEvent(&c->range_locks_event, NotificationEvent, FALSE);
InsertTailList(&Vcb->chunks, &c->list_entry);
TRACE("startoff = %llx, superblock = %llx\n", startoff + cis[j].offset, superblock_addrs[i]);
#endif
+ space_list_subtract(Vcb, c, FALSE, c->offset + off_start, off_end - off_start, NULL);
+ }
+ }
+ } else if (ci->type & BLOCK_FLAG_RAID5) {
+ for (j = 0; j < ci->num_stripes; j++) {
+ UINT64 stripe_size = ci->size / (ci->num_stripes - 1);
+
+ if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
+ TRACE("cut out superblock in chunk %llx\n", c->offset);
+
+ off_start = superblock_addrs[i] - cis[j].offset;
+ off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 1));
+ off_start *= ci->num_stripes - 1;
+
+ off_end = off_start + (ci->stripe_length * (ci->num_stripes - 1));
+
+ TRACE("cutting out %llx, size %llx\n", c->offset + off_start, off_end - off_start);
+
+ space_list_subtract(Vcb, c, FALSE, c->offset + off_start, off_end - off_start, NULL);
+ }
+ }
+ } else if (ci->type & BLOCK_FLAG_RAID6) {
+ for (j = 0; j < ci->num_stripes; j++) {
+ UINT64 stripe_size = ci->size / (ci->num_stripes - 2);
+
+ if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
+ TRACE("cut out superblock in chunk %llx\n", c->offset);
+
+ off_start = superblock_addrs[i] - cis[j].offset;
+ off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 2));
+ off_start *= ci->num_stripes - 2;
+
+ off_end = off_start + (ci->stripe_length * (ci->num_stripes - 2));
+
+ TRACE("cutting out %llx, size %llx\n", c->offset + off_start, off_end - off_start);
+
space_list_subtract(Vcb, c, FALSE, c->offset + off_start, off_end - off_start, NULL);
}
}
return Status;
}
- if (!keycmp(&searchkey, &tp.item->key)) {
+ if (!keycmp(searchkey, tp.item->key)) {
if (tp.item->size >= sizeof(BLOCK_GROUP_ITEM)) {
bgi = (BLOCK_GROUP_ITEM*)tp.item->data;
Vcb->extent_root->id, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
}
}
-
+
// if (addr >= c->offset && (addr - c->offset) < c->chunk_item->size && c->chunk_item->num_stripes > 0) {
// cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
//
// return (addr - c->offset) + cis->offset;
// }
-
+
+ // It doesn't make a great deal of sense to load the free space cache of a
+ // readonly seeding chunk, as we'll never write to it. But btrfs check will
+ // complain if we don't write a valid cache, so we have to do it anyway...
+
// FIXME - make sure we free occasionally after doing one of these, or we
// might use up a lot of memory with a big disk.
if (!NT_SUCCESS(Status)) {
ERR("load_free_space_cache returned %08x\n", Status);
return Status;
- }
+ }
protect_superblocks(Vcb, c);
goto end;
}
- if (keycmp(&tp.item->key, &searchkey)) {
+ if (keycmp(tp.item->key, searchkey)) {
ERR("could not find (%llx,%x,%llx) in root tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
goto end;
}
return NULL;
}
-static NTSTATUS create_worker_threads(PDEVICE_OBJECT DeviceObject) {
- device_extension* Vcb = DeviceObject->DeviceExtension;
- ULONG i;
- NTSTATUS Status;
-
- Vcb->threads.num_threads = max(3, KeQueryActiveProcessorCount(NULL));
-
- Vcb->threads.threads = ExAllocatePoolWithTag(NonPagedPool, sizeof(drv_thread) * Vcb->threads.num_threads, ALLOC_TAG);
- if (!Vcb->threads.threads) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- RtlZeroMemory(Vcb->threads.threads, sizeof(drv_thread) * Vcb->threads.num_threads);
-
- for (i = 0; i < Vcb->threads.num_threads; i++) {
- Vcb->threads.threads[i].DeviceObject = DeviceObject;
- KeInitializeEvent(&Vcb->threads.threads[i].event, SynchronizationEvent, FALSE);
- KeInitializeEvent(&Vcb->threads.threads[i].finished, NotificationEvent, FALSE);
- InitializeListHead(&Vcb->threads.threads[i].jobs);
- KeInitializeSpinLock(&Vcb->threads.threads[i].spin_lock);
-
- Status = PsCreateSystemThread(&Vcb->threads.threads[i].handle, 0, NULL, NULL, NULL, worker_thread, &Vcb->threads.threads[i]);
- if (!NT_SUCCESS(Status)) {
- ULONG j;
-
- ERR("PsCreateSystemThread returned %08x\n", Status);
-
- for (j = 0; j < i; j++) {
- Vcb->threads.threads[i].quit = TRUE;
- KeSetEvent(&Vcb->threads.threads[i].event, 0, FALSE);
- }
-
- return Status;
- }
- }
-
- Vcb->threads.pending_jobs = 0;
-
- return STATUS_SUCCESS;
-}
-
-BOOL add_thread_job(device_extension* Vcb, PIRP Irp) {
- ULONG threadnum;
- thread_job* tj;
-
- threadnum = InterlockedIncrement(&Vcb->threads.next_thread) % Vcb->threads.num_threads;
-
- if (Vcb->threads.pending_jobs >= Vcb->threads.num_threads)
- return FALSE;
-
- if (Vcb->threads.threads[threadnum].quit)
- return FALSE;
-
- tj = ExAllocatePoolWithTag(NonPagedPool, sizeof(thread_job), ALLOC_TAG);
- if (!tj) {
- Irp->IoStatus.Status = STATUS_INSUFFICIENT_RESOURCES;
- Irp->IoStatus.Information = 0;
- IoCompleteRequest(Irp, IO_NO_INCREMENT);
- return FALSE;
- }
-
- tj->Irp = Irp;
-
- InterlockedIncrement(&Vcb->threads.pending_jobs);
-
- ExInterlockedInsertTailList(&Vcb->threads.threads[threadnum].jobs, &tj->list_entry, &Vcb->threads.threads[threadnum].spin_lock);
- KeSetEvent(&Vcb->threads.threads[threadnum].event, 0, FALSE);
-
- return TRUE;
-}
-
static BOOL raid_generations_okay(device_extension* Vcb) {
UINT64 i;
device_extension* Vcb = NULL;
GET_LENGTH_INFORMATION gli;
UINT64 i;
- LIST_ENTRY* le;
+ LIST_ENTRY *le, batchlist;
KEY searchkey;
traverse_ptr tp;
fcb* root_fcb = NULL;
ccb* root_ccb = NULL;
+ BOOL init_lookaside = FALSE;
TRACE("mount_vol called\n");
Vcb->devices[0].devobj = DeviceToMount;
RtlCopyMemory(&Vcb->devices[0].devitem, &Vcb->superblock.dev_item, sizeof(DEV_ITEM));
+
+ Vcb->devices[0].seeding = Vcb->superblock.flags & BTRFS_SUPERBLOCK_FLAGS_SEEDING ? TRUE : FALSE;
+
init_device(Vcb, &Vcb->devices[0], FALSE);
Vcb->devices[0].length = gli.Length.QuadPart;
Vcb->devices_loaded = 1;
+ if (DeviceToMount->Flags & DO_SYSTEM_BOOT_PARTITION)
+ Vcb->disallow_dismount = TRUE;
+
TRACE("DeviceToMount = %p\n", DeviceToMount);
TRACE("Stack->Parameters.MountVolume.Vpb = %p\n", Stack->Parameters.MountVolume.Vpb);
InitializeListHead(&Vcb->all_fcbs);
InitializeListHead(&Vcb->dirty_fcbs);
InitializeListHead(&Vcb->dirty_filerefs);
- InitializeListHead(&Vcb->shared_extents);
InitializeListHead(&Vcb->sector_checksums);
KeInitializeSpinLock(&Vcb->dirty_fcbs_lock);
KeInitializeSpinLock(&Vcb->dirty_filerefs_lock);
- KeInitializeSpinLock(&Vcb->shared_extents_lock);
InitializeListHead(&Vcb->DirNotifyList);
FsRtlNotifyInitializeSync(&Vcb->NotifySync);
+ ExInitializePagedLookasideList(&Vcb->tree_data_lookaside, NULL, NULL, 0, sizeof(tree_data), ALLOC_TAG, 0);
+ ExInitializePagedLookasideList(&Vcb->traverse_ptr_lookaside, NULL, NULL, 0, sizeof(traverse_ptr), ALLOC_TAG, 0);
+ ExInitializePagedLookasideList(&Vcb->rollback_item_lookaside, NULL, NULL, 0, sizeof(rollback_item), ALLOC_TAG, 0);
+ ExInitializePagedLookasideList(&Vcb->batch_item_lookaside, NULL, NULL, 0, sizeof(batch_item), ALLOC_TAG, 0);
+ ExInitializeNPagedLookasideList(&Vcb->range_lock_lookaside, NULL, NULL, 0, sizeof(range_lock), ALLOC_TAG, 0);
+ init_lookaside = TRUE;
+
Status = load_chunk_root(Vcb, Irp);
if (!NT_SUCCESS(Status)) {
ERR("load_chunk_root returned %08x\n", Status);
goto exit;
}
+ if (Vcb->devices[0].readonly && !Vcb->readonly) {
+ Vcb->readonly = TRUE;
+
+ for (i = 0; i < Vcb->superblock.num_devices; i++) {
+ if (Vcb->devices[i].readonly && !Vcb->devices[i].seeding)
+ break;
+
+ if (!Vcb->devices[i].readonly) {
+ Vcb->readonly = FALSE;
+ break;
+ }
+ }
+
+ if (Vcb->readonly)
+ WARN("setting volume to readonly\n");
+ }
+
if (!raid_generations_okay(Vcb)) {
ERR("could not mount as generation mismatch\n");
Status = STATUS_INTERNAL_ERROR;
goto exit;
}
+ } else {
+ if (Vcb->devices[0].readonly) {
+ WARN("setting volume to readonly as device is readonly\n");
+ Vcb->readonly = TRUE;
+ }
}
add_root(Vcb, BTRFS_ROOT_ROOT, Vcb->superblock.root_tree_addr, NULL);
}
}
+ InitializeListHead(&batchlist);
+
// We've already increased the generation by one
if (!Vcb->readonly && Vcb->superblock.generation - 1 != Vcb->superblock.cache_generation) {
WARN("generation was %llx, free-space cache generation was %llx; clearing cache...\n", Vcb->superblock.generation - 1, Vcb->superblock.cache_generation);
- Status = clear_free_space_cache(Vcb, Irp);
+ Status = clear_free_space_cache(Vcb, &batchlist, Irp);
if (!NT_SUCCESS(Status)) {
ERR("clear_free_space_cache returned %08x\n", Status);
+ clear_batch_list(Vcb, &batchlist);
goto exit;
}
}
+ commit_batch_list(Vcb, &batchlist, Irp, NULL);
+
Vcb->volume_fcb = create_fcb(NonPagedPool);
if (!Vcb->volume_fcb) {
ERR("out of memory\n");
goto exit;
}
- Status = create_worker_threads(NewDeviceObject);
- if (!NT_SUCCESS(Status)) {
- ERR("create_worker_threads returned %08x\n", Status);
- goto exit;
- }
-
Status = registry_mark_volume_mounted(&Vcb->superblock.uuid);
if (!NT_SUCCESS(Status))
WARN("registry_mark_volume_mounted returned %08x\n", Status);
if (!NT_SUCCESS(Status)) {
if (Vcb) {
+ if (init_lookaside) {
+ ExDeletePagedLookasideList(&Vcb->tree_data_lookaside);
+ ExDeletePagedLookasideList(&Vcb->traverse_ptr_lookaside);
+ ExDeletePagedLookasideList(&Vcb->rollback_item_lookaside);
+ ExDeletePagedLookasideList(&Vcb->batch_item_lookaside);
+ ExDeleteNPagedLookasideList(&Vcb->range_lock_lookaside);
+ }
+
if (Vcb->root_file)
ObDereferenceObject(Vcb->root_file);
else if (Vcb->root_fileref)
Status = verify_volume(DeviceObject);
if (!NT_SUCCESS(Status) && Vcb->Vpb->Flags & VPB_MOUNTED) {
- uninit(Vcb, FALSE);
-// Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+ if (Vcb->open_files > 0) {
+ Vcb->removing = TRUE;
+// Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+ } else
+ uninit(Vcb, FALSE);
}
break;
return Status;
}
-static NTSTATUS part0_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
- NTSTATUS Status;
- part0_device_extension* p0de = DeviceObject->DeviceExtension;
- PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
-
- TRACE("control code = %x\n", IrpSp->Parameters.DeviceIoControl.IoControlCode);
-
- switch (IrpSp->Parameters.DeviceIoControl.IoControlCode) {
- case IOCTL_MOUNTDEV_QUERY_UNIQUE_ID:
- {
- MOUNTDEV_UNIQUE_ID* mduid;
-
- if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_UNIQUE_ID)) {
- Status = STATUS_BUFFER_TOO_SMALL;
- Irp->IoStatus.Status = Status;
- Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID);
- IoCompleteRequest(Irp, IO_NO_INCREMENT);
- return Status;
- }
-
- mduid = Irp->AssociatedIrp.SystemBuffer;
- mduid->UniqueIdLength = sizeof(BTRFS_UUID);
-
- if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_UNIQUE_ID) - 1 + mduid->UniqueIdLength) {
- Status = STATUS_BUFFER_OVERFLOW;
- Irp->IoStatus.Status = Status;
- Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID);
- IoCompleteRequest(Irp, IO_NO_INCREMENT);
- return Status;
- }
-
- RtlCopyMemory(mduid->UniqueId, &p0de->uuid, sizeof(BTRFS_UUID));
-
- Status = STATUS_SUCCESS;
- Irp->IoStatus.Status = Status;
- Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID) - 1 + mduid->UniqueIdLength;
- IoCompleteRequest(Irp, IO_NO_INCREMENT);
-
- return Status;
- }
-
- case IOCTL_MOUNTDEV_QUERY_DEVICE_NAME:
- {
- PMOUNTDEV_NAME name;
-
- if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_NAME)) {
- Status = STATUS_BUFFER_TOO_SMALL;
- Irp->IoStatus.Status = Status;
- Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME);
- IoCompleteRequest(Irp, IO_NO_INCREMENT);
- return Status;
- }
-
- name = Irp->AssociatedIrp.SystemBuffer;
- name->NameLength = p0de->name.Length;
-
- if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_NAME) - 1 + name->NameLength) {
- Status = STATUS_BUFFER_OVERFLOW;
- Irp->IoStatus.Status = Status;
- Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME);
- IoCompleteRequest(Irp, IO_NO_INCREMENT);
- return Status;
- }
-
- RtlCopyMemory(name->Name, p0de->name.Buffer, p0de->name.Length);
-
- Status = STATUS_SUCCESS;
- Irp->IoStatus.Status = Status;
- Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME) - 1 + name->NameLength;
- IoCompleteRequest(Irp, IO_NO_INCREMENT);
-
- return Status;
- }
- }
-
- IoSkipCurrentIrpStackLocation(Irp);
-
- Status = IoCallDriver(p0de->devobj, Irp);
-
- TRACE("returning %08x\n", Status);
-
- return Status;
-}
-
-static NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
- NTSTATUS Status;
- PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
- PFILE_OBJECT FileObject = IrpSp->FileObject;
- device_extension* Vcb = DeviceObject->DeviceExtension;
- fcb* fcb;
- BOOL top_level;
-
- FsRtlEnterFileSystem();
-
- top_level = is_top_level(Irp);
-
- Irp->IoStatus.Information = 0;
-
- if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
- Status = part0_device_control(DeviceObject, Irp);
- goto end2;
- }
-
- TRACE("control code = %x\n", IrpSp->Parameters.DeviceIoControl.IoControlCode);
-
- if (!FileObject) {
- ERR("FileObject was NULL\n");
- Status = STATUS_INVALID_PARAMETER;
- goto end;
- }
-
- fcb = FileObject->FsContext;
-
- if (!fcb) {
- ERR("FCB was NULL\n");
- Status = STATUS_INVALID_PARAMETER;
- goto end;
- }
-
- if (fcb != Vcb->volume_fcb) {
- Status = STATUS_NOT_IMPLEMENTED;
- goto end;
- }
-
- IoSkipCurrentIrpStackLocation(Irp);
-
- Status = IoCallDriver(Vcb->devices[0].devobj, Irp);
-
- goto end2;
-
-end:
- Irp->IoStatus.Status = Status;
-
- if (Status != STATUS_PENDING)
- IoCompleteRequest(Irp, IO_NO_INCREMENT);
-
-end2:
- if (top_level)
- IoSetTopLevelIrp(NULL);
-
- FsRtlExitFileSystem();
-
- return Status;
-}
-
static NTSTATUS STDCALL drv_shutdown(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
NTSTATUS Status;
BOOL top_level;
return TRUE;
}
+void chunk_lock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length) {
+ LIST_ENTRY* le;
+ BOOL locked;
+ range_lock* rl;
+
+ rl = ExAllocateFromNPagedLookasideList(&Vcb->range_lock_lookaside);
+ if (!rl) {
+ ERR("out of memory\n");
+ return;
+ }
+
+ rl->start = start;
+ rl->length = length;
+ rl->thread = PsGetCurrentThread();
+
+ while (TRUE) {
+ KIRQL irql;
+
+ locked = FALSE;
+
+ KeAcquireSpinLock(&c->range_locks_spinlock, &irql);
+
+ le = c->range_locks.Flink;
+ while (le != &c->range_locks) {
+ range_lock* rl2 = CONTAINING_RECORD(le, range_lock, list_entry);
+
+ if (rl2->start < start + length && rl2->start + rl2->length > start && rl2->thread != PsGetCurrentThread()) {
+ locked = TRUE;
+ break;
+ }
+
+ le = le->Flink;
+ }
+
+ if (!locked) {
+ InsertTailList(&c->range_locks, &rl->list_entry);
+
+ KeReleaseSpinLock(&c->range_locks_spinlock, irql);
+ return;
+ }
+
+ KeClearEvent(&c->range_locks_event);
+
+ KeReleaseSpinLock(&c->range_locks_spinlock, irql);
+
+ KeWaitForSingleObject(&c->range_locks_event, UserRequest, KernelMode, FALSE, NULL);
+ }
+}
+
+void chunk_unlock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length) {
+ KIRQL irql;
+ LIST_ENTRY* le;
+
+ KeAcquireSpinLock(&c->range_locks_spinlock, &irql);
+
+ le = c->range_locks.Flink;
+ while (le != &c->range_locks) {
+ range_lock* rl = CONTAINING_RECORD(le, range_lock, list_entry);
+
+ if (rl->start == start && rl->length == length) {
+ RemoveEntryList(&rl->list_entry);
+ ExFreeToNPagedLookasideList(&Vcb->range_lock_lookaside, rl);
+ break;
+ }
+
+ le = le->Flink;
+ }
+
+ KeSetEvent(&c->range_locks_event, 0, FALSE);
+
+ KeReleaseSpinLock(&c->range_locks_spinlock, irql);
+}
+
#ifdef _DEBUG
static void STDCALL init_serial() {
NTSTATUS Status;
#ifndef _MSC_VER
__get_cpuid(1, &cpuInfo[0], &cpuInfo[1], &cpuInfo[2], &cpuInfo[3]);
have_sse42 = cpuInfo[2] & bit_SSE4_2;
+ have_sse2 = cpuInfo[3] & bit_SSE2;
#else
__cpuid(cpuInfo, 1);
have_sse42 = cpuInfo[2] & (1 << 20);
+ have_sse2 = cpuInfo[3] & (1 << 26);
#endif
if (have_sse42)
TRACE("SSE4.2 is supported\n");
else
TRACE("SSE4.2 not supported\n");
+
+ if (have_sse2)
+ TRACE("SSE2 is supported\n");
+ else
+ TRACE("SSE2 is not supported\n");
}
#endif
#define BTRFS_ROOT_FSTREE 5
#define BTRFS_ROOT_CHECKSUM 7
#define BTRFS_ROOT_UUID 9
+#define BTRFS_ROOT_DATA_RELOC 0xFFFFFFFFFFFFFFF7
#define BTRFS_COMPRESSION_NONE 0
#define BTRFS_COMPRESSION_ZLIB 1
#define BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA 0x0100
#define BTRFS_INCOMPAT_FLAGS_NO_HOLES 0x0200
+#define BTRFS_SUPERBLOCK_FLAGS_SEEDING 0x100000000
+
#pragma pack(push, 1)
typedef struct {
UINT64 offset;
} KEY;
-#define HEADER_FLAG_MIXED_BACKREF 0x100000000000000
+#define HEADER_FLAG_WRITTEN 0x000000000000001
#define HEADER_FLAG_SHARED_BACKREF 0x000000000000002
+#define HEADER_FLAG_MIXED_BACKREF 0x100000000000000
typedef struct {
UINT8 csum[32];
BLOCK "080904b0"
BEGIN
VALUE "FileDescription", "WinBtrfs"
- VALUE "FileVersion", "0.5"
+ VALUE "FileVersion", "0.7"
VALUE "InternalName", "btrfs"
VALUE "LegalCopyright", "Copyright (c) Mark Harmstone 2016"
VALUE "OriginalFilename", "btrfs.sys"
VALUE "ProductName", "WinBtrfs"
- VALUE "ProductVersion", "0.6"
+ VALUE "ProductVersion", "0.7"
END
END
BLOCK "VarFileInfo"
#include <stdio.h>
#include <stdarg.h>
#include <stddef.h>
+#include <emmintrin.h>
#include "btrfs.h"
#ifdef _DEBUG
// #define DEBUG_FCB_REFCOUNTS
// #define DEBUG_LONG_MESSAGES
+// #define DEBUG_FLUSH_TIMES
+// #define DEBUG_STATS
#define DEBUG_PARANOID
#endif
#define EA_REPARSE "system.reparse"
#define EA_REPARSE_HASH 0x786f6167
+#define EA_EA "user.EA"
+#define EA_EA_HASH 0x8270dd43
+
#define MAX_EXTENT_SIZE 0x8000000 // 128 MB
#define COMPRESSED_EXTENT_SIZE 0x20000 // 128 KB
#define READ_AHEAD_GRANULARITY COMPRESSED_EXTENT_SIZE // really ought to be a multiple of COMPRESSED_EXTENT_SIZE
+#define IO_REPARSE_TAG_LXSS_SYMLINK 0xa000001d // undocumented?
+
#ifdef _MSC_VER
#define try __try
#define except __except
UNICODE_STRING devpath;
UINT64 length;
UINT64 gen1, gen2;
+ BOOL seeding;
BOOL processed;
LIST_ENTRY list_entry;
} volume;
FSRTL_ADVANCED_FCB_HEADER Header;
struct _fcb_nonpaged* nonpaged;
LONG refcount;
- LONG open_count;
struct _device_extension* Vcb;
struct _root* subvol;
UINT64 inode;
LIST_ENTRY extents;
UINT64 last_dir_index;
ANSI_STRING reparse_xattr;
+ ANSI_STRING ea_xattr;
+ ULONG ealen;
LIST_ENTRY hardlinks;
struct _file_ref* fileref;
+ BOOL inode_item_changed;
BOOL index_loaded;
LIST_ENTRY index_list;
BOOL atts_changed, atts_deleted;
BOOL extents_changed;
BOOL reparse_xattr_changed;
+ BOOL ea_changed;
BOOL created;
BOOL ads;
file_ref_nonpaged* nonpaged;
LIST_ENTRY children;
LONG refcount;
+ LONG open_count;
struct _file_ref* parent;
WCHAR* debug_desc;
ACCESS_MASK access;
file_ref* fileref;
UNICODE_STRING filename;
+ ULONG ea_index;
+ BOOL case_sensitive;
+ BOOL user_set_creation_time;
+ BOOL user_set_access_time;
+ BOOL user_set_write_time;
+ BOOL user_set_change_time;
} ccb;
// typedef struct _log_to_phys {
LIST_ENTRY list_entry;
UINT64 new_address;
BOOL has_new_address;
+ BOOL updated_extents;
UINT64 flags;
BOOL write;
} tree;
typedef struct _root {
UINT64 id;
+ LONGLONG lastinode; // signed so we can use InterlockedIncrement64
tree_holder treeholder;
root_nonpaged* nonpaged;
- UINT64 lastinode;
ROOT_ITEM root_item;
UNICODE_STRING path;
LIST_ENTRY fcbs;
LIST_ENTRY list_entry;
} root;
+enum batch_operation {
+ Batch_Insert,
+ Batch_SetXattr,
+ Batch_DirItem,
+ Batch_InodeRef,
+ Batch_InodeExtRef,
+};
+
+typedef struct {
+ KEY key;
+ void* data;
+ UINT16 datalen;
+ enum batch_operation operation;
+ LIST_ENTRY list_entry;
+} batch_item;
+
+typedef struct {
+ root* r;
+ LIST_ENTRY items;
+ LIST_ENTRY list_entry;
+} batch_root;
+
typedef struct {
tree* tree;
tree_data* item;
PDEVICE_OBJECT devobj;
DEV_ITEM devitem;
BOOL removable;
+ BOOL seeding;
+ BOOL readonly;
+ BOOL ssd;
+ BOOL trim;
ULONG change_count;
UINT64 length;
LIST_ENTRY space;
} device;
+typedef struct {
+ UINT64 start;
+ UINT64 length;
+ PETHREAD thread;
+ LIST_ENTRY list_entry;
+} range_lock;
+
typedef struct {
CHUNK_ITEM* chunk_item;
UINT32 size;
LIST_ENTRY space_size;
LIST_ENTRY deleting;
LIST_ENTRY changed_extents;
+ LIST_ENTRY range_locks;
+ KSPIN_LOCK range_locks_spinlock;
+ KEVENT range_locks_event;
ERESOURCE lock;
ERESOURCE changed_extents_lock;
BOOL created;
+ BOOL readonly;
LIST_ENTRY list_entry;
LIST_ENTRY list_entry_changed;
UINT64 count;
UINT64 old_count;
BOOL no_csum;
+ BOOL superseded;
LIST_ENTRY refs;
LIST_ENTRY old_refs;
LIST_ENTRY list_entry;
} changed_extent;
typedef struct {
- EXTENT_DATA_REF edr;
+ UINT8 type;
+
+ union {
+ EXTENT_DATA_REF edr;
+ SHARED_DATA_REF sdr;
+ };
+
LIST_ENTRY list_entry;
} changed_extent_ref;
-typedef struct {
- UINT64 address;
- UINT64 size;
- EXTENT_DATA_REF edr;
- LIST_ENTRY list_entry;
-} shared_data_entry;
-
-typedef struct {
- UINT64 address;
- UINT64 parent;
- LIST_ENTRY entries;
- LIST_ENTRY list_entry;
-} shared_data;
-
typedef struct {
KEY key;
void* data;
LIST_ENTRY list_entry;
} sys_chunk;
-typedef struct {
- PIRP Irp;
- LIST_ENTRY list_entry;
-} thread_job;
-
-typedef struct {
- PDEVICE_OBJECT DeviceObject;
- HANDLE handle;
- KEVENT event, finished;
- BOOL quit;
- LIST_ENTRY jobs;
- KSPIN_LOCK spin_lock;
-} drv_thread;
-
-typedef struct {
- ULONG num_threads;
- LONG next_thread;
- drv_thread* threads;
- LONG pending_jobs;
-} drv_threads;
-
typedef struct {
BOOL ignore;
BOOL compress;
UINT32 flush_interval;
UINT32 max_inline;
UINT64 subvol_id;
+ UINT32 raid5_recalculation;
+ UINT32 raid6_recalculation;
} mount_options;
#define VCB_TYPE_VOLUME 1
#define VCB_TYPE_PARTITION0 2
+#ifdef DEBUG_STATS
+typedef struct {
+ UINT64 num_reads;
+ UINT64 data_read;
+ UINT64 read_total_time;
+ UINT64 read_csum_time;
+ UINT64 read_disk_time;
+} debug_stats;
+#endif
+
typedef struct _device_extension {
UINT32 type;
mount_options options;
PVPB Vpb;
device* devices;
+#ifdef DEBUG_STATS
+ debug_stats stats;
+#endif
UINT64 devices_loaded;
// DISK_GEOMETRY geometry;
superblock superblock;
BOOL readonly;
BOOL removing;
BOOL locked;
+ BOOL disallow_dismount;
+ BOOL trim;
PFILE_OBJECT locked_fileobj;
fcb* volume_fcb;
file_ref* root_fileref;
+ LONG open_files;
ERESOURCE DirResource;
KSPIN_LOCK FcbListLock;
ERESOURCE fcb_lock;
root* checksum_root;
root* dev_root;
root* uuid_root;
+ root* data_reloc_root;
BOOL log_to_phys_loaded;
LIST_ENTRY sys_chunks;
LIST_ENTRY chunks;
ERESOURCE checksum_lock;
ERESOURCE chunk_lock;
LIST_ENTRY sector_checksums;
- LIST_ENTRY shared_extents;
- KSPIN_LOCK shared_extents_lock;
HANDLE flush_thread_handle;
KTIMER flush_thread_timer;
KEVENT flush_thread_finished;
- drv_threads threads;
PFILE_OBJECT root_file;
+ PAGED_LOOKASIDE_LIST tree_data_lookaside;
+ PAGED_LOOKASIDE_LIST traverse_ptr_lookaside;
+ PAGED_LOOKASIDE_LIST rollback_item_lookaside;
+ PAGED_LOOKASIDE_LIST batch_item_lookaside;
+ NPAGED_LOOKASIDE_LIST range_lock_lookaside;
LIST_ENTRY list_entry;
} device_extension;
*stripeoff = initoff + startoff - (*stripe * stripe_length);
}
+/* We only have 64 bits for a file ID, which isn't technically enough to be
+ * unique on Btrfs. We fudge it by having three bytes for the subvol and
+ * five for the inode, which should be good enough.
+ * Inodes are also 64 bits on Linux, but the Linux driver seems to get round
+ * this by tricking it into thinking subvols are separate volumes. */
+#ifdef __REACTOS__
+static __inline UINT64 make_file_id(root* r, UINT64 inode) {
+#else
+static UINT64 __inline make_file_id(root* r, UINT64 inode) {
+#endif
+ return (r->id << 40) | (inode & 0xffffffffff);
+}
+
+#define keycmp(key1, key2)\
+ ((key1.obj_id < key2.obj_id) ? -1 :\
+ ((key1.obj_id > key2.obj_id) ? 1 :\
+ ((key1.obj_type < key2.obj_type) ? -1 :\
+ ((key1.obj_type > key2.obj_type) ? 1 :\
+ ((key1.offset < key2.offset) ? -1 :\
+ ((key1.offset > key2.offset) ? 1 :\
+ 0))))))
+
// in btrfs.c
device* find_device_from_uuid(device_extension* Vcb, BTRFS_UUID* uuid);
UINT64 sector_align( UINT64 NumberToBeAligned, UINT64 Alignment );
-int keycmp(const KEY* key1, const KEY* key2);
ULONG STDCALL get_file_attributes(device_extension* Vcb, INODE_ITEM* ii, root* r, UINT64 inode, UINT8 type, BOOL dotfile, BOOL ignore_xa, PIRP Irp);
BOOL STDCALL get_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, UINT8** data, UINT16* datalen, PIRP Irp);
void _free_fcb(fcb* fcb, const char* func, const char* file, unsigned int line);
void _free_fileref(file_ref* fr, const char* func, const char* file, unsigned int line);
-BOOL STDCALL get_last_inode(device_extension* Vcb, root* r, PIRP Irp);
-NTSTATUS add_dir_item(device_extension* Vcb, root* subvol, UINT64 inode, UINT32 crc32, DIR_ITEM* di, ULONG disize, PIRP Irp, LIST_ENTRY* rollback);
NTSTATUS delete_dir_item(device_extension* Vcb, root* subvol, UINT64 parinode, UINT32 crc32, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback);
NTSTATUS delete_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback);
fcb* create_fcb(POOL_TYPE pool_type);
void mark_fcb_dirty(fcb* fcb);
void mark_fileref_dirty(file_ref* fileref);
NTSTATUS delete_fileref(file_ref* fileref, PFILE_OBJECT FileObject, PIRP Irp, LIST_ENTRY* rollback);
+void chunk_lock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length);
+void chunk_unlock_range(device_extension* Vcb, chunk* c, UINT64 start, UINT64 length);
#ifdef _MSC_VER
#define funcname __FUNCTION__
#define free_fcb(fcb) _free_fcb(fcb, funcname, __FILE__, __LINE__)
#define free_fileref(fileref) _free_fileref(fileref, funcname, __FILE__, __LINE__)
+extern BOOL have_sse2;
+
extern UINT32 mount_compress;
extern UINT32 mount_compress_force;
extern UINT32 mount_compress_type;
extern UINT32 mount_zlib_level;
extern UINT32 mount_flush_interval;
extern UINT32 mount_max_inline;
+extern UINT32 mount_raid5_recalculation;
+extern UINT32 mount_raid6_recalculation;
#ifdef _DEBUG
ROLLBACK_SUBTRACT_SPACE
};
+typedef struct {
+ enum rollback_type type;
+ void* ptr;
+ LIST_ENTRY list_entry;
+} rollback_item;
+
// in treefuncs.c
NTSTATUS STDCALL _find_item(device_extension* Vcb, root* r, traverse_ptr* tp, const KEY* searchkey, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line);
BOOL STDCALL _find_next_item(device_extension* Vcb, const traverse_ptr* tp, traverse_ptr* next_tp, BOOL ignore, PIRP Irp, const char* func, const char* file, unsigned int line);
NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** pt, tree* parent, PIRP Irp, const char* func, const char* file, unsigned int line);
NTSTATUS STDCALL _do_load_tree(device_extension* Vcb, tree_holder* th, root* r, tree* t, tree_data* td, BOOL* loaded, PIRP Irp,
const char* func, const char* file, unsigned int line);
-void clear_rollback(LIST_ENTRY* rollback);
+void clear_rollback(device_extension* Vcb, LIST_ENTRY* rollback);
void do_rollback(device_extension* Vcb, LIST_ENTRY* rollback);
void free_trees_root(device_extension* Vcb, root* r);
-void add_rollback(LIST_ENTRY* rollback, enum rollback_type type, void* ptr);
+void add_rollback(device_extension* Vcb, LIST_ENTRY* rollback, enum rollback_type type, void* ptr);
+void commit_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback);
+void clear_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist);
#define find_item(Vcb, r, tp, searchkey, ignore, Irp) _find_item(Vcb, r, tp, searchkey, ignore, Irp, funcname, __FILE__, __LINE__)
#define find_next_item(Vcb, tp, next_tp, ignore, Irp) _find_next_item(Vcb, tp, next_tp, ignore, Irp, funcname, __FILE__, __LINE__)
extern CACHE_MANAGER_CALLBACKS* cache_callbacks;
// in write.c
-NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback);
NTSTATUS write_file(device_extension* Vcb, PIRP Irp, BOOL wait, BOOL deferred_write);
NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, BOOL paging_io, BOOL no_cache,
BOOL wait, BOOL deferred_write, LIST_ENTRY* rollback);
NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, BOOL need_free, UINT32 length, write_data_context* wtc, PIRP Irp, chunk* c);
NTSTATUS STDCALL write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c);
void free_write_data_stripes(write_data_context* wtc);
-NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback);
NTSTATUS STDCALL drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
-void flush_fcb(fcb* fcb, BOOL cache, PIRP Irp, LIST_ENTRY* rollback);
BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, BOOL prealloc, void* data, LIST_ENTRY* changed_sector_list,
PIRP Irp, LIST_ENTRY* rollback, UINT8 compression, UINT64 decoded_size);
NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback);
-NTSTATUS update_changed_extent_ref(device_extension* Vcb, chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset,
- signed long long count, BOOL no_csum, UINT64 new_size, PIRP Irp);
NTSTATUS do_write_file(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback);
NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback);
+BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address);
+void get_raid56_lock_range(chunk* c, UINT64 address, UINT64 length, UINT64* lockaddr, UINT64* locklen);
// in dirctrl.c
NTSTATUS STDCALL drv_directory_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
// in fileinfo.c
NTSTATUS STDCALL drv_set_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
NTSTATUS STDCALL drv_query_information(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
-NTSTATUS add_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, UINT64 index, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback);
BOOL has_open_children(file_ref* fileref);
NTSTATUS STDCALL stream_set_end_of_file_information(device_extension* Vcb, UINT64 end, fcb* fcb, file_ref* fileref, PFILE_OBJECT FileObject, BOOL advance_only, LIST_ENTRY* rollback);
NTSTATUS fileref_get_filename(file_ref* fileref, PUNICODE_STRING fn, USHORT* name_offset);
NTSTATUS open_fileref_by_inode(device_extension* Vcb, root* subvol, UINT64 inode, file_ref** pfr, PIRP Irp);
+NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
+NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
// in reparse.c
-NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, DWORD* retlen);
+NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, ULONG_PTR* retlen);
NTSTATUS set_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp);
NTSTATUS delete_reparse_point(PDEVICE_OBJECT DeviceObject, PIRP Irp);
// in create.c
NTSTATUS STDCALL drv_create(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING filename, file_ref* fr,
- root** subvol, UINT64* inode, UINT8* type, UINT64* index, PANSI_STRING utf8, PIRP Irp);
-NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* unparsed, ULONG* fn_offset, PIRP Irp);
-NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, PIRP Irp);
+ root** subvol, UINT64* inode, UINT8* type, UINT64* index, PANSI_STRING utf8, BOOL case_sensitive, PIRP Irp);
+NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* unparsed, ULONG* fn_offset,
+ POOL_TYPE pooltype, BOOL case_sensitive, PIRP Irp);
+NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, POOL_TYPE pooltype, PIRP Irp);
NTSTATUS open_fcb_stream(device_extension* Vcb, root* subvol, UINT64 inode, ANSI_STRING* xattr, UINT32 streamhash, fcb* parent, fcb** pfcb, PIRP Irp);
void insert_fileref_child(file_ref* parent, file_ref* child, BOOL do_lock);
NTSTATUS fcb_get_last_dir_index(fcb* fcb, UINT64* index, PIRP Irp);
// in flushthread.c
void STDCALL flush_thread(void* context);
+NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback);
+NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback);
+void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback);
+NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length);
+BOOL is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp);
// in read.c
NTSTATUS STDCALL drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp);
-NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk** pc, PIRP Irp);
+NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk* c, chunk** pc, PIRP Irp);
NTSTATUS STDCALL read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp);
NTSTATUS do_read(PIRP Irp, BOOL wait, ULONG* bytes_read);
// in free-space.c
NTSTATUS load_free_space_cache(device_extension* Vcb, chunk* c, PIRP Irp);
-NTSTATUS clear_free_space_cache(device_extension* Vcb, PIRP Irp);
+NTSTATUS clear_free_space_cache(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp);
NTSTATUS allocate_cache(device_extension* Vcb, BOOL* changed, PIRP Irp, LIST_ENTRY* rollback);
NTSTATUS update_chunk_caches(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback);
NTSTATUS add_space_entry(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 offset, UINT64 size);
void _space_list_add(device_extension* Vcb, chunk* c, BOOL deleting, UINT64 address, UINT64 length, LIST_ENTRY* rollback, const char* func);
-void _space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func);
+void _space_list_add2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func);
void _space_list_subtract(device_extension* Vcb, chunk* c, BOOL deleting, UINT64 address, UINT64 length, LIST_ENTRY* rollback, const char* func);
-void _space_list_subtract2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func);
+void _space_list_subtract2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func);
#define space_list_add(Vcb, c, deleting, address, length, rollback) _space_list_add(Vcb, c, deleting, address, length, rollback, funcname)
-#define space_list_add2(list, list_size, address, length, rollback) _space_list_add2(list, list_size, address, length, NULL, rollback, funcname)
+#define space_list_add2(Vcb, list, list_size, address, length, rollback) _space_list_add2(Vcb, list, list_size, address, length, NULL, rollback, funcname)
#define space_list_subtract(Vcb, c, deleting, address, length, rollback) _space_list_subtract(Vcb, c, deleting, address, length, rollback, funcname)
-#define space_list_subtract2(list, list_size, address, length, rollback) _space_list_subtract2(list, list_size, address, length, NULL, rollback, funcname)
+#define space_list_subtract2(Vcb, list, list_size, address, length, rollback) _space_list_subtract2(Vcb, list, list_size, address, length, NULL, rollback, funcname)
// in extent-tree.c
NTSTATUS increase_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, UINT32 refcount, PIRP Irp, LIST_ENTRY* rollback);
NTSTATUS decrease_extent_refcount_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 inode, UINT64 offset, UINT32 refcount, PIRP Irp, LIST_ENTRY* rollback);
-NTSTATUS decrease_extent_refcount_shared_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 treeaddr, UINT64 parent, PIRP Irp, LIST_ENTRY* rollback);
-NTSTATUS decrease_extent_refcount_old(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 treeaddr, PIRP Irp, LIST_ENTRY* rollback);
+NTSTATUS decrease_extent_refcount_tree(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT8 level, PIRP Irp, LIST_ENTRY* rollback);
void decrease_chunk_usage(chunk* c, UINT64 delta);
-NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp, LIST_ENTRY* rollback);
-UINT64 find_extent_data_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, PIRP Irp);
+// NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp, LIST_ENTRY* rollback);
+UINT64 get_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp);
+BOOL is_extent_unique(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp);
+NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback);
+UINT64 get_extent_flags(device_extension* Vcb, UINT64 address, PIRP Irp);
+void update_extent_flags(device_extension* Vcb, UINT64 address, UINT64 flags, PIRP Irp);
+NTSTATUS update_changed_extent_ref(device_extension* Vcb, chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset,
+ signed long long count, BOOL no_csum, BOOL superseded, PIRP Irp);
+void add_changed_extent_ref(chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, UINT32 count, BOOL no_csum);
+UINT64 find_extent_shared_tree_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp);
+UINT64 find_extent_shared_data_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp);
+NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem,
+ UINT8 level, UINT64 parent, PIRP Irp, LIST_ENTRY* rollback);
// in worker-thread.c
-void STDCALL worker_thread(void* context);
void do_read_job(PIRP Irp);
void do_write_job(device_extension* Vcb, PIRP Irp);
NTSTATUS decompress(UINT8 type, UINT8* inbuf, UINT64 inlen, UINT8* outbuf, UINT64 outlen);
NTSTATUS write_compressed_bit(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, BOOL* compressed, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback);
+// in galois.c
+void galois_double(UINT8* data, UINT32 len);
+void galois_divpower(UINT8* data, UINT8 div, UINT32 readlen);
+UINT8 gpow2(UINT8 e);
+UINT8 gmul(UINT8 a, UINT8 b);
+UINT8 gdiv(UINT8 a, UINT8 b);
+
+// in devctrl.c
+NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp);
+
#define fast_io_possible(fcb) (!FsRtlAreThereCurrentFileLocks(&fcb->lock) && !fcb->Vcb->readonly ? FastIoIsPossible : FastIoIsQuestionable)
static __inline void print_open_trees(device_extension* Vcb) {
}
}
-static __inline void InsertAfter(LIST_ENTRY* head, LIST_ENTRY* item, LIST_ENTRY* before) {
- item->Flink = before->Flink;
- before->Flink = item;
- item->Blink = before;
-
- if (item->Flink != head)
- item->Flink->Blink = item;
- else
- head->Blink = item;
-}
-
static __inline BOOL write_fcb_compressed(fcb* fcb) {
// make sure we don't accidentally write the cache inodes or pagefile compressed
if (fcb->subvol->id == BTRFS_ROOT_ROOT || fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE)
return FALSE;
}
+static __inline void do_xor(UINT8* buf1, UINT8* buf2, UINT32 len) {
+ UINT32 j;
+#ifndef __REACTOS__
+ __m128i x1, x2;
+#endif
+
+#ifndef __REACTOS__
+ if (have_sse2 && ((uintptr_t)buf1 & 0xf) == 0 && ((uintptr_t)buf2 & 0xf) == 0) {
+ while (len >= 16) {
+ x1 = _mm_load_si128((__m128i*)buf1);
+ x2 = _mm_load_si128((__m128i*)buf2);
+ x1 = _mm_xor_si128(x1, x2);
+ _mm_store_si128((__m128i*)buf1, x1);
+
+ buf1 += 16;
+ buf2 += 16;
+ len -= 16;
+ }
+ }
+#endif
+
+ for (j = 0; j < len; j++) {
+ *buf1 ^= *buf2;
+ buf1++;
+ buf2++;
+ }
+}
+
#ifdef DEBUG_FCB_REFCOUNTS
#ifdef DEBUG_LONG_MESSAGES
#define increase_fileref_refcount(fileref) {\
#define S_IXOTH (S_IXGRP >> 3)
#endif
+// LXSS programs can be distinguished by the fact they have a NULL PEB.
+#ifdef _AMD64_
+ static __inline BOOL called_from_lxss() {
+ UINT8* proc = (UINT8*)PsGetCurrentProcess();
+ ULONG_PTR* peb = (ULONG_PTR*)&proc[0x3f8];
+
+ return !*peb;
+ }
+#else
+#define called_from_lxss() FALSE
+#endif
+
#if defined(__REACTOS__) && (NTDDI_VERSION < NTDDI_WIN7)
NTSTATUS WINAPI RtlUnicodeToUTF8N(CHAR *utf8_dest, ULONG utf8_bytes_max,
ULONG *utf8_bytes_written,
while (le != &fcb->Vcb->chunks) {
c = CONTAINING_RECORD(le, chunk, list_entry);
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
- if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
- ExReleaseResourceLite(&c->lock);
- ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
-
- if (compression != BTRFS_COMPRESSION_NONE)
- ExFreePool(comp_data);
-
- return STATUS_SUCCESS;
+ if (!c->readonly) {
+ ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+ if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
+ if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
+ ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
+
+ if (compression != BTRFS_COMPRESSION_NONE)
+ ExFreePool(comp_data);
+
+ return STATUS_SUCCESS;
+ }
}
+
+ ExReleaseResourceLite(&c->lock);
}
-
- ExReleaseResourceLite(&c->lock);
le = le->Flink;
}
if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
- ExReleaseResourceLite(&c->lock);
-
if (compression != BTRFS_COMPRESSION_NONE)
ExFreePool(comp_data);
while (le != &fcb->Vcb->chunks) {
c = CONTAINING_RECORD(le, chunk, list_entry);
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
- if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
- ExReleaseResourceLite(&c->lock);
- ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
-
- if (compression != BTRFS_COMPRESSION_NONE)
- ExFreePool(comp_data);
-
- return STATUS_SUCCESS;
+ if (!c->readonly) {
+ ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+ if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
+ if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
+ ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
+
+ if (compression != BTRFS_COMPRESSION_NONE)
+ ExFreePool(comp_data);
+
+ return STATUS_SUCCESS;
+ }
}
+
+ ExReleaseResourceLite(&c->lock);
}
-
- ExReleaseResourceLite(&c->lock);
le = le->Flink;
}
if (c->chunk_item->type == fcb->Vcb->data_flags && (c->chunk_item->size - c->used) >= comp_length) {
if (insert_extent_chunk(fcb->Vcb, fcb, c, start_data, comp_length, FALSE, comp_data, changed_sector_list, Irp, rollback, compression, end_data - start_data)) {
- ExReleaseResourceLite(&c->lock);
-
if (compression != BTRFS_COMPRESSION_NONE)
ExFreePool(comp_data);
crc = _mm_crc32_u8(crc, *buf);
}
-#ifdef __x86_64__
+#ifdef _AMD64_
CALC_CRC(_mm_crc32_u64, crc, UINT64, buf, len);
#endif
CALC_CRC(_mm_crc32_u32, crc, UINT32, buf, len);
extern PDEVICE_OBJECT devobj;
+static WCHAR datastring[] = L"::$DATA";
+
static NTSTATUS find_file_dir_index(device_extension* Vcb, root* r, UINT64 inode, UINT64 parinode, PANSI_STRING utf8, UINT64* pindex, PIRP Irp) {
KEY searchkey;
traverse_ptr tp;
return Status;
}
- if (!keycmp(&tp.item->key, &searchkey)) {
+ if (!keycmp(tp.item->key, searchkey)) {
INODE_REF* ir;
ULONG len;
return Status;
}
- if (!keycmp(&tp.item->key, &searchkey)) {
+ if (!keycmp(tp.item->key, searchkey)) {
INODE_EXTREF* ier;
ULONG len;
return Status;
}
- if (keycmp(&tp.item->key, &searchkey)) {
+ if (keycmp(tp.item->key, searchkey)) {
ERR("couldn't find (%llx,%x,%llx) in root tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
return STATUS_INTERNAL_ERROR;
}
return Status;
}
- if (keycmp(&tp.item->key, &searchkey) == -1) {
+ if (keycmp(tp.item->key, searchkey) == -1) {
if (find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp)) {
tp = next_tp;
ExFreePool(ie);
}
} else
- mark_fcb_dirty(fcb);
+ mark_fcb_dirty(fcb); // It's not necessarily dirty, but this is an easy way of making sure
+ // the list remains in memory until the next flush.
return Status;
}
}
static NTSTATUS STDCALL find_file_in_dir_with_crc32(device_extension* Vcb, PUNICODE_STRING filename, UINT32 crc32, file_ref* fr,
- root** subvol, UINT64* inode, UINT8* type, UINT64* pindex, PANSI_STRING utf8, PIRP Irp) {
+ root** subvol, UINT64* inode, UINT8* type, UINT64* pindex, PANSI_STRING utf8,
+ BOOL case_sensitive, PIRP Irp) {
DIR_ITEM* di;
KEY searchkey;
traverse_ptr tp;
TRACE("found item %llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
- if (!keycmp(&searchkey, &tp.item->key)) {
+ if (!keycmp(searchkey, tp.item->key)) {
UINT32 size = tp.item->size;
// found by hash
us.Buffer = utf16;
us.Length = us.MaximumLength = (USHORT)stringlen;
- if (FsRtlAreNamesEqual(filename, &us, TRUE, NULL)) {
+ if (FsRtlAreNamesEqual(filename, &us, !case_sensitive, NULL)) {
UINT64 index;
if (di->key.obj_type == TYPE_ROOT_ITEM) {
file_ref* fr2 = CONTAINING_RECORD(le, file_ref, list_entry);
if (fr2->index == index) {
- if (fr2->deleted || !FsRtlAreNamesEqual(&fr2->filepart, filename, TRUE, NULL)) {
+ if (fr2->deleted || !FsRtlAreNamesEqual(&fr2->filepart, filename, !case_sensitive, NULL)) {
goto byindex;
}
break;
}
byindex:
+ if (case_sensitive)
+ return STATUS_OBJECT_NAME_NOT_FOUND;
+
Status = find_file_in_dir_index(fr, filename, subvol, inode, type, pindex, utf8, Irp);
if (!NT_SUCCESS(Status) && Status != STATUS_OBJECT_NAME_NOT_FOUND) {
ERR("find_file_in_dir_index returned %08x\n", Status);
}
NTSTATUS STDCALL find_file_in_dir(device_extension* Vcb, PUNICODE_STRING filename, file_ref* fr,
- root** subvol, UINT64* inode, UINT8* type, UINT64* index, PANSI_STRING utf8, PIRP Irp) {
+ root** subvol, UINT64* inode, UINT8* type, UINT64* index, PANSI_STRING utf8,
+ BOOL case_sensitive, PIRP Irp) {
char* fn;
UINT32 crc32;
ULONG utf8len;
crc32 = calc_crc32c(0xfffffffe, (UINT8*)fn, (ULONG)utf8len);
TRACE("crc32c(%.*s) = %08x\n", utf8len, fn, crc32);
- return find_file_in_dir_with_crc32(Vcb, filename, crc32, fr, subvol, inode, type, index, utf8, Irp);
+ return find_file_in_dir_with_crc32(Vcb, filename, crc32, fr, subvol, inode, type, index, utf8, case_sensitive, Irp);
}
static BOOL find_stream(device_extension* Vcb, fcb* fcb, PUNICODE_STRING stream, PUNICODE_STRING newstreamname, UINT32* hash, PANSI_STRING xattr, PIRP Irp) {
crc32 = calc_crc32c(0xfffffffe, (UINT8*)utf8, utf8len);
TRACE("crc32 = %08x\n", crc32);
+ if ((crc32 == EA_DOSATTRIB_HASH && utf8len == strlen(EA_DOSATTRIB) && RtlCompareMemory(utf8, EA_DOSATTRIB, utf8len) == utf8len) ||
+ (crc32 == EA_EA_HASH && utf8len == strlen(EA_EA) && RtlCompareMemory(utf8, EA_EA, utf8len) == utf8len)) {
+ return FALSE;
+ }
+
searchkey.obj_id = fcb->inode;
searchkey.obj_type = TYPE_XATTR_ITEM;
searchkey.offset = crc32;
goto end;
}
- if (!keycmp(&tp.item->key, &searchkey)) {
+ if (!keycmp(tp.item->key, searchkey)) {
if (tp.item->size < sizeof(DIR_ITEM)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
} else {
// }
// #endif
-static file_ref* search_fileref_children(file_ref* dir, PUNICODE_STRING name) {
+static file_ref* search_fileref_children(file_ref* dir, PUNICODE_STRING name, BOOL case_sensitive) {
LIST_ENTRY* le;
file_ref *c, *deleted = NULL;
NTSTATUS Status;
#ifdef DEBUG_FCB_REFCOUNTS
ULONG rc;
#endif
+
+ if (case_sensitive) {
+ le = dir->children.Flink;
+ while (le != &dir->children) {
+ c = CONTAINING_RECORD(le, file_ref, list_entry);
+
+ if (c->refcount > 0 && c->filepart.Length == name->Length &&
+ RtlCompareMemory(c->filepart.Buffer, name->Buffer, name->Length) == name->Length) {
+ if (c->deleted) {
+ deleted = c;
+ } else {
+#ifdef DEBUG_FCB_REFCOUNTS
+ rc = InterlockedIncrement(&c->refcount);
+ WARN("fileref %p: refcount now %i (%S)\n", c, rc, file_desc_fileref(c));
+#else
+ InterlockedIncrement(&c->refcount);
+#endif
+ return c;
+ }
+ }
+
+ le = le->Flink;
+ }
+
+ goto end;
+ }
Status = RtlUpcaseUnicodeString(&ucus, name, TRUE);
if (!NT_SUCCESS(Status)) {
ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
return NULL;
}
-
+
le = dir->children.Flink;
while (le != &dir->children) {
c = CONTAINING_RECORD(le, file_ref, list_entry);
le = le->Flink;
}
+ ExFreePool(ucus.Buffer);
+
+end:
if (deleted)
increase_fileref_refcount(deleted);
- ExFreePool(ucus.Buffer);
-
return deleted;
}
-static UINT64 get_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp) {
- KEY searchkey;
- traverse_ptr tp;
- NTSTATUS Status;
- EXTENT_ITEM* ei;
-
- searchkey.obj_id = address;
- searchkey.obj_type = TYPE_EXTENT_ITEM;
- searchkey.offset = size;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return 0;
- }
-
- if (keycmp(&searchkey, &tp.item->key)) {
- ERR("couldn't find (%llx,%x,%llx) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
- return 0;
- }
-
- if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
- EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
-
- return eiv0->refcount;
- } else if (tp.item->size < sizeof(EXTENT_ITEM)) {
- ERR("(%llx,%x,%llx) was %x bytes, expected at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type,
- tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
- return 0;
- }
-
- ei = (EXTENT_ITEM*)tp.item->data;
-
- return ei->refcount;
-}
-
-NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, PIRP Irp) {
+NTSTATUS open_fcb(device_extension* Vcb, root* subvol, UINT64 inode, UINT8 type, PANSI_STRING utf8, fcb* parent, fcb** pfcb, POOL_TYPE pooltype, PIRP Irp) {
KEY searchkey;
traverse_ptr tp;
NTSTATUS Status;
fcb* fcb;
BOOL b;
+ UINT8* eadata;
+ UINT16 ealen;
if (!IsListEmpty(&subvol->fcbs)) {
LIST_ENTRY* le = subvol->fcbs.Flink;
}
}
- fcb = create_fcb(PagedPool);
+ fcb = create_fcb(pooltype);
if (!fcb) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
fcb_get_sd(fcb, parent, Irp);
- if (fcb->type == BTRFS_TYPE_DIRECTORY) {
+ if (fcb->type == BTRFS_TYPE_DIRECTORY && fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT) {
UINT8* xattrdata;
UINT16 xattrlen;
if (get_xattr(Vcb, subvol, inode, EA_REPARSE, EA_REPARSE_HASH, &xattrdata, &xattrlen, Irp)) {
fcb->reparse_xattr.Buffer = (char*)xattrdata;
fcb->reparse_xattr.Length = fcb->reparse_xattr.MaximumLength = xattrlen;
+ } else {
+ fcb->atts &= ~FILE_ATTRIBUTE_REPARSE_POINT;
+
+ if (!Vcb->readonly && !(subvol->root_item.flags & BTRFS_SUBVOL_READONLY)) {
+ fcb->atts_changed = TRUE;
+ mark_fcb_dirty(fcb);
+ }
+ }
+ }
+
+ fcb->ealen = 0;
+
+ if (get_xattr(Vcb, subvol, inode, EA_EA, EA_EA_HASH, &eadata, &ealen, Irp)) {
+ ULONG offset;
+
+ Status = IoCheckEaBufferValidity((FILE_FULL_EA_INFORMATION*)eadata, ealen, &offset);
+
+ if (!NT_SUCCESS(Status)) {
+ WARN("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset);
+ ExFreePool(eadata);
+ } else {
+ FILE_FULL_EA_INFORMATION* eainfo;
+ fcb->ea_xattr.Buffer = (char*)eadata;
+ fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = ealen;
+
+ fcb->ealen = 4;
+
+ // calculate ealen
+ eainfo = (FILE_FULL_EA_INFORMATION*)eadata;
+ do {
+ fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength;
+
+ if (eainfo->NextEntryOffset == 0)
+ break;
+
+ eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset);
+ } while (TRUE);
}
}
if (ed2->address == 0 && ed2->size == 0) // sparse
goto nextitem;
- if (ed2->size != 0)
- unique = get_extent_refcount(fcb->Vcb, ed2->address, ed2->size, Irp) == 1;
+ if (ed2->size != 0 && is_tree_unique(Vcb, tp.tree, Irp))
+ unique = is_extent_unique(Vcb, ed2->address, ed2->size, Irp);
}
- ext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
+ ext = ExAllocatePoolWithTag(pooltype, sizeof(extent), ALLOC_TAG);
if (!ext) {
ERR("out of memory\n");
free_fcb(fcb);
return STATUS_INSUFFICIENT_RESOURCES;
}
- ext->data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+ ext->data = ExAllocatePoolWithTag(pooltype, tp.item->size, ALLOC_TAG);
if (!ext->data) {
ERR("out of memory\n");
ExFreePool(ext);
hardlink* hl;
ULONG stringlen;
- hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG);
+ hl = ExAllocatePoolWithTag(pooltype, sizeof(hardlink), ALLOC_TAG);
if (!hl) {
ERR("out of memory\n");
free_fcb(fcb);
hl->utf8.Length = hl->utf8.MaximumLength = ir->n;
if (hl->utf8.Length > 0) {
- hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG);
+ hl->utf8.Buffer = ExAllocatePoolWithTag(pooltype, hl->utf8.MaximumLength, ALLOC_TAG);
RtlCopyMemory(hl->utf8.Buffer, ir->name, ir->n);
}
if (stringlen == 0)
hl->name.Buffer = NULL;
else {
- hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG);
+ hl->name.Buffer = ExAllocatePoolWithTag(pooltype, hl->name.MaximumLength, ALLOC_TAG);
if (!hl->name.Buffer) {
ERR("out of memory\n");
hardlink* hl;
ULONG stringlen;
- hl = ExAllocatePoolWithTag(PagedPool, sizeof(hardlink), ALLOC_TAG);
+ hl = ExAllocatePoolWithTag(pooltype, sizeof(hardlink), ALLOC_TAG);
if (!hl) {
ERR("out of memory\n");
free_fcb(fcb);
hl->utf8.Length = hl->utf8.MaximumLength = ier->n;
if (hl->utf8.Length > 0) {
- hl->utf8.Buffer = ExAllocatePoolWithTag(PagedPool, hl->utf8.MaximumLength, ALLOC_TAG);
+ hl->utf8.Buffer = ExAllocatePoolWithTag(pooltype, hl->utf8.MaximumLength, ALLOC_TAG);
RtlCopyMemory(hl->utf8.Buffer, ier->name, ier->n);
}
if (stringlen == 0)
hl->name.Buffer = NULL;
else {
- hl->name.Buffer = ExAllocatePoolWithTag(PagedPool, hl->name.MaximumLength, ALLOC_TAG);
+ hl->name.Buffer = ExAllocatePoolWithTag(pooltype, hl->name.MaximumLength, ALLOC_TAG);
if (!hl->name.Buffer) {
ERR("out of memory\n");
return Status;
}
- if (keycmp(&tp.item->key, &searchkey)) {
+ if (keycmp(tp.item->key, searchkey)) {
ERR("error - could not find key for xattr\n");
free_fcb(fcb);
return STATUS_INTERNAL_ERROR;
ExReleaseResourceLite(&parent->nonpaged->children_lock);
}
-NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* unparsed, ULONG* fn_offset, PIRP Irp) {
+NTSTATUS open_fileref(device_extension* Vcb, file_ref** pfr, PUNICODE_STRING fnus, file_ref* related, BOOL parent, USHORT* unparsed, ULONG* fn_offset,
+ POOL_TYPE pooltype, BOOL case_sensitive, PIRP Irp) {
UNICODE_STRING fnus2;
file_ref *dir, *sf, *sf2;
ULONG i, num_parts;
}
if (fnus2.Length == sizeof(WCHAR)) {
- if (Vcb->root_fileref->fcb->open_count == 0) { // don't allow root to be opened on unmounted FS
+ if (Vcb->root_fileref->open_count == 0) { // don't allow root to be opened on unmounted FS
ULONG cc;
IO_STATUS_BLOCK iosb;
if (fnus->Length == 0) {
num_parts = 0;
+ } else if (fnus->Length == wcslen(datastring) * sizeof(WCHAR) &&
+ RtlCompareMemory(fnus->Buffer, datastring, wcslen(datastring) * sizeof(WCHAR)) == wcslen(datastring) * sizeof(WCHAR)) {
+ num_parts = 0;
} else {
Status = split_path(&fnus2, &parts, &num_parts, &has_stream);
if (!NT_SUCCESS(Status)) {
goto end2;
}
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
-
for (i = 0; i < num_parts; i++) {
BOOL lastpart = (i == num_parts-1) || (i == num_parts-2 && has_stream);
- sf2 = search_fileref_children(sf, &parts[i]);
+ sf2 = search_fileref_children(sf, &parts[i], case_sensitive);
if (sf2 && sf2->fcb->type != BTRFS_TYPE_DIRECTORY && !lastpart) {
WARN("passed path including file as subdirectory\n");
goto end;
}
+ if (sf2 && sf2->deleted) {
+ TRACE("element in path has been deleted\n");
+ free_fileref(sf2);
+ Status = lastpart ? STATUS_OBJECT_NAME_NOT_FOUND : STATUS_OBJECT_PATH_NOT_FOUND;
+ goto end;
+ }
+
if (!sf2) {
if (has_stream && i == num_parts - 1) {
UNICODE_STRING streamname;
UINT8 type;
ANSI_STRING utf8;
- Status = find_file_in_dir(Vcb, &parts[i], sf, &subvol, &inode, &type, &index, &utf8, Irp);
+ Status = find_file_in_dir(Vcb, &parts[i], sf, &subvol, &inode, &type, &index, &utf8, case_sensitive, Irp);
if (Status == STATUS_OBJECT_NAME_NOT_FOUND) {
TRACE("could not find %.*S\n", parts[i].Length / sizeof(WCHAR), parts[i].Buffer);
fcb* fcb;
ULONG strlen;
- Status = open_fcb(Vcb, subvol, inode, type, &utf8, sf->fcb, &fcb, Irp);
+ Status = open_fcb(Vcb, subvol, inode, type, &utf8, sf->fcb, &fcb, pooltype, Irp);
if (!NT_SUCCESS(Status)) {
ERR("open_fcb returned %08x\n", Status);
goto end;
end:
free_fileref(sf);
- ExReleaseResourceLite(&Vcb->fcb_lock);
end2:
if (parts)
return Status;
}
-static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_STRING fpus, file_ref* parfileref, ULONG options, file_ref** pfr, LIST_ENTRY* rollback) {
+static NTSTATUS STDCALL file_create2(PIRP Irp, device_extension* Vcb, PUNICODE_STRING fpus, file_ref* parfileref, ULONG options,
+ FILE_FULL_EA_INFORMATION* ea, ULONG ealen, file_ref** pfr, LIST_ENTRY* rollback) {
NTSTATUS Status;
fcb* fcb;
ULONG utf8len;
parfileref->fcb->inode_item.st_mtime = now;
ExReleaseResourceLite(parfileref->fcb->Header.Resource);
+ parfileref->fcb->inode_item_changed = TRUE;
mark_fcb_dirty(parfileref->fcb);
- if (parfileref->fcb->subvol->lastinode == 0)
- get_last_inode(Vcb, parfileref->fcb->subvol, Irp);
-
- inode = parfileref->fcb->subvol->lastinode + 1;
+ inode = InterlockedIncrement64(&parfileref->fcb->subvol->lastinode);
type = options & FILE_DIRECTORY_FILE ? BTRFS_TYPE_DIRECTORY : BTRFS_TYPE_FILE;
if (IrpSp->Parameters.Create.FileAttributes == FILE_ATTRIBUTE_NORMAL)
IrpSp->Parameters.Create.FileAttributes = defda;
- parfileref->fcb->subvol->lastinode++;
-
fcb = create_fcb(pool_type);
if (!fcb) {
ERR("out of memory\n");
fcb->Vcb = Vcb;
- if (IrpSp->Flags & SL_OPEN_PAGING_FILE)
+ if (IrpSp->Flags & SL_OPEN_PAGING_FILE) {
fcb->Header.Flags2 |= FSRTL_FLAG2_IS_PAGING_FILE;
+ Vcb->disallow_dismount = TRUE;
+ }
fcb->inode_item.generation = Vcb->superblock.generation;
fcb->inode_item.transid = Vcb->superblock.generation;
fcb->inode_item.flags |= BTRFS_INODE_COMPRESS;
}
+ fcb->inode_item_changed = TRUE;
+
fcb->Header.IsFastIoPossible = fast_io_possible(fcb);
fcb->Header.AllocationSize.QuadPart = 0;
fcb->Header.FileSize.QuadPart = 0;
if (!NT_SUCCESS(Status)) {
ERR("fcb_get_new_sd returned %08x\n", Status);
-
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fcb(fcb);
- ExReleaseResource(&Vcb->fcb_lock);
-
return Status;
}
fcb->sd_dirty = TRUE;
+ if (ea && ealen > 0) {
+ FILE_FULL_EA_INFORMATION* eainfo;
+
+ fcb->ealen = 4;
+
+ // capitalize EA names
+ eainfo = ea;
+ do {
+ STRING s;
+
+ s.Length = s.MaximumLength = eainfo->EaNameLength;
+ s.Buffer = eainfo->EaName;
+
+ RtlUpperString(&s, &s);
+
+ fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength;
+
+ if (eainfo->NextEntryOffset == 0)
+ break;
+
+ eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset);
+ } while (TRUE);
+
+ fcb->ea_xattr.Buffer = ExAllocatePoolWithTag(pool_type, ealen, ALLOC_TAG);
+ if (!fcb->ea_xattr.Buffer) {
+ ERR("out of memory\n");
+ free_fcb(fcb);
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = ealen;
+ RtlCopyMemory(fcb->ea_xattr.Buffer, ea, ealen);
+
+ fcb->ea_changed = TRUE;
+ }
+
hl = ExAllocatePoolWithTag(pool_type, sizeof(hardlink), ALLOC_TAG);
if (!hl) {
ERR("out of memory\n");
-
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fcb(fcb);
- ExReleaseResource(&Vcb->fcb_lock);
-
return STATUS_INSUFFICIENT_RESOURCES;
}
if (!hl->utf8.Buffer) {
ERR("out of memory\n");
ExFreePool(hl);
-
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fcb(fcb);
- ExReleaseResource(&Vcb->fcb_lock);
-
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(hl->utf8.Buffer, utf8, utf8len);
ERR("out of memory\n");
ExFreePool(hl->utf8.Buffer);
ExFreePool(hl);
-
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fcb(fcb);
- ExReleaseResource(&Vcb->fcb_lock);
-
return STATUS_INSUFFICIENT_RESOURCES;
}
fileref = create_fileref();
if (!fileref) {
ERR("out of memory\n");
-
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fcb(fcb);
- ExReleaseResource(&Vcb->fcb_lock);
-
return STATUS_INSUFFICIENT_RESOURCES;
}
if (!fileref->filepart.Buffer) {
ERR("out of memory\n");
-
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fcb(fcb);
- ExReleaseResource(&Vcb->fcb_lock);
-
return STATUS_INSUFFICIENT_RESOURCES;
}
Status = RtlUpcaseUnicodeString(&fileref->filepart_uc, &fileref->filepart, TRUE);
if (!NT_SUCCESS(Status)) {
ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
return Status;
}
if (!NT_SUCCESS(Status)) {
ERR("extend_file returned %08x\n", Status);
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
return Status;
}
}
}
static NTSTATUS create_stream(device_extension* Vcb, file_ref** pfileref, file_ref** pparfileref, PUNICODE_STRING fpus, PUNICODE_STRING stream,
- PIRP Irp, ULONG options, POOL_TYPE pool_type, LIST_ENTRY* rollback) {
+ PIRP Irp, ULONG options, POOL_TYPE pool_type, BOOL case_sensitive, LIST_ENTRY* rollback) {
file_ref *fileref, *newpar, *parfileref;
fcb* fcb;
static char xapref[] = "user.";
+ static WCHAR DOSATTRIB[] = L"DOSATTRIB";
+ static WCHAR EA[] = L"EA";
ULONG xapreflen = strlen(xapref), overhead;
LARGE_INTEGER time;
BTRFS_TIME now;
parfileref = *pparfileref;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
- Status = open_fileref(Vcb, &newpar, fpus, parfileref, FALSE, NULL, NULL, Irp);
- ExReleaseResource(&Vcb->fcb_lock);
+ Status = open_fileref(Vcb, &newpar, fpus, parfileref, FALSE, NULL, NULL, PagedPool, case_sensitive, Irp);
if (Status == STATUS_OBJECT_NAME_NOT_FOUND) {
UNICODE_STRING fpus2;
RtlCopyMemory(fpus2.Buffer, fpus->Buffer, fpus2.Length);
- Status = file_create2(Irp, Vcb, &fpus2, parfileref, options, &newpar, rollback);
+ Status = file_create2(Irp, Vcb, &fpus2, parfileref, options, NULL, 0, &newpar, rollback);
if (!NT_SUCCESS(Status)) {
ERR("file_create2 returned %08x\n", Status);
return Status;
}
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(parfileref);
- ExReleaseResource(&Vcb->fcb_lock);
parfileref = newpar;
*pparfileref = parfileref;
- if (parfileref->fcb->type != BTRFS_TYPE_FILE && parfileref->fcb->type != BTRFS_TYPE_SYMLINK) {
- WARN("parent not file or symlink\n");
+ if (parfileref->fcb->type != BTRFS_TYPE_FILE && parfileref->fcb->type != BTRFS_TYPE_SYMLINK && parfileref->fcb->type != BTRFS_TYPE_DIRECTORY) {
+ WARN("parent not file, directory, or symlink\n");
return STATUS_INVALID_PARAMETER;
}
WARN("tried to create directory as stream\n");
return STATUS_INVALID_PARAMETER;
}
+
+ if ((stream->Length == wcslen(DOSATTRIB) * sizeof(WCHAR) && RtlCompareMemory(stream->Buffer, DOSATTRIB, stream->Length) == stream->Length) ||
+ (stream->Length == wcslen(EA) * sizeof(WCHAR) && RtlCompareMemory(stream->Buffer, EA, stream->Length) == stream->Length)) {
+ return STATUS_OBJECT_NAME_INVALID;
+ }
fcb = create_fcb(pool_type);
if (!fcb) {
Status = RtlUnicodeToUTF8N(NULL, 0, &utf8len, stream->Buffer, stream->Length);
if (!NT_SUCCESS(Status)) {
ERR("RtlUnicodeToUTF8N 1 returned %08x\n", Status);
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fcb(fcb);
- ExReleaseResource(&Vcb->fcb_lock);
return Status;
}
fcb->adsxattr.Buffer = ExAllocatePoolWithTag(pool_type, fcb->adsxattr.MaximumLength, ALLOC_TAG);
if (!fcb->adsxattr.Buffer) {
ERR("out of memory\n");
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fcb(fcb);
- ExReleaseResource(&Vcb->fcb_lock);
return STATUS_INSUFFICIENT_RESOURCES;
}
Status = RtlUnicodeToUTF8N(&fcb->adsxattr.Buffer[xapreflen], utf8len, &utf8len, stream->Buffer, stream->Length);
if (!NT_SUCCESS(Status)) {
ERR("RtlUnicodeToUTF8N 2 returned %08x\n", Status);
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fcb(fcb);
- ExReleaseResource(&Vcb->fcb_lock);
return Status;
}
Status = find_item(Vcb, parfileref->fcb->subvol, &tp, &searchkey, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("find_item returned %08x\n", Status);
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fcb(fcb);
- ExReleaseResource(&Vcb->fcb_lock);
return Status;
}
- if (!keycmp(&tp.item->key, &searchkey))
+ if (!keycmp(tp.item->key, searchkey))
overhead = tp.item->size;
else
overhead = 0;
if (utf8len + xapreflen + overhead > fcb->adsmaxlen) {
WARN("not enough room for new DIR_ITEM (%u + %u > %u)", utf8len + xapreflen, overhead, fcb->adsmaxlen);
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fcb(fcb);
- ExReleaseResource(&Vcb->fcb_lock);
return STATUS_DISK_FULL;
} else
fcb->adsmaxlen -= overhead + utf8len + xapreflen;
fileref = create_fileref();
if (!fileref) {
ERR("out of memory\n");
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fcb(fcb);
- ExReleaseResource(&Vcb->fcb_lock);
return STATUS_INSUFFICIENT_RESOURCES;
}
fileref->filepart.Buffer = ExAllocatePoolWithTag(pool_type, fileref->filepart.MaximumLength, ALLOC_TAG);
if (!fileref->filepart.Buffer) {
ERR("out of memory\n");
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
return STATUS_INSUFFICIENT_RESOURCES;
}
Status = RtlUpcaseUnicodeString(&fileref->filepart_uc, &fileref->filepart, TRUE);
if (!NT_SUCCESS(Status)) {
ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
return Status;
}
mark_fcb_dirty(fcb);
mark_fileref_dirty(fileref);
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
InsertTailList(&fcb->subvol->fcbs, &fcb->list_entry);
InsertTailList(&Vcb->all_fcbs, &fcb->list_entry_all);
- ExReleaseResource(&Vcb->fcb_lock);
KeQuerySystemTime(&time);
win_time_to_unix(time, &now);
parfileref->fcb->inode_item.transid = Vcb->superblock.generation;
parfileref->fcb->inode_item.sequence++;
parfileref->fcb->inode_item.st_ctime = now;
+ parfileref->fcb->inode_item_changed = TRUE;
mark_fcb_dirty(parfileref->fcb);
} else
related = NULL;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
- Status = open_fileref(Vcb, &parfileref, &FileObject->FileName, related, TRUE, NULL, NULL, Irp);
- ExReleaseResource(&Vcb->fcb_lock);
+ Status = open_fileref(Vcb, &parfileref, &FileObject->FileName, related, TRUE, NULL, NULL, pool_type, IrpSp->Flags & SL_CASE_SENSITIVE, Irp);
if (!NT_SUCCESS(Status))
goto end;
}
if (stream.Length > 0) {
- Status = create_stream(Vcb, &fileref, &parfileref, &fpus, &stream, Irp, options, pool_type, rollback);
+ Status = create_stream(Vcb, &fileref, &parfileref, &fpus, &stream, Irp, options, pool_type, IrpSp->Flags & SL_CASE_SENSITIVE, rollback);
if (!NT_SUCCESS(Status)) {
ERR("create_stream returned %08x\n", Status);
goto end;
goto end;
}
- Status = file_create2(Irp, Vcb, &fpus, parfileref, options, &fileref, rollback);
+ if (Irp->AssociatedIrp.SystemBuffer && IrpSp->Parameters.Create.EaLength > 0) {
+ ULONG offset;
+
+ Status = IoCheckEaBufferValidity(Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.Create.EaLength, &offset);
+ if (!NT_SUCCESS(Status)) {
+ ERR("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset);
+ goto end;
+ }
+ }
+
+ Status = file_create2(Irp, Vcb, &fpus, parfileref, options, Irp->AssociatedIrp.SystemBuffer, IrpSp->Parameters.Create.EaLength,
+ &fileref, rollback);
if (!NT_SUCCESS(Status)) {
ERR("file_create2 returned %08x\n", Status);
if (!ccb) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
goto end;
}
ccb->has_wildcard = FALSE;
ccb->specific_file = FALSE;
ccb->access = access_state->OriginalDesiredAccess;
+ ccb->case_sensitive = IrpSp->Flags & SL_CASE_SENSITIVE;
#ifdef DEBUG_FCB_REFCOUNTS
- oc = InterlockedIncrement(&fileref->fcb->open_count);
- ERR("fcb %p: open_count now %i\n", fileref->fcb, oc);
+ oc = InterlockedIncrement(&fileref->open_count);
+ ERR("fileref %p: open_count now %i\n", fileref, oc);
#else
- InterlockedIncrement(&fileref->fcb->open_count);
+ InterlockedIncrement(&fileref->open_count);
#endif
+ InterlockedIncrement(&Vcb->open_files);
FileObject->FsContext2 = ccb;
ExFreePool(fpus.Buffer);
end2:
- if (parfileref) {
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
+ if (parfileref)
free_fileref(parfileref);
- ExReleaseResource(&Vcb->fcb_lock);
- }
return Status;
}
USHORT unparsed;
ULONG fn_offset = 0;
file_ref *related, *fileref;
+ POOL_TYPE pool_type = Stack->Flags & SL_OPEN_PAGING_FILE ? NonPagedPool : PagedPool;
+ ACCESS_MASK granted_access;
#ifdef DEBUG_FCB_REFCOUNTS
LONG oc;
#endif
if (options & FILE_DIRECTORY_FILE && RequestedDisposition == FILE_SUPERSEDE) {
WARN("error - supersede requested with FILE_DIRECTORY_FILE\n");
Status = STATUS_INVALID_PARAMETER;
- goto exit;
+ goto exit2;
}
FileObject = Stack->FileObject;
if (Vcb->readonly && (RequestedDisposition == FILE_SUPERSEDE || RequestedDisposition == FILE_CREATE || RequestedDisposition == FILE_OVERWRITE)) {
Status = STATUS_MEDIA_WRITE_PROTECTED;
- goto exit;
+ goto exit2;
}
if (Vcb->readonly && Stack->Parameters.Create.SecurityContext->DesiredAccess &
(FILE_WRITE_DATA | FILE_APPEND_DATA | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES | DELETE | WRITE_OWNER | WRITE_DAC)) {
Status = STATUS_MEDIA_WRITE_PROTECTED;
- goto exit;
+ goto exit2;
}
+
+ ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
if (options & FILE_OPEN_BY_FILE_ID) {
if (FileObject->FileName.Length == sizeof(UINT64) && related && RequestedDisposition == FILE_OPEN) {
increase_fileref_refcount(fileref);
Status = STATUS_SUCCESS;
} else {
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
Status = open_fileref_by_inode(Vcb, related->fcb->subvol, inode, &fileref, Irp);
- ExReleaseResource(&Vcb->fcb_lock);
}
} else {
WARN("FILE_OPEN_BY_FILE_ID only supported for inodes\n");
goto exit;
}
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
- Status = open_fileref(Vcb, &fileref, &FileObject->FileName, related, Stack->Flags & SL_OPEN_TARGET_DIRECTORY, &unparsed, &fn_offset, Irp);
- ExReleaseResource(&Vcb->fcb_lock);
+ Status = open_fileref(Vcb, &fileref, &FileObject->FileName, related, Stack->Flags & SL_OPEN_TARGET_DIRECTORY, &unparsed, &fn_offset,
+ pool_type, Stack->Flags & SL_CASE_SENSITIVE, Irp);
}
if (Status == STATUS_REPARSE) {
if (!NT_SUCCESS(Status)) {
ERR("get_reparse_block returned %08x\n", Status);
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResourceLite(&Vcb->fcb_lock);
goto exit;
}
Irp->Tail.Overlay.AuxiliaryBuffer = (void*)data;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResourceLite(&Vcb->fcb_lock);
goto exit;
}
if (NT_SUCCESS(Status) && fileref->deleted) {
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResourceLite(&Vcb->fcb_lock);
Status = STATUS_OBJECT_NAME_NOT_FOUND;
}
if (RequestedDisposition == FILE_CREATE) {
TRACE("file %S already exists, returning STATUS_OBJECT_NAME_COLLISION\n", file_desc_fileref(fileref));
Status = STATUS_OBJECT_NAME_COLLISION;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
goto exit;
}
} else if (Status == STATUS_OBJECT_NAME_NOT_FOUND) {
if (RequestedDisposition == FILE_SUPERSEDE || RequestedDisposition == FILE_OVERWRITE || RequestedDisposition == FILE_OVERWRITE_IF) {
if (fileref->fcb->type == BTRFS_TYPE_DIRECTORY || fileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) {
Status = STATUS_ACCESS_DENIED;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
goto exit;
}
if (Vcb->readonly) {
Status = STATUS_MEDIA_WRITE_PROTECTED;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
goto exit;
}
}
- if (fileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY && Stack->Parameters.Create.SecurityContext->DesiredAccess &
+ SeLockSubjectContext(&Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext);
+
+ if (!SeAccessCheck(fileref->fcb->ads ? fileref->parent->fcb->sd : fileref->fcb->sd,
+ &Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext,
+ FALSE, Stack->Parameters.Create.SecurityContext->DesiredAccess, 0, NULL,
+ IoGetFileObjectGenericMapping(), Stack->Flags & SL_FORCE_ACCESS_CHECK ? UserMode : Irp->RequestorMode,
+ &granted_access, &Status)) {
+ SeUnlockSubjectContext(&Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext);
+ WARN("SeAccessCheck failed, returning %08x\n", Status);
+ goto exit;
+ }
+
+ SeUnlockSubjectContext(&Stack->Parameters.Create.SecurityContext->AccessState->SubjectSecurityContext);
+
+ if (fileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY && granted_access &
(FILE_WRITE_DATA | FILE_APPEND_DATA | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES | DELETE | WRITE_OWNER | WRITE_DAC)) {
Status = STATUS_ACCESS_DENIED;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
goto exit;
}
WARN("could not open as deletion pending\n");
Status = STATUS_DELETE_PENDING;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResourceLite(&Vcb->fcb_lock);
goto exit;
}
sf = sf->parent;
}
-
+
if (fileref->fcb->atts & FILE_ATTRIBUTE_READONLY) {
ACCESS_MASK allowed = DELETE | READ_CONTROL | WRITE_OWNER | WRITE_DAC |
SYNCHRONIZE | ACCESS_SYSTEM_SECURITY | FILE_READ_DATA |
if (fileref->fcb->type == BTRFS_TYPE_DIRECTORY)
allowed |= FILE_ADD_SUBDIRECTORY | FILE_ADD_FILE | FILE_DELETE_CHILD;
- if (Stack->Parameters.Create.SecurityContext->DesiredAccess & ~allowed) {
+ if (granted_access & ~allowed) {
Status = STATUS_ACCESS_DENIED;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
goto exit;
}
}
if (options & FILE_DELETE_ON_CLOSE && (fileref == Vcb->root_fileref || Vcb->readonly ||
fileref->fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY || fileref->fcb->atts & FILE_ATTRIBUTE_READONLY)) {
Status = STATUS_CANNOT_DELETE;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
goto exit;
}
Status = get_reparse_block(fileref->fcb, (UINT8**)&data);
if (!NT_SUCCESS(Status)) {
ERR("get_reparse_block returned %08x\n", Status);
-
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResourceLite(&Vcb->fcb_lock);
goto exit;
}
Irp->Tail.Overlay.AuxiliaryBuffer = (void*)data;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResourceLite(&Vcb->fcb_lock);
goto exit;
}
- if (fileref->fcb->type == BTRFS_TYPE_DIRECTORY) {
+ if (fileref->fcb->type == BTRFS_TYPE_DIRECTORY && !fileref->fcb->ads) {
if (options & FILE_NON_DIRECTORY_FILE && !(fileref->fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT)) {
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResourceLite(&Vcb->fcb_lock);
-
Status = STATUS_FILE_IS_A_DIRECTORY;
goto exit;
}
} else if (options & FILE_DIRECTORY_FILE) {
TRACE("returning STATUS_NOT_A_DIRECTORY (type = %u, %S)\n", fileref->fcb->type, file_desc_fileref(fileref));
-
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResourceLite(&Vcb->fcb_lock);
-
Status = STATUS_NOT_A_DIRECTORY;
goto exit;
}
- if (fileref->fcb->open_count > 0) {
- Status = IoCheckShareAccess(Stack->Parameters.Create.SecurityContext->DesiredAccess,
- Stack->Parameters.Create.ShareAccess, FileObject, &fileref->fcb->share_access, TRUE);
+ if (fileref->open_count > 0) {
+ Status = IoCheckShareAccess(granted_access, Stack->Parameters.Create.ShareAccess, FileObject, &fileref->fcb->share_access, TRUE);
if (!NT_SUCCESS(Status)) {
WARN("IoCheckShareAccess failed, returning %08x\n", Status);
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResourceLite(&Vcb->fcb_lock);
goto exit;
}
} else {
- IoSetShareAccess(Stack->Parameters.Create.SecurityContext->DesiredAccess,
- Stack->Parameters.Create.ShareAccess, FileObject, &fileref->fcb->share_access);
+ IoSetShareAccess(granted_access, Stack->Parameters.Create.ShareAccess, FileObject, &fileref->fcb->share_access);
}
- if (Stack->Parameters.Create.SecurityContext->DesiredAccess & FILE_WRITE_DATA || options & FILE_DELETE_ON_CLOSE) {
+ if (granted_access & FILE_WRITE_DATA || options & FILE_DELETE_ON_CLOSE) {
if (!MmFlushImageSection(&fileref->fcb->nonpaged->segment_object, MmFlushForWrite)) {
Status = (options & FILE_DELETE_ON_CLOSE) ? STATUS_CANNOT_DELETE : STATUS_SHARING_VIOLATION;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResourceLite(&Vcb->fcb_lock);
goto exit;
}
}
if ((RequestedDisposition == FILE_OVERWRITE || RequestedDisposition == FILE_OVERWRITE_IF) && fileref->fcb->atts & FILE_ATTRIBUTE_READONLY) {
WARN("cannot overwrite readonly file\n");
Status = STATUS_ACCESS_DENIED;
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
goto exit;
}
Status = truncate_file(fileref->fcb, 0, Irp, rollback);
if (!NT_SUCCESS(Status)) {
ERR("truncate_file returned %08x\n", Status);
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
goto exit;
}
if (!NT_SUCCESS(Status)) {
ERR("extend_file returned %08x\n", Status);
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResource(&Vcb->fcb_lock);
goto exit;
}
}
+ if (Irp->AssociatedIrp.SystemBuffer && Stack->Parameters.Create.EaLength > 0) {
+ ULONG offset;
+ FILE_FULL_EA_INFORMATION* eainfo;
+
+ Status = IoCheckEaBufferValidity(Irp->AssociatedIrp.SystemBuffer, Stack->Parameters.Create.EaLength, &offset);
+ if (!NT_SUCCESS(Status)) {
+ ERR("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset);
+ free_fileref(fileref);
+ goto exit;
+ }
+
+ fileref->fcb->ealen = 4;
+
+ // capitalize EA name
+ eainfo = Irp->AssociatedIrp.SystemBuffer;
+ do {
+ STRING s;
+
+ s.Length = s.MaximumLength = eainfo->EaNameLength;
+ s.Buffer = eainfo->EaName;
+
+ RtlUpperString(&s, &s);
+
+ fileref->fcb->ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength;
+
+ if (eainfo->NextEntryOffset == 0)
+ break;
+
+ eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset);
+ } while (TRUE);
+
+ if (fileref->fcb->ea_xattr.Buffer)
+ ExFreePool(fileref->fcb->ea_xattr.Buffer);
+
+ fileref->fcb->ea_xattr.Buffer = ExAllocatePoolWithTag(pool_type, Stack->Parameters.Create.EaLength, ALLOC_TAG);
+ if (!fileref->fcb->ea_xattr.Buffer) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+
+ free_fileref(fileref);
+ goto exit;
+ }
+
+ fileref->fcb->ea_xattr.Length = fileref->fcb->ea_xattr.MaximumLength = Stack->Parameters.Create.EaLength;
+ RtlCopyMemory(fileref->fcb->ea_xattr.Buffer, Irp->AssociatedIrp.SystemBuffer, Stack->Parameters.Create.EaLength);
+ } else {
+ if (fileref->fcb->ea_xattr.Length > 0) {
+ ExFreePool(fileref->fcb->ea_xattr.Buffer);
+ fileref->fcb->ea_xattr.Buffer = NULL;
+ fileref->fcb->ea_xattr.Length = fileref->fcb->ea_xattr.MaximumLength = 0;
+
+ fileref->fcb->ea_changed = TRUE;
+ fileref->fcb->ealen = 0;
+ }
+ }
+
filter = FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_LAST_WRITE;
mark_fcb_dirty(fileref->fcb);
fileref->fcb->inode_item.sequence++;
fileref->fcb->inode_item.st_ctime = now;
fileref->fcb->inode_item.st_mtime = now;
+ fileref->fcb->inode_item_changed = TRUE;
// FIXME - truncate streams
// FIXME - do we need to alter parent directory's times?
send_notification_fcb(fileref, filter, FILE_ACTION_MODIFIED);
+ } else {
+ if (options & FILE_NO_EA_KNOWLEDGE && fileref->fcb->ea_xattr.Length > 0) {
+ FILE_FULL_EA_INFORMATION* ffei = (FILE_FULL_EA_INFORMATION*)fileref->fcb->ea_xattr.Buffer;
+
+ do {
+ if (ffei->Flags & FILE_NEED_EA) {
+ WARN("returning STATUS_ACCESS_DENIED as no EA knowledge\n");
+ free_fileref(fileref);
+ Status = STATUS_ACCESS_DENIED;
+ goto exit;
+ }
+
+ if (ffei->NextEntryOffset == 0)
+ break;
+
+ ffei = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ffei) + ffei->NextEntryOffset);
+ } while (TRUE);
+ }
}
FileObject->FsContext = fileref->fcb;
ccb = ExAllocatePoolWithTag(NonPagedPool, sizeof(*ccb), ALLOC_TAG);
if (!ccb) {
ERR("out of memory\n");
-
- ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
free_fileref(fileref);
- ExReleaseResourceLite(&Vcb->fcb_lock);
-
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
RtlInitUnicodeString(&ccb->query_string, NULL);
ccb->has_wildcard = FALSE;
ccb->specific_file = FALSE;
- ccb->access = Stack->Parameters.Create.SecurityContext->DesiredAccess;
+ ccb->access = granted_access;
+ ccb->case_sensitive = Stack->Flags & SL_CASE_SENSITIVE;
ccb->fileref = fileref;
}
}
+ // Make sure paging files don't have any extents marked as being prealloc,
+ // as this would mean we'd have to lock exclusively when writing.
+ if (Stack->Flags & SL_OPEN_PAGING_FILE) {
+ LIST_ENTRY* le;
+ BOOL changed = FALSE;
+
+ ExAcquireResourceExclusiveLite(fileref->fcb->Header.Resource, TRUE);
+
+ le = fileref->fcb->extents.Flink;
+
+ while (le != &fileref->fcb->extents) {
+ extent* ext = CONTAINING_RECORD(le, extent, list_entry);
+
+ if (ext->data->type == EXTENT_TYPE_PREALLOC) {
+ ext->data->type = EXTENT_TYPE_REGULAR;
+ changed = TRUE;
+ }
+
+ le = le->Flink;
+ }
+
+ ExReleaseResourceLite(fileref->fcb->Header.Resource);
+
+ if (changed) {
+ fileref->fcb->extents_changed = TRUE;
+ mark_fcb_dirty(fileref->fcb);
+ }
+
+ fileref->fcb->Header.Flags2 |= FSRTL_FLAG2_IS_PAGING_FILE;
+ Vcb->disallow_dismount = TRUE;
+ }
+
#ifdef DEBUG_FCB_REFCOUNTS
- oc = InterlockedIncrement(&fileref->fcb->open_count);
- ERR("fcb %p: open_count now %i\n", fileref->fcb, oc);
+ oc = InterlockedIncrement(&fileref->open_count);
+ ERR("fileref %p: open_count now %i\n", fileref, oc);
#else
- InterlockedIncrement(&fileref->fcb->open_count);
+ InterlockedIncrement(&fileref->open_count);
#endif
+ InterlockedIncrement(&Vcb->open_files);
} else {
Status = file_create(Irp, DeviceObject->DeviceExtension, FileObject, &FileObject->FileName, RequestedDisposition, options, rollback);
Irp->IoStatus.Information = NT_SUCCESS(Status) ? FILE_CREATED : 0;
FileObject->Flags |= FO_CACHE_SUPPORTED;
exit:
+ ExReleaseResourceLite(&Vcb->fcb_lock);
+
+exit2:
if (NT_SUCCESS(Status)) {
if (!FileObject->Vpb)
FileObject->Vpb = DeviceObject->Vpb;
NTSTATUS verify_vcb(device_extension* Vcb, PIRP Irp) {
UINT64 i;
- for (i = 0; i < Vcb->superblock.num_devices; i++) {
+ for (i = 0; i < Vcb->devices_loaded; i++) {
if (Vcb->devices[i].removable) {
NTSTATUS Status;
ULONG cc;
goto exit;
}
- Vcb = DeviceObject->DeviceExtension;
-
Status = verify_vcb(Vcb, Irp);
if (!NT_SUCCESS(Status)) {
ERR("verify_vcb returned %08x\n", Status);
Status = STATUS_NOT_A_DIRECTORY;
goto exit;
}
+
+ if (Vcb->removing) {
+ Status = STATUS_ACCESS_DENIED;
+ goto exit;
+ }
#ifdef DEBUG_FCB_REFCOUNTS
rc = InterlockedIncrement(&Vcb->volume_fcb->refcount);
- oc = InterlockedIncrement(&Vcb->volume_fcb->open_count);
WARN("fcb %p: refcount now %i (volume)\n", Vcb->volume_fcb, rc);
- WARN("fcb %p: open_count now %i (volume)\n", Vcb->volume_fcb, oc);
#else
InterlockedIncrement(&Vcb->volume_fcb->refcount);
- InterlockedIncrement(&Vcb->volume_fcb->open_count);
#endif
IrpSp->FileObject->FsContext = Vcb->volume_fcb;
if (!IrpSp->FileObject->Vpb)
IrpSp->FileObject->Vpb = DeviceObject->Vpb;
+
+ InterlockedIncrement(&Vcb->open_files);
Irp->IoStatus.Information = FILE_OPENED;
Status = STATUS_SUCCESS;
if (!NT_SUCCESS(Status))
do_rollback(Vcb, &rollback);
else
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
if (!skip_lock)
ExReleaseResourceLite(&Vcb->tree_lock);
--- /dev/null
+/* Copyright (c) Mark Harmstone 2016
+ *
+ * This file is part of WinBtrfs.
+ *
+ * WinBtrfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public Licence as published by
+ * the Free Software Foundation, either version 3 of the Licence, or
+ * (at your option) any later version.
+ *
+ * WinBtrfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public Licence for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public Licence
+ * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "btrfs_drv.h"
+#ifndef __REACTOS__
+#include <winioctl.h>
+#endif
+#include <mountdev.h>
+#include <initguid.h>
+#include <diskguid.h>
+
+static NTSTATUS part0_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
+ NTSTATUS Status;
+ part0_device_extension* p0de = DeviceObject->DeviceExtension;
+ PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+
+ TRACE("control code = %x\n", IrpSp->Parameters.DeviceIoControl.IoControlCode);
+
+ switch (IrpSp->Parameters.DeviceIoControl.IoControlCode) {
+ case IOCTL_MOUNTDEV_QUERY_UNIQUE_ID:
+ {
+ MOUNTDEV_UNIQUE_ID* mduid;
+
+ if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_UNIQUE_ID)) {
+ Status = STATUS_BUFFER_TOO_SMALL;
+ Irp->IoStatus.Status = Status;
+ Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID);
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);
+ return Status;
+ }
+
+ mduid = Irp->AssociatedIrp.SystemBuffer;
+ mduid->UniqueIdLength = sizeof(BTRFS_UUID);
+
+ if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_UNIQUE_ID) - 1 + mduid->UniqueIdLength) {
+ Status = STATUS_BUFFER_OVERFLOW;
+ Irp->IoStatus.Status = Status;
+ Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID);
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);
+ return Status;
+ }
+
+ RtlCopyMemory(mduid->UniqueId, &p0de->uuid, sizeof(BTRFS_UUID));
+
+ Status = STATUS_SUCCESS;
+ Irp->IoStatus.Status = Status;
+ Irp->IoStatus.Information = sizeof(MOUNTDEV_UNIQUE_ID) - 1 + mduid->UniqueIdLength;
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);
+
+ return Status;
+ }
+
+ case IOCTL_MOUNTDEV_QUERY_DEVICE_NAME:
+ {
+ PMOUNTDEV_NAME name;
+
+ if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_NAME)) {
+ Status = STATUS_BUFFER_TOO_SMALL;
+ Irp->IoStatus.Status = Status;
+ Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME);
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);
+ return Status;
+ }
+
+ name = Irp->AssociatedIrp.SystemBuffer;
+ name->NameLength = p0de->name.Length;
+
+ if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_NAME) - 1 + name->NameLength) {
+ Status = STATUS_BUFFER_OVERFLOW;
+ Irp->IoStatus.Status = Status;
+ Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME);
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);
+ return Status;
+ }
+
+ RtlCopyMemory(name->Name, p0de->name.Buffer, p0de->name.Length);
+
+ Status = STATUS_SUCCESS;
+ Irp->IoStatus.Status = Status;
+ Irp->IoStatus.Information = sizeof(MOUNTDEV_NAME) - 1 + name->NameLength;
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);
+
+ return Status;
+ }
+ }
+
+ IoSkipCurrentIrpStackLocation(Irp);
+
+ Status = IoCallDriver(p0de->devobj, Irp);
+
+ TRACE("returning %08x\n", Status);
+
+ return Status;
+}
+
+static NTSTATUS mountdev_query_stable_guid(device_extension* Vcb, PIRP Irp) {
+ MOUNTDEV_STABLE_GUID* msg = Irp->UserBuffer;
+ PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+
+ TRACE("IOCTL_MOUNTDEV_QUERY_STABLE_GUID\n");
+
+ if (IrpSp->Parameters.DeviceIoControl.OutputBufferLength < sizeof(MOUNTDEV_STABLE_GUID))
+ return STATUS_INVALID_PARAMETER;
+
+ RtlCopyMemory(&msg->StableGuid, &Vcb->superblock.uuid, sizeof(GUID));
+
+ Irp->IoStatus.Information = sizeof(MOUNTDEV_STABLE_GUID);
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS get_partition_info_ex(device_extension* Vcb, PIRP Irp) {
+ NTSTATUS Status;
+ PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+ PARTITION_INFORMATION_EX* piex;
+
+ TRACE("IOCTL_DISK_GET_PARTITION_INFO_EX\n");
+
+ Status = dev_ioctl(Vcb->devices[0].devobj, IOCTL_DISK_GET_PARTITION_INFO_EX, NULL, 0,
+ Irp->UserBuffer, IrpSp->Parameters.DeviceIoControl.OutputBufferLength, TRUE, &Irp->IoStatus);
+ if (!NT_SUCCESS(Status))
+ return Status;
+
+ piex = (PARTITION_INFORMATION_EX*)Irp->UserBuffer;
+
+ if (piex->PartitionStyle == PARTITION_STYLE_MBR) {
+ piex->Mbr.PartitionType = PARTITION_IFS;
+ piex->Mbr.RecognizedPartition = TRUE;
+ } else if (piex->PartitionStyle == PARTITION_STYLE_GPT) {
+ piex->Gpt.PartitionType = PARTITION_BASIC_DATA_GUID;
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS is_writable(device_extension* Vcb, PIRP Irp) {
+ TRACE("IOCTL_DISK_IS_WRITABLE\n");
+
+ return Vcb->readonly ? STATUS_MEDIA_WRITE_PROTECTED : STATUS_SUCCESS;
+}
+
+NTSTATUS STDCALL drv_device_control(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
+ NTSTATUS Status;
+ PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+ device_extension* Vcb = DeviceObject->DeviceExtension;
+ BOOL top_level;
+
+ FsRtlEnterFileSystem();
+
+ top_level = is_top_level(Irp);
+
+ Irp->IoStatus.Information = 0;
+
+ if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
+ Status = part0_device_control(DeviceObject, Irp);
+ goto end2;
+ }
+
+ switch (IrpSp->Parameters.DeviceIoControl.IoControlCode) {
+ case IOCTL_MOUNTDEV_QUERY_STABLE_GUID:
+ Status = mountdev_query_stable_guid(Vcb, Irp);
+ goto end;
+
+ case IOCTL_DISK_GET_PARTITION_INFO_EX:
+ Status = get_partition_info_ex(Vcb, Irp);
+ goto end;
+
+ case IOCTL_DISK_IS_WRITABLE:
+ Status = is_writable(Vcb, Irp);
+ goto end;
+
+ default:
+ TRACE("unhandled control code %x\n", IrpSp->Parameters.DeviceIoControl.IoControlCode);
+ break;
+ }
+
+ IoSkipCurrentIrpStackLocation(Irp);
+
+ Status = IoCallDriver(Vcb->devices[0].devobj, Irp);
+
+ goto end2;
+
+end:
+ Irp->IoStatus.Status = Status;
+
+ if (Status != STATUS_PENDING)
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);
+
+end2:
+ if (top_level)
+ IoSetTopLevelIrp(NULL);
+
+ FsRtlExitFileSystem();
+
+ return Status;
+}
ULONG tag = 0, br;
NTSTATUS Status;
- // FIXME - will this slow things down?
-
- if (type == BTRFS_TYPE_SYMLINK)
- return IO_REPARSE_TAG_SYMLINK;
+ if (type == BTRFS_TYPE_SYMLINK) {
+ if (called_from_lxss())
+ return IO_REPARSE_TAG_LXSS_SYMLINK;
+ else
+ return IO_REPARSE_TAG_SYMLINK;
+ }
if (type != BTRFS_TYPE_FILE && type != BTRFS_TYPE_DIRECTORY)
return 0;
return 0;
ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
- Status = open_fcb(Vcb, subvol, inode, type, NULL, NULL, &fcb, Irp);
+ Status = open_fcb(Vcb, subvol, inode, type, NULL, NULL, &fcb, PagedPool, Irp);
if (!NT_SUCCESS(Status)) {
ERR("open_fcb returned %08x\n", Status);
ExReleaseResourceLite(&Vcb->fcb_lock);
return tag;
}
+static ULONG get_ea_len(device_extension* Vcb, root* subvol, UINT64 inode, PIRP Irp) {
+ UINT8* eadata;
+ UINT16 len;
+
+ if (get_xattr(Vcb, subvol, inode, EA_EA, EA_EA_HASH, &eadata, &len, Irp)) {
+ ULONG offset;
+ NTSTATUS Status;
+
+ Status = IoCheckEaBufferValidity((FILE_FULL_EA_INFORMATION*)eadata, len, &offset);
+
+ if (!NT_SUCCESS(Status)) {
+ WARN("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset);
+ ExFreePool(eadata);
+ return 0;
+ } else {
+ FILE_FULL_EA_INFORMATION* eainfo;
+ ULONG ealen;
+
+ ealen = 4;
+ eainfo = (FILE_FULL_EA_INFORMATION*)eadata;
+ do {
+ ealen += 5 + eainfo->EaNameLength + eainfo->EaValueLength;
+
+ if (eainfo->NextEntryOffset == 0)
+ break;
+
+ eainfo = (FILE_FULL_EA_INFORMATION*)(((UINT8*)eainfo) + eainfo->NextEntryOffset);
+ } while (TRUE);
+
+ ExFreePool(eadata);
+
+ return ealen;
+ }
+ } else
+ return 0;
+}
+
static NTSTATUS STDCALL query_dir_item(fcb* fcb, file_ref* fileref, void* buf, LONG* len, PIRP Irp, dir_entry* de, root* r) {
PIO_STACK_LOCATION IrpSp;
UINT32 needed;
INODE_ITEM ii;
NTSTATUS Status;
ULONG stringlen;
- ULONG atts;
+ ULONG atts, ealen;
IrpSp = IoGetCurrentIrpStackLocation(Irp);
if (fcb2->inode == inode && !fcb2->ads) {
ii = fcb2->inode_item;
atts = fcb2->atts;
+ ealen = fcb2->ealen;
found = TRUE;
break;
}
if (IrpSp->Parameters.QueryDirectory.FileInformationClass == FileBothDirectoryInformation ||
IrpSp->Parameters.QueryDirectory.FileInformationClass == FileDirectoryInformation ||
IrpSp->Parameters.QueryDirectory.FileInformationClass == FileFullDirectoryInformation ||
- IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation) {
+ IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation ||
+ IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdFullDirectoryInformation) {
BOOL dotfile = de->namelen > 1 && de->name[0] == '.';
atts = get_file_attributes(fcb->Vcb, &ii, r, inode, de->type, dotfile, FALSE, Irp);
}
+
+ if (IrpSp->Parameters.QueryDirectory.FileInformationClass == FileBothDirectoryInformation ||
+ IrpSp->Parameters.QueryDirectory.FileInformationClass == FileFullDirectoryInformation ||
+ IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation ||
+ IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdFullDirectoryInformation) {
+ ealen = get_ea_len(fcb->Vcb, r, inode, Irp);
+ }
}
break;
r = fcb->subvol;
inode = fcb->inode;
atts = fcb->atts;
+ ealen = fcb->ealen;
break;
case DirEntryType_Parent:
r = fileref->parent->fcb->subvol;
inode = fileref->parent->fcb->inode;
atts = fileref->parent->fcb->atts;
+ ealen = fileref->parent->fcb->ealen;
} else {
ERR("no fileref\n");
return STATUS_INTERNAL_ERROR;
IrpSp->Parameters.QueryDirectory.FileInformationClass == FileDirectoryInformation ||
IrpSp->Parameters.QueryDirectory.FileInformationClass == FileFullDirectoryInformation ||
IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdBothDirectoryInformation ||
+ IrpSp->Parameters.QueryDirectory.FileInformationClass == FileIdFullDirectoryInformation ||
IrpSp->Parameters.QueryDirectory.FileInformationClass == FileNamesInformation) {
Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, de->name, de->namelen);
fbdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks;
fbdi->FileAttributes = atts;
fbdi->FileNameLength = stringlen;
- fbdi->EaSize = get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp);
+ fbdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen;
fbdi->ShortNameLength = 0;
// fibdi->ShortName[12];
ffdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks;
ffdi->FileAttributes = atts;
ffdi->FileNameLength = stringlen;
- ffdi->EaSize = get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp);
+ ffdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen;
Status = RtlUTF8ToUnicodeN(ffdi->FileName, stringlen, &stringlen, de->name, de->namelen);
fibdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks;
fibdi->FileAttributes = atts;
fibdi->FileNameLength = stringlen;
- fibdi->EaSize = get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp);
+ fibdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen;
fibdi->ShortNameLength = 0;
// fibdi->ShortName[12];
- fibdi->FileId.QuadPart = inode;
+ fibdi->FileId.QuadPart = make_file_id(r, inode);
Status = RtlUTF8ToUnicodeN(fibdi->FileName, stringlen, &stringlen, de->name, de->namelen);
}
case FileIdFullDirectoryInformation:
- FIXME("STUB: FileIdFullDirectoryInformation\n");
- break;
+ {
+ FILE_ID_FULL_DIR_INFORMATION* fifdi = buf;
+
+ TRACE("FileIdFullDirectoryInformation\n");
+
+ needed = sizeof(FILE_ID_FULL_DIR_INFORMATION) - sizeof(WCHAR) + stringlen;
+
+ if (needed > *len) {
+ TRACE("buffer overflow - %u > %u\n", needed, *len);
+ return STATUS_BUFFER_OVERFLOW;
+ }
+
+// if (!buf)
+// return STATUS_INVALID_POINTER;
+
+ fifdi->NextEntryOffset = 0;
+ fifdi->FileIndex = 0;
+ fifdi->CreationTime.QuadPart = unix_time_to_win(&ii.otime);
+ fifdi->LastAccessTime.QuadPart = unix_time_to_win(&ii.st_atime);
+ fifdi->LastWriteTime.QuadPart = unix_time_to_win(&ii.st_mtime);
+ fifdi->ChangeTime.QuadPart = unix_time_to_win(&ii.st_ctime);
+ fifdi->EndOfFile.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_size;
+ fifdi->AllocationSize.QuadPart = de->type == BTRFS_TYPE_SYMLINK ? 0 : ii.st_blocks;
+ fifdi->FileAttributes = atts;
+ fifdi->FileNameLength = stringlen;
+ fifdi->EaSize = atts & FILE_ATTRIBUTE_REPARSE_POINT ? get_reparse_tag(fcb->Vcb, r, inode, de->type, atts, Irp) : ealen;
+ fifdi->FileId.QuadPart = make_file_id(r, inode);
+
+ Status = RtlUTF8ToUnicodeN(fifdi->FileName, stringlen, &stringlen, de->name, de->namelen);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("RtlUTF8ToUnicodeN returned %08x\n", Status);
+ return Status;
+ }
+
+ *len -= needed;
+
+ return STATUS_SUCCESS;
+ }
case FileNamesInformation:
{
goto end;
}
- if (keycmp(&tp.item->key, &searchkey) == -1) {
+ if (keycmp(tp.item->key, searchkey) == -1) {
if (find_next_item(fileref->fcb->Vcb, &tp, &next_tp, FALSE, Irp))
tp = next_tp;
}
- if (keycmp(&tp.item->key, &searchkey) != -1 && tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
+ if (keycmp(tp.item->key, searchkey) != -1 && tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
do {
if (fr) {
if (fr->index <= tp.item->key.offset && !fr->deleted) {
return STATUS_INVALID_PARAMETER;
}
- if (!(ccb->access & FILE_LIST_DIRECTORY)) {
+ if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_LIST_DIRECTORY)) {
WARN("insufficient privileges\n");
return STATUS_ACCESS_DENIED;
}
if (IrpSp->Parameters.QueryDirectory.FileName->Buffer[0] != '*') {
specific_file = TRUE;
- if (FsRtlDoesNameContainWildCards(IrpSp->Parameters.QueryDirectory.FileName)) {
+ if (!ccb->case_sensitive || FsRtlDoesNameContainWildCards(IrpSp->Parameters.QueryDirectory.FileName)) {
has_wildcard = TRUE;
specific_file = FALSE;
}
UNICODE_STRING us;
LIST_ENTRY* le;
- Status = RtlUpcaseUnicodeString(&us, &ccb->query_string, TRUE);
- if (!NT_SUCCESS(Status)) {
- ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
- goto end;
+ us.Buffer = NULL;
+
+ if (!ccb->case_sensitive) {
+ Status = RtlUpcaseUnicodeString(&us, &ccb->query_string, TRUE);
+ if (!NT_SUCCESS(Status)) {
+ ERR("RtlUpcaseUnicodeString returned %08x\n", Status);
+ goto end;
+ }
}
ExAcquireResourceSharedLite(&fileref->nonpaged->children_lock, TRUE);
le = fileref->children.Flink;
while (le != &fileref->children) {
file_ref* fr2 = CONTAINING_RECORD(le, file_ref, list_entry);
-
- if (!fr2->deleted && fr2->filepart_uc.Length == us.Length &&
- RtlCompareMemory(fr2->filepart_uc.Buffer, us.Buffer, us.Length) == us.Length) {
- found = TRUE;
+ if (!fr2->deleted) {
+ if (!ccb->case_sensitive && fr2->filepart_uc.Length == us.Length &&
+ RtlCompareMemory(fr2->filepart_uc.Buffer, us.Buffer, us.Length) == us.Length)
+ found = TRUE;
+ else if (ccb->case_sensitive && fr2->filepart.Length == ccb->query_string.Length &&
+ RtlCompareMemory(fr2->filepart.Buffer, ccb->query_string.Buffer, ccb->query_string.Length) == ccb->query_string.Length)
+ found = TRUE;
+ }
+
+ if (found) {
if (fr2->fcb->subvol == fcb->subvol) {
de.key.obj_id = fr2->fcb->inode;
de.key.obj_type = TYPE_INODE_ITEM;
ExFreePool(us.Buffer);
if (!found) {
- Status = find_file_in_dir(fcb->Vcb, &ccb->query_string, fileref, &found_subvol, &found_inode, &found_type, &found_index, &utf8, Irp);
+ Status = find_file_in_dir(fcb->Vcb, &ccb->query_string, fileref, &found_subvol, &found_inode, &found_type, &found_index, &utf8, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
Status = STATUS_NO_SUCH_FILE;
di_uni_fn.Length = di_uni_fn.MaximumLength = stringlen;
di_uni_fn.Buffer = uni_fn;
- while (!FsRtlIsNameInExpression(&ccb->query_string, &di_uni_fn, TRUE, NULL)) {
+ while (!FsRtlIsNameInExpression(&ccb->query_string, &di_uni_fn, !ccb->case_sensitive, NULL)) {
if (de.name_alloc)
ExFreePool(de.name);
case FileDirectoryInformation:
case FileIdBothDirectoryInformation:
case FileFullDirectoryInformation:
+ case FileIdFullDirectoryInformation:
length -= length % 8;
break;
di_uni_fn.Buffer = uni_fn;
}
- if (!has_wildcard || FsRtlIsNameInExpression(&ccb->query_string, &di_uni_fn, TRUE, NULL)) {
+ if (!has_wildcard || FsRtlIsNameInExpression(&ccb->query_string, &di_uni_fn, !ccb->case_sensitive, NULL)) {
curitem = (UINT8*)buf + IrpSp->Parameters.QueryDirectory.Length - length;
count++;
return STATUS_INVALID_PARAMETER;
}
- if (!(ccb->access & FILE_LIST_DIRECTORY)) {
+ if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_LIST_DIRECTORY)) {
WARN("insufficient privileges\n");
return STATUS_ACCESS_DENIED;
}
#include "btrfs_drv.h"
+typedef struct {
+ UINT8 type;
+
+ union {
+ EXTENT_DATA_REF edr;
+ SHARED_DATA_REF sdr;
+ TREE_BLOCK_REF tbr;
+ SHARED_BLOCK_REF sbr;
+ };
+
+ UINT64 hash;
+ LIST_ENTRY list_entry;
+} extent_ref;
+
static __inline ULONG get_extent_data_len(UINT8 type) {
switch (type) {
case TYPE_TREE_BLOCK_REF:
case TYPE_EXTENT_REF_V0:
return sizeof(EXTENT_REF_V0);
- // FIXME - TYPE_SHARED_BLOCK_REF
+ case TYPE_SHARED_BLOCK_REF:
+ return sizeof(SHARED_BLOCK_REF);
case TYPE_SHARED_DATA_REF:
return sizeof(SHARED_DATA_REF);
return erv0->count;
}
- // FIXME - TYPE_SHARED_BLOCK_REF
+ case TYPE_SHARED_BLOCK_REF:
+ return 1;
case TYPE_SHARED_DATA_REF:
{
static UINT64 get_extent_hash(UINT8 type, void* data) {
if (type == TYPE_EXTENT_DATA_REF) {
return get_extent_data_ref_hash((EXTENT_DATA_REF*)data);
+ } else if (type == TYPE_SHARED_BLOCK_REF) {
+ SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)data;
+ return sbr->offset;
+ } else if (type == TYPE_SHARED_DATA_REF) {
+ SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)data;
+ return sdr->offset;
+ } else if (type == TYPE_TREE_BLOCK_REF) {
+ TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)data;
+ return tbr->offset;
} else {
ERR("unhandled extent type %x\n", type);
return 0;
}
}
-static NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
- NTSTATUS Status;
- KEY searchkey;
- traverse_ptr tp;
- ULONG datalen = get_extent_data_len(type), len, max_extent_item_size;
- EXTENT_ITEM* ei;
- UINT8* ptr;
- UINT64 inline_rc, offset;
- UINT8* data2;
- EXTENT_ITEM* newei;
+static void free_extent_refs(LIST_ENTRY* extent_refs) {
+ while (!IsListEmpty(extent_refs)) {
+ LIST_ENTRY* le = RemoveHeadList(extent_refs);
+ extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+
+ ExFreePool(er);
+ }
+}
+
+static NTSTATUS add_shared_data_extent_ref(LIST_ENTRY* extent_refs, UINT64 parent, UINT32 count) {
+ extent_ref* er2;
+ LIST_ENTRY* le;
- // FIXME - handle A9s
+ if (!IsListEmpty(extent_refs)) {
+ le = extent_refs->Flink;
+
+ while (le != extent_refs) {
+ extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+
+ if (er->type == TYPE_SHARED_DATA_REF && er->sdr.offset == parent) {
+ er->sdr.count += count;
+ return STATUS_SUCCESS;
+ }
+
+ le = le->Flink;
+ }
+ }
- if (datalen == 0) {
- ERR("unrecognized extent type %x\n", type);
- return STATUS_INTERNAL_ERROR;
+ er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
+ if (!er2) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
}
- searchkey.obj_id = address;
- searchkey.obj_type = TYPE_EXTENT_ITEM;
- searchkey.offset = 0xffffffffffffffff;
+ er2->type = TYPE_SHARED_DATA_REF;
+ er2->sdr.offset = parent;
+ er2->sdr.count = count;
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
+ InsertTailList(extent_refs, &er2->list_entry);
- // If entry doesn't exist yet, create new inline extent item
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_shared_block_extent_ref(LIST_ENTRY* extent_refs, UINT64 parent) {
+ extent_ref* er2;
+ LIST_ENTRY* le;
- if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
- ULONG eisize;
- EXTENT_ITEM* ei;
- BOOL is_tree = type == TYPE_TREE_BLOCK_REF;
- UINT8* ptr;
-
- eisize = sizeof(EXTENT_ITEM);
- if (is_tree) eisize += sizeof(EXTENT_ITEM2);
- eisize += sizeof(UINT8);
- eisize += datalen;
-
- ei = ExAllocatePoolWithTag(PagedPool, eisize, ALLOC_TAG);
- if (!ei) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- ei->refcount = get_extent_data_refcount(type, data);
- ei->generation = Vcb->superblock.generation;
- ei->flags = is_tree ? EXTENT_ITEM_TREE_BLOCK : EXTENT_ITEM_DATA;
- ptr = (UINT8*)&ei[1];
+ if (!IsListEmpty(extent_refs)) {
+ le = extent_refs->Flink;
- if (is_tree) {
- EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)ptr;
- ei2->firstitem = *firstitem;
- ei2->level = level;
- ptr = (UINT8*)&ei2[1];
+ while (le != extent_refs) {
+ extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+
+ if (er->type == TYPE_SHARED_BLOCK_REF && er->sbr.offset == parent)
+ return STATUS_SUCCESS;
+
+ le = le->Flink;
}
+ }
+
+ er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
+ if (!er2) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ er2->type = TYPE_SHARED_BLOCK_REF;
+ er2->sbr.offset = parent;
+
+ InsertTailList(extent_refs, &er2->list_entry);
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_tree_block_extent_ref(LIST_ENTRY* extent_refs, UINT64 root) {
+ extent_ref* er2;
+ LIST_ENTRY* le;
+
+ if (!IsListEmpty(extent_refs)) {
+ le = extent_refs->Flink;
- *ptr = type;
- RtlCopyMemory(ptr + 1, data, datalen);
-
- if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, eisize, NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- return STATUS_INTERNAL_ERROR;
+ while (le != extent_refs) {
+ extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+
+ if (er->type == TYPE_TREE_BLOCK_REF && er->tbr.offset == root)
+ return STATUS_SUCCESS;
+
+ le = le->Flink;
}
-
- // FIXME - add to space list?
+ }
+
+ er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
+ if (!er2) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ er2->type = TYPE_TREE_BLOCK_REF;
+ er2->tbr.offset = root;
+
+ InsertTailList(extent_refs, &er2->list_entry);
+
+ return STATUS_SUCCESS;
+}
+static NTSTATUS construct_extent_item(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 flags, LIST_ENTRY* extent_refs,
+ KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
+ LIST_ENTRY *le, *next_le;
+ UINT64 refcount;
+ ULONG inline_len;
+ BOOL all_inline = TRUE;
+ extent_ref* first_noninline;
+ EXTENT_ITEM* ei;
+ UINT8* siptr;
+
+ // FIXME - write skinny extents if is tree and incompat flag set
+
+ if (IsListEmpty(extent_refs)) {
+ WARN("no extent refs found\n");
return STATUS_SUCCESS;
- } else if (tp.item->key.offset != size) {
- ERR("extent %llx exists, but with size %llx rather than %llx expected\n", tp.item->key.obj_id, tp.item->key.offset, size);
- return STATUS_INTERNAL_ERROR;
}
+
+ refcount = 0;
+ inline_len = sizeof(EXTENT_ITEM);
+
+ if (flags & EXTENT_ITEM_TREE_BLOCK)
+ inline_len += sizeof(EXTENT_ITEM2);
+
+ le = extent_refs->Flink;
+ while (le != extent_refs) {
+ extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+ UINT64 rc;
- if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
- EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
-
- TRACE("converting old-style extent at (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-
- ei = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM), ALLOC_TAG);
-
- if (!ei) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- ei->refcount = eiv0->refcount;
- ei->generation = Vcb->superblock.generation;
- ei->flags = EXTENT_ITEM_DATA;
-
- delete_tree_item(Vcb, &tp, rollback);
+ next_le = le->Flink;
- if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ei, sizeof(EXTENT_ITEM), NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- ExFreePool(ei);
- return STATUS_INTERNAL_ERROR;
- }
+ rc = get_extent_data_refcount(er->type, &er->edr);
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
+ if (rc == 0) {
+ RemoveEntryList(&er->list_entry);
+
+ ExFreePool(er);
+ } else {
+ ULONG extlen = get_extent_data_len(er->type);
+
+ refcount += rc;
+
+ er->hash = get_extent_hash(er->type, &er->edr);
+
+ if (all_inline) {
+ if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) {
+ all_inline = FALSE;
+ first_noninline = er;
+ } else
+ inline_len += extlen + 1;
+ }
}
- }
- if (tp.item->size < sizeof(EXTENT_ITEM)) {
- ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
- return STATUS_INTERNAL_ERROR;
+ le = next_le;
}
- ei = (EXTENT_ITEM*)tp.item->data;
+ ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
+ if (!ei) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
- len = tp.item->size - sizeof(EXTENT_ITEM);
- ptr = (UINT8*)&ei[1];
+ ei->refcount = refcount;
+ ei->generation = Vcb->superblock.generation;
+ ei->flags = flags;
- if (ei->flags & EXTENT_ITEM_TREE_BLOCK) {
- if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
- ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
- return STATUS_INTERNAL_ERROR;
+ if (flags & EXTENT_ITEM_TREE_BLOCK) {
+ EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)&ei[1];
+
+ if (firstitem) {
+ ei2->firstitem.obj_id = firstitem->obj_id;
+ ei2->firstitem.obj_type = firstitem->obj_type;
+ ei2->firstitem.offset = firstitem->offset;
+ } else {
+ ei2->firstitem.obj_id = 0;
+ ei2->firstitem.obj_type = 0;
+ ei2->firstitem.offset = 0;
}
- len -= sizeof(EXTENT_ITEM2);
- ptr += sizeof(EXTENT_ITEM2);
- }
-
- inline_rc = 0;
+ ei2->level = level;
+
+ siptr = (UINT8*)&ei2[1];
+ } else
+ siptr = (UINT8*)&ei[1];
- // Loop through existing inline extent entries
+ // Do we need to sort the inline extent refs? The Linux driver doesn't seem to bother.
- while (len > 0) {
- UINT8 secttype = *ptr;
- ULONG sectlen = get_extent_data_len(secttype);
- UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
+ le = extent_refs->Flink;
+ while (le != extent_refs) {
+ extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+ ULONG extlen = get_extent_data_len(er->type);
- len--;
+ if (!all_inline && er == first_noninline)
+ break;
- if (sectlen > len) {
- ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
- return STATUS_INTERNAL_ERROR;
- }
-
- if (sectlen == 0) {
- ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
- return STATUS_INTERNAL_ERROR;
- }
+ *siptr = er->type;
+ siptr++;
- // If inline extent already present, increase refcount and return
+ if (extlen > 0) {
+ RtlCopyMemory(siptr, &er->edr, extlen);
+ siptr += extlen;
+ }
+
+ le = le->Flink;
+ }
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, inline_len, NULL, Irp, rollback)) {
+ ERR("error - failed to insert item\n");
+ ExFreePool(ei);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (!all_inline) {
+ le = &first_noninline->list_entry;
+
+ while (le != extent_refs) {
+ extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+ ULONG len = get_extent_data_len(er->type);
+ UINT8* data;
+
+ if (len > 0) {
+ data = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
+
+ if (!data) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(data, &er->edr, len);
+ } else
+ data = NULL;
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, address, er->type, er->hash, data, len, NULL, Irp, rollback)) {
+ ERR("error - failed to insert item\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ le = le->Flink;
+ }
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS convert_old_extent(device_extension* Vcb, UINT64 address, BOOL tree, KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
+ NTSTATUS Status;
+ KEY searchkey;
+ traverse_ptr tp, next_tp;
+ LIST_ENTRY extent_refs;
+ UINT64 size;
+
+ InitializeListHead(&extent_refs);
+
+ searchkey.obj_id = address;
+ searchkey.obj_type = TYPE_EXTENT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+ ERR("old-style extent %llx not found\n", address);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ size = tp.item->key.offset;
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ while (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) {
+ tp = next_tp;
+
+ if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_EXTENT_REF_V0 && tp.item->size >= sizeof(EXTENT_REF_V0)) {
+ EXTENT_REF_V0* erv0 = (EXTENT_REF_V0*)tp.item->data;
+
+ if (tree) {
+ if (tp.item->key.offset == tp.item->key.obj_id) { // top of the tree
+ Status = add_tree_block_extent_ref(&extent_refs, erv0->root);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_tree_block_extent_ref returned %08x\n", Status);
+ goto end;
+ }
+ } else {
+ Status = add_shared_block_extent_ref(&extent_refs, tp.item->key.offset);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_shared_block_extent_ref returned %08x\n", Status);
+ goto end;
+ }
+ }
+ } else {
+ Status = add_shared_data_extent_ref(&extent_refs, tp.item->key.offset, erv0->count);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_shared_data_extent_ref returned %08x\n", Status);
+ goto end;
+ }
+ }
+
+ delete_tree_item(Vcb, &tp, rollback);
+ }
+
+ if (tp.item->key.obj_id > address || tp.item->key.obj_type > TYPE_EXTENT_REF_V0)
+ break;
+ }
+
+ Status = construct_extent_item(Vcb, address, size, tree ? (EXTENT_ITEM_TREE_BLOCK | EXTENT_ITEM_SHARED_BACKREFS) : EXTENT_ITEM_DATA,
+ &extent_refs, firstitem, level, Irp, rollback);
+ if (!NT_SUCCESS(Status))
+ ERR("construct_extent_item returned %08x\n", Status);
+
+end:
+ free_extent_refs(&extent_refs);
+
+ return Status;
+}
+
+NTSTATUS increase_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
+ NTSTATUS Status;
+ KEY searchkey;
+ traverse_ptr tp;
+ ULONG datalen = get_extent_data_len(type), len, max_extent_item_size;
+ EXTENT_ITEM* ei;
+ UINT8* ptr;
+ UINT64 inline_rc, offset;
+ UINT8* data2;
+ EXTENT_ITEM* newei;
+ BOOL skinny;
+ BOOL is_tree = type == TYPE_TREE_BLOCK_REF || type == TYPE_SHARED_BLOCK_REF;
+
+ if (datalen == 0) {
+ ERR("unrecognized extent type %x\n", type);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ searchkey.obj_id = address;
+ searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ // If entry doesn't exist yet, create new inline extent item
+
+ if (tp.item->key.obj_id != searchkey.obj_id || (tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM)) {
+ ULONG eisize;
+ EXTENT_ITEM* ei;
+ UINT8* ptr;
+
+ eisize = sizeof(EXTENT_ITEM);
+ if (is_tree) eisize += sizeof(EXTENT_ITEM2);
+ eisize += sizeof(UINT8);
+ eisize += datalen;
+
+ ei = ExAllocatePoolWithTag(PagedPool, eisize, ALLOC_TAG);
+ if (!ei) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ ei->refcount = get_extent_data_refcount(type, data);
+ ei->generation = Vcb->superblock.generation;
+ ei->flags = is_tree ? EXTENT_ITEM_TREE_BLOCK : EXTENT_ITEM_DATA;
+ ptr = (UINT8*)&ei[1];
+
+ if (is_tree && !(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
+ EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)ptr;
+ ei2->firstitem = *firstitem;
+ ei2->level = level;
+ ptr = (UINT8*)&ei2[1];
+ }
+
+ *ptr = type;
+ RtlCopyMemory(ptr + 1, data, datalen);
+
+ if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA && is_tree) {
+ if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, ei, eisize, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+ } else {
+ if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, eisize, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+ }
+
+ return STATUS_SUCCESS;
+ } else if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset != size) {
+ ERR("extent %llx exists, but with size %llx rather than %llx expected\n", tp.item->key.obj_id, tp.item->key.offset, size);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ skinny = tp.item->key.obj_type == TYPE_METADATA_ITEM;
+
+ if (tp.item->size == sizeof(EXTENT_ITEM_V0) && !skinny) {
+ Status = convert_old_extent(Vcb, address, is_tree, firstitem, level, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("convert_old_extent returned %08x\n", Status);
+ return Status;
+ }
+
+ return increase_extent_refcount(Vcb, address, size, type, data, firstitem, level, Irp, rollback);
+ }
+
+ if (tp.item->size < sizeof(EXTENT_ITEM)) {
+ ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ ei = (EXTENT_ITEM*)tp.item->data;
+
+ len = tp.item->size - sizeof(EXTENT_ITEM);
+ ptr = (UINT8*)&ei[1];
+
+ if (ei->flags & EXTENT_ITEM_TREE_BLOCK && !skinny) {
+ if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
+ ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ len -= sizeof(EXTENT_ITEM2);
+ ptr += sizeof(EXTENT_ITEM2);
+ }
+
+ inline_rc = 0;
+
+ // Loop through existing inline extent entries
+
+ while (len > 0) {
+ UINT8 secttype = *ptr;
+ ULONG sectlen = get_extent_data_len(secttype);
+ UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
+
+ len--;
+
+ if (sectlen > len) {
+ ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (sectlen == 0) {
+ ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ // If inline extent already present, increase refcount and return
if (secttype == type) {
if (type == TYPE_EXTENT_DATA_REF) {
return STATUS_SUCCESS;
}
} else if (type == TYPE_TREE_BLOCK_REF) {
- ERR("trying to increase refcount of tree extent\n");
- return STATUS_INTERNAL_ERROR;
+ TREE_BLOCK_REF* secttbr = (TREE_BLOCK_REF*)(ptr + sizeof(UINT8));
+ TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)data;
+
+ if (secttbr->offset == tbr->offset) {
+ TRACE("trying to increase refcount of non-shared tree extent\n");
+ return STATUS_SUCCESS;
+ }
+ } else if (type == TYPE_SHARED_BLOCK_REF) {
+ SHARED_BLOCK_REF* sectsbr = (SHARED_BLOCK_REF*)(ptr + sizeof(UINT8));
+ SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)data;
+
+ if (sectsbr->offset == sbr->offset)
+ return STATUS_SUCCESS;
+ } else if (type == TYPE_SHARED_DATA_REF) {
+ SHARED_DATA_REF* sectsdr = (SHARED_DATA_REF*)(ptr + sizeof(UINT8));
+ SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)data;
+
+ if (sectsdr->offset == sdr->offset) {
+ UINT32 rc = get_extent_data_refcount(type, data);
+ SHARED_DATA_REF* sectsdr2;
+
+ newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+ if (!newei) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(newei, tp.item->data, tp.item->size);
+
+ newei->generation = Vcb->superblock.generation;
+ newei->refcount += rc;
+
+ sectsdr2 = (SHARED_DATA_REF*)((UINT8*)newei + ((UINT8*)sectsdr - tp.item->data));
+ sectsdr2->count += rc;
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ return STATUS_SUCCESS;
+ }
} else {
ERR("unhandled extent type %x\n", type);
return STATUS_INTERNAL_ERROR;
len = tp.item->size - sizeof(EXTENT_ITEM);
ptr = (UINT8*)&ei[1];
- if (ei->flags & EXTENT_ITEM_TREE_BLOCK) {
+ if (ei->flags & EXTENT_ITEM_TREE_BLOCK && !skinny) {
len -= sizeof(EXTENT_ITEM2);
ptr += sizeof(EXTENT_ITEM2);
}
return Status;
}
- if (!keycmp(&tp.item->key, &searchkey)) {
+ if (!keycmp(tp.item->key, searchkey)) {
if (tp.item->size < datalen) {
ERR("(%llx,%x,%llx) was %x bytes, expecting %x\n", tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset, tp.item->size, datalen);
return STATUS_INTERNAL_ERROR;
edr->count += get_extent_data_refcount(type, data);
} else if (type == TYPE_TREE_BLOCK_REF) {
- ERR("trying to increase refcount of tree extent\n");
- return STATUS_INTERNAL_ERROR;
+ TRACE("trying to increase refcount of non-shared tree extent\n");
+ return STATUS_SUCCESS;
+ } else if (type == TYPE_SHARED_BLOCK_REF)
+ return STATUS_SUCCESS;
+ else if (type == TYPE_SHARED_DATA_REF) {
+ SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)data2;
+
+ sdr->count += get_extent_data_refcount(type, data);
} else {
ERR("unhandled extent type %x\n", type);
return STATUS_INTERNAL_ERROR;
TRACE("decreasing size of chunk %llx by %llx\n", c->offset, delta);
}
-static NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem,
- UINT8 level, UINT64 parent, PIRP Irp, LIST_ENTRY* rollback) {
+NTSTATUS decrease_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT8 type, void* data, KEY* firstitem,
+ UINT8 level, UINT64 parent, PIRP Irp, LIST_ENTRY* rollback) {
KEY searchkey;
NTSTATUS Status;
traverse_ptr tp, tp2;
UINT8* ptr;
UINT32 rc = data ? get_extent_data_refcount(type, data) : 1;
ULONG datalen = get_extent_data_len(type);
+ BOOL is_tree = (type == TYPE_TREE_BLOCK_REF || type == TYPE_SHARED_BLOCK_REF), skinny = FALSE;
- // FIXME - handle trees
-
- searchkey.obj_id = address;
- searchkey.obj_type = TYPE_EXTENT_ITEM;
- searchkey.offset = 0xffffffffffffffff;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
+ if (is_tree && Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
+ searchkey.obj_id = address;
+ searchkey.obj_type = TYPE_METADATA_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
+ skinny = TRUE;
}
- if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
- ERR("could not find EXTENT_ITEM for address %llx\n", address);
- return STATUS_INTERNAL_ERROR;
- }
-
- if (tp.item->key.offset != size) {
- ERR("extent %llx had length %llx, not %llx as expected\n", address, tp.item->key.offset, size);
- return STATUS_INTERNAL_ERROR;
- }
-
- if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
- EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
-
- TRACE("converting old-style extent at (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-
- ei = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM), ALLOC_TAG);
+ if (!skinny) {
+ searchkey.obj_id = address;
+ searchkey.obj_type = TYPE_EXTENT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
- if (!ei) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
}
- ei->refcount = eiv0->refcount;
- ei->generation = Vcb->superblock.generation;
- ei->flags = EXTENT_ITEM_DATA;
-
- delete_tree_item(Vcb, &tp, rollback);
+ if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+ ERR("could not find EXTENT_ITEM for address %llx\n", address);
+ return STATUS_INTERNAL_ERROR;
+ }
- if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ei, sizeof(EXTENT_ITEM), &tp, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- ExFreePool(ei);
+ if (tp.item->key.offset != size) {
+ ERR("extent %llx had length %llx, not %llx as expected\n", address, tp.item->key.offset, size);
return STATUS_INTERNAL_ERROR;
}
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
+ if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
+ Status = convert_old_extent(Vcb, address, is_tree, firstitem, level, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("convert_old_extent returned %08x\n", Status);
+ return Status;
+ }
+
+ return decrease_extent_refcount(Vcb, address, size, type, data, firstitem, level, parent, Irp, rollback);
}
}
len = tp.item->size - sizeof(EXTENT_ITEM);
ptr = (UINT8*)&ei[1];
- if (ei->flags & EXTENT_ITEM_TREE_BLOCK) {
+ if (ei->flags & EXTENT_ITEM_TREE_BLOCK && !skinny) {
if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
return STATUS_INTERNAL_ERROR;
return STATUS_INTERNAL_ERROR;
}
+ return STATUS_SUCCESS;
+ }
+ } else if (type == TYPE_TREE_BLOCK_REF) {
+ TREE_BLOCK_REF* secttbr = (TREE_BLOCK_REF*)(ptr + sizeof(UINT8));
+ TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)data;
+ ULONG neweilen;
+ EXTENT_ITEM* newei;
+
+ if (secttbr->offset == tbr->offset) {
+ if (ei->refcount == 1) {
+ delete_tree_item(Vcb, &tp, rollback);
+ return STATUS_SUCCESS;
+ }
+
+ neweilen = tp.item->size - sizeof(UINT8) - sectlen;
+
+ newei = ExAllocatePoolWithTag(PagedPool, neweilen, ALLOC_TAG);
+ if (!newei) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(newei, ei, ptr - tp.item->data);
+
+ if (len > sectlen)
+ RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data), ptr + sectlen + sizeof(UINT8), len - sectlen);
+
+ newei->generation = Vcb->superblock.generation;
+ newei->refcount--;
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, neweilen, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ return STATUS_SUCCESS;
+ }
+ } else if (type == TYPE_SHARED_BLOCK_REF) {
+ SHARED_BLOCK_REF* sectsbr = (SHARED_BLOCK_REF*)(ptr + sizeof(UINT8));
+ SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)data;
+ ULONG neweilen;
+ EXTENT_ITEM* newei;
+
+ if (sectsbr->offset == sbr->offset) {
+ if (ei->refcount == 1) {
+ delete_tree_item(Vcb, &tp, rollback);
+ return STATUS_SUCCESS;
+ }
+
+ neweilen = tp.item->size - sizeof(UINT8) - sectlen;
+
+ newei = ExAllocatePoolWithTag(PagedPool, neweilen, ALLOC_TAG);
+ if (!newei) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(newei, ei, ptr - tp.item->data);
+
+ if (len > sectlen)
+ RtlCopyMemory((UINT8*)newei + (ptr - tp.item->data), ptr + sectlen + sizeof(UINT8), len - sectlen);
+
+ newei->generation = Vcb->superblock.generation;
+ newei->refcount--;
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, neweilen, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
return STATUS_SUCCESS;
}
} else {
return Status;
}
- if (keycmp(&tp2.item->key, &searchkey)) {
+ if (keycmp(tp2.item->key, searchkey)) {
ERR("(%llx,%x,%llx) not found\n", tp2.item->key.obj_id, tp2.item->key.obj_type, tp2.item->key.offset);
return STATUS_INTERNAL_ERROR;
}
return STATUS_INTERNAL_ERROR;
}
+ return STATUS_SUCCESS;
+ } else {
+ ERR("error - collision?\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+ } else if (type == TYPE_SHARED_BLOCK_REF) {
+ SHARED_BLOCK_REF* sectsbr = (SHARED_BLOCK_REF*)tp2.item->data;
+ SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)data;
+ EXTENT_ITEM* newei;
+
+ if (sectsbr->offset == sbr->offset) {
+ if (ei->refcount == 1) {
+ delete_tree_item(Vcb, &tp, rollback);
+ delete_tree_item(Vcb, &tp2, rollback);
+ return STATUS_SUCCESS;
+ }
+
+ delete_tree_item(Vcb, &tp2, rollback);
+
+ newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+ if (!newei) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(newei, tp.item->data, tp.item->size);
+
+ newei->generation = Vcb->superblock.generation;
+ newei->refcount -= rc;
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ return STATUS_SUCCESS;
+ } else {
+ ERR("error - collision?\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+ } else if (type == TYPE_TREE_BLOCK_REF) {
+ TREE_BLOCK_REF* secttbr = (TREE_BLOCK_REF*)tp2.item->data;
+ TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)data;
+ EXTENT_ITEM* newei;
+
+ if (secttbr->offset == tbr->offset) {
+ if (ei->refcount == 1) {
+ delete_tree_item(Vcb, &tp, rollback);
+ delete_tree_item(Vcb, &tp2, rollback);
+ return STATUS_SUCCESS;
+ }
+
+ delete_tree_item(Vcb, &tp2, rollback);
+
+ newei = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+ if (!newei) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(newei, tp.item->data, tp.item->size);
+
+ newei->generation = Vcb->superblock.generation;
+ newei->refcount -= rc;
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newei, tp.item->size, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
return STATUS_SUCCESS;
} else {
ERR("error - collision?\n");
return decrease_extent_refcount(Vcb, address, size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, 0, Irp, rollback);
}
-NTSTATUS decrease_extent_refcount_shared_data(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 treeaddr, UINT64 parent, PIRP Irp, LIST_ENTRY* rollback) {
- SHARED_DATA_REF sdr;
-
- sdr.offset = treeaddr;
- sdr.count = 1;
+NTSTATUS decrease_extent_refcount_tree(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root,
+ UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
+ TREE_BLOCK_REF tbr;
- return decrease_extent_refcount(Vcb, address, size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, parent, Irp, rollback);
-}
-
-NTSTATUS decrease_extent_refcount_old(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 treeaddr, PIRP Irp, LIST_ENTRY* rollback) {
- return decrease_extent_refcount(Vcb, address, size, TYPE_EXTENT_REF_V0, NULL, NULL, 0, treeaddr, Irp, rollback);
-}
-
-typedef struct {
- UINT8 type;
- void* data;
- BOOL allocated;
- UINT64 hash;
- LIST_ENTRY list_entry;
-} extent_ref;
-
-static void free_extent_refs(LIST_ENTRY* extent_refs) {
- while (!IsListEmpty(extent_refs)) {
- LIST_ENTRY* le = RemoveHeadList(extent_refs);
- extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
-
- if (er->allocated)
- ExFreePool(er->data);
-
- ExFreePool(er);
- }
+ tbr.offset = root;
+
+ return decrease_extent_refcount(Vcb, address, size, TYPE_TREE_BLOCK_REF, &tbr, NULL/*FIXME*/, level, 0, Irp, rollback);
}
-static NTSTATUS add_data_extent_ref(LIST_ENTRY* extent_refs, UINT64 tree_id, UINT64 obj_id, UINT64 offset) {
- extent_ref* er2;
+static UINT64 find_extent_data_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, PIRP Irp) {
+ NTSTATUS Status;
+ KEY searchkey;
+ traverse_ptr tp;
EXTENT_DATA_REF* edr;
- LIST_ENTRY* le;
- if (!IsListEmpty(extent_refs)) {
- le = extent_refs->Flink;
+ searchkey.obj_id = address;
+ searchkey.obj_type = TYPE_EXTENT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return 0;
+ }
+
+ if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+ TRACE("could not find address %llx in extent tree\n", address);
+ return 0;
+ }
+
+ if (tp.item->key.offset != size) {
+ ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, size);
+ return 0;
+ }
+
+ if (tp.item->size >= sizeof(EXTENT_ITEM)) {
+ EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
+ UINT32 len = tp.item->size - sizeof(EXTENT_ITEM);
+ UINT8* ptr = (UINT8*)&ei[1];
- while (le != extent_refs) {
- extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+ while (len > 0) {
+ UINT8 secttype = *ptr;
+ ULONG sectlen = get_extent_data_len(secttype);
+ UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
+
+ len--;
+
+ if (sectlen > len) {
+ ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
+ return 0;
+ }
+
+ if (sectlen == 0) {
+ ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
+ return 0;
+ }
- if (er->type == TYPE_EXTENT_DATA_REF) {
- edr = (EXTENT_DATA_REF*)er->data;
+ if (secttype == TYPE_EXTENT_DATA_REF) {
+ EXTENT_DATA_REF* sectedr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8));
- if (edr->root == tree_id && edr->objid == obj_id && edr->offset == offset) {
- edr->count++;
- return STATUS_SUCCESS;
- }
+ if (sectedr->root == root && sectedr->objid == objid && sectedr->offset == offset)
+ return sectcount;
}
- le = le->Flink;
+ len -= sectlen;
+ ptr += sizeof(UINT8) + sectlen;
}
}
- er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
- if (!er2) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
+ searchkey.obj_id = address;
+ searchkey.obj_type = TYPE_EXTENT_DATA_REF;
+ searchkey.offset = get_extent_data_ref_hash2(root, objid, offset);
- edr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA_REF), ALLOC_TAG);
- if (!edr) {
- ERR("out of memory\n");
- ExFreePool(er2);
- return STATUS_INSUFFICIENT_RESOURCES;
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return 0;
}
- edr->root = tree_id;
- edr->objid = obj_id;
- edr->offset = offset;
- edr->count = 1; // FIXME - not necessarily
-
- er2->type = TYPE_EXTENT_DATA_REF;
- er2->data = edr;
- er2->allocated = TRUE;
-
- InsertTailList(extent_refs, &er2->list_entry);
+ if (!keycmp(searchkey, tp.item->key)) {
+ if (tp.item->size < sizeof(EXTENT_DATA_REF))
+ ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA_REF));
+ else {
+ edr = (EXTENT_DATA_REF*)tp.item->data;
+
+ return edr->count;
+ }
+ }
- return STATUS_SUCCESS;
+ return 0;
}
-static NTSTATUS construct_extent_item(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 flags, LIST_ENTRY* extent_refs, PIRP Irp, LIST_ENTRY* rollback) {
- LIST_ENTRY *le, *next_le;
- UINT64 refcount;
- ULONG inline_len;
- BOOL all_inline = TRUE;
- extent_ref* first_noninline;
+UINT64 get_extent_refcount(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp) {
+ KEY searchkey;
+ traverse_ptr tp;
+ NTSTATUS Status;
EXTENT_ITEM* ei;
- UINT8* siptr;
- if (IsListEmpty(extent_refs)) {
- WARN("no extent refs found\n");
- return STATUS_SUCCESS;
+ searchkey.obj_id = address;
+ searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return 0;
}
- refcount = 0;
- inline_len = sizeof(EXTENT_ITEM);
+ if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA && tp.item->key.obj_id == address &&
+ tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
+ ei = (EXTENT_ITEM*)tp.item->data;
- le = extent_refs->Flink;
- while (le != extent_refs) {
- extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
- UINT64 rc;
-
- next_le = le->Flink;
-
- rc = get_extent_data_refcount(er->type, er->data);
-
- if (rc == 0) {
- if (er->allocated)
- ExFreePool(er->data);
-
- RemoveEntryList(&er->list_entry);
-
- ExFreePool(er);
- } else {
- ULONG extlen = get_extent_data_len(er->type);
-
- refcount += rc;
-
- if (er->type == TYPE_EXTENT_DATA_REF)
- er->hash = get_extent_data_ref_hash(er->data);
- else
- er->hash = 0;
-
- if (all_inline) {
- if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) {
- all_inline = FALSE;
- first_noninline = er;
- } else
- inline_len += extlen + 1;
- }
- }
+ return ei->refcount;
+ }
+
+ if (tp.item->key.obj_id != address || tp.item->key.obj_type != TYPE_EXTENT_ITEM) {
+ ERR("couldn't find (%llx,%x,%llx) in extent tree\n", address, TYPE_EXTENT_ITEM, size);
+ return 0;
+ } else if (tp.item->key.offset != size) {
+ ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, size);
+ return 0;
+ }
+
+ if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
+ EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
- le = next_le;
+ return eiv0->refcount;
+ } else if (tp.item->size < sizeof(EXTENT_ITEM)) {
+ ERR("(%llx,%x,%llx) was %x bytes, expected at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type,
+ tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
+ return 0;
}
- ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
- if (!ei) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
+ ei = (EXTENT_ITEM*)tp.item->data;
+
+ return ei->refcount;
+}
+
+BOOL is_extent_unique(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp) {
+ KEY searchkey;
+ traverse_ptr tp, next_tp;
+ NTSTATUS Status;
+ UINT64 rc, rcrun, root = 0, inode = 0;
+ UINT32 len;
+ EXTENT_ITEM* ei;
+ UINT8* ptr;
+ BOOL b;
+
+ rc = get_extent_refcount(Vcb, address, size, Irp);
+
+ if (rc == 1)
+ return TRUE;
+
+ if (rc == 0)
+ return FALSE;
+
+ searchkey.obj_id = address;
+ searchkey.obj_type = TYPE_EXTENT_ITEM;
+ searchkey.offset = size;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ WARN("error - find_item returned %08x\n", Status);
+ return FALSE;
}
- ei->refcount = refcount;
- ei->generation = Vcb->superblock.generation;
- ei->flags = flags;
+ if (keycmp(tp.item->key, searchkey)) {
+ WARN("could not find (%llx,%x,%llx)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+ return FALSE;
+ }
- // Do we need to sort the inline extent refs? The Linux driver doesn't seem to bother.
+ if (tp.item->size == sizeof(EXTENT_ITEM_V0))
+ return FALSE;
- siptr = (UINT8*)&ei[1];
- le = extent_refs->Flink;
- while (le != extent_refs) {
- extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
- ULONG extlen = get_extent_data_len(er->type);
+ if (tp.item->size < sizeof(EXTENT_ITEM)) {
+ WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
+ return FALSE;
+ }
+
+ ei = (EXTENT_ITEM*)tp.item->data;
+
+ len = tp.item->size - sizeof(EXTENT_ITEM);
+ ptr = (UINT8*)&ei[1];
+
+ if (ei->flags & EXTENT_ITEM_TREE_BLOCK) {
+ if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
+ WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
+ return FALSE;
+ }
- if (!all_inline && er == first_noninline)
- break;
+ len -= sizeof(EXTENT_ITEM2);
+ ptr += sizeof(EXTENT_ITEM2);
+ }
+
+ rcrun = 0;
+
+ // Loop through inline extent entries
+
+ while (len > 0) {
+ UINT8 secttype = *ptr;
+ ULONG sectlen = get_extent_data_len(secttype);
+ UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
- *siptr = er->type;
- siptr++;
+ len--;
- if (extlen > 0) {
- RtlCopyMemory(siptr, er->data, extlen);
- siptr += extlen;
+ if (sectlen > len) {
+ WARN("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
+ return FALSE;
}
-
- le = le->Flink;
+
+ if (sectlen == 0) {
+ WARN("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
+ return FALSE;
+ }
+
+ if (secttype == TYPE_EXTENT_DATA_REF) {
+ EXTENT_DATA_REF* sectedr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8));
+
+ if (root == 0 && inode == 0) {
+ root = sectedr->root;
+ inode = sectedr->objid;
+ } else if (root != sectedr->root || inode != sectedr->objid)
+ return FALSE;
+ } else
+ return FALSE;
+
+ len -= sectlen;
+ ptr += sizeof(UINT8) + sectlen;
+ rcrun += sectcount;
}
- if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, inline_len, NULL, Irp, rollback)) {
- ERR("error - failed to insert item\n");
- ExFreePool(ei);
- return STATUS_INTERNAL_ERROR;
- }
+ if (rcrun == rc)
+ return TRUE;
+
+ // Loop through non-inlines if some refs still unaccounted for
- if (!all_inline) {
- le = &first_noninline->list_entry;
+ do {
+ b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp);
- while (le != extent_refs) {
- extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
+ if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) {
+ EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)tp.item->data;
- if (!insert_tree_item(Vcb, Vcb->extent_root, address, er->type, er->hash, er->data, get_extent_data_len(er->type), NULL, Irp, rollback)) {
- ERR("error - failed to insert item\n");
- return STATUS_INTERNAL_ERROR;
+ if (tp.item->size < sizeof(EXTENT_DATA_REF)) {
+ WARN("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
+ tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
+ return FALSE;
}
- er->allocated = FALSE;
+ if (root == 0 && inode == 0) {
+ root = edr->root;
+ inode = edr->objid;
+ } else if (root != edr->root || inode != edr->objid)
+ return FALSE;
- le = le->Flink;
+ rcrun += edr->count;
}
- }
+
+ if (rcrun == rc)
+ return TRUE;
+
+ if (b) {
+ tp = next_tp;
+
+ if (tp.item->key.obj_id > searchkey.obj_id)
+ break;
+ }
+ } while (b);
- return STATUS_SUCCESS;
+ // If we reach this point, there's still some refs unaccounted for somewhere.
+ // Return FALSE in case we mess things up elsewhere.
+
+ return FALSE;
}
-static NTSTATUS populate_extent_refs_from_tree(device_extension* Vcb, UINT64 tree_address, UINT64 extent_address, LIST_ENTRY* extent_refs) {
- UINT8* buf;
- tree_header* th;
+UINT64 get_extent_flags(device_extension* Vcb, UINT64 address, PIRP Irp) {
+ KEY searchkey;
+ traverse_ptr tp;
NTSTATUS Status;
+ EXTENT_ITEM* ei;
- buf = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
- if (!buf) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- Status = read_data(Vcb, tree_address, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, NULL);
+ searchkey.obj_id = address;
+ searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
- ERR("read_data returned %08x\n", Status);
- ExFreePool(buf);
- return Status;
+ ERR("error - find_item returned %08x\n", Status);
+ return 0;
}
- th = (tree_header*)buf;
-
- if (th->level == 0) {
- UINT32 i;
- leaf_node* ln = (leaf_node*)&th[1];
-
- for (i = 0; i < th->num_items; i++) {
- if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) && ln[i].offset + ln[i].size <= Vcb->superblock.node_size - sizeof(tree_header)) {
- EXTENT_DATA* ed = (EXTENT_DATA*)(((UINT8*)&th[1]) + ln[i].offset);
-
- if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
- EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ed->data[0];
-
- if (ed2->address == extent_address) {
- Status = add_data_extent_ref(extent_refs, th->tree_id, ln[i].key.obj_id, ln[i].key.offset);
- if (!NT_SUCCESS(Status)) {
- ERR("add_data_extent_ref returned %08x\n", Status);
- ExFreePool(buf);
- return Status;
- }
- }
- }
- }
- }
- } else
- WARN("shared data ref pointed to tree of level %x\n", th->level);
+ if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA && tp.item->key.obj_id == address &&
+ tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
+ ei = (EXTENT_ITEM*)tp.item->data;
- ExFreePool(buf);
+ return ei->flags;
+ }
- return STATUS_SUCCESS;
+ if (tp.item->key.obj_id != address || tp.item->key.obj_type != TYPE_EXTENT_ITEM) {
+ ERR("couldn't find %llx in extent tree\n", address);
+ return 0;
+ }
+
+ if (tp.item->size == sizeof(EXTENT_ITEM_V0))
+ return 0;
+ else if (tp.item->size < sizeof(EXTENT_ITEM)) {
+ ERR("(%llx,%x,%llx) was %x bytes, expected at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type,
+ tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
+ return 0;
+ }
+
+ ei = (EXTENT_ITEM*)tp.item->data;
+
+ return ei->flags;
}
-NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, PIRP Irp, LIST_ENTRY* rollback) {
+void update_extent_flags(device_extension* Vcb, UINT64 address, UINT64 flags, PIRP Irp) {
KEY searchkey;
- traverse_ptr tp, next_tp;
- BOOL b;
- LIST_ENTRY extent_refs;
+ traverse_ptr tp;
NTSTATUS Status;
+ EXTENT_ITEM* ei;
searchkey.obj_id = address;
- searchkey.obj_type = TYPE_EXTENT_ITEM;
- searchkey.offset = size;
+ searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
- return Status;
+ return;
}
- if (keycmp(&tp.item->key, &searchkey)) {
- WARN("extent item not found for address %llx, size %llx\n", address, size);
- return STATUS_SUCCESS;
+ if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA && tp.item->key.obj_id == address &&
+ tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
+ ei = (EXTENT_ITEM*)tp.item->data;
+ ei->flags = flags;
+ return;
}
- if (tp.item->size != sizeof(EXTENT_ITEM_V0)) {
- TRACE("extent does not appear to be old - returning STATUS_SUCCESS\n");
- return STATUS_SUCCESS;
+ if (tp.item->key.obj_id != address || tp.item->key.obj_type != TYPE_EXTENT_ITEM) {
+ ERR("couldn't find %llx in extent tree\n", address);
+ return;
}
- delete_tree_item(Vcb, &tp, rollback);
+ if (tp.item->size == sizeof(EXTENT_ITEM_V0))
+ return;
+ else if (tp.item->size < sizeof(EXTENT_ITEM)) {
+ ERR("(%llx,%x,%llx) was %x bytes, expected at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type,
+ tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
+ return;
+ }
- searchkey.obj_id = address;
- searchkey.obj_type = TYPE_EXTENT_REF_V0;
- searchkey.offset = 0;
+ ei = (EXTENT_ITEM*)tp.item->data;
+ ei->flags = flags;
+}
+
+static changed_extent* get_changed_extent_item(chunk* c, UINT64 address, UINT64 size, BOOL no_csum) {
+ LIST_ENTRY* le;
+ changed_extent* ce;
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
+ le = c->changed_extents.Flink;
+ while (le != &c->changed_extents) {
+ ce = CONTAINING_RECORD(le, changed_extent, list_entry);
+
+ if (ce->address == address && ce->size == size)
+ return ce;
+
+ le = le->Flink;
}
- InitializeListHead(&extent_refs);
+ ce = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent), ALLOC_TAG);
+ if (!ce) {
+ ERR("out of memory\n");
+ return NULL;
+ }
- do {
- b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp);
+ ce->address = address;
+ ce->size = size;
+ ce->old_size = size;
+ ce->count = 0;
+ ce->old_count = 0;
+ ce->no_csum = no_csum;
+ ce->superseded = FALSE;
+ InitializeListHead(&ce->refs);
+ InitializeListHead(&ce->old_refs);
+
+ InsertTailList(&c->changed_extents, &ce->list_entry);
+
+ return ce;
+}
+
+NTSTATUS update_changed_extent_ref(device_extension* Vcb, chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, signed long long count,
+ BOOL no_csum, BOOL superseded, PIRP Irp) {
+ LIST_ENTRY* le;
+ changed_extent* ce;
+ changed_extent_ref* cer;
+ NTSTATUS Status;
+ KEY searchkey;
+ traverse_ptr tp;
+ UINT64 old_count;
+
+ ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
+
+ ce = get_changed_extent_item(c, address, size, no_csum);
+
+ if (!ce) {
+ ERR("get_changed_extent_item failed\n");
+ Status = STATUS_INTERNAL_ERROR;
+ goto end;
+ }
+
+ if (IsListEmpty(&ce->refs) && IsListEmpty(&ce->old_refs)) { // new entry
+ searchkey.obj_id = address;
+ searchkey.obj_type = TYPE_EXTENT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
- if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
- Status = populate_extent_refs_from_tree(Vcb, tp.item->key.offset, address, &extent_refs);
- if (!NT_SUCCESS(Status)) {
- ERR("populate_extent_refs_from_tree returned %08x\n", Status);
- return Status;
- }
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ goto end;
+ }
+
+ if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+ ERR("could not find address %llx in extent tree\n", address);
+ Status = STATUS_INTERNAL_ERROR;
+ goto end;
+ }
+
+ if (tp.item->key.offset != size) {
+ ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, size);
+ Status = STATUS_INTERNAL_ERROR;
+ goto end;
+ }
+
+ if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
+ EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
- delete_tree_item(Vcb, &tp, rollback);
+ ce->count = ce->old_count = eiv0->refcount;
+ } else if (tp.item->size >= sizeof(EXTENT_ITEM)) {
+ EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
+
+ ce->count = ce->old_count = ei->refcount;
+ } else {
+ ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
+ Status = STATUS_INTERNAL_ERROR;
+ goto end;
}
+ }
+
+ le = ce->refs.Flink;
+ while (le != &ce->refs) {
+ cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
- if (b) {
- tp = next_tp;
+ if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == root && cer->edr.objid == objid && cer->edr.offset == offset) {
+ ce->count += count;
+ cer->edr.count += count;
+ Status = STATUS_SUCCESS;
- if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
- break;
+ if (superseded)
+ ce->superseded = TRUE;
+
+ goto end;
}
- } while (b);
+
+ le = le->Flink;
+ }
- Status = construct_extent_item(Vcb, address, size, EXTENT_ITEM_DATA, &extent_refs, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("construct_extent_item returned %08x\n", Status);
- free_extent_refs(&extent_refs);
- return Status;
+ old_count = find_extent_data_refcount(Vcb, address, size, root, objid, offset, Irp);
+
+ if (old_count > 0) {
+ cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
+
+ if (!cer) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto end;
+ }
+
+ cer->type = TYPE_EXTENT_DATA_REF;
+ cer->edr.root = root;
+ cer->edr.objid = objid;
+ cer->edr.offset = offset;
+ cer->edr.count = old_count;
+
+ InsertTailList(&ce->old_refs, &cer->list_entry);
}
- free_extent_refs(&extent_refs);
+ cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
- return STATUS_SUCCESS;
+ if (!cer) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto end;
+ }
+
+ cer->type = TYPE_EXTENT_DATA_REF;
+ cer->edr.root = root;
+ cer->edr.objid = objid;
+ cer->edr.offset = offset;
+ cer->edr.count = old_count + count;
+
+ InsertTailList(&ce->refs, &cer->list_entry);
+
+ ce->count += count;
+
+ if (superseded)
+ ce->superseded = TRUE;
+
+ Status = STATUS_SUCCESS;
+
+end:
+ ExReleaseResourceLite(&c->changed_extents_lock);
+
+ return Status;
+}
+
+void add_changed_extent_ref(chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, UINT32 count, BOOL no_csum) {
+ changed_extent* ce;
+ changed_extent_ref* cer;
+ LIST_ENTRY* le;
+
+ ce = get_changed_extent_item(c, address, size, no_csum);
+
+ if (!ce) {
+ ERR("get_changed_extent_item failed\n");
+ return;
+ }
+
+ le = ce->refs.Flink;
+ while (le != &ce->refs) {
+ cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
+
+ if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == root && cer->edr.objid == objid && cer->edr.offset == offset) {
+ ce->count += count;
+ cer->edr.count += count;
+ return;
+ }
+
+ le = le->Flink;
+ }
+
+ cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
+
+ if (!cer) {
+ ERR("out of memory\n");
+ return;
+ }
+
+ cer->type = TYPE_EXTENT_DATA_REF;
+ cer->edr.root = root;
+ cer->edr.objid = objid;
+ cer->edr.offset = offset;
+ cer->edr.count = count;
+
+ InsertTailList(&ce->refs, &cer->list_entry);
+
+ ce->count += count;
}
-UINT64 find_extent_data_refcount(device_extension* Vcb, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, PIRP Irp) {
+UINT64 find_extent_shared_tree_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp) {
NTSTATUS Status;
KEY searchkey;
traverse_ptr tp;
- EXTENT_DATA_REF* edr;
- BOOL old = FALSE;
+ UINT64 inline_rc;
+ EXTENT_ITEM* ei;
+ UINT32 len;
+ UINT8* ptr;
searchkey.obj_id = address;
- searchkey.obj_type = TYPE_EXTENT_ITEM;
+ searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
searchkey.offset = 0xffffffffffffffff;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
return 0;
}
- if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+ if (tp.item->key.obj_id != searchkey.obj_id || (tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM)) {
TRACE("could not find address %llx in extent tree\n", address);
return 0;
}
- if (tp.item->key.offset != size) {
- ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, size);
+ if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset != Vcb->superblock.node_size) {
+ ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, Vcb->superblock.node_size);
return 0;
}
- if (tp.item->size >= sizeof(EXTENT_ITEM)) {
- EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
- UINT32 len = tp.item->size - sizeof(EXTENT_ITEM);
- UINT8* ptr = (UINT8*)&ei[1];
+ if (tp.item->size < sizeof(EXTENT_ITEM)) {
+ ERR("(%llx,%x,%llx): size was %u, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
+ return 0;
+ }
+
+ ei = (EXTENT_ITEM*)tp.item->data;
+ inline_rc = 0;
+
+ len = tp.item->size - sizeof(EXTENT_ITEM);
+ ptr = (UINT8*)&ei[1];
+
+ if (searchkey.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) {
+ if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
+ ERR("(%llx,%x,%llx): size was %u, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
+ tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
+ return 0;
+ }
- while (len > 0) {
- UINT8 secttype = *ptr;
- ULONG sectlen = get_extent_data_len(secttype);
- UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
-
- len--;
-
- if (sectlen > len) {
- ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
- return 0;
- }
+ len -= sizeof(EXTENT_ITEM2);
+ ptr += sizeof(EXTENT_ITEM2);
+ }
+
+ while (len > 0) {
+ UINT8 secttype = *ptr;
+ ULONG sectlen = get_extent_data_len(secttype);
+ UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
+
+ len--;
+
+ if (sectlen > len) {
+ ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
+ return 0;
+ }
- if (sectlen == 0) {
- ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
- return 0;
- }
-
- if (secttype == TYPE_EXTENT_DATA_REF) {
- EXTENT_DATA_REF* sectedr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8));
-
- if (sectedr->root == root && sectedr->objid == objid && sectedr->offset == offset)
- return sectcount;
- } else if (secttype == TYPE_SHARED_DATA_REF) {
- SHARED_DATA_REF* sectsdr = (SHARED_DATA_REF*)(ptr + sizeof(UINT8));
- BOOL found = FALSE;
- LIST_ENTRY* le;
-
- le = Vcb->shared_extents.Flink;
- while (le != &Vcb->shared_extents) {
- shared_data* sd = CONTAINING_RECORD(le, shared_data, list_entry);
-
- if (sd->address == sectsdr->offset) {
- LIST_ENTRY* le2 = sd->entries.Flink;
- while (le2 != &sd->entries) {
- shared_data_entry* sde = CONTAINING_RECORD(le2, shared_data_entry, list_entry);
-
- if (sde->edr.root == root && sde->edr.objid == objid && sde->edr.offset == offset)
- return sde->edr.count;
-
- le2 = le2->Flink;
- }
- found = TRUE;
- break;
- }
-
- le = le->Flink;
- }
-
- if (!found)
- WARN("shared data extents not loaded for tree at %llx\n", sectsdr->offset);
- }
+ if (sectlen == 0) {
+ ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
+ return 0;
+ }
+
+ if (secttype == TYPE_SHARED_BLOCK_REF) {
+ SHARED_BLOCK_REF* sectsbr = (SHARED_BLOCK_REF*)(ptr + sizeof(UINT8));
- len -= sectlen;
- ptr += sizeof(UINT8) + sectlen;
+ if (sectsbr->offset == parent)
+ return 1;
}
- } else if (tp.item->size == sizeof(EXTENT_ITEM_V0))
- old = TRUE;
+
+ len -= sectlen;
+ ptr += sizeof(UINT8) + sectlen;
+ inline_rc += sectcount;
+ }
+
+ // FIXME - what if old?
+
+ if (inline_rc == ei->refcount)
+ return 0;
searchkey.obj_id = address;
- searchkey.obj_type = TYPE_EXTENT_DATA_REF;
- searchkey.offset = get_extent_data_ref_hash2(root, objid, offset);
+ searchkey.obj_type = TYPE_SHARED_BLOCK_REF;
+ searchkey.offset = parent;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
return 0;
}
- if (!keycmp(&searchkey, &tp.item->key)) {
- if (tp.item->size < sizeof(EXTENT_DATA_REF))
- ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA_REF));
- else {
- edr = (EXTENT_DATA_REF*)tp.item->data;
-
- return edr->count;
- }
+ if (!keycmp(searchkey, tp.item->key)) {
+ if (tp.item->size < sizeof(SHARED_BLOCK_REF))
+ ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(SHARED_BLOCK_REF));
+ else
+ return 1;
+ }
+
+ return 0;
+}
+
+UINT64 find_extent_shared_data_refcount(device_extension* Vcb, UINT64 address, UINT64 parent, PIRP Irp) {
+ NTSTATUS Status;
+ KEY searchkey;
+ traverse_ptr tp;
+ UINT64 inline_rc;
+ EXTENT_ITEM* ei;
+ UINT32 len;
+ UINT8* ptr;
+
+ searchkey.obj_id = address;
+ searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return 0;
+ }
+
+ if (tp.item->key.obj_id != searchkey.obj_id || (tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM)) {
+ TRACE("could not find address %llx in extent tree\n", address);
+ return 0;
}
-
- if (old) {
- BOOL b;
+
+ if (tp.item->size < sizeof(EXTENT_ITEM)) {
+ ERR("(%llx,%x,%llx): size was %u, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
+ return 0;
+ }
+
+ ei = (EXTENT_ITEM*)tp.item->data;
+ inline_rc = 0;
+
+ len = tp.item->size - sizeof(EXTENT_ITEM);
+ ptr = (UINT8*)&ei[1];
+
+ while (len > 0) {
+ UINT8 secttype = *ptr;
+ ULONG sectlen = get_extent_data_len(secttype);
+ UINT64 sectcount = get_extent_data_refcount(secttype, ptr + sizeof(UINT8));
- searchkey.obj_id = address;
- searchkey.obj_type = TYPE_EXTENT_REF_V0;
- searchkey.offset = 0;
+ len--;
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
+ if (sectlen > len) {
+ ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, len, sectlen);
return 0;
}
-
- do {
- traverse_ptr next_tp;
-
- b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp);
-
- if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
- if (tp.item->size >= sizeof(EXTENT_REF_V0)) {
- EXTENT_REF_V0* erv0 = (EXTENT_REF_V0*)tp.item->data;
-
- if (erv0->root == root && erv0->objid == objid) {
- LIST_ENTRY* le;
- BOOL found = FALSE;
-
- le = Vcb->shared_extents.Flink;
- while (le != &Vcb->shared_extents) {
- shared_data* sd = CONTAINING_RECORD(le, shared_data, list_entry);
-
- if (sd->address == tp.item->key.offset) {
- LIST_ENTRY* le2 = sd->entries.Flink;
- while (le2 != &sd->entries) {
- shared_data_entry* sde = CONTAINING_RECORD(le2, shared_data_entry, list_entry);
-
- if (sde->edr.root == root && sde->edr.objid == objid && sde->edr.offset == offset)
- return sde->edr.count;
-
- le2 = le2->Flink;
- }
- found = TRUE;
- break;
- }
-
- le = le->Flink;
- }
-
- if (!found)
- WARN("shared data extents not loaded for tree at %llx\n", tp.item->key.offset);
- }
- } else {
- ERR("(%llx,%x,%llx) was %x bytes, not %x as expected\n", tp.item->key.obj_id, tp.item->key.obj_type,
- tp.item->key.offset, tp.item->size, sizeof(EXTENT_REF_V0));
- }
- }
-
- if (b) {
- tp = next_tp;
-
- if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
- break;
- }
- } while (b);
- } else {
- BOOL b;
-
- searchkey.obj_id = address;
- searchkey.obj_type = TYPE_SHARED_DATA_REF;
- searchkey.offset = 0;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
+
+ if (sectlen == 0) {
+ ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, secttype);
return 0;
}
- do {
- traverse_ptr next_tp;
+ if (secttype == TYPE_SHARED_DATA_REF) {
+ SHARED_DATA_REF* sectsdr = (SHARED_DATA_REF*)(ptr + sizeof(UINT8));
- b = find_next_item(Vcb, &tp, &next_tp, FALSE, Irp);
-
- if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
- if (tp.item->size >= sizeof(SHARED_DATA_REF)) {
- SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)tp.item->data;
- LIST_ENTRY* le;
- BOOL found = FALSE;
-
- le = Vcb->shared_extents.Flink;
- while (le != &Vcb->shared_extents) {
- shared_data* sd = CONTAINING_RECORD(le, shared_data, list_entry);
-
- if (sd->address == sdr->offset) {
- LIST_ENTRY* le2 = sd->entries.Flink;
- while (le2 != &sd->entries) {
- shared_data_entry* sde = CONTAINING_RECORD(le2, shared_data_entry, list_entry);
-
- if (sde->edr.root == root && sde->edr.objid == objid && sde->edr.offset == offset)
- return sde->edr.count;
-
- le2 = le2->Flink;
- }
- found = TRUE;
- break;
- }
-
- le = le->Flink;
- }
-
- if (!found)
- WARN("shared data extents not loaded for tree at %llx\n", sdr->offset);
- } else {
- ERR("(%llx,%x,%llx) was %x bytes, not %x as expected\n", tp.item->key.obj_id, tp.item->key.obj_type,
- tp.item->key.offset, tp.item->size, sizeof(SHARED_DATA_REF));
- }
- }
-
- if (b) {
- tp = next_tp;
-
- if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
- break;
- }
- } while (b);
+ if (sectsdr->offset == parent)
+ return sectsdr->count;
+ }
+
+ len -= sectlen;
+ ptr += sizeof(UINT8) + sectlen;
+ inline_rc += sectcount;
+ }
+
+ // FIXME - what if old?
+
+ if (inline_rc == ei->refcount)
+ return 0;
+
+ searchkey.obj_id = address;
+ searchkey.obj_type = TYPE_SHARED_DATA_REF;
+ searchkey.offset = parent;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return 0;
+ }
+
+ if (!keycmp(searchkey, tp.item->key)) {
+ if (tp.item->size < sizeof(SHARED_DATA_REF))
+ ERR("(%llx,%x,%llx) has size %u, not %u as expected\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(SHARED_DATA_REF));
+ else {
+ SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)tp.item->data;
+ return sdr->count;
+ }
}
return 0;
// FIXME - what about subvol roots?
// FIXME - link FILE_ATTRIBUTE_READONLY to st_mode
- // FIXME - handle times == -1
+
+ if (fbi->CreationTime.QuadPart == -1)
+ ccb->user_set_creation_time = TRUE;
+ else if (fbi->CreationTime.QuadPart != 0) {
+ win_time_to_unix(fbi->CreationTime, &fcb->inode_item.otime);
+ inode_item_changed = TRUE;
+ filter |= FILE_NOTIFY_CHANGE_CREATION;
+
+ ccb->user_set_creation_time = TRUE;
+ }
+
+ if (fbi->LastAccessTime.QuadPart == -1)
+ ccb->user_set_access_time = TRUE;
+ else if (fbi->LastAccessTime.QuadPart != 0) {
+ win_time_to_unix(fbi->LastAccessTime, &fcb->inode_item.st_atime);
+ inode_item_changed = TRUE;
+ filter |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
+
+ ccb->user_set_access_time = TRUE;
+ }
+
+ if (fbi->LastWriteTime.QuadPart == -1)
+ ccb->user_set_write_time = TRUE;
+ else if (fbi->LastWriteTime.QuadPart != 0) {
+ win_time_to_unix(fbi->LastWriteTime, &fcb->inode_item.st_mtime);
+ inode_item_changed = TRUE;
+ filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
+
+ ccb->user_set_write_time = TRUE;
+ }
+
+ if (fbi->ChangeTime.QuadPart == -1)
+ ccb->user_set_change_time = TRUE;
+ else if (fbi->ChangeTime.QuadPart != 0) {
+ win_time_to_unix(fbi->ChangeTime, &fcb->inode_item.st_ctime);
+ inode_item_changed = TRUE;
+ // no filter for this
+
+ ccb->user_set_change_time = TRUE;
+ }
// FileAttributes == 0 means don't set - undocumented, but seen in fastfat
if (fbi->FileAttributes != 0) {
fcb->atts_changed = TRUE;
+ if (fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT)
+ fbi->FileAttributes |= FILE_ATTRIBUTE_REPARSE_POINT;
+
if (defda == fbi->FileAttributes)
fcb->atts_deleted = TRUE;
KeQuerySystemTime(&time);
win_time_to_unix(time, &now);
- fcb->inode_item.st_ctime = now;
+ if (!ccb->user_set_change_time)
+ fcb->inode_item.st_ctime = now;
+
fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
fcb->subvol->root_item.ctime = now;
filter |= FILE_NOTIFY_CHANGE_ATTRIBUTES;
}
-
- if (fbi->CreationTime.QuadPart == -1) {
- FIXME("FIXME - support CreationTime == -1\n"); // FIXME - set ccb flag
- } else if (fbi->CreationTime.QuadPart != 0) {
- win_time_to_unix(fbi->CreationTime, &fcb->inode_item.otime);
- inode_item_changed = TRUE;
- filter |= FILE_NOTIFY_CHANGE_CREATION;
- }
-
- if (fbi->LastAccessTime.QuadPart == -1) {
- FIXME("FIXME - support LastAccessTime == -1\n"); // FIXME - set ccb flag
- } else if (fbi->LastAccessTime.QuadPart != 0) {
- win_time_to_unix(fbi->LastAccessTime, &fcb->inode_item.st_atime);
- inode_item_changed = TRUE;
- filter |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
- }
-
- if (fbi->LastWriteTime.QuadPart == -1) {
- FIXME("FIXME - support LastWriteTime == -1\n"); // FIXME - set ccb flag
- } else if (fbi->LastWriteTime.QuadPart != 0) {
- win_time_to_unix(fbi->LastWriteTime, &fcb->inode_item.st_mtime);
- inode_item_changed = TRUE;
- filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
- }
-
- if (fbi->ChangeTime.QuadPart == -1) {
- FIXME("FIXME - support ChangeTime == -1\n"); // FIXME - set ccb flag
- } else if (fbi->ChangeTime.QuadPart != 0) {
- win_time_to_unix(fbi->ChangeTime, &fcb->inode_item.st_ctime);
- inode_item_changed = TRUE;
- // no filter for this
- }
if (inode_item_changed) {
fcb->inode_item.transid = Vcb->superblock.generation;
fcb->inode_item.sequence++;
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
}
if (!fileref)
return STATUS_INVALID_PARAMETER;
+ ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
+
ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
TRACE("changing delete_on_close to %s for %S (fcb %p)\n", fdi->DeleteFile ? "TRUE" : "FALSE", file_desc(FileObject), fcb);
end:
ExReleaseResourceLite(fcb->Header.Resource);
-
- return Status;
-}
-
-static NTSTATUS add_inode_extref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, UINT64 index, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp;
- INODE_EXTREF* ier;
- NTSTATUS Status;
-
- searchkey.obj_id = inode;
- searchkey.obj_type = TYPE_INODE_EXTREF;
- searchkey.offset = calc_crc32c((UINT32)parinode, (UINT8*)utf8->Buffer, utf8->Length);
-
- Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (!keycmp(&searchkey, &tp.item->key)) {
- ULONG iersize = tp.item->size + sizeof(INODE_EXTREF) - 1 + utf8->Length;
- UINT8* ier2;
- UINT32 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
-
- if (iersize > maxlen) {
- ERR("item would be too long (%u > %u)\n", iersize, maxlen);
- return STATUS_INTERNAL_ERROR;
- }
-
- ier2 = ExAllocatePoolWithTag(PagedPool, iersize, ALLOC_TAG);
- if (!ier2) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- if (tp.item->size > 0)
- RtlCopyMemory(ier2, tp.item->data, tp.item->size);
-
- ier = (INODE_EXTREF*)&ier2[tp.item->size];
- ier->dir = parinode;
- ier->index = index;
- ier->n = utf8->Length;
- RtlCopyMemory(ier->name, utf8->Buffer, utf8->Length);
-
- delete_tree_item(Vcb, &tp, rollback);
-
- if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ier2, iersize, NULL, Irp, rollback)) {
- ERR("error - failed to insert item\n");
- return STATUS_INTERNAL_ERROR;
- }
- } else {
- ier = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_EXTREF) - 1 + utf8->Length, ALLOC_TAG);
- if (!ier) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- ier->dir = parinode;
- ier->index = index;
- ier->n = utf8->Length;
- RtlCopyMemory(ier->name, utf8->Buffer, utf8->Length);
-
- if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ier, sizeof(INODE_EXTREF) - 1 + utf8->Length, NULL, Irp, rollback)) {
- ERR("error - failed to insert item\n");
- return STATUS_INTERNAL_ERROR;
- }
- }
-
- return STATUS_SUCCESS;
-}
-
-NTSTATUS add_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, UINT64 index, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp;
- INODE_REF* ir;
- NTSTATUS Status;
- searchkey.obj_id = inode;
- searchkey.obj_type = TYPE_INODE_REF;
- searchkey.offset = parinode;
-
- Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (!keycmp(&searchkey, &tp.item->key)) {
- ULONG irsize = tp.item->size + sizeof(INODE_REF) - 1 + utf8->Length;
- UINT8* ir2;
- UINT32 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
-
- if (irsize > maxlen) {
- if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) {
- TRACE("INODE_REF too long, creating INODE_EXTREF\n");
- return add_inode_extref(Vcb, subvol, inode, parinode, index, utf8, Irp, rollback);
- } else {
- ERR("item would be too long (%u > %u)\n", irsize, maxlen);
- return STATUS_INTERNAL_ERROR;
- }
- }
-
- ir2 = ExAllocatePoolWithTag(PagedPool, irsize, ALLOC_TAG);
- if (!ir2) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- if (tp.item->size > 0)
- RtlCopyMemory(ir2, tp.item->data, tp.item->size);
-
- ir = (INODE_REF*)&ir2[tp.item->size];
- ir->index = index;
- ir->n = utf8->Length;
- RtlCopyMemory(ir->name, utf8->Buffer, utf8->Length);
-
- delete_tree_item(Vcb, &tp, rollback);
-
- if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ir2, irsize, NULL, Irp, rollback)) {
- ERR("error - failed to insert item\n");
- return STATUS_INTERNAL_ERROR;
- }
- } else {
- ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + utf8->Length, ALLOC_TAG);
- if (!ir) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
+ ExReleaseResourceLite(&Vcb->fcb_lock);
- ir->index = index;
- ir->n = utf8->Length;
- RtlCopyMemory(ir->name, utf8->Buffer, utf8->Length);
-
- if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ir, sizeof(INODE_REF) - 1 + ir->n, NULL, Irp, rollback)) {
- ERR("error - failed to insert item\n");
- return STATUS_INTERNAL_ERROR;
- }
- }
-
- return STATUS_SUCCESS;
+ return Status;
}
BOOL has_open_children(file_ref* fileref) {
while (le != &fileref->children) {
file_ref* c = CONTAINING_RECORD(le, file_ref, list_entry);
- if (c->fcb->open_count > 0)
+ if (c->open_count > 0)
return TRUE;
if (has_open_children(c))
}
RtlCopyMemory(&fcb->inode_item, &oldfcb->inode_item, sizeof(INODE_ITEM));
+ fcb->inode_item_changed = TRUE;
if (oldfcb->sd && RtlLengthSecurityDescriptor(oldfcb->sd) > 0) {
fcb->sd = ExAllocatePoolWithTag(PagedPool, RtlLengthSecurityDescriptor(oldfcb->sd), ALLOC_TAG);
RtlCopyMemory(fcb->reparse_xattr.Buffer, oldfcb->reparse_xattr.Buffer, fcb->reparse_xattr.Length);
}
+
+ if (oldfcb->ea_xattr.Buffer && oldfcb->ea_xattr.Length > 0) {
+ fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = oldfcb->ea_xattr.Length;
+
+ fcb->ea_xattr.Buffer = ExAllocatePoolWithTag(PagedPool, fcb->ea_xattr.MaximumLength, ALLOC_TAG);
+ if (!fcb->ea_xattr.Buffer) {
+ ERR("out of memory\n");
+
+ ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
+ free_fcb(fcb);
+ ExReleaseResourceLite(&Vcb->fcb_lock);
+
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(fcb->ea_xattr.Buffer, oldfcb->ea_xattr.Buffer, fcb->ea_xattr.Length);
+ }
end:
*pfcb = fcb;
if (!fr->deleted) {
me2 = ExAllocatePoolWithTag(PagedPool, sizeof(move_entry), ALLOC_TAG);
- if (!me) {
+ if (!me2) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
if (xa->n > xapreflen && RtlCompareMemory(xa->name, xapref, xapreflen) == xapreflen &&
- (tp.item->key.offset != EA_DOSATTRIB_HASH || xa->n != strlen(EA_DOSATTRIB) || RtlCompareMemory(xa->name, EA_DOSATTRIB, xa->n) != xa->n)
+ (tp.item->key.offset != EA_DOSATTRIB_HASH || xa->n != strlen(EA_DOSATTRIB) || RtlCompareMemory(xa->name, EA_DOSATTRIB, xa->n) != xa->n) &&
+ (tp.item->key.offset != EA_EA_HASH || xa->n != strlen(EA_EA) || RtlCompareMemory(xa->name, EA_EA, xa->n) != xa->n)
) {
BOOL found = FALSE;
insert_fileref_child(me->fileref, fr, FALSE);
me2 = ExAllocatePoolWithTag(PagedPool, sizeof(move_entry), ALLOC_TAG);
- if (!me) {
+ if (!me2) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
}
ExAcquireResourceExclusiveLite(&me->fileref->fcb->Vcb->fcb_lock, TRUE);
- Status = open_fcb(me->fileref->fcb->Vcb, subvol, inode, di->type, &utf8, me->fileref->fcb, &fcb, Irp);
+ Status = open_fcb(me->fileref->fcb->Vcb, subvol, inode, di->type, &utf8, me->fileref->fcb, &fcb, PagedPool, Irp);
ExReleaseResourceLite(&me->fileref->fcb->Vcb->fcb_lock);
if (!NT_SUCCESS(Status)) {
fr->fcb->fileref = fr;
me2 = ExAllocatePoolWithTag(PagedPool, sizeof(move_entry), ALLOC_TAG);
- if (!me) {
+ if (!me2) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
me->dummyfcb->atts_deleted = me->fileref->fcb->atts_deleted;
me->dummyfcb->extents_changed = me->fileref->fcb->extents_changed;
me->dummyfcb->reparse_xattr_changed = me->fileref->fcb->reparse_xattr_changed;
+ me->dummyfcb->ea_changed = me->fileref->fcb->ea_changed;
}
me->dummyfcb->created = me->fileref->fcb->created;
if (!me->fileref->fcb->ads) {
LIST_ENTRY* le2;
- if (destdir->fcb->subvol->lastinode == 0)
- get_last_inode(destdir->fcb->Vcb, destdir->fcb->subvol, Irp);
-
me->fileref->fcb->subvol = destdir->fcb->subvol;
- me->fileref->fcb->inode = ++destdir->fcb->subvol->lastinode; // FIXME - do proper function for this
+ me->fileref->fcb->inode = InterlockedIncrement64(&destdir->fcb->subvol->lastinode);
me->fileref->fcb->inode_item.st_nlink = 1;
defda = get_file_attributes(me->fileref->fcb->Vcb, &me->fileref->fcb->inode_item, me->fileref->fcb->subvol, me->fileref->fcb->inode,
me->fileref->fcb->atts_changed = defda != me->fileref->fcb->atts;
me->fileref->fcb->extents_changed = !IsListEmpty(&me->fileref->fcb->extents);
me->fileref->fcb->reparse_xattr_changed = !!me->fileref->fcb->reparse_xattr.Buffer;
+ me->fileref->fcb->ea_changed = !!me->fileref->fcb->ea_xattr.Buffer;
+ me->fileref->fcb->inode_item_changed = TRUE;
le2 = me->fileref->fcb->extents.Flink;
while (le2 != &me->fileref->fcb->extents) {
ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
} else {
Status = update_changed_extent_ref(me->fileref->fcb->Vcb, c, ed2->address, ed2->size, me->fileref->fcb->subvol->id, me->fileref->fcb->inode,
- ext->offset - ed2->offset, 1, me->fileref->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
+ ext->offset - ed2->offset, 1, me->fileref->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("update_changed_extent_ref returned %08x\n", Status);
ExFreePool(hl);
}
+ me->fileref->fcb->inode_item_changed = TRUE;
mark_fcb_dirty(me->fileref->fcb);
if ((!me->dummyfcb->ads && me->dummyfcb->inode_item.st_nlink > 1) || (me->dummyfcb->ads && me->parent->dummyfcb->inode_item.st_nlink > 1)) {
} else {
ExAcquireResourceExclusiveLite(me->fileref->fcb->Header.Resource, TRUE);
me->fileref->fcb->inode_item.st_nlink++;
+ me->fileref->fcb->inode_item_changed = TRUE;
ExReleaseResourceLite(me->fileref->fcb->Header.Resource);
}
}
me->fileref->parent->fcb->inode_item.sequence++;
me->fileref->parent->fcb->inode_item.st_ctime = now;
me->fileref->parent->fcb->inode_item.st_mtime = now;
+ me->fileref->parent->fcb->inode_item_changed = TRUE;
mark_fcb_dirty(me->fileref->parent->fcb);
}
related = relatedccb->fileref;
increase_fileref_refcount(related);
+ } else if (fnus.Length >= sizeof(WCHAR) && fnus.Buffer[0] != '\\') {
+ related = fileref->parent;
+ increase_fileref_refcount(related);
}
ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
- Status = open_fileref(Vcb, &oldfileref, &fnus, related, FALSE, NULL, NULL, Irp);
+ Status = open_fileref(Vcb, &oldfileref, &fnus, related, FALSE, NULL, NULL, PagedPool, ccb->case_sensitive, Irp);
ExReleaseResourceLite(&Vcb->fcb_lock);
if (NT_SUCCESS(Status)) {
if (!IrpSp->Parameters.SetFile.ReplaceIfExists) {
Status = STATUS_OBJECT_NAME_COLLISION;
goto end;
- } else if ((oldfileref->fcb->open_count >= 1 || has_open_children(oldfileref)) && !oldfileref->deleted) {
+ } else if ((oldfileref->open_count >= 1 || has_open_children(oldfileref)) && !oldfileref->deleted) {
WARN("trying to overwrite open file\n");
Status = STATUS_ACCESS_DENIED;
goto end;
if (!related) {
ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
- Status = open_fileref(Vcb, &related, &fnus, NULL, TRUE, NULL, NULL, Irp);
+ Status = open_fileref(Vcb, &related, &fnus, NULL, TRUE, NULL, NULL, PagedPool, ccb->case_sensitive, Irp);
ExReleaseResourceLite(&Vcb->fcb_lock);
if (!NT_SUCCESS(Status)) {
fcb->inode_item.transid = Vcb->superblock.generation;
fcb->inode_item.sequence++;
- fcb->inode_item.st_ctime = now;
+ if (!ccb->user_set_change_time)
+ fcb->inode_item.st_ctime = now;
+
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
// update parent's INODE_ITEM
related->fcb->inode_item.st_ctime = now;
related->fcb->inode_item.st_mtime = now;
+ related->fcb->inode_item_changed = TRUE;
mark_fcb_dirty(related->fcb);
send_notification_fileref(related, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED);
fcb->inode_item.transid = Vcb->superblock.generation;
fcb->inode_item.sequence++;
- fcb->inode_item.st_ctime = now;
+ if (!ccb->user_set_change_time)
+ fcb->inode_item.st_ctime = now;
+
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
// update new parent's INODE_ITEM
related->fcb->inode_item.st_ctime = now;
related->fcb->inode_item.st_mtime = now;
+ related->fcb->inode_item_changed = TRUE;
mark_fcb_dirty(related->fcb);
// update old parent's INODE_ITEM
free_fileref(fr2);
ExReleaseResourceLite(&Vcb->fcb_lock);
+ fr2->parent->fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fr2->parent->fcb);
send_notification_fileref(fr2, fcb->type == BTRFS_TYPE_DIRECTORY ? FILE_NOTIFY_CHANGE_DIR_NAME : FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_REMOVED);
}
if (NT_SUCCESS(Status))
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
else
do_rollback(Vcb, &rollback);
fileref->parent->fcb->inode_item.sequence++;
fileref->parent->fcb->inode_item.st_ctime = now;
+ fileref->parent->fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fileref->parent->fcb);
fileref->parent->fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
CcSetFileSizes(FileObject, &ccfs);
TRACE("setting FileSize for %S to %llx\n", file_desc(FileObject), ccfs.FileSize);
- KeQuerySystemTime(&time);
-
- win_time_to_unix(time, &fcb->inode_item.st_mtime);
+ if (!ccb->user_set_write_time) {
+ KeQuerySystemTime(&time);
+ win_time_to_unix(time, &fcb->inode_item.st_mtime);
+ }
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_LAST_WRITE | FILE_NOTIFY_CHANGE_SIZE, FILE_ACTION_MODIFIED);
end:
if (NT_SUCCESS(Status))
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
else
do_rollback(Vcb, &rollback);
}
ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
- Status = open_fileref(Vcb, &oldfileref, &fnus, related, FALSE, NULL, NULL, Irp);
+ Status = open_fileref(Vcb, &oldfileref, &fnus, related, FALSE, NULL, NULL, PagedPool, ccb->case_sensitive, Irp);
ExReleaseResourceLite(&Vcb->fcb_lock);
if (NT_SUCCESS(Status)) {
if (!fli->ReplaceIfExists) {
Status = STATUS_OBJECT_NAME_COLLISION;
goto end;
- } else if (oldfileref->fcb->open_count >= 1 && !oldfileref->deleted) {
+ } else if (oldfileref->open_count >= 1 && !oldfileref->deleted) {
WARN("trying to overwrite open file\n");
Status = STATUS_ACCESS_DENIED;
goto end;
if (!related) {
ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
- Status = open_fileref(Vcb, &related, &fnus, NULL, TRUE, NULL, NULL, Irp);
+ Status = open_fileref(Vcb, &related, &fnus, NULL, TRUE, NULL, NULL, PagedPool, ccb->case_sensitive, Irp);
ExReleaseResourceLite(&Vcb->fcb_lock);
if (!NT_SUCCESS(Status)) {
fcb->inode_item.transid = Vcb->superblock.generation;
fcb->inode_item.sequence++;
fcb->inode_item.st_nlink++;
- fcb->inode_item.st_ctime = now;
+ if (!ccb->user_set_change_time)
+ fcb->inode_item.st_ctime = now;
+
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
// update parent's INODE_ITEM
parfcb->inode_item.sequence++;
parfcb->inode_item.st_ctime = now;
+ parfcb->inode_item_changed = TRUE;
mark_fcb_dirty(parfcb);
send_notification_fileref(fr2, FILE_NOTIFY_CHANGE_FILE_NAME, FILE_ACTION_ADDED);
}
if (NT_SUCCESS(Status))
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
else
do_rollback(Vcb, &rollback);
{
TRACE("FilePositionInformation\n");
- if (Irp->RequestorMode == UserMode &&
- (!(ccb->access & (FILE_READ_DATA | FILE_WRITE_DATA)) || !(ccb->options & (FILE_SYNCHRONOUS_IO_ALERT | FILE_SYNCHRONOUS_IO_NONALERT)))) {
- WARN("insufficient privileges\n");
- Status = STATUS_ACCESS_DENIED;
- break;
- }
-
Status = set_position_information(Vcb, Irp, IrpSp->FileObject);
break;
return STATUS_SUCCESS;
}
-static NTSTATUS STDCALL fill_in_file_internal_information(FILE_INTERNAL_INFORMATION* fii, UINT64 inode, LONG* length) {
+static NTSTATUS STDCALL fill_in_file_internal_information(FILE_INTERNAL_INFORMATION* fii, fcb* fcb, LONG* length) {
*length -= sizeof(FILE_INTERNAL_INFORMATION);
- fii->IndexNumber.QuadPart = inode;
+ fii->IndexNumber.QuadPart = make_file_id(fcb->subvol, fcb->inode);
return STATUS_SUCCESS;
}
-static NTSTATUS STDCALL fill_in_file_ea_information(FILE_EA_INFORMATION* eai, LONG* length) {
+static NTSTATUS STDCALL fill_in_file_ea_information(FILE_EA_INFORMATION* eai, fcb* fcb, LONG* length) {
*length -= sizeof(FILE_EA_INFORMATION);
- // FIXME - should this be the reparse tag for symlinks?
- eai->EaSize = 0;
+ /* This value appears to be the size of the structure NTFS stores on disk, and not,
+ * as might be expected, the size of FILE_FULL_EA_INFORMATION (which is what we store).
+ * The formula is 4 bytes as a header, followed by 5 + NameLength + ValueLength for each
+ * item. */
+
+ eai->EaSize = fcb->ealen;
return STATUS_SUCCESS;
}
goto end;
}
- si = ExAllocatePoolWithTag(PagedPool, sizeof(stream_info), ALLOC_TAG);
- if (!si) {
- ERR("out of memory\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto end;
+ if (fileref->fcb->type != BTRFS_TYPE_DIRECTORY) {
+ si = ExAllocatePoolWithTag(PagedPool, sizeof(stream_info), ALLOC_TAG);
+ if (!si) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto end;
+ }
+
+ si->name.Length = si->name.MaximumLength = 0;
+ si->name.Buffer = NULL;
+ si->size = fileref->fcb->inode_item.st_size;
+ si->ignore = FALSE;
+
+ InsertTailList(&streamlist, &si->list_entry);
}
- si->name.Length = si->name.MaximumLength = 0;
- si->name.Buffer = NULL;
- si->size = fileref->fcb->inode_item.st_size;
- si->ignore = FALSE;
-
- InsertTailList(&streamlist, &si->list_entry);
-
do {
if (tp.item->key.obj_id == fileref->fcb->inode && tp.item->key.obj_type == TYPE_XATTR_ITEM) {
if (tp.item->size < sizeof(DIR_ITEM)) {
}
if (xa->n > strlen(xapref) && RtlCompareMemory(xa->name, xapref, strlen(xapref)) == strlen(xapref) &&
- (tp.item->key.offset != EA_DOSATTRIB_HASH || xa->n != strlen(EA_DOSATTRIB) || RtlCompareMemory(xa->name, EA_DOSATTRIB, xa->n) != xa->n)) {
+ (tp.item->key.offset != EA_DOSATTRIB_HASH || xa->n != strlen(EA_DOSATTRIB) || RtlCompareMemory(xa->name, EA_DOSATTRIB, xa->n) != xa->n) &&
+ (tp.item->key.offset != EA_EA_HASH || xa->n != strlen(EA_EA) || RtlCompareMemory(xa->name, EA_EA, xa->n) != xa->n)
+ ) {
Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, &xa->name[strlen(xapref)], xa->n - strlen(xapref));
if (!NT_SUCCESS(Status)) {
ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status);
hardlink* hl;
file_ref *parfr, *fr;
- Status = open_fcb(Vcb, subvol, inode, 0, NULL, NULL, &fcb, Irp);
+ Status = open_fcb(Vcb, subvol, inode, 0, NULL, NULL, &fcb, PagedPool, Irp);
if (!NT_SUCCESS(Status)) {
ERR("open_fcb returned %08x\n", Status);
return Status;
TRACE("FileAllInformation\n");
- if (!(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
+ if (Irp->RequestorMode != KernelMode && !(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
WARN("insufficient privileges\n");
Status = STATUS_ACCESS_DENIED;
goto exit;
fill_in_file_standard_information(&fai->StandardInformation, fcb, fileref, &length);
if (length > 0)
- fill_in_file_internal_information(&fai->InternalInformation, fcb->inode, &length);
+ fill_in_file_internal_information(&fai->InternalInformation, fcb, &length);
if (length > 0)
- fill_in_file_ea_information(&fai->EaInformation, &length);
+ fill_in_file_ea_information(&fai->EaInformation, fcb, &length);
if (length > 0)
fill_in_file_access_information(&fai->AccessInformation, &length);
TRACE("FileAttributeTagInformation\n");
- if (!(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
+ if (Irp->RequestorMode != KernelMode && !(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
WARN("insufficient privileges\n");
Status = STATUS_ACCESS_DENIED;
goto exit;
TRACE("FileBasicInformation\n");
- if (!(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
+ if (Irp->RequestorMode != KernelMode && !(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
WARN("insufficient privileges\n");
Status = STATUS_ACCESS_DENIED;
goto exit;
TRACE("FileEaInformation\n");
- Status = fill_in_file_ea_information(eai, &length);
+ Status = fill_in_file_ea_information(eai, fcb, &length);
break;
}
TRACE("FileInternalInformation\n");
- Status = fill_in_file_internal_information(fii, fcb->inode, &length);
+ Status = fill_in_file_internal_information(fii, fcb, &length);
break;
}
TRACE("FileNetworkOpenInformation\n");
- if (!(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
+ if (Irp->RequestorMode != KernelMode && !(ccb->access & (FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES))) {
WARN("insufficient privileges\n");
Status = STATUS_ACCESS_DENIED;
goto exit;
TRACE("FilePositionInformation\n");
- if (!(ccb->access & (FILE_READ_DATA | FILE_WRITE_DATA)) || !(ccb->options & (FILE_SYNCHRONOUS_IO_ALERT | FILE_SYNCHRONOUS_IO_NONALERT))) {
- WARN("insufficient privileges\n");
- Status = STATUS_ACCESS_DENIED;
- goto exit;
- }
-
Status = fill_in_file_position_information(fpi, FileObject, &length);
break;
return Status;
}
+
+NTSTATUS STDCALL drv_query_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
+ NTSTATUS Status;
+ BOOL top_level;
+ device_extension* Vcb = DeviceObject->DeviceExtension;
+ PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+ PFILE_OBJECT FileObject = IrpSp->FileObject;
+ fcb* fcb;
+ ccb* ccb;
+ FILE_FULL_EA_INFORMATION* ffei;
+ ULONG retlen = 0;
+#ifdef __REACTOS__
+ Status = STATUS_INTERNAL_ERROR;
+#endif
+
+ TRACE("(%p, %p)\n", DeviceObject, Irp);
+
+ FsRtlEnterFileSystem();
+
+ top_level = is_top_level(Irp);
+
+ if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
+ Status = part0_passthrough(DeviceObject, Irp);
+ goto exit;
+ }
+
+ ffei = map_user_buffer(Irp);
+ if (!ffei) {
+ ERR("could not get output buffer\n");
+ Status = STATUS_INVALID_PARAMETER;
+ goto end;
+ }
+
+ if (!FileObject) {
+ ERR("no file object\n");
+ Status = STATUS_INVALID_PARAMETER;
+ goto end;
+ }
+
+ fcb = FileObject->FsContext;
+
+ if (!fcb) {
+ ERR("no fcb\n");
+ Status = STATUS_INVALID_PARAMETER;
+ goto end;
+ }
+
+ ccb = FileObject->FsContext2;
+
+ if (!ccb) {
+ ERR("no ccb\n");
+ Status = STATUS_INVALID_PARAMETER;
+ goto end;
+ }
+
+ if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_READ_EA | FILE_WRITE_EA))) {
+ WARN("insufficient privileges\n");
+ Status = STATUS_ACCESS_DENIED;
+ goto end;
+ }
+
+ ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE);
+
+ if (fcb->ea_xattr.Length == 0)
+ goto end2;
+
+ if (IrpSp->Parameters.QueryEa.EaList) {
+ FILE_FULL_EA_INFORMATION *ea, *out;
+ FILE_GET_EA_INFORMATION* in;
+
+ in = IrpSp->Parameters.QueryEa.EaList;
+ do {
+ STRING s;
+
+ s.Length = s.MaximumLength = in->EaNameLength;
+ s.Buffer = in->EaName;
+
+ RtlUpperString(&s, &s);
+
+ if (in->NextEntryOffset == 0)
+ break;
+
+ in = (FILE_GET_EA_INFORMATION*)(((UINT8*)in) + in->NextEntryOffset);
+ } while (TRUE);
+
+ ea = (FILE_FULL_EA_INFORMATION*)fcb->ea_xattr.Buffer;
+ out = NULL;
+
+ do {
+ BOOL found = FALSE;
+
+ in = IrpSp->Parameters.QueryEa.EaList;
+ do {
+ if (in->EaNameLength == ea->EaNameLength &&
+ RtlCompareMemory(in->EaName, ea->EaName, in->EaNameLength) == in->EaNameLength) {
+ found = TRUE;
+ break;
+ }
+
+ if (in->NextEntryOffset == 0)
+ break;
+
+ in = (FILE_GET_EA_INFORMATION*)(((UINT8*)in) + in->NextEntryOffset);
+ } while (TRUE);
+
+ if (found) {
+ UINT8 padding = retlen % 4 > 0 ? (4 - (retlen % 4)) : 0;
+
+ if (offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength > IrpSp->Parameters.QueryEa.Length - retlen - padding) {
+ Status = STATUS_BUFFER_OVERFLOW;
+ retlen = 0;
+ goto end2;
+ }
+
+ retlen += padding;
+
+ if (out) {
+ out->NextEntryOffset = offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + out->EaNameLength + 1 + out->EaValueLength + padding;
+ out = (FILE_FULL_EA_INFORMATION*)(((UINT8*)out) + out->NextEntryOffset);
+ } else
+ out = ffei;
+
+ out->NextEntryOffset = 0;
+ out->Flags = ea->Flags;
+ out->EaNameLength = ea->EaNameLength;
+ out->EaValueLength = ea->EaValueLength;
+ RtlCopyMemory(out->EaName, ea->EaName, ea->EaNameLength + ea->EaValueLength + 1);
+
+ retlen += offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength;
+
+ if (IrpSp->Flags & SL_RETURN_SINGLE_ENTRY)
+ break;
+ }
+
+ if (ea->NextEntryOffset == 0)
+ break;
+
+ ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset);
+ } while (TRUE);
+ } else {
+ FILE_FULL_EA_INFORMATION *ea, *out;
+ ULONG index;
+
+ if (IrpSp->Flags & SL_INDEX_SPECIFIED) {
+ // The index is 1-based
+ if (IrpSp->Parameters.QueryEa.EaIndex == 0) {
+ Status = STATUS_NONEXISTENT_EA_ENTRY;
+ goto end;
+ } else
+ index = IrpSp->Parameters.QueryEa.EaIndex - 1;
+ } else if (IrpSp->Flags & SL_RESTART_SCAN)
+ index = ccb->ea_index = 0;
+ else
+ index = ccb->ea_index;
+
+ ea = (FILE_FULL_EA_INFORMATION*)fcb->ea_xattr.Buffer;
+
+ if (index > 0) {
+ ULONG i;
+
+ for (i = 0; i < index; i++) {
+ if (ea->NextEntryOffset == 0) // last item
+ goto end2;
+
+ ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset);
+ }
+ }
+
+ out = NULL;
+
+ do {
+ UINT8 padding = retlen % 4 > 0 ? (4 - (retlen % 4)) : 0;
+
+ if (offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength > IrpSp->Parameters.QueryEa.Length - retlen - padding) {
+ Status = retlen == 0 ? STATUS_BUFFER_TOO_SMALL : STATUS_BUFFER_OVERFLOW;
+ goto end2;
+ }
+
+ retlen += padding;
+
+ if (out) {
+ out->NextEntryOffset = offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + out->EaNameLength + 1 + out->EaValueLength + padding;
+ out = (FILE_FULL_EA_INFORMATION*)(((UINT8*)out) + out->NextEntryOffset);
+ } else
+ out = ffei;
+
+ out->NextEntryOffset = 0;
+ out->Flags = ea->Flags;
+ out->EaNameLength = ea->EaNameLength;
+ out->EaValueLength = ea->EaValueLength;
+ RtlCopyMemory(out->EaName, ea->EaName, ea->EaNameLength + ea->EaValueLength + 1);
+
+ retlen += offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + 1 + ea->EaValueLength;
+
+ if (!(IrpSp->Flags & SL_INDEX_SPECIFIED))
+ ccb->ea_index++;
+
+ if (ea->NextEntryOffset == 0 || IrpSp->Flags & SL_RETURN_SINGLE_ENTRY)
+ break;
+
+ ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset);
+ } while (TRUE);
+ }
+
+ Status = STATUS_SUCCESS;
+
+end2:
+ ExReleaseResourceLite(fcb->Header.Resource);
+
+end:
+ Irp->IoStatus.Status = Status;
+ Irp->IoStatus.Information = NT_SUCCESS(Status) || Status == STATUS_BUFFER_OVERFLOW ? retlen : 0;
+
+ IoCompleteRequest( Irp, IO_NO_INCREMENT );
+
+exit:
+ if (top_level)
+ IoSetTopLevelIrp(NULL);
+
+ FsRtlExitFileSystem();
+
+ return Status;
+}
+
+typedef struct {
+ ANSI_STRING name;
+ ANSI_STRING value;
+ UCHAR flags;
+ LIST_ENTRY list_entry;
+} ea_item;
+
+NTSTATUS STDCALL drv_set_ea(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
+ device_extension* Vcb = DeviceObject->DeviceExtension;
+ NTSTATUS Status;
+ BOOL top_level;
+ PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+ PFILE_OBJECT FileObject = IrpSp->FileObject;
+ fcb* fcb;
+ ccb* ccb;
+ FILE_FULL_EA_INFORMATION* ffei;
+ ULONG offset;
+ LIST_ENTRY ealist;
+ ea_item* item;
+ FILE_FULL_EA_INFORMATION* ea;
+ LIST_ENTRY* le;
+ LARGE_INTEGER time;
+ BTRFS_TIME now;
+
+ TRACE("(%p, %p)\n", DeviceObject, Irp);
+
+ FsRtlEnterFileSystem();
+
+ top_level = is_top_level(Irp);
+
+ if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
+ Status = part0_passthrough(DeviceObject, Irp);
+ goto exit;
+ }
+
+ if (Vcb->readonly) {
+ Status = STATUS_MEDIA_WRITE_PROTECTED;
+ goto end;
+ }
+
+ ffei = map_user_buffer(Irp);
+ if (!ffei) {
+ ERR("could not get output buffer\n");
+ Status = STATUS_INVALID_PARAMETER;
+ goto end;
+ }
+
+ Status = IoCheckEaBufferValidity(ffei, IrpSp->Parameters.SetEa.Length, &offset);
+ if (!NT_SUCCESS(Status)) {
+ ERR("IoCheckEaBufferValidity returned %08x (error at offset %u)\n", Status, offset);
+ goto end;
+ }
+
+ if (!FileObject) {
+ ERR("no file object\n");
+ Status = STATUS_INVALID_PARAMETER;
+ goto end;
+ }
+
+ fcb = FileObject->FsContext;
+
+ if (!fcb) {
+ ERR("no fcb\n");
+ Status = STATUS_INVALID_PARAMETER;
+ goto end;
+ }
+
+ ccb = FileObject->FsContext2;
+
+ if (!ccb) {
+ ERR("no ccb\n");
+ Status = STATUS_INVALID_PARAMETER;
+ goto end;
+ }
+
+ if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_EA)) {
+ WARN("insufficient privileges\n");
+ Status = STATUS_ACCESS_DENIED;
+ goto end;
+ }
+
+ InitializeListHead(&ealist);
+
+ ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
+
+ if (fcb->ea_xattr.Length > 0) {
+ ea = (FILE_FULL_EA_INFORMATION*)fcb->ea_xattr.Buffer;
+
+ do {
+ item = ExAllocatePoolWithTag(PagedPool, sizeof(ea_item), ALLOC_TAG);
+ if (!item) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto end2;
+ }
+
+ item->name.Length = item->name.MaximumLength = ea->EaNameLength;
+ item->name.Buffer = ea->EaName;
+
+ item->value.Length = item->value.MaximumLength = ea->EaValueLength;
+ item->value.Buffer = &ea->EaName[ea->EaNameLength + 1];
+
+ item->flags = ea->Flags;
+
+ InsertTailList(&ealist, &item->list_entry);
+
+ if (ea->NextEntryOffset == 0)
+ break;
+
+ ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset);
+ } while (TRUE);
+ }
+
+ ea = ffei;
+
+ do {
+ STRING s;
+ BOOL found = FALSE;
+
+ s.Length = s.MaximumLength = ea->EaNameLength;
+ s.Buffer = ea->EaName;
+
+ RtlUpperString(&s, &s);
+
+ le = ealist.Flink;
+ while (le != &ealist) {
+ item = CONTAINING_RECORD(le, ea_item, list_entry);
+
+ if (item->name.Length == s.Length &&
+ RtlCompareMemory(item->name.Buffer, s.Buffer, s.Length) == s.Length) {
+ item->flags = ea->Flags;
+ item->value.Length = item->value.MaximumLength = ea->EaValueLength;
+ item->value.Buffer = &ea->EaName[ea->EaNameLength + 1];
+ found = TRUE;
+ break;
+ }
+
+ le = le->Flink;
+ }
+
+ if (!found) {
+ item = ExAllocatePoolWithTag(PagedPool, sizeof(ea_item), ALLOC_TAG);
+ if (!item) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto end2;
+ }
+
+ item->name.Length = item->name.MaximumLength = ea->EaNameLength;
+ item->name.Buffer = ea->EaName;
+
+ item->value.Length = item->value.MaximumLength = ea->EaValueLength;
+ item->value.Buffer = &ea->EaName[ea->EaNameLength + 1];
+
+ item->flags = ea->Flags;
+
+ InsertTailList(&ealist, &item->list_entry);
+ }
+
+ if (ea->NextEntryOffset == 0)
+ break;
+
+ ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset);
+ } while (TRUE);
+
+ // remove entries with zero-length value
+ le = ealist.Flink;
+ while (le != &ealist) {
+ LIST_ENTRY* le2 = le->Flink;
+
+ item = CONTAINING_RECORD(le, ea_item, list_entry);
+
+ if (item->value.Length == 0) {
+ RemoveEntryList(&item->list_entry);
+ ExFreePool(item);
+ }
+
+ le = le2;
+ }
+
+ if (IsListEmpty(&ealist)) {
+ fcb->ealen = 0;
+
+ if (fcb->ea_xattr.Buffer)
+ ExFreePool(fcb->ea_xattr.Buffer);
+
+ fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = 0;
+ fcb->ea_xattr.Buffer = NULL;
+ } else {
+ ULONG size = 0;
+ char *buf, *oldbuf;
+
+ le = ealist.Flink;
+ while (le != &ealist) {
+ item = CONTAINING_RECORD(le, ea_item, list_entry);
+
+ if (size % 4 > 0)
+ size += 4 - (size % 4);
+
+ size += offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + item->name.Length + 1 + item->value.Length;
+
+ le = le->Flink;
+ }
+
+ buf = ExAllocatePoolWithTag(PagedPool, size, ALLOC_TAG);
+ if (!buf) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto end2;
+ }
+
+ oldbuf = fcb->ea_xattr.Buffer;
+
+ fcb->ea_xattr.Length = fcb->ea_xattr.MaximumLength = size;
+ fcb->ea_xattr.Buffer = buf;
+
+ fcb->ealen = 4;
+ ea = NULL;
+
+ le = ealist.Flink;
+ while (le != &ealist) {
+ item = CONTAINING_RECORD(le, ea_item, list_entry);
+
+ if (ea) {
+ ea->NextEntryOffset = offsetof(FILE_FULL_EA_INFORMATION, EaName[0]) + ea->EaNameLength + ea->EaValueLength;
+
+ if (ea->NextEntryOffset % 4 > 0)
+ ea->NextEntryOffset += 4 - (ea->NextEntryOffset % 4);
+
+ ea = (FILE_FULL_EA_INFORMATION*)(((UINT8*)ea) + ea->NextEntryOffset);
+ } else
+ ea = (FILE_FULL_EA_INFORMATION*)fcb->ea_xattr.Buffer;
+
+ ea->NextEntryOffset = 0;
+ ea->Flags = item->flags;
+ ea->EaNameLength = item->name.Length;
+ ea->EaValueLength = item->value.Length;
+
+ RtlCopyMemory(ea->EaName, item->name.Buffer, item->name.Length);
+ ea->EaName[item->name.Length] = 0;
+ RtlCopyMemory(&ea->EaName[item->name.Length + 1], item->value.Buffer, item->value.Length);
+
+ fcb->ealen += 5 + item->name.Length + item->value.Length;
+
+ le = le->Flink;
+ }
+
+ if (oldbuf)
+ ExFreePool(oldbuf);
+ }
+
+ fcb->ea_changed = TRUE;
+
+ KeQuerySystemTime(&time);
+ win_time_to_unix(time, &now);
+
+ fcb->inode_item.transid = Vcb->superblock.generation;
+ fcb->inode_item.sequence++;
+
+ if (!ccb->user_set_change_time)
+ fcb->inode_item.st_ctime = now;
+
+ fcb->inode_item_changed = TRUE;
+ mark_fcb_dirty(fcb);
+
+ send_notification_fileref(ccb->fileref, FILE_NOTIFY_CHANGE_EA, FILE_ACTION_MODIFIED);
+
+ Status = STATUS_SUCCESS;
+
+end2:
+ ExReleaseResourceLite(fcb->Header.Resource);
+
+ while (!IsListEmpty(&ealist)) {
+ le = RemoveHeadList(&ealist);
+
+ item = CONTAINING_RECORD(le, ea_item, list_entry);
+
+ ExFreePool(item);
+ }
+
+end:
+ Irp->IoStatus.Status = Status;
+ Irp->IoStatus.Information = 0;
+
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);
+
+exit:
+ if (top_level)
+ IoSetTopLevelIrp(NULL);
+
+ FsRtlExitFileSystem();
+
+ return Status;
+}
#include "btrfs_drv.h"
+#define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
+
+// #define DEBUG_WRITE_LOOPS
+
+typedef struct {
+ KEVENT Event;
+ IO_STATUS_BLOCK iosb;
+} write_context;
+
+typedef struct {
+ EXTENT_ITEM_TREE eit;
+ UINT8 type;
+ TREE_BLOCK_REF tbr;
+} EXTENT_ITEM_TREE2;
+
+typedef struct {
+ EXTENT_ITEM ei;
+ UINT8 type;
+ TREE_BLOCK_REF tbr;
+} EXTENT_ITEM_SKINNY_METADATA;
+
+typedef struct {
+ UINT64 address;
+ UINT32 length;
+ BOOL overlap;
+ UINT8* data;
+ LIST_ENTRY list_entry;
+} tree_write;
+
+static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENTRY* rollback);
+
+static BOOL insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, UINT64 objid, UINT64 objtype, UINT64 offset,
+ void* data, UINT16 datalen, enum batch_operation operation, PIRP Irp, LIST_ENTRY* rollback);
+
+static NTSTATUS STDCALL write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
+ write_context* context = conptr;
+
+ context->iosb = Irp->IoStatus;
+ KeSetEvent(&context->Event, 0, FALSE);
+
+// return STATUS_SUCCESS;
+ return STATUS_MORE_PROCESSING_REQUIRED;
+}
+
+NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length) {
+ NTSTATUS Status;
+ LARGE_INTEGER offset;
+ PIRP Irp;
+ PIO_STACK_LOCATION IrpSp;
+ write_context* context = NULL;
+
+ TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
+
+ context = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_context), ALLOC_TAG);
+ if (!context) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlZeroMemory(context, sizeof(write_context));
+
+ KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
+
+ offset.QuadPart = address;
+
+// Irp = IoBuildSynchronousFsdRequest(IRP_MJ_WRITE, Vcb->device, data, length, &offset, NULL, &context->iosb);
+
+ Irp = IoAllocateIrp(device->StackSize, FALSE);
+
+ if (!Irp) {
+ ERR("IoAllocateIrp failed\n");
+ Status = STATUS_INTERNAL_ERROR;
+ goto exit2;
+ }
+
+ IrpSp = IoGetNextIrpStackLocation(Irp);
+ IrpSp->MajorFunction = IRP_MJ_WRITE;
+
+ if (device->Flags & DO_BUFFERED_IO) {
+ Irp->AssociatedIrp.SystemBuffer = data;
+
+ Irp->Flags = IRP_BUFFERED_IO;
+ } else if (device->Flags & DO_DIRECT_IO) {
+ Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
+ if (!Irp->MdlAddress) {
+ DbgPrint("IoAllocateMdl failed\n");
+ goto exit;
+ }
+
+ MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
+ } else {
+ Irp->UserBuffer = data;
+ }
+
+ IrpSp->Parameters.Write.Length = length;
+ IrpSp->Parameters.Write.ByteOffset = offset;
+
+ Irp->UserIosb = &context->iosb;
+
+ Irp->UserEvent = &context->Event;
+
+ IoSetCompletionRoutine(Irp, write_completion, context, TRUE, TRUE, TRUE);
+
+ Status = IoCallDriver(device, Irp);
+
+ if (Status == STATUS_PENDING) {
+ KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
+ Status = context->iosb.Status;
+ }
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("IoCallDriver returned %08x\n", Status);
+ }
+
+ if (device->Flags & DO_DIRECT_IO) {
+ MmUnlockPages(Irp->MdlAddress);
+ IoFreeMdl(Irp->MdlAddress);
+ }
+
+exit:
+ IoFreeIrp(Irp);
+
+exit2:
+ if (context)
+ ExFreePool(context);
+
+ return Status;
+}
+
+static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
+ // FIXME - loop through c->deleting and do TRIM if device supports it
+ // FIXME - also find way of doing TRIM of dropped chunks
+
+ while (!IsListEmpty(&c->deleting)) {
+ space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
+
+ RemoveEntryList(&s->list_entry);
+ ExFreePool(s);
+ }
+}
+
+static void clean_space_cache(device_extension* Vcb) {
+ chunk* c;
+
+ TRACE("(%p)\n", Vcb);
+
+ while (!IsListEmpty(&Vcb->chunks_changed)) {
+ c = CONTAINING_RECORD(Vcb->chunks_changed.Flink, chunk, list_entry_changed);
+
+ ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+ clean_space_cache_chunk(Vcb, c);
+ RemoveEntryList(&c->list_entry_changed);
+ c->list_entry_changed.Flink = NULL;
+
+ ExReleaseResourceLite(&c->lock);
+ }
+}
+
+static BOOL trees_consistent(device_extension* Vcb, LIST_ENTRY* rollback) {
+ ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
+ LIST_ENTRY* le;
+
+ le = Vcb->trees.Flink;
+ while (le != &Vcb->trees) {
+ tree* t = CONTAINING_RECORD(le, tree, list_entry);
+
+ if (t->write) {
+ if (t->header.num_items == 0 && t->parent) {
+#ifdef DEBUG_WRITE_LOOPS
+ ERR("empty tree found, looping again\n");
+#endif
+ return FALSE;
+ }
+
+ if (t->size > maxsize) {
+#ifdef DEBUG_WRITE_LOOPS
+ ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize);
+#endif
+ return FALSE;
+ }
+
+ if (!t->has_new_address) {
+#ifdef DEBUG_WRITE_LOOPS
+ ERR("tree found without new address, looping again\n");
+#endif
+ return FALSE;
+ }
+ }
+
+ le = le->Flink;
+ }
+
+ return TRUE;
+}
+
+static NTSTATUS add_parents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+ UINT8 level;
+ LIST_ENTRY* le;
+
+ for (level = 0; level <= 255; level++) {
+ BOOL nothing_found = TRUE;
+
+ TRACE("level = %u\n", level);
+
+ le = Vcb->trees.Flink;
+ while (le != &Vcb->trees) {
+ tree* t = CONTAINING_RECORD(le, tree, list_entry);
+
+ if (t->write && t->header.level == level) {
+ TRACE("tree %p: root = %llx, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent);
+
+ nothing_found = FALSE;
+
+ if (t->parent) {
+ if (!t->parent->write)
+ TRACE("adding tree %p (level %x)\n", t->parent, t->header.level);
+
+ t->parent->write = TRUE;
+ } else if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
+ KEY searchkey;
+ traverse_ptr tp;
+ NTSTATUS Status;
+
+ searchkey.obj_id = t->root->id;
+ searchkey.obj_type = TYPE_ROOT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+ ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
+ ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
+
+ if (!ri) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (!insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+ }
+ }
+ }
+
+ le = le->Flink;
+ }
+
+ if (nothing_found)
+ break;
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static void add_parents_to_cache(device_extension* Vcb, tree* t) {
+ while (t->parent) {
+ t = t->parent;
+ t->write = TRUE;
+ }
+}
+
+static BOOL insert_tree_extent_skinny(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64 address, PIRP Irp, LIST_ENTRY* rollback) {
+ EXTENT_ITEM_SKINNY_METADATA* eism;
+ traverse_ptr insert_tp;
+
+ eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
+ if (!eism) {
+ ERR("out of memory\n");
+ return FALSE;
+ }
+
+ eism->ei.refcount = 1;
+ eism->ei.generation = Vcb->superblock.generation;
+ eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
+ eism->type = TYPE_TREE_BLOCK_REF;
+ eism->tbr.offset = root_id;
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ ExFreePool(eism);
+ return FALSE;
+ }
+
+ ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+ space_list_subtract(Vcb, c, FALSE, address, Vcb->superblock.node_size, rollback);
+
+ ExReleaseResourceLite(&c->lock);
+
+ add_parents_to_cache(Vcb, insert_tp.tree);
+
+ return TRUE;
+}
+
+static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64* new_address, PIRP Irp, LIST_ENTRY* rollback) {
+ UINT64 address;
+ EXTENT_ITEM_TREE2* eit2;
+ traverse_ptr insert_tp;
+
+ TRACE("(%p, %x, %llx, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback);
+
+ if (!find_address_in_chunk(Vcb, c, Vcb->superblock.node_size, &address))
+ return FALSE;
+
+ if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
+ BOOL b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback);
+
+ if (b)
+ *new_address = address;
+
+ return b;
+ }
+
+ eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
+ if (!eit2) {
+ ERR("out of memory\n");
+ return FALSE;
+ }
+
+ eit2->eit.extent_item.refcount = 1;
+ eit2->eit.extent_item.generation = Vcb->superblock.generation;
+ eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
+// eit2->eit.firstitem = wt->firstitem;
+ eit2->eit.level = level;
+ eit2->type = TYPE_TREE_BLOCK_REF;
+ eit2->tbr.offset = root_id;
+
+// #ifdef DEBUG_PARANOID
+// if (wt->firstitem.obj_type == 0xcc) { // TESTING
+// ERR("error - firstitem not set (wt = %p, tree = %p, address = %x)\n", wt, wt->tree, (UINT32)address);
+// ERR("num_items = %u, level = %u, root = %x, delete = %u\n", wt->tree->header.num_items, wt->tree->header.level, (UINT32)wt->tree->root->id, wt->delete);
+// int3;
+// }
+// #endif
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ ExFreePool(eit2);
+ return FALSE;
+ }
+
+ ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+ space_list_subtract(Vcb, c, FALSE, address, Vcb->superblock.node_size, rollback);
+
+ ExReleaseResourceLite(&c->lock);
+
+ add_parents_to_cache(Vcb, insert_tp.tree);
+
+ *new_address = address;
+
+ return TRUE;
+}
+
+NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
+ chunk *origchunk = NULL, *c;
+ LIST_ENTRY* le;
+ UINT64 flags = t->flags, addr;
+
+ if (flags == 0) {
+ if (t->root->id == BTRFS_ROOT_CHUNK)
+ flags = BLOCK_FLAG_SYSTEM | BLOCK_FLAG_DUPLICATE;
+ else if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS)
+ flags = BLOCK_FLAG_DATA | BLOCK_FLAG_METADATA;
+ else
+ flags = BLOCK_FLAG_METADATA | BLOCK_FLAG_DUPLICATE;
+ }
+
+// TRACE("flags = %x\n", (UINT32)wt->flags);
+
+// if (!chunk_test) { // TESTING
+// if ((c = alloc_chunk(Vcb, flags))) {
+// if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
+// if (insert_tree_extent(Vcb, t, c)) {
+// chunk_test = TRUE;
+// return STATUS_SUCCESS;
+// }
+// }
+// }
+// }
+
+ if (t->has_address) {
+ origchunk = get_chunk_from_address(Vcb, t->header.address);
+
+ if (!origchunk->readonly && insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) {
+ t->new_address = addr;
+ t->has_new_address = TRUE;
+ return STATUS_SUCCESS;
+ }
+ }
+
+ ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
+
+ le = Vcb->chunks.Flink;
+ while (le != &Vcb->chunks) {
+ c = CONTAINING_RECORD(le, chunk, list_entry);
+
+ if (!c->readonly) {
+ ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+ if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
+ if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
+ ExReleaseResourceLite(&c->lock);
+ ExReleaseResourceLite(&Vcb->chunk_lock);
+ t->new_address = addr;
+ t->has_new_address = TRUE;
+ return STATUS_SUCCESS;
+ }
+ }
+
+ ExReleaseResourceLite(&c->lock);
+ }
+
+ le = le->Flink;
+ }
+
+ // allocate new chunk if necessary
+ if ((c = alloc_chunk(Vcb, flags))) {
+ ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+ if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
+ if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
+ ExReleaseResourceLite(&c->lock);
+ ExReleaseResourceLite(&Vcb->chunk_lock);
+ t->new_address = addr;
+ t->has_new_address = TRUE;
+ return STATUS_SUCCESS;
+ }
+ }
+
+ ExReleaseResourceLite(&c->lock);
+ }
+
+ ExReleaseResourceLite(&Vcb->chunk_lock);
+
+ ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
+
+ return STATUS_DISK_FULL;
+}
+
+// TESTING
+// static void check_tree_num_items(tree* t) {
+// LIST_ENTRY* le2;
+// UINT32 ni;
+//
+// le2 = t->itemlist.Flink;
+// ni = 0;
+// while (le2 != &t->itemlist) {
+// tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+// if (!td->ignore)
+// ni++;
+// le2 = le2->Flink;
+// }
+//
+// if (t->header.num_items != ni) {
+// ERR("tree %p not okay: num_items was %x, expecting %x\n", t, ni, t->header.num_items);
+// int3;
+// } else {
+// ERR("tree %p okay\n", t);
+// }
+// }
+//
+// static void check_trees_num_items(LIST_ENTRY* tc) {
+// LIST_ENTRY* le = tc->Flink;
+// while (le != tc) {
+// tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
+//
+// check_tree_num_items(tc2->tree);
+//
+// le = le->Flink;
+// }
+// }
+
+static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
+ NTSTATUS Status;
+ UINT64 rc, root;
+
+ TRACE("(%p, %llx, %p)\n", Vcb, address, t);
+
+ rc = get_extent_refcount(Vcb, address, Vcb->superblock.node_size, Irp);
+ if (rc == 0) {
+ ERR("error - refcount for extent %llx was 0\n", address);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (t->parent)
+ root = t->parent->header.tree_id;
+ else
+ root = t->header.tree_id;
+
+ Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, t->header.level, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("decrease_extent_refcount_tree returned %08x\n", Status);
+ return Status;
+ }
+
+ if (rc == 1) {
+ chunk* c = get_chunk_from_address(Vcb, address);
+
+ if (c) {
+ ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+ decrease_chunk_usage(c, Vcb->superblock.node_size);
+
+ space_list_add(Vcb, c, TRUE, address, Vcb->superblock.node_size, rollback);
+
+ ExReleaseResourceLite(&c->lock);
+ } else
+ ERR("could not find chunk for address %llx\n", address);
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_changed_extent_ref_edr(changed_extent* ce, EXTENT_DATA_REF* edr, BOOL old) {
+ LIST_ENTRY *le2, *list;
+ changed_extent_ref* cer;
+
+ list = old ? &ce->old_refs : &ce->refs;
+
+ le2 = list->Flink;
+ while (le2 != list) {
+ cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
+
+ if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == edr->root && cer->edr.objid == edr->objid && cer->edr.offset == edr->offset) {
+ cer->edr.count += edr->count;
+ goto end;
+ }
+
+ le2 = le2->Flink;
+ }
+
+ cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
+ if (!cer) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ cer->type = TYPE_EXTENT_DATA_REF;
+ RtlCopyMemory(&cer->edr, edr, sizeof(EXTENT_DATA_REF));
+ InsertTailList(list, &cer->list_entry);
+
+end:
+ if (old)
+ ce->old_count += edr->count;
+ else
+ ce->count += edr->count;
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_changed_extent_ref_sdr(changed_extent* ce, SHARED_DATA_REF* sdr, BOOL old) {
+ LIST_ENTRY *le2, *list;
+ changed_extent_ref* cer;
+
+ list = old ? &ce->old_refs : &ce->refs;
+
+ le2 = list->Flink;
+ while (le2 != list) {
+ cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
+
+ if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr->offset) {
+ cer->sdr.count += sdr->count;
+ goto end;
+ }
+
+ le2 = le2->Flink;
+ }
+
+ cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
+ if (!cer) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ cer->type = TYPE_SHARED_DATA_REF;
+ RtlCopyMemory(&cer->sdr, sdr, sizeof(SHARED_DATA_REF));
+ InsertTailList(list, &cer->list_entry);
+
+end:
+ if (old)
+ ce->old_count += sdr->count;
+ else
+ ce->count += sdr->count;
+
+ return STATUS_SUCCESS;
+}
+
+static BOOL shared_tree_is_unique(device_extension* Vcb, tree* t, PIRP Irp) {
+ KEY searchkey;
+ traverse_ptr tp;
+ NTSTATUS Status;
+
+ searchkey.obj_id = t->header.address;
+ searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return FALSE;
+ }
+
+ if (tp.item->key.obj_id == t->header.address && (tp.item->key.obj_type == TYPE_METADATA_ITEM || tp.item->key.obj_type == TYPE_EXTENT_ITEM))
+ return FALSE;
+ else
+ return TRUE;
+}
+
+static NTSTATUS update_tree_extents(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
+ NTSTATUS Status;
+ UINT64 rc = get_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, Irp);
+ UINT64 flags = get_extent_flags(Vcb, t->header.address, Irp);
+
+ if (rc == 0) {
+ ERR("refcount for extent %llx was 0\n", t->header.address);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
+ TREE_BLOCK_REF tbr;
+ BOOL unique = rc > 1 ? FALSE : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp) : FALSE);
+
+ if (t->header.level == 0) {
+ LIST_ENTRY* le;
+
+ le = t->itemlist.Flink;
+ while (le != &t->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+
+ if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
+ EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
+
+ if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
+ EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
+
+ if (ed2->size > 0) {
+ EXTENT_DATA_REF edr;
+ changed_extent* ce = NULL;
+ chunk* c = get_chunk_from_address(Vcb, ed2->address);
+
+ if (c) {
+ LIST_ENTRY* le2;
+
+ le2 = c->changed_extents.Flink;
+ while (le2 != &c->changed_extents) {
+ changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
+
+ if (ce2->address == ed2->address) {
+ ce = ce2;
+ break;
+ }
+
+ le2 = le2->Flink;
+ }
+ }
+
+ edr.root = t->root->id;
+ edr.objid = td->key.obj_id;
+ edr.offset = td->key.offset - ed2->offset;
+ edr.count = 1;
+
+ if (ce) {
+ Status = add_changed_extent_ref_edr(ce, &edr, TRUE);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_changed_extent_ref_edr returned %08x\n", Status);
+ return Status;
+ }
+
+ Status = add_changed_extent_ref_edr(ce, &edr, FALSE);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_changed_extent_ref_edr returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("increase_extent_refcount returned %08x\n", Status);
+ return Status;
+ }
+
+ if ((flags & EXTENT_ITEM_SHARED_BACKREFS && unique) || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
+ UINT64 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, t->header.address, Irp);
+
+ if (sdrrc > 0) {
+ SHARED_DATA_REF sdr;
+
+ sdr.offset = t->header.address;
+ sdr.count = sdrrc;
+
+ Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
+ t->header.address, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("decrease_extent_refcount returned %08x\n", Status);
+ return Status;
+ }
+
+ if (ce) {
+ ce->count--;
+ ce->old_count--;
+ }
+ }
+ }
+
+ // FIXME - clear shared flag if unique?
+ }
+ }
+ }
+
+ le = le->Flink;
+ }
+ } else {
+ LIST_ENTRY* le;
+
+ le = t->itemlist.Flink;
+ while (le != &t->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+
+ if (!td->inserted) {
+ TREE_BLOCK_REF tbr;
+
+ tbr.offset = t->root->id;
+
+ Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF,
+ &tbr, &td->key, t->header.level - 1, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("increase_extent_refcount returned %08x\n", Status);
+ return Status;
+ }
+
+ if (unique || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
+ UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, td->treeholder.address, t->header.address, Irp);
+
+ if (sbrrc > 0) {
+ SHARED_BLOCK_REF sbr;
+
+ sbr.offset = t->header.address;
+
+ Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
+ t->header.address, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("decrease_extent_refcount returned %08x\n", Status);
+ return Status;
+ }
+ }
+ }
+
+ // FIXME - clear shared flag if unique?
+ }
+
+ le = le->Flink;
+ }
+ }
+
+ if (unique) {
+ UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, t->header.address, t->parent->header.address, Irp);
+
+ if (sbrrc == 1) {
+ SHARED_BLOCK_REF sbr;
+
+ sbr.offset = t->parent->header.address;
+
+ Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
+ t->parent->header.address, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("decrease_extent_refcount returned %08x\n", Status);
+ return Status;
+ }
+ }
+ }
+
+ if (t->parent)
+ tbr.offset = t->parent->header.tree_id;
+ else
+ tbr.offset = t->header.tree_id;
+
+ Status = increase_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr,
+ t->parent ? &t->paritem->key : NULL, t->header.level, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("increase_extent_refcount returned %08x\n", Status);
+ return Status;
+ }
+
+ // FIXME - clear shared flag if unique?
+
+ t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
+ }
+
+ Status = reduce_tree_extent(Vcb, t->header.address, t, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("reduce_tree_extent returned %08x\n", Status);
+ return Status;
+ }
+
+ t->has_address = FALSE;
+
+ if (rc > 1 && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) {
+ if (t->header.tree_id == t->root->id) {
+ flags |= EXTENT_ITEM_SHARED_BACKREFS;
+ update_extent_flags(Vcb, t->header.address, flags, Irp);
+ }
+
+ if (t->header.level > 0) {
+ LIST_ENTRY* le;
+
+ le = t->itemlist.Flink;
+ while (le != &t->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+
+ if (!td->inserted) {
+ if (t->header.tree_id == t->root->id) {
+ SHARED_BLOCK_REF sbr;
+
+ sbr.offset = t->header.address;
+
+ Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp, rollback);
+ } else {
+ TREE_BLOCK_REF tbr;
+
+ tbr.offset = t->root->id;
+
+ Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp, rollback);
+ }
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("increase_extent_refcount returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ le = le->Flink;
+ }
+ } else {
+ LIST_ENTRY* le;
+
+ le = t->itemlist.Flink;
+ while (le != &t->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+
+ if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
+ EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
+
+ if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
+ EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
+
+ if (ed2->size > 0) {
+ changed_extent* ce = NULL;
+ chunk* c = get_chunk_from_address(Vcb, ed2->address);
+
+ if (c) {
+ LIST_ENTRY* le2;
+
+ le2 = c->changed_extents.Flink;
+ while (le2 != &c->changed_extents) {
+ changed_extent* ce2 = CONTAINING_RECORD(le2, changed_extent, list_entry);
+
+ if (ce2->address == ed2->address) {
+ ce = ce2;
+ break;
+ }
+
+ le2 = le2->Flink;
+ }
+ }
+
+ if (t->header.tree_id == t->root->id) {
+ SHARED_DATA_REF sdr;
+
+ sdr.offset = t->header.address;
+ sdr.count = 1;
+
+ if (ce) {
+ Status = add_changed_extent_ref_sdr(ce, &sdr, TRUE);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_changed_extent_ref_edr returned %08x\n", Status);
+ return Status;
+ }
+
+ Status = add_changed_extent_ref_sdr(ce, &sdr, FALSE);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_changed_extent_ref_edr returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, Irp, rollback);
+ } else {
+ EXTENT_DATA_REF edr;
+
+ edr.root = t->root->id;
+ edr.objid = td->key.obj_id;
+ edr.offset = td->key.offset - ed2->offset;
+ edr.count = 1;
+
+ if (ce) {
+ Status = add_changed_extent_ref_edr(ce, &edr, TRUE);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_changed_extent_ref_edr returned %08x\n", Status);
+ return Status;
+ }
+
+ Status = add_changed_extent_ref_edr(ce, &edr, FALSE);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_changed_extent_ref_edr returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_EXTENT_DATA_REF, &edr, NULL, 0, Irp, rollback);
+ }
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("increase_extent_refcount returned %08x\n", Status);
+ return Status;
+ }
+ }
+ }
+ }
+
+ le = le->Flink;
+ }
+ }
+ }
+
+ t->updated_extents = TRUE;
+ t->header.tree_id = t->root->id;
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+ LIST_ENTRY* le;
+ NTSTATUS Status;
+ BOOL changed = FALSE;
+ UINT8 max_level = 0, level;
+
+ TRACE("(%p)\n", Vcb);
+
+ le = Vcb->trees.Flink;
+ while (le != &Vcb->trees) {
+ tree* t = CONTAINING_RECORD(le, tree, list_entry);
+
+ if (t->write && !t->has_new_address) {
+ chunk* c;
+
+ Status = get_tree_new_address(Vcb, t, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("get_tree_new_address returned %08x\n", Status);
+ return Status;
+ }
+
+ TRACE("allocated extent %llx\n", t->new_address);
+
+ c = get_chunk_from_address(Vcb, t->new_address);
+
+ if (c) {
+ increase_chunk_usage(c, Vcb->superblock.node_size);
+ } else {
+ ERR("could not find chunk for address %llx\n", t->new_address);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ changed = TRUE;
+
+ if (t->header.level > max_level)
+ max_level = t->header.level;
+ }
+
+ le = le->Flink;
+ }
+
+ if (!changed)
+ return STATUS_SUCCESS;
+
+ level = max_level;
+ do {
+ le = Vcb->trees.Flink;
+ while (le != &Vcb->trees) {
+ tree* t = CONTAINING_RECORD(le, tree, list_entry);
+
+ if (t->write && !t->updated_extents && t->has_address && t->header.level == level) {
+ Status = update_tree_extents(Vcb, t, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_tree_extents returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ le = le->Flink;
+ }
+
+ if (level == 0)
+ break;
+
+ level--;
+ } while (TRUE);
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS update_root_root(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+ LIST_ENTRY* le;
+ NTSTATUS Status;
+
+ TRACE("(%p)\n", Vcb);
+
+ le = Vcb->trees.Flink;
+ while (le != &Vcb->trees) {
+ tree* t = CONTAINING_RECORD(le, tree, list_entry);
+
+ if (t->write && !t->parent) {
+ if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
+ KEY searchkey;
+ traverse_ptr tp;
+
+ searchkey.obj_id = t->root->id;
+ searchkey.obj_type = TYPE_ROOT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+ ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
+ int3;
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, t->new_address);
+
+ t->root->root_item.block_number = t->new_address;
+ t->root->root_item.root_level = t->header.level;
+ t->root->root_item.generation = Vcb->superblock.generation;
+ t->root->root_item.generation2 = Vcb->superblock.generation;
+
+ // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents
+
+ RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
+ }
+
+ t->root->treeholder.address = t->new_address;
+ }
+
+ le = le->Flink;
+ }
+
+ Status = update_chunk_caches(Vcb, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_chunk_caches returned %08x\n", Status);
+ return Status;
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) {
+ UINT8 level;
+ UINT8 *data, *body;
+ UINT32 crc32;
+ NTSTATUS Status;
+ LIST_ENTRY* le;
+ write_data_context* wtc;
+ LIST_ENTRY tree_writes;
+ tree_write* tw;
+ chunk* c;
+
+ TRACE("(%p)\n", Vcb);
+
+ InitializeListHead(&tree_writes);
+
+ for (level = 0; level <= 255; level++) {
+ BOOL nothing_found = TRUE;
+
+ TRACE("level = %u\n", level);
+
+ le = Vcb->trees.Flink;
+ while (le != &Vcb->trees) {
+ tree* t = CONTAINING_RECORD(le, tree, list_entry);
+
+ if (t->write && t->header.level == level) {
+ KEY firstitem, searchkey;
+ LIST_ENTRY* le2;
+ traverse_ptr tp;
+ EXTENT_ITEM_TREE* eit;
+
+ if (!t->has_new_address) {
+ ERR("error - tried to write tree with no new address\n");
+ int3;
+ }
+
+ le2 = t->itemlist.Flink;
+ while (le2 != &t->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+ if (!td->ignore) {
+ firstitem = td->key;
+ break;
+ }
+ le2 = le2->Flink;
+ }
+
+ if (t->parent) {
+ t->paritem->key = firstitem;
+ t->paritem->treeholder.address = t->new_address;
+ t->paritem->treeholder.generation = Vcb->superblock.generation;
+ }
+
+ if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
+ searchkey.obj_id = t->new_address;
+ searchkey.obj_type = TYPE_EXTENT_ITEM;
+ searchkey.offset = Vcb->superblock.node_size;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (keycmp(searchkey, tp.item->key)) {
+// traverse_ptr next_tp;
+// BOOL b;
+// tree_data* paritem;
+
+ ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+
+// searchkey.obj_id = 0;
+// searchkey.obj_type = 0;
+// searchkey.offset = 0;
+//
+// find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
+//
+// paritem = NULL;
+// do {
+// if (tp.tree->paritem != paritem) {
+// paritem = tp.tree->paritem;
+// ERR("paritem: %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset);
+// }
+//
+// ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+//
+// b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
+// if (b) {
+// free_traverse_ptr(&tp);
+// tp = next_tp;
+// }
+// } while (b);
+//
+// free_traverse_ptr(&tp);
+
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
+ ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ eit = (EXTENT_ITEM_TREE*)tp.item->data;
+ eit->firstitem = firstitem;
+ }
+
+ nothing_found = FALSE;
+ }
+
+ le = le->Flink;
+ }
+
+ if (nothing_found)
+ break;
+ }
+
+ TRACE("allocated tree extents\n");
+
+ wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG);
+ if (!wtc) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
+ InitializeListHead(&wtc->stripes);
+ wtc->tree = TRUE;
+ wtc->stripes_left = 0;
+
+ le = Vcb->trees.Flink;
+ while (le != &Vcb->trees) {
+ tree* t = CONTAINING_RECORD(le, tree, list_entry);
+#ifdef DEBUG_PARANOID
+ UINT32 num_items = 0, size = 0;
+ LIST_ENTRY* le2;
+ BOOL crash = FALSE;
+#endif
+
+ if (t->write) {
+#ifdef DEBUG_PARANOID
+ le2 = t->itemlist.Flink;
+ while (le2 != &t->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+ if (!td->ignore) {
+ num_items++;
+
+ if (t->header.level == 0)
+ size += td->size;
+ }
+ le2 = le2->Flink;
+ }
+
+ if (t->header.level == 0)
+ size += num_items * sizeof(leaf_node);
+ else
+ size += num_items * sizeof(internal_node);
+
+ if (num_items != t->header.num_items) {
+ ERR("tree %llx, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
+ crash = TRUE;
+ }
+
+ if (size != t->size) {
+ ERR("tree %llx, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
+ crash = TRUE;
+ }
+
+ if (t->header.num_items == 0 && t->parent) {
+ ERR("tree %llx, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
+ crash = TRUE;
+ }
+
+ if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
+ ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
+ crash = TRUE;
+ }
+
+ if (crash) {
+ ERR("tree %p\n", t);
+ le2 = t->itemlist.Flink;
+ while (le2 != &t->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+ if (!td->ignore) {
+ ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
+ }
+ le2 = le2->Flink;
+ }
+ int3;
+ }
+#endif
+ t->header.address = t->new_address;
+ t->header.generation = Vcb->superblock.generation;
+ t->header.tree_id = t->root->id;
+ t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
+ t->header.fs_uuid = Vcb->superblock.uuid;
+ t->has_address = TRUE;
+
+ data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
+ if (!data) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto end;
+ }
+
+ body = data + sizeof(tree_header);
+
+ RtlCopyMemory(data, &t->header, sizeof(tree_header));
+ RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
+
+ if (t->header.level == 0) {
+ leaf_node* itemptr = (leaf_node*)body;
+ int i = 0;
+ LIST_ENTRY* le2;
+ UINT8* dataptr = data + Vcb->superblock.node_size;
+
+ le2 = t->itemlist.Flink;
+ while (le2 != &t->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+ if (!td->ignore) {
+ dataptr = dataptr - td->size;
+
+ itemptr[i].key = td->key;
+ itemptr[i].offset = (UINT8*)dataptr - (UINT8*)body;
+ itemptr[i].size = td->size;
+ i++;
+
+ if (td->size > 0)
+ RtlCopyMemory(dataptr, td->data, td->size);
+ }
+
+ le2 = le2->Flink;
+ }
+ } else {
+ internal_node* itemptr = (internal_node*)body;
+ int i = 0;
+ LIST_ENTRY* le2;
+
+ le2 = t->itemlist.Flink;
+ while (le2 != &t->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+ if (!td->ignore) {
+ itemptr[i].key = td->key;
+ itemptr[i].address = td->treeholder.address;
+ itemptr[i].generation = td->treeholder.generation;
+ i++;
+ }
+
+ le2 = le2->Flink;
+ }
+ }
+
+ crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
+ crc32 = ~crc32;
+ *((UINT32*)data) = crc32;
+ TRACE("setting crc32 to %08x\n", crc32);
+
+ tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG);
+ if (!tw) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ tw->address = t->new_address;
+ tw->length = Vcb->superblock.node_size;
+ tw->data = data;
+ tw->overlap = FALSE;
+
+ if (IsListEmpty(&tree_writes))
+ InsertTailList(&tree_writes, &tw->list_entry);
+ else {
+ LIST_ENTRY* le2;
+ BOOL inserted = FALSE;
+
+ le2 = tree_writes.Flink;
+ while (le2 != &tree_writes) {
+ tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry);
+
+ if (tw2->address > tw->address) {
+ InsertHeadList(le2->Blink, &tw->list_entry);
+ inserted = TRUE;
+ break;
+ }
+
+ le2 = le2->Flink;
+ }
+
+ if (!inserted)
+ InsertTailList(&tree_writes, &tw->list_entry);
+ }
+ }
+
+ le = le->Flink;
+ }
+
+ Status = STATUS_SUCCESS;
+
+ // merge together runs
+ c = NULL;
+ le = tree_writes.Flink;
+ while (le != &tree_writes) {
+ tw = CONTAINING_RECORD(le, tree_write, list_entry);
+
+ if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size)
+ c = get_chunk_from_address(Vcb, tw->address);
+ else {
+ tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry);
+
+ if (tw->address == tw2->address + tw2->length) {
+ data = ExAllocatePoolWithTag(NonPagedPool, tw2->length + tw->length, ALLOC_TAG);
+
+ if (!data) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto end;
+ }
+
+ RtlCopyMemory(data, tw2->data, tw2->length);
+ RtlCopyMemory(&data[tw2->length], tw->data, tw->length);
+
+ ExFreePool(tw2->data);
+ tw2->data = data;
+ tw2->length += tw->length;
+
+ ExFreePool(tw->data);
+ RemoveEntryList(&tw->list_entry);
+ ExFreePool(tw);
+
+ le = tw2->list_entry.Flink;
+ continue;
+ }
+ }
+
+ le = le->Flink;
+ }
+
+ // mark RAID5/6 overlaps so we can do them one by one
+ c = NULL;
+ le = tree_writes.Flink;
+ while (le != &tree_writes) {
+ tw = CONTAINING_RECORD(le, tree_write, list_entry);
+
+ if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size)
+ c = get_chunk_from_address(Vcb, tw->address);
+ else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
+ tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry);
+ UINT64 last_stripe, this_stripe;
+
+ last_stripe = (tw2->address + tw2->length - 1 - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1));
+ this_stripe = (tw->address - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1));
+
+ if (last_stripe == this_stripe)
+ tw->overlap = TRUE;
+ } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
+ tree_write* tw2 = CONTAINING_RECORD(le->Blink, tree_write, list_entry);
+ UINT64 last_stripe, this_stripe;
+
+ last_stripe = (tw2->address + tw2->length - 1 - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2));
+ this_stripe = (tw->address - c->offset) / (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2));
+
+ if (last_stripe == this_stripe)
+ tw->overlap = TRUE;
+ }
+
+ le = le->Flink;
+ }
+
+ le = tree_writes.Flink;
+ while (le != &tree_writes) {
+ tw = CONTAINING_RECORD(le, tree_write, list_entry);
+
+ if (!tw->overlap) {
+ TRACE("address: %llx, size: %x, overlap = %u\n", tw->address, tw->length, tw->overlap);
+
+ Status = write_data(Vcb, tw->address, tw->data, TRUE, tw->length, wtc, NULL, NULL);
+ if (!NT_SUCCESS(Status)) {
+ ERR("write_data returned %08x\n", Status);
+ goto end;
+ }
+ }
+
+ le = le->Flink;
+ }
+
+ if (wtc->stripes.Flink != &wtc->stripes) {
+ // launch writes and wait
+ le = wtc->stripes.Flink;
+ while (le != &wtc->stripes) {
+ write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
+
+ if (stripe->status != WriteDataStatus_Ignore)
+ IoCallDriver(stripe->device->devobj, stripe->Irp);
+
+ le = le->Flink;
+ }
+
+ KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
+
+ le = wtc->stripes.Flink;
+ while (le != &wtc->stripes) {
+ write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
+
+ if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
+ Status = stripe->iosb.Status;
+ break;
+ }
+
+ le = le->Flink;
+ }
+
+ free_write_data_stripes(wtc);
+ }
+
+ le = tree_writes.Flink;
+ while (le != &tree_writes) {
+ tw = CONTAINING_RECORD(le, tree_write, list_entry);
+
+ if (tw->overlap) {
+ TRACE("address: %llx, size: %x, overlap = %u\n", tw->address, tw->length, tw->overlap);
+
+ Status = write_data_complete(Vcb, tw->address, tw->data, tw->length, Irp, NULL);
+ if (!NT_SUCCESS(Status)) {
+ ERR("write_data_complete returned %08x\n", Status);
+ goto end;
+ }
+ }
+
+ le = le->Flink;
+ }
+
+end:
+ ExFreePool(wtc);
+
+ while (!IsListEmpty(&tree_writes)) {
+ le = RemoveHeadList(&tree_writes);
+ tw = CONTAINING_RECORD(le, tree_write, list_entry);
+
+ ExFreePool(tw);
+ }
+
+ return Status;
+}
+
+static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) {
+ KEY searchkey;
+ traverse_ptr tp;
+
+ RtlZeroMemory(sb, sizeof(superblock_backup));
+
+ sb->root_tree_addr = Vcb->superblock.root_tree_addr;
+ sb->root_tree_generation = Vcb->superblock.generation;
+ sb->root_level = Vcb->superblock.root_level;
+
+ sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
+ sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
+ sb->chunk_root_level = Vcb->superblock.chunk_root_level;
+
+ searchkey.obj_id = BTRFS_ROOT_EXTENT;
+ searchkey.obj_type = TYPE_ROOT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
+ if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
+ ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
+
+ sb->extent_tree_addr = ri->block_number;
+ sb->extent_tree_generation = ri->generation;
+ sb->extent_root_level = ri->root_level;
+ }
+ }
+
+ searchkey.obj_id = BTRFS_ROOT_FSTREE;
+
+ if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
+ if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
+ ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
+
+ sb->fs_tree_addr = ri->block_number;
+ sb->fs_tree_generation = ri->generation;
+ sb->fs_root_level = ri->root_level;
+ }
+ }
+
+ searchkey.obj_id = BTRFS_ROOT_DEVTREE;
+
+ if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
+ if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
+ ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
+
+ sb->dev_root_addr = ri->block_number;
+ sb->dev_root_generation = ri->generation;
+ sb->dev_root_level = ri->root_level;
+ }
+ }
+
+ searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
+
+ if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
+ if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
+ ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
+
+ sb->csum_root_addr = ri->block_number;
+ sb->csum_root_generation = ri->generation;
+ sb->csum_root_level = ri->root_level;
+ }
+ }
+
+ sb->total_bytes = Vcb->superblock.total_bytes;
+ sb->bytes_used = Vcb->superblock.bytes_used;
+ sb->num_devices = Vcb->superblock.num_devices;
+}
+
+static NTSTATUS STDCALL write_superblock(device_extension* Vcb, device* device) {
+ NTSTATUS Status;
+ unsigned int i = 0;
+ UINT32 crc32;
+#ifdef __REACTOS__
+ Status = STATUS_INTERNAL_ERROR;
+#endif
+
+ RtlCopyMemory(&Vcb->superblock.dev_item, &device->devitem, sizeof(DEV_ITEM));
+
+ // All the documentation says that the Linux driver only writes one superblock
+ // if it thinks a disk is an SSD, but this doesn't seem to be the case!
+
+ while (superblock_addrs[i] > 0 && device->length >= superblock_addrs[i] + sizeof(superblock)) {
+ TRACE("writing superblock %u\n", i);
+
+ Vcb->superblock.sb_phys_addr = superblock_addrs[i];
+
+ crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
+ crc32 = ~crc32;
+ TRACE("crc32 is %08x\n", crc32);
+ RtlCopyMemory(&Vcb->superblock.checksum, &crc32, sizeof(UINT32));
+
+ Status = write_data_phys(device->devobj, superblock_addrs[i], &Vcb->superblock, sizeof(superblock));
+
+ if (!NT_SUCCESS(Status))
+ break;
+
+ i++;
+ }
+
+ if (i == 0) {
+ ERR("no superblocks written!\n");
+ }
+
+ return Status;
+}
+
+static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) {
+ UINT64 i;
+ NTSTATUS Status;
+ LIST_ENTRY* le;
+
+ TRACE("(%p)\n", Vcb);
+
+ le = Vcb->trees.Flink;
+ while (le != &Vcb->trees) {
+ tree* t = CONTAINING_RECORD(le, tree, list_entry);
+
+ if (t->write && !t->parent) {
+ if (t->root == Vcb->root_root) {
+ Vcb->superblock.root_tree_addr = t->new_address;
+ Vcb->superblock.root_level = t->header.level;
+ } else if (t->root == Vcb->chunk_root) {
+ Vcb->superblock.chunk_tree_addr = t->new_address;
+ Vcb->superblock.chunk_root_generation = t->header.generation;
+ Vcb->superblock.chunk_root_level = t->header.level;
+ }
+ }
+
+ le = le->Flink;
+ }
+
+ for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
+ RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
+ }
+
+ update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp);
+
+ for (i = 0; i < Vcb->superblock.num_devices; i++) {
+ if (Vcb->devices[i].devobj && !Vcb->devices[i].readonly) {
+ Status = write_superblock(Vcb, &Vcb->devices[i]);
+ if (!NT_SUCCESS(Status)) {
+ ERR("write_superblock returned %08x\n", Status);
+ return Status;
+ }
+ }
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) {
+ LIST_ENTRY *le, *le2;
+ NTSTATUS Status;
+ UINT64 old_size;
+
+ le = ce->refs.Flink;
+ while (le != &ce->refs) {
+ changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
+ LIST_ENTRY* le3 = le->Flink;
+ UINT64 old_count = 0;
+
+ if (cer->type == TYPE_EXTENT_DATA_REF) {
+ le2 = ce->old_refs.Flink;
+ while (le2 != &ce->old_refs) {
+ changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
+
+ if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
+ old_count = cer2->edr.count;
+
+ RemoveEntryList(&cer2->list_entry);
+ ExFreePool(cer2);
+ break;
+ }
+
+ le2 = le2->Flink;
+ }
+
+ old_size = ce->old_count > 0 ? ce->old_size : ce->size;
+
+ if (cer->edr.count > old_count) {
+ Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("increase_extent_refcount_data returned %08x\n", Status);
+ return Status;
+ }
+ } else if (cer->edr.count < old_count) {
+ Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset,
+ old_count - cer->edr.count, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("decrease_extent_refcount_data returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ if (ce->size != ce->old_size && ce->old_count > 0) {
+ KEY searchkey;
+ traverse_ptr tp;
+ void* data;
+
+ searchkey.obj_id = ce->address;
+ searchkey.obj_type = TYPE_EXTENT_ITEM;
+ searchkey.offset = ce->old_size;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (keycmp(searchkey, tp.item->key)) {
+ ERR("could not find (%llx,%x,%llx) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (tp.item->size > 0) {
+ data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+
+ if (!data) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(data, tp.item->data, tp.item->size);
+ } else
+ data = NULL;
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ delete_tree_item(Vcb, &tp, rollback);
+ }
+ } else if (cer->type == TYPE_SHARED_DATA_REF) {
+ le2 = ce->old_refs.Flink;
+ while (le2 != &ce->old_refs) {
+ changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
+
+ if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) {
+// old_count = cer2->edr.count;
+
+ RemoveEntryList(&cer2->list_entry);
+ ExFreePool(cer2);
+ break;
+ }
+
+ le2 = le2->Flink;
+ }
+ }
+
+ RemoveEntryList(&cer->list_entry);
+ ExFreePool(cer);
+
+ le = le3;
+ }
+
+#ifdef DEBUG_PARANOID
+ if (!IsListEmpty(&ce->old_refs))
+ WARN("old_refs not empty\n");
+#endif
+
+ if (ce->count == 0 && !ce->superseded) {
+ if (!ce->no_csum) {
+ LIST_ENTRY changed_sector_list;
+
+ changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
+ if (!sc) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ sc->ol.key = ce->address;
+ sc->checksums = NULL;
+ sc->length = ce->size / Vcb->superblock.sector_size;
+
+ sc->deleted = TRUE;
+
+ InitializeListHead(&changed_sector_list);
+ insert_into_ordered_list(&changed_sector_list, &sc->ol);
+
+ ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE);
+ commit_checksum_changes(Vcb, &changed_sector_list);
+ ExReleaseResourceLite(&Vcb->checksum_lock);
+ }
+
+ decrease_chunk_usage(c, ce->size);
+
+ space_list_add(Vcb, c, TRUE, ce->address, ce->size, rollback);
+ }
+
+ RemoveEntryList(&ce->list_entry);
+ ExFreePool(ce);
+
+ return STATUS_SUCCESS;
+}
+
+static void update_checksum_tree(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+ LIST_ENTRY* le = Vcb->sector_checksums.Flink;
+ changed_sector* cs;
+ traverse_ptr tp, next_tp;
+ KEY searchkey;
+ UINT32* data;
+ NTSTATUS Status;
+
+ if (!Vcb->checksum_root) {
+ ERR("no checksum root\n");
+ goto exit;
+ }
+
+ while (le != &Vcb->sector_checksums) {
+ UINT64 startaddr, endaddr;
+ ULONG len;
+ UINT32* checksums;
+ RTL_BITMAP bmp;
+ ULONG* bmparr;
+ ULONG runlength, index;
+
+ cs = (changed_sector*)le;
+
+ searchkey.obj_id = EXTENT_CSUM_ID;
+ searchkey.obj_type = TYPE_EXTENT_CSUM;
+ searchkey.offset = cs->ol.key;
+
+ // FIXME - create checksum_root if it doesn't exist at all
+
+ Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
+ if (Status == STATUS_NOT_FOUND) { // tree is completely empty
+ if (!cs->deleted) {
+ checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * cs->length, ALLOC_TAG);
+ if (!checksums) {
+ ERR("out of memory\n");
+ goto exit;
+ }
+
+ RtlCopyMemory(checksums, cs->checksums, sizeof(UINT32) * cs->length);
+
+ if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, cs->ol.key, checksums, sizeof(UINT32) * cs->length, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ ExFreePool(checksums);
+ goto exit;
+ }
+ }
+ } else if (!NT_SUCCESS(Status)) {
+ ERR("find_item returned %08x\n", Status);
+ goto exit;
+ } else {
+ UINT32 tplen;
+
+ // FIXME - check entry is TYPE_EXTENT_CSUM?
+
+ if (tp.item->key.offset < cs->ol.key && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= cs->ol.key)
+ startaddr = tp.item->key.offset;
+ else
+ startaddr = cs->ol.key;
+
+ searchkey.obj_id = EXTENT_CSUM_ID;
+ searchkey.obj_type = TYPE_EXTENT_CSUM;
+ searchkey.offset = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
+
+ Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ goto exit;
+ }
+
+ tplen = tp.item->size / sizeof(UINT32);
+
+ if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= cs->ol.key + (cs->length * Vcb->superblock.sector_size))
+ endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
+ else
+ endaddr = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
+
+ TRACE("cs starts at %llx (%x sectors)\n", cs->ol.key, cs->length);
+ TRACE("startaddr = %llx\n", startaddr);
+ TRACE("endaddr = %llx\n", endaddr);
+
+ len = (endaddr - startaddr) / Vcb->superblock.sector_size;
+
+ checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG);
+ if (!checksums) {
+ ERR("out of memory\n");
+ goto exit;
+ }
+
+ bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
+ if (!bmparr) {
+ ERR("out of memory\n");
+ ExFreePool(checksums);
+ goto exit;
+ }
+
+ RtlInitializeBitMap(&bmp, bmparr, len);
+ RtlSetAllBits(&bmp);
+
+ searchkey.obj_id = EXTENT_CSUM_ID;
+ searchkey.obj_type = TYPE_EXTENT_CSUM;
+ searchkey.offset = cs->ol.key;
+
+ Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ goto exit;
+ }
+
+ // set bit = free space, cleared bit = allocated sector
+
+ // ERR("start loop\n");
+ while (tp.item->key.offset < endaddr) {
+ // ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+ if (tp.item->key.offset >= startaddr) {
+ if (tp.item->size > 0) {
+ RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, tp.item->size);
+ RtlClearBits(&bmp, (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size, tp.item->size / sizeof(UINT32));
+ }
+
+ delete_tree_item(Vcb, &tp, rollback);
+ }
+
+ if (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) {
+ tp = next_tp;
+ } else
+ break;
+ }
+ // ERR("end loop\n");
+
+ if (cs->deleted) {
+ RtlSetBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
+ } else {
+ RtlCopyMemory(&checksums[(cs->ol.key - startaddr) / Vcb->superblock.sector_size], cs->checksums, cs->length * sizeof(UINT32));
+ RtlClearBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
+ }
+
+ runlength = RtlFindFirstRunClear(&bmp, &index);
+
+ while (runlength != 0) {
+ do {
+ ULONG rl;
+ UINT64 off;
+
+ if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE)
+ rl = MAX_CSUM_SIZE / sizeof(UINT32);
+ else
+ rl = runlength;
+
+ data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG);
+ if (!data) {
+ ERR("out of memory\n");
+ ExFreePool(bmparr);
+ ExFreePool(checksums);
+ goto exit;
+ }
+
+ RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl);
+
+ off = startaddr + UInt32x32To64(index, Vcb->superblock.sector_size);
+
+ if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, sizeof(UINT32) * rl, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ ExFreePool(data);
+ ExFreePool(bmparr);
+ ExFreePool(checksums);
+ goto exit;
+ }
+
+ runlength -= rl;
+ index += rl;
+ } while (runlength > 0);
+
+ runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
+ }
+
+ ExFreePool(bmparr);
+ ExFreePool(checksums);
+ }
+
+ le = le->Flink;
+ }
+
+exit:
+ while (!IsListEmpty(&Vcb->sector_checksums)) {
+ le = RemoveHeadList(&Vcb->sector_checksums);
+ cs = (changed_sector*)le;
+
+ if (cs->checksums)
+ ExFreePool(cs->checksums);
+
+ ExFreePool(cs);
+ }
+}
+
+static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+ LIST_ENTRY *le = Vcb->chunks.Flink, *le2;
+ chunk* c;
+ KEY searchkey;
+ traverse_ptr tp;
+ BLOCK_GROUP_ITEM* bgi;
+ NTSTATUS Status;
+ BOOL flushed_extents = FALSE;
+
+ TRACE("(%p)\n", Vcb);
+
+ ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
+
+ while (le != &Vcb->chunks) {
+ c = CONTAINING_RECORD(le, chunk, list_entry);
+
+ ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+ le2 = c->changed_extents.Flink;
+ while (le2 != &c->changed_extents) {
+ LIST_ENTRY* le3 = le2->Flink;
+ changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry);
+
+ Status = flush_changed_extent(Vcb, c, ce, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("flush_changed_extent returned %08x\n", Status);
+ ExReleaseResourceLite(&c->lock);
+ goto end;
+ }
+
+ flushed_extents = TRUE;
+
+ le2 = le3;
+ }
+
+ // This is usually done by update_chunks, but we have to check again in case any new chunks
+ // have been allocated since.
+ if (c->created) {
+ Status = create_chunk(Vcb, c, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("create_chunk returned %08x\n", Status);
+ ExReleaseResourceLite(&c->lock);
+ goto end;
+ }
+ }
+
+ if (c->used != c->oldused) {
+ searchkey.obj_id = c->offset;
+ searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
+ searchkey.offset = c->chunk_item->size;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ ExReleaseResourceLite(&c->lock);
+ goto end;
+ }
+
+ if (keycmp(searchkey, tp.item->key)) {
+ ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+ int3;
+ Status = STATUS_INTERNAL_ERROR;
+ ExReleaseResourceLite(&c->lock);
+ goto end;
+ }
+
+ if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
+ ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
+ Status = STATUS_INTERNAL_ERROR;
+ ExReleaseResourceLite(&c->lock);
+ goto end;
+ }
+
+ bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+ if (!bgi) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ ExReleaseResourceLite(&c->lock);
+ goto end;
+ }
+
+ RtlCopyMemory(bgi, tp.item->data, tp.item->size);
+ bgi->used = c->used;
+
+ TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ ExFreePool(bgi);
+ Status = STATUS_INTERNAL_ERROR;
+ ExReleaseResourceLite(&c->lock);
+ goto end;
+ }
+
+ TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
+
+ Vcb->superblock.bytes_used += c->used - c->oldused;
+
+ TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
+
+ c->oldused = c->used;
+ }
+
+ ExReleaseResourceLite(&c->lock);
+
+ le = le->Flink;
+ }
+
+ if (flushed_extents) {
+ ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE);
+ if (!IsListEmpty(&Vcb->sector_checksums)) {
+ update_checksum_tree(Vcb, Irp, rollback);
+ }
+ ExReleaseResourceLite(&Vcb->checksum_lock);
+ }
+
+ Status = STATUS_SUCCESS;
+
+end:
+ ExReleaseResourceLite(&Vcb->chunk_lock);
+
+ return Status;
+}
+
+static void get_first_item(tree* t, KEY* key) {
+ LIST_ENTRY* le;
+
+ le = t->itemlist.Flink;
+ while (le != &t->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+
+ *key = td->key;
+ return;
+ }
+}
+
+static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
+ tree *nt, *pt;
+ tree_data* td;
+ tree_data* oldlastitem;
+// write_tree* wt2;
+// // tree_data *firsttd, *lasttd;
+// // LIST_ENTRY* le;
+// #ifdef DEBUG_PARANOID
+// KEY lastkey1, lastkey2;
+// traverse_ptr tp, next_tp;
+// ULONG numitems1, numitems2;
+// #endif
+
+ TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
+
+// #ifdef DEBUG_PARANOID
+// lastkey1.obj_id = 0xffffffffffffffff;
+// lastkey1.obj_type = 0xff;
+// lastkey1.offset = 0xffffffffffffffff;
+//
+// if (!find_item(Vcb, t->root, &tp, &lastkey1, NULL, FALSE))
+// ERR("error - find_item failed\n");
+// else {
+// lastkey1 = tp.item->key;
+// numitems1 = 0;
+// while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
+// free_traverse_ptr(&tp);
+// tp = next_tp;
+// numitems1++;
+// }
+// free_traverse_ptr(&tp);
+// }
+// #endif
+
+ nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
+ if (!nt) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
+ nt->header.address = 0;
+ nt->header.generation = Vcb->superblock.generation;
+ nt->header.num_items = t->header.num_items - numitems;
+ nt->header.flags = HEADER_FLAG_MIXED_BACKREF;
+
+ nt->has_address = FALSE;
+ nt->Vcb = Vcb;
+ nt->parent = t->parent;
+
+#ifdef DEBUG_PARANOID
+ if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
+#endif
+
+ nt->root = t->root;
+// nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
+ nt->new_address = 0;
+ nt->has_new_address = FALSE;
+ nt->updated_extents = FALSE;
+ nt->flags = t->flags;
+ InitializeListHead(&nt->itemlist);
+
+// ExInitializeResourceLite(&nt->nonpaged->load_tree_lock);
+
+ oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
+
+// // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
+// // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
+// //
+// // TRACE("old tree in %x was from (%x,%x,%x) to (%x,%x,%x)\n",
+// // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
+// // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
+// //
+// // le = wt->tree->itemlist.Flink;
+// // while (le != &wt->tree->itemlist) {
+// // td = CONTAINING_RECORD(le, tree_data, list_entry);
+// // TRACE("old tree item was (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
+// // le = le->Flink;
+// // }
+
+ nt->itemlist.Flink = &newfirstitem->list_entry;
+ nt->itemlist.Blink = t->itemlist.Blink;
+ nt->itemlist.Flink->Blink = &nt->itemlist;
+ nt->itemlist.Blink->Flink = &nt->itemlist;
+
+ t->itemlist.Blink = &oldlastitem->list_entry;
+ t->itemlist.Blink->Flink = &t->itemlist;
+
+// // le = wt->tree->itemlist.Flink;
+// // while (le != &wt->tree->itemlist) {
+// // td = CONTAINING_RECORD(le, tree_data, list_entry);
+// // TRACE("old tree item now (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
+// // le = le->Flink;
+// // }
+// //
+// // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
+// // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
+// //
+// // TRACE("old tree in %x is now from (%x,%x,%x) to (%x,%x,%x)\n",
+// // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
+// // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
+
+ nt->size = t->size - size;
+ t->size = size;
+ t->header.num_items = numitems;
+ nt->write = TRUE;
+
+ InterlockedIncrement(&Vcb->open_trees);
+ InsertTailList(&Vcb->trees, &nt->list_entry);
+
+// // // TESTING
+// // td = wt->tree->items;
+// // while (td) {
+// // if (!td->ignore) {
+// // TRACE("old tree item: (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
+// // }
+// // td = td->next;
+// // }
+
+// // oldlastitem->next = NULL;
+// // wt->tree->lastitem = oldlastitem;
+
+// // TRACE("last item is now (%x,%x,%x)\n", (UINT32)oldlastitem->key.obj_id, oldlastitem->key.obj_type, (UINT32)oldlastitem->key.offset);
+
+ if (nt->header.level > 0) {
+ LIST_ENTRY* le = nt->itemlist.Flink;
+
+ while (le != &nt->itemlist) {
+ tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
+
+ if (td2->treeholder.tree) {
+ td2->treeholder.tree->parent = nt;
+#ifdef DEBUG_PARANOID
+ if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
+#endif
+ }
+
+ le = le->Flink;
+ }
+ }
+
+ if (nt->parent) {
+ td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
+ if (!td) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ td->key = newfirstitem->key;
+
+ InsertHeadList(&t->paritem->list_entry, &td->list_entry);
+
+ td->ignore = FALSE;
+ td->inserted = TRUE;
+ td->treeholder.tree = nt;
+// td->treeholder.nonpaged->status = tree_holder_loaded;
+ nt->paritem = td;
+
+ nt->parent->header.num_items++;
+ nt->parent->size += sizeof(internal_node);
+
+ goto end;
+ }
+
+ TRACE("adding new tree parent\n");
+
+ if (nt->header.level == 255) {
+ ERR("cannot add parent to tree at level 255\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
+ if (!pt) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
+ pt->header.address = 0;
+ pt->header.num_items = 2;
+ pt->header.level = nt->header.level + 1;
+ pt->header.flags = HEADER_FLAG_MIXED_BACKREF | HEADER_FLAG_WRITTEN;
+
+ pt->has_address = FALSE;
+ pt->Vcb = Vcb;
+ pt->parent = NULL;
+ pt->paritem = NULL;
+ pt->root = t->root;
+ pt->new_address = 0;
+ pt->has_new_address = FALSE;
+ pt->updated_extents = FALSE;
+// pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
+ pt->size = pt->header.num_items * sizeof(internal_node);
+ pt->flags = t->flags;
+ InitializeListHead(&pt->itemlist);
+
+// ExInitializeResourceLite(&pt->nonpaged->load_tree_lock);
+
+ InterlockedIncrement(&Vcb->open_trees);
+ InsertTailList(&Vcb->trees, &pt->list_entry);
+
+ td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
+ if (!td) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ get_first_item(t, &td->key);
+ td->ignore = FALSE;
+ td->inserted = FALSE;
+ td->treeholder.address = 0;
+ td->treeholder.generation = Vcb->superblock.generation;
+ td->treeholder.tree = t;
+// td->treeholder.nonpaged->status = tree_holder_loaded;
+ InsertTailList(&pt->itemlist, &td->list_entry);
+ t->paritem = td;
+
+ td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
+ if (!td) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ td->key = newfirstitem->key;
+ td->ignore = FALSE;
+ td->inserted = FALSE;
+ td->treeholder.address = 0;
+ td->treeholder.generation = Vcb->superblock.generation;
+ td->treeholder.tree = nt;
+// td->treeholder.nonpaged->status = tree_holder_loaded;
+ InsertTailList(&pt->itemlist, &td->list_entry);
+ nt->paritem = td;
+
+ pt->write = TRUE;
+
+ t->root->treeholder.tree = pt;
+
+ t->parent = pt;
+ nt->parent = pt;
+
+#ifdef DEBUG_PARANOID
+ if (t->parent && t->parent->header.level <= t->header.level) int3;
+ if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
+#endif
+
+end:
+ t->root->root_item.bytes_used += Vcb->superblock.node_size;
+
+// #ifdef DEBUG_PARANOID
+// lastkey2.obj_id = 0xffffffffffffffff;
+// lastkey2.obj_type = 0xff;
+// lastkey2.offset = 0xffffffffffffffff;
+//
+// if (!find_item(Vcb, wt->tree->root, &tp, &lastkey2, NULL, FALSE))
+// ERR("error - find_item failed\n");
+// else {
+// lastkey2 = tp.item->key;
+//
+// numitems2 = 0;
+// while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
+// free_traverse_ptr(&tp);
+// tp = next_tp;
+// numitems2++;
+// }
+// free_traverse_ptr(&tp);
+// }
+//
+// ERR("lastkey1 = %llx,%x,%llx\n", lastkey1.obj_id, lastkey1.obj_type, lastkey1.offset);
+// ERR("lastkey2 = %llx,%x,%llx\n", lastkey2.obj_id, lastkey2.obj_type, lastkey2.offset);
+// ERR("numitems1 = %u\n", numitems1);
+// ERR("numitems2 = %u\n", numitems2);
+// #endif
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) {
+ LIST_ENTRY* le;
+ UINT32 size, ds, numitems;
+
+ size = 0;
+ numitems = 0;
+
+ // FIXME - naïve implementation: maximizes number of filled trees
+
+ le = t->itemlist.Flink;
+ while (le != &t->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+
+ if (!td->ignore) {
+ if (t->header.level == 0)
+ ds = sizeof(leaf_node) + td->size;
+ else
+ ds = sizeof(internal_node);
+
+ // FIXME - move back if previous item was deleted item with same key
+ if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
+ return split_tree_at(Vcb, t, td, numitems, size);
+
+ size += ds;
+ numitems++;
+ }
+
+ le = le->Flink;
+ }
+
+ return STATUS_SUCCESS;
+}
+
+BOOL is_tree_unique(device_extension* Vcb, tree* t, PIRP Irp) {
+ KEY searchkey;
+ traverse_ptr tp;
+ NTSTATUS Status;
+
+ do {
+ EXTENT_ITEM* ei;
+ UINT8* type;
+
+ if (t->has_address) {
+ searchkey.obj_id = t->header.address;
+ searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return FALSE;
+ }
+
+ if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM))
+ return FALSE;
+
+ if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0))
+ return FALSE;
+
+ if (tp.item->size < sizeof(EXTENT_ITEM))
+ return FALSE;
+
+ ei = (EXTENT_ITEM*)tp.item->data;
+
+ if (ei->refcount > 1)
+ return FALSE;
+
+ if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) {
+ EXTENT_ITEM2* ei2;
+
+ if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2))
+ return FALSE;
+
+ ei2 = (EXTENT_ITEM2*)&ei[1];
+ type = (UINT8*)&ei2[1];
+ } else
+ type = (UINT8*)&ei[1];
+
+ if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF)
+ return FALSE;
+ }
+
+ t = t->parent;
+ } while (t);
+
+ return TRUE;
+}
+
+static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
+ LIST_ENTRY* le;
+ tree_data* nextparitem = NULL;
+ NTSTATUS Status;
+ tree *next_tree, *par;
+ BOOL loaded;
+
+ TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
+
+ // FIXME - doesn't capture everything, as it doesn't ascend
+ // FIXME - write proper function and put it in treefuncs.c
+ le = t->paritem->list_entry.Flink;
+ while (le != &t->parent->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+
+ if (!td->ignore) {
+ nextparitem = td;
+ break;
+ }
+
+ le = le->Flink;
+ }
+
+ if (!nextparitem)
+ return STATUS_SUCCESS;
+
+ // FIXME - loop, and capture more than one tree if we can
+
+ TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
+// nextparitem = t->paritem;
+
+// ExAcquireResourceExclusiveLite(&t->parent->nonpaged->load_tree_lock, TRUE);
+
+ Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded, NULL);
+ if (!NT_SUCCESS(Status)) {
+ ERR("do_load_tree returned %08x\n", Status);
+ return Status;
+ }
+
+ if (!is_tree_unique(Vcb, nextparitem->treeholder.tree, Irp))
+ return STATUS_SUCCESS;
+
+// ExReleaseResourceLite(&t->parent->nonpaged->load_tree_lock);
+
+ next_tree = nextparitem->treeholder.tree;
+
+ if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
+ // merge two trees into one
+
+ t->header.num_items += next_tree->header.num_items;
+ t->size += next_tree->size;
+
+ if (next_tree->header.level > 0) {
+ le = next_tree->itemlist.Flink;
+
+ while (le != &next_tree->itemlist) {
+ tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
+
+ if (td2->treeholder.tree) {
+ td2->treeholder.tree->parent = t;
+#ifdef DEBUG_PARANOID
+ if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
+#endif
+ }
+
+ le = le->Flink;
+ }
+ }
+
+ t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
+ t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
+ t->itemlist.Blink = next_tree->itemlist.Blink;
+ t->itemlist.Blink->Flink = &t->itemlist;
+
+// // TESTING
+// le = t->itemlist.Flink;
+// while (le != &t->itemlist) {
+// tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+// if (!td->ignore) {
+// ERR("key: %llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
+// }
+// le = le->Flink;
+// }
+
+ next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
+
+ next_tree->header.num_items = 0;
+ next_tree->size = 0;
+
+ if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
+ Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("reduce_tree_extent returned %08x\n", Status);
+ return Status;
+ }
+ } else if (next_tree->has_address) {
+ Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("reduce_tree_extent returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ if (!nextparitem->ignore) {
+ nextparitem->ignore = TRUE;
+ next_tree->parent->header.num_items--;
+ next_tree->parent->size -= sizeof(internal_node);
+ }
+
+ par = next_tree->parent;
+ while (par) {
+ par->write = TRUE;
+ par = par->parent;
+ }
+
+ RemoveEntryList(&nextparitem->list_entry);
+ ExFreePool(next_tree->paritem);
+ next_tree->paritem = NULL;
+
+ next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
+
+ free_tree(next_tree);
+ } else {
+ // rebalance by moving items from second tree into first
+ ULONG avg_size = (t->size + next_tree->size) / 2;
+ KEY firstitem = {0, 0, 0};
+ BOOL changed = FALSE;
+
+ TRACE("attempting rebalance\n");
+
+ le = next_tree->itemlist.Flink;
+ while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
+ tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+ ULONG size;
+
+ if (!td->ignore) {
+ if (next_tree->header.level == 0)
+ size = sizeof(leaf_node) + td->size;
+ else
+ size = sizeof(internal_node);
+ } else
+ size = 0;
+
+ if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
+ RemoveEntryList(&td->list_entry);
+ InsertTailList(&t->itemlist, &td->list_entry);
+
+ if (next_tree->header.level > 0 && td->treeholder.tree) {
+ td->treeholder.tree->parent = t;
+#ifdef DEBUG_PARANOID
+ if (td->treeholder.tree->parent && td->treeholder.tree->parent->header.level <= td->treeholder.tree->header.level) int3;
+#endif
+ }
+
+ if (!td->ignore) {
+ next_tree->size -= size;
+ t->size += size;
+ next_tree->header.num_items--;
+ t->header.num_items++;
+ }
+
+ changed = TRUE;
+ } else
+ break;
+
+ le = next_tree->itemlist.Flink;
+ }
+
+ if (changed) {
+ le = next_tree->itemlist.Flink;
+ while (le != &next_tree->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+
+ if (!td->ignore) {
+ firstitem = td->key;
+ break;
+ }
+
+ le = le->Flink;
+ }
+
+ // ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset);
+
+ // FIXME - once ascension is working, make this work with parent's parent, etc.
+ if (next_tree->paritem)
+ next_tree->paritem->key = firstitem;
+
+ par = next_tree;
+ while (par) {
+ par->write = TRUE;
+ par = par->parent;
+ }
+ }
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
+ KEY searchkey;
+ traverse_ptr tp;
+ NTSTATUS Status;
+
+ if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
+ searchkey.obj_id = address;
+ searchkey.obj_type = TYPE_METADATA_ITEM;
+ searchkey.offset = t->header.level;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (!keycmp(tp.item->key, searchkey)) {
+ EXTENT_ITEM_SKINNY_METADATA* eism;
+
+ if (tp.item->size > 0) {
+ eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+
+ if (!eism) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(eism, tp.item->data, tp.item->size);
+ } else
+ eism = NULL;
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ ExFreePool(eism);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ return STATUS_SUCCESS;
+ }
+ }
+
+ searchkey.obj_id = address;
+ searchkey.obj_type = TYPE_EXTENT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
+ EXTENT_ITEM_TREE* eit;
+
+ if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
+ ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
+
+ if (!eit) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(eit, tp.item->data, tp.item->size);
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ eit->level = level;
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ ExFreePool(eit);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ return STATUS_SUCCESS;
+ }
+
+ ERR("could not find EXTENT_ITEM for address %llx\n", address);
+
+ return STATUS_INTERNAL_ERROR;
+}
+
+static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+// LIST_ENTRY *le, *le2;
+// write_tree* wt;
+// tree_data* td;
+ UINT8 level, max_level;
+ UINT32 min_size;
+ BOOL empty, done_deletions = FALSE;
+ NTSTATUS Status;
+ tree* t;
+
+ TRACE("(%p)\n", Vcb);
+
+ max_level = 0;
+
+ for (level = 0; level <= 255; level++) {
+ LIST_ENTRY *le, *nextle;
+
+ empty = TRUE;
+
+ TRACE("doing level %u\n", level);
+
+ le = Vcb->trees.Flink;
+
+ while (le != &Vcb->trees) {
+ t = CONTAINING_RECORD(le, tree, list_entry);
+
+ nextle = le->Flink;
+
+ if (t->write && t->header.level == level) {
+ empty = FALSE;
+
+ if (t->header.num_items == 0) {
+ if (t->parent) {
+ LIST_ENTRY* le2;
+ KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
+#ifdef __REACTOS__
+ (void)firstitem;
+#endif
+
+ done_deletions = TRUE;
+
+ le2 = t->itemlist.Flink;
+ while (le2 != &t->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
+ firstitem = td->key;
+ break;
+ }
+
+ TRACE("deleting tree in root %llx (first item was %llx,%x,%llx)\n",
+ t->root->id, firstitem.obj_id, firstitem.obj_type, firstitem.offset);
+
+ t->root->root_item.bytes_used -= Vcb->superblock.node_size;
+
+ if (t->has_new_address) { // delete associated EXTENT_ITEM
+ Status = reduce_tree_extent(Vcb, t->new_address, t, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("reduce_tree_extent returned %08x\n", Status);
+ return Status;
+ }
+
+ t->has_new_address = FALSE;
+ } else if (t->has_address) {
+ Status = reduce_tree_extent(Vcb,t->header.address, t, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("reduce_tree_extent returned %08x\n", Status);
+ return Status;
+ }
+
+ t->has_address = FALSE;
+ }
+
+ if (!t->paritem->ignore) {
+ t->paritem->ignore = TRUE;
+ t->parent->header.num_items--;
+ t->parent->size -= sizeof(internal_node);
+ }
+
+ RemoveEntryList(&t->paritem->list_entry);
+ ExFreePool(t->paritem);
+ t->paritem = NULL;
+
+ free_tree(t);
+ } else if (t->header.level != 0) {
+ if (t->has_new_address) {
+ Status = update_extent_level(Vcb, t->new_address, t, 0, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_extent_level returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ t->header.level = 0;
+ }
+ } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
+ TRACE("splitting overlarge tree (%x > %x)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header));
+ Status = split_tree(Vcb, t);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("split_tree returned %08x\n", Status);
+ return Status;
+ }
+ }
+ }
+
+ le = nextle;
+ }
+
+ if (!empty) {
+ max_level = level;
+ } else {
+ TRACE("nothing found for level %u\n", level);
+ break;
+ }
+ }
+
+ min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
+
+ for (level = 0; level <= max_level; level++) {
+ LIST_ENTRY* le;
+
+ le = Vcb->trees.Flink;
+
+ while (le != &Vcb->trees) {
+ t = CONTAINING_RECORD(le, tree, list_entry);
+
+ if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && t->size < min_size && is_tree_unique(Vcb, t, Irp)) {
+ Status = try_tree_amalgamate(Vcb, t, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("try_tree_amalgamate returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ le = le->Flink;
+ }
+ }
+
+ // simplify trees if top tree only has one entry
+
+ if (done_deletions) {
+ for (level = max_level; level > 0; level--) {
+ LIST_ENTRY *le, *nextle;
+
+ le = Vcb->trees.Flink;
+ while (le != &Vcb->trees) {
+ nextle = le->Flink;
+ t = CONTAINING_RECORD(le, tree, list_entry);
+
+ if (t->write && t->header.level == level) {
+ if (!t->parent && t->header.num_items == 1) {
+ LIST_ENTRY* le2 = t->itemlist.Flink;
+ tree_data* td;
+ tree* child_tree = NULL;
+
+ while (le2 != &t->itemlist) {
+ td = CONTAINING_RECORD(le2, tree_data, list_entry);
+ if (!td->ignore)
+ break;
+ le2 = le2->Flink;
+ }
+
+ TRACE("deleting top-level tree in root %llx with one item\n", t->root->id);
+
+ if (t->has_new_address) { // delete associated EXTENT_ITEM
+ Status = reduce_tree_extent(Vcb, t->new_address, t, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("reduce_tree_extent returned %08x\n", Status);
+ return Status;
+ }
+
+ t->has_new_address = FALSE;
+ } else if (t->has_address) {
+ Status = reduce_tree_extent(Vcb,t->header.address, t, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("reduce_tree_extent returned %08x\n", Status);
+ return Status;
+ }
+
+ t->has_address = FALSE;
+ }
+
+ if (!td->treeholder.tree) { // load first item if not already loaded
+ KEY searchkey = {0,0,0};
+ traverse_ptr tp;
+
+ Status = find_item(Vcb, t->root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ child_tree = td->treeholder.tree;
+
+ if (child_tree) {
+ child_tree->parent = NULL;
+ child_tree->paritem = NULL;
+ }
+
+ t->root->root_item.bytes_used -= Vcb->superblock.node_size;
+
+ free_tree(t);
+
+ if (child_tree)
+ child_tree->root->treeholder.tree = child_tree;
+ }
+ }
+
+ le = nextle;
+ }
+ }
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
+ NTSTATUS Status;
+
+ if (level > 0) {
+ if (!th->tree) {
+ Status = load_tree(Vcb, th->address, r, &th->tree, NULL, NULL);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("load_tree(%llx) returned %08x\n", th->address, Status);
+ return Status;
+ }
+ }
+
+ if (th->tree->header.level > 0) {
+ LIST_ENTRY* le = th->tree->itemlist.Flink;
+
+ while (le != &th->tree->itemlist) {
+ tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
+
+ if (!td->ignore) {
+ Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("remove_root_extents returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ le = le->Flink;
+ }
+ }
+ }
+
+ if (!th->tree || th->tree->has_address) {
+ Status = reduce_tree_extent(Vcb, th->address, NULL, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("reduce_tree_extent(%llx) returned %08x\n", th->address, Status);
+ return Status;
+ }
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* rollback) {
+ NTSTATUS Status;
+ KEY searchkey;
+ traverse_ptr tp;
+
+ Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("remove_root_extents returned %08x\n", Status);
+ return Status;
+ }
+
+ // remove entry in uuid root (tree 9)
+ if (Vcb->uuid_root) {
+ RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(UINT64));
+ searchkey.obj_type = TYPE_SUBVOL_UUID;
+ RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
+
+ if (searchkey.obj_id != 0 || searchkey.offset != 0) {
+ Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ WARN("find_item returned %08x\n", Status);
+ } else {
+ if (!keycmp(tp.item->key, searchkey))
+ delete_tree_item(Vcb, &tp, rollback);
+ else
+ WARN("could not find (%llx,%x,%llx) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+ }
+ }
+ }
+
+ // delete ROOT_ITEM
+
+ searchkey.obj_id = r->id;
+ searchkey.obj_type = TYPE_ROOT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
+ delete_tree_item(Vcb, &tp, rollback);
+ else
+ WARN("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+
+ // delete items in tree cache
+
+ free_trees_root(Vcb, r);
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS drop_roots(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+ LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2;
+ NTSTATUS Status;
+
+ while (le != &Vcb->drop_roots) {
+ root* r = CONTAINING_RECORD(le, root, list_entry);
+
+ le2 = le->Flink;
+
+ Status = drop_root(Vcb, r, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("drop_root(%llx) returned %08x\n", r->id, Status);
+ return Status;
+ }
+
+ le = le2;
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp, LIST_ENTRY* rollback) {
+ KEY searchkey;
+ traverse_ptr tp;
+ DEV_ITEM* di;
+ NTSTATUS Status;
+
+ searchkey.obj_id = 1;
+ searchkey.obj_type = TYPE_DEV_ITEM;
+ searchkey.offset = device->devitem.dev_id;
+
+ Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (keycmp(tp.item->key, searchkey)) {
+ ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
+ if (!di) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
+
+ if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static void regen_bootstrap(device_extension* Vcb) {
+ sys_chunk* sc2;
+ USHORT i = 0;
+ LIST_ENTRY* le;
+
+ i = 0;
+ le = Vcb->sys_chunks.Flink;
+ while (le != &Vcb->sys_chunks) {
+ sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
+
+ TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
+
+ RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
+ i += sizeof(KEY);
+
+ RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
+ i += sc2->size;
+
+ le = le->Flink;
+ }
+}
+
+static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, ULONG size) {
+ sys_chunk *sc, *sc2;
+ LIST_ENTRY* le;
+
+ if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
+ ERR("error - bootstrap is full\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
+ if (!sc) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ sc->key.obj_id = obj_id;
+ sc->key.obj_type = obj_type;
+ sc->key.offset = offset;
+ sc->size = size;
+ sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
+ if (!sc->data) {
+ ERR("out of memory\n");
+ ExFreePool(sc);
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(sc->data, data, sc->size);
+
+ le = Vcb->sys_chunks.Flink;
+ while (le != &Vcb->sys_chunks) {
+ sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
+
+ if (keycmp(sc2->key, sc->key) == 1)
+ break;
+
+ le = le->Flink;
+ }
+ InsertTailList(le, &sc->list_entry);
+
+ Vcb->superblock.n += sizeof(KEY) + size;
+
+ regen_bootstrap(Vcb);
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENTRY* rollback) {
+ CHUNK_ITEM* ci;
+ CHUNK_ITEM_STRIPE* cis;
+ BLOCK_GROUP_ITEM* bgi;
+ UINT16 i, factor;
+ NTSTATUS Status;
+
+ ci = ExAllocatePoolWithTag(PagedPool, c->size, ALLOC_TAG);
+ if (!ci) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(ci, c->chunk_item, c->size);
+
+ if (!insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ ExFreePool(ci);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
+ Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_to_bootstrap returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ // add BLOCK_GROUP_ITEM to tree 2
+
+ bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
+ if (!bgi) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ bgi->used = c->used;
+ bgi->chunk_tree = 0x100;
+ bgi->flags = c->chunk_item->type;
+
+ if (!insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ ExFreePool(bgi);
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ if (c->chunk_item->type & BLOCK_FLAG_RAID0)
+ factor = c->chunk_item->num_stripes;
+ else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
+ factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
+ else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
+ factor = c->chunk_item->num_stripes - 1;
+ else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
+ factor = c->chunk_item->num_stripes - 2;
+ else // SINGLE, DUPLICATE, RAID1
+ factor = 1;
+
+ // add DEV_EXTENTs to tree 4
+
+ cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
+
+ for (i = 0; i < c->chunk_item->num_stripes; i++) {
+ DEV_EXTENT* de;
+
+ de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
+ if (!de) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ de->chunktree = Vcb->chunk_root->id;
+ de->objid = 0x100;
+ de->address = c->offset;
+ de->length = c->chunk_item->size / factor;
+ de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
+
+ if (!insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ ExFreePool(de);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ // FIXME - no point in calling this twice for the same device
+ Status = update_dev_item(Vcb, c->devices[i], Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_dev_item returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ c->created = FALSE;
+
+ return STATUS_SUCCESS;
+}
+
+static void remove_from_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset) {
+ sys_chunk* sc2;
+ LIST_ENTRY* le;
+
+ le = Vcb->sys_chunks.Flink;
+ while (le != &Vcb->sys_chunks) {
+ sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
+
+ if (sc2->key.obj_id == obj_id && sc2->key.obj_type == obj_type && sc2->key.offset == offset) {
+ RemoveEntryList(&sc2->list_entry);
+
+ Vcb->superblock.n -= sizeof(KEY) + sc2->size;
+
+ ExFreePool(sc2->data);
+ ExFreePool(sc2);
+ regen_bootstrap(Vcb);
+ return;
+ }
+
+ le = le->Flink;
+ }
+}
+
+static NTSTATUS STDCALL set_xattr(device_extension* Vcb, LIST_ENTRY* batchlist, root* subvol, UINT64 inode, char* name, UINT32 crc32,
+ UINT8* data, UINT16 datalen, PIRP Irp, LIST_ENTRY* rollback) {
+ ULONG xasize;
+ DIR_ITEM* xa;
+
+ TRACE("(%p, %llx, %llx, %s, %08x, %p, %u)\n", Vcb, subvol->id, inode, name, crc32, data, datalen);
+
+ xasize = sizeof(DIR_ITEM) - 1 + (ULONG)strlen(name) + datalen;
+
+ xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
+ if (!xa) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ xa->key.obj_id = 0;
+ xa->key.obj_type = 0;
+ xa->key.offset = 0;
+ xa->transid = Vcb->superblock.generation;
+ xa->m = datalen;
+ xa->n = (UINT16)strlen(name);
+ xa->type = BTRFS_TYPE_EA;
+ RtlCopyMemory(xa->name, name, strlen(name));
+ RtlCopyMemory(xa->name + strlen(name), data, datalen);
+
+ if (!insert_tree_item_batch(batchlist, Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, Batch_SetXattr, Irp, rollback))
+ return STATUS_INTERNAL_ERROR;
+
+ return STATUS_SUCCESS;
+}
+
+static BOOL STDCALL delete_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, PIRP Irp, LIST_ENTRY* rollback) {
+ KEY searchkey;
+ traverse_ptr tp;
+ DIR_ITEM* xa;
+ NTSTATUS Status;
+
+ TRACE("(%p, %llx, %llx, %s, %08x)\n", Vcb, subvol->id, inode, name, crc32);
+
+ searchkey.obj_id = inode;
+ searchkey.obj_type = TYPE_XATTR_ITEM;
+ searchkey.offset = crc32;
+
+ Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return FALSE;
+ }
+
+ if (!keycmp(tp.item->key, searchkey)) { // key exists
+ ULONG size = tp.item->size;
+
+ if (tp.item->size < sizeof(DIR_ITEM)) {
+ ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
+
+ return FALSE;
+ } else {
+ xa = (DIR_ITEM*)tp.item->data;
+
+ while (TRUE) {
+ ULONG oldxasize;
+
+ if (size < sizeof(DIR_ITEM) || size < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) {
+ ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+
+ return FALSE;
+ }
+
+ oldxasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n;
+
+ if (xa->n == strlen(name) && RtlCompareMemory(name, xa->name, xa->n) == xa->n) {
+ ULONG newsize;
+ UINT8 *newdata, *dioff;
+
+ newsize = tp.item->size - (sizeof(DIR_ITEM) - 1 + xa->n + xa->m);
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (newsize == 0) {
+ TRACE("xattr %s deleted\n", name);
+
+ return TRUE;
+ }
+
+ // FIXME - deleting collisions almost certainly works, but we should test it properly anyway
+ newdata = ExAllocatePoolWithTag(PagedPool, newsize, ALLOC_TAG);
+ if (!newdata) {
+ ERR("out of memory\n");
+ return FALSE;
+ }
+
+ if ((UINT8*)xa > tp.item->data) {
+ RtlCopyMemory(newdata, tp.item->data, (UINT8*)xa - tp.item->data);
+ dioff = newdata + ((UINT8*)xa - tp.item->data);
+ } else {
+ dioff = newdata;
+ }
+
+ if ((UINT8*)&xa->name[xa->n+xa->m] - tp.item->data < tp.item->size)
+ RtlCopyMemory(dioff, &xa->name[xa->n+xa->m], tp.item->size - ((UINT8*)&xa->name[xa->n+xa->m] - tp.item->data));
+
+ insert_tree_item(Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, newdata, newsize, NULL, Irp, rollback);
+
+
+ return TRUE;
+ }
+
+ if (xa->m + xa->n >= size) { // FIXME - test this works
+ WARN("xattr %s not found\n", name);
+
+ return FALSE;
+ } else {
+ xa = (DIR_ITEM*)&xa->name[xa->m + xa->n];
+ size -= oldxasize;
+ }
+ }
+ }
+ } else {
+ WARN("xattr %s not found\n", name);
+
+ return FALSE;
+ }
+}
+
+static NTSTATUS insert_sparse_extent(fcb* fcb, UINT64 start, UINT64 length, PIRP Irp, LIST_ENTRY* rollback) {
+ EXTENT_DATA* ed;
+ EXTENT_DATA2* ed2;
+
+ TRACE("((%llx, %llx), %llx, %llx)\n", fcb->subvol->id, fcb->inode, start, length);
+
+ ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
+ if (!ed) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ ed->generation = fcb->Vcb->superblock.generation;
+ ed->decoded_size = length;
+ ed->compression = BTRFS_COMPRESSION_NONE;
+ ed->encryption = BTRFS_ENCRYPTION_NONE;
+ ed->encoding = BTRFS_ENCODING_NONE;
+ ed->type = EXTENT_TYPE_REGULAR;
+
+ ed2 = (EXTENT_DATA2*)ed->data;
+ ed2->address = 0;
+ ed2->size = 0;
+ ed2->offset = 0;
+ ed2->num_bytes = length;
+
+ if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static BOOL insert_tree_item_batch(LIST_ENTRY* batchlist, device_extension* Vcb, root* r, UINT64 objid, UINT64 objtype, UINT64 offset,
+ void* data, UINT16 datalen, enum batch_operation operation, PIRP Irp, LIST_ENTRY* rollback) {
+ LIST_ENTRY* le;
+ batch_root* br = NULL;
+ batch_item* bi;
+
+ le = batchlist->Flink;
+ while (le != batchlist) {
+ batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry);
+
+ if (br2->r == r) {
+ br = br2;
+ break;
+ }
+
+ le = le->Flink;
+ }
+
+ if (!br) {
+ br = ExAllocatePoolWithTag(PagedPool, sizeof(batch_root), ALLOC_TAG);
+ if (!br) {
+ ERR("out of memory\n");
+ return FALSE;
+ }
+
+ br->r = r;
+ InitializeListHead(&br->items);
+ InsertTailList(batchlist, &br->list_entry);
+ }
+
+ bi = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside);
+ if (!bi) {
+ ERR("out of memory\n");
+ return FALSE;
+ }
+
+ bi->key.obj_id = objid;
+ bi->key.obj_type = objtype;
+ bi->key.offset = offset;
+ bi->data = data;
+ bi->datalen = datalen;
+ bi->operation = operation;
+
+ le = br->items.Blink;
+ while (le != &br->items) {
+ batch_item* bi2 = CONTAINING_RECORD(le, batch_item, list_entry);
+
+ if (keycmp(bi2->key, bi->key) == -1) {
+ InsertHeadList(&bi2->list_entry, &bi->list_entry);
+ return TRUE;
+ }
+
+ le = le->Blink;
+ }
+
+ InsertHeadList(&br->items, &bi->list_entry);
+
+ return TRUE;
+}
+
+typedef struct {
+ UINT64 address;
+ UINT64 length;
+ UINT64 offset;
+ BOOL changed;
+ chunk* chunk;
+ UINT64 skip_start;
+ UINT64 skip_end;
+ LIST_ENTRY list_entry;
+} extent_range;
+
+static void rationalize_extents(fcb* fcb, PIRP Irp) {
+ LIST_ENTRY* le;
+ LIST_ENTRY extent_ranges;
+ extent_range* er;
+ BOOL changed = FALSE, truncating = FALSE;
+ UINT32 num_extents = 0;
+
+ InitializeListHead(&extent_ranges);
+
+ le = fcb->extents.Flink;
+ while (le != &fcb->extents) {
+ extent* ext = CONTAINING_RECORD(le, extent, list_entry);
+
+ if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && ext->data->compression == BTRFS_COMPRESSION_NONE && ext->unique) {
+ EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
+
+ if (ed2->size != 0) {
+ LIST_ENTRY* le2;
+
+ le2 = extent_ranges.Flink;
+ while (le2 != &extent_ranges) {
+ extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
+
+ if (er2->address == ed2->address) {
+ er2->skip_start = min(er2->skip_start, ed2->offset);
+ er2->skip_end = min(er2->skip_end, ed2->size - ed2->offset - ed2->num_bytes);
+ goto cont;
+ } else if (er2->address > ed2->address)
+ break;
+
+ le2 = le2->Flink;
+ }
+
+ er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_range), ALLOC_TAG); // FIXME - should be from lookaside?
+ if (!er) {
+ ERR("out of memory\n");
+ goto end;
+ }
+
+ er->address = ed2->address;
+ er->length = ed2->size;
+ er->offset = ext->offset - ed2->offset;
+ er->changed = FALSE;
+ er->chunk = NULL;
+ er->skip_start = ed2->offset;
+ er->skip_end = ed2->size - ed2->offset - ed2->num_bytes;
+
+ if (er->skip_start != 0 || er->skip_end != 0)
+ truncating = TRUE;
+
+ InsertHeadList(le2->Blink, &er->list_entry);
+ num_extents++;
+ }
+ }
+
+cont:
+ le = le->Flink;
+ }
+
+ if (num_extents == 0 || (num_extents == 1 && !truncating))
+ goto end;
+
+ le = extent_ranges.Flink;
+ while (le != &extent_ranges) {
+ er = CONTAINING_RECORD(le, extent_range, list_entry);
+
+ if (!er->chunk) {
+ LIST_ENTRY* le2;
+
+ er->chunk = get_chunk_from_address(fcb->Vcb, er->address);
+
+ if (!er->chunk) {
+ ERR("get_chunk_from_address(%llx) failed\n", er->address);
+ goto end;
+ }
+
+ le2 = le->Flink;
+ while (le2 != &extent_ranges) {
+ extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
+
+ if (!er2->chunk && er2->address >= er->chunk->offset && er2->address < er->chunk->offset + er->chunk->chunk_item->size)
+ er2->chunk = er->chunk;
+
+ le2 = le2->Flink;
+ }
+ }
+
+ le = le->Flink;
+ }
+
+ if (truncating) {
+ // truncate beginning or end of extent if unused
+
+ le = extent_ranges.Flink;
+ while (le != &extent_ranges) {
+ er = CONTAINING_RECORD(le, extent_range, list_entry);
+
+ if (er->skip_start > 0) {
+ LIST_ENTRY* le2 = fcb->extents.Flink;
+ while (le2 != &fcb->extents) {
+ extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
+
+ if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && ext->data->compression == BTRFS_COMPRESSION_NONE && ext->unique) {
+ EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
+
+ if (ed2->size != 0 && ed2->address == er->address) {
+ NTSTATUS Status;
+
+ Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
+ -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_changed_extent_ref returned %08x\n", Status);
+ goto end;
+ }
+
+ ext->data->decoded_size -= er->skip_start;
+ ed2->size -= er->skip_start;
+ ed2->address += er->skip_start;
+ ed2->offset -= er->skip_start;
+
+ add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
+ 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
+ }
+ }
+
+ le2 = le2->Flink;
+ }
+
+ if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
+ LIST_ENTRY changed_sector_list;
+
+ changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
+ if (!sc) {
+ ERR("out of memory\n");
+ goto end;
+ }
+
+ sc->ol.key = er->address;
+ sc->checksums = NULL;
+ sc->length = er->skip_start / fcb->Vcb->superblock.sector_size;
+
+ sc->deleted = TRUE;
+
+ InitializeListHead(&changed_sector_list);
+ insert_into_ordered_list(&changed_sector_list, &sc->ol);
+
+ commit_checksum_changes(fcb->Vcb, &changed_sector_list);
+ }
+
+ decrease_chunk_usage(er->chunk, er->skip_start);
+
+ space_list_add(fcb->Vcb, er->chunk, TRUE, er->address, er->skip_start, NULL);
+
+ er->address += er->skip_start;
+ er->length -= er->skip_start;
+ }
+
+ if (er->skip_end > 0) {
+ LIST_ENTRY* le2 = fcb->extents.Flink;
+ while (le2 != &fcb->extents) {
+ extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
+
+ if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && ext->data->compression == BTRFS_COMPRESSION_NONE && ext->unique) {
+ EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
+
+ if (ed2->size != 0 && ed2->address == er->address) {
+ NTSTATUS Status;
+
+ Status = update_changed_extent_ref(fcb->Vcb, er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
+ -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_changed_extent_ref returned %08x\n", Status);
+ goto end;
+ }
+
+ ext->data->decoded_size -= er->skip_end;
+ ed2->size -= er->skip_end;
+
+ add_changed_extent_ref(er->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
+ 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
+ }
+ }
+
+ le2 = le2->Flink;
+ }
+
+ if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
+ LIST_ENTRY changed_sector_list;
+
+ changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
+ if (!sc) {
+ ERR("out of memory\n");
+ goto end;
+ }
+
+ sc->ol.key = er->address + er->length - er->skip_end;
+ sc->checksums = NULL;
+ sc->length = er->skip_end / fcb->Vcb->superblock.sector_size;
+
+ sc->deleted = TRUE;
+
+ InitializeListHead(&changed_sector_list);
+ insert_into_ordered_list(&changed_sector_list, &sc->ol);
+
+ commit_checksum_changes(fcb->Vcb, &changed_sector_list);
+ }
+
+ decrease_chunk_usage(er->chunk, er->skip_end);
+
+ space_list_add(fcb->Vcb, er->chunk, TRUE, er->address + er->length - er->skip_end, er->skip_end, NULL);
+
+ er->length -= er->skip_end;
+ }
+
+ le = le->Flink;
+ }
+ }
+
+ if (num_extents < 2)
+ goto end;
+
+ // merge together adjacent extents
+ le = extent_ranges.Flink;
+ while (le != &extent_ranges) {
+ er = CONTAINING_RECORD(le, extent_range, list_entry);
+
+ if (le->Flink != &extent_ranges && er->length < MAX_EXTENT_SIZE) {
+ extent_range* er2 = CONTAINING_RECORD(le->Flink, extent_range, list_entry);
+
+ if (er->chunk == er2->chunk) {
+ if (er2->address == er->address + er->length && er2->offset >= er->offset + er->length) {
+ if (er->length + er2->length <= MAX_EXTENT_SIZE) {
+ er->length += er2->length;
+ er->changed = TRUE;
+
+ RemoveEntryList(&er2->list_entry);
+ ExFreePool(er2);
+
+ changed = TRUE;
+ continue;
+// } else { // FIXME - make changing of beginning of offset work
+// er2->length = er2->address + er->length - er->address - MAX_EXTENT_SIZE;
+// er2->address = er->address + MAX_EXTENT_SIZE;
+// er->length = MAX_EXTENT_SIZE;
+ }
+ }
+ }
+ }
+
+ le = le->Flink;
+ }
+
+ if (!changed)
+ goto end;
+
+ le = fcb->extents.Flink;
+ while (le != &fcb->extents) {
+ extent* ext = CONTAINING_RECORD(le, extent, list_entry);
+
+ if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && ext->data->compression == BTRFS_COMPRESSION_NONE && ext->unique) {
+ EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
+
+ if (ed2->size != 0) {
+ LIST_ENTRY* le2;
+
+ le2 = extent_ranges.Flink;
+ while (le2 != &extent_ranges) {
+ extent_range* er2 = CONTAINING_RECORD(le2, extent_range, list_entry);
+
+ if (ed2->address >= er2->address && ed2->address + ed2->size <= er2->address + er2->length && er2->changed) {
+ NTSTATUS Status;
+
+ Status = update_changed_extent_ref(fcb->Vcb, er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
+ -1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, TRUE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_changed_extent_ref returned %08x\n", Status);
+ goto end;
+ }
+
+ ed2->offset += ed2->address - er2->address;
+ ed2->address = er2->address;
+ ed2->size = er2->length;
+ ext->data->decoded_size = ed2->size;
+
+ add_changed_extent_ref(er2->chunk, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset,
+ 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
+
+ break;
+ }
+
+ le2 = le2->Flink;
+ }
+ }
+ }
+
+ le = le->Flink;
+ }
+
+end:
+ while (!IsListEmpty(&extent_ranges)) {
+ le = RemoveHeadList(&extent_ranges);
+ er = CONTAINING_RECORD(le, extent_range, list_entry);
+
+ ExFreePool(er);
+ }
+}
+
+void flush_fcb(fcb* fcb, BOOL cache, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
+ traverse_ptr tp;
+ KEY searchkey;
+ NTSTATUS Status;
+ INODE_ITEM* ii;
+ UINT64 ii_offset;
+#ifdef DEBUG_PARANOID
+ UINT64 old_size = 0;
+ BOOL extents_changed;
+#endif
+
+// ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
+
+ while (!IsListEmpty(&fcb->index_list)) {
+ LIST_ENTRY* le = RemoveHeadList(&fcb->index_list);
+ index_entry* ie = CONTAINING_RECORD(le, index_entry, list_entry);
+
+ if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer);
+ if (ie->filepart_uc.Buffer) ExFreePool(ie->filepart_uc.Buffer);
+ ExFreePool(ie);
+ }
+
+ fcb->index_loaded = FALSE;
+
+ if (fcb->ads) {
+ if (fcb->deleted)
+ delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, Irp, rollback);
+ else {
+ Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, (UINT8*)fcb->adsdata.Buffer, fcb->adsdata.Length, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("set_xattr returned %08x\n", Status);
+ goto end;
+ }
+ }
+ goto end;
+ }
+
+#ifdef DEBUG_PARANOID
+ extents_changed = fcb->extents_changed;
+#endif
+
+ if (fcb->extents_changed) {
+ BOOL b;
+ traverse_ptr next_tp;
+ LIST_ENTRY* le;
+ BOOL prealloc = FALSE, extents_inline = FALSE;
+ UINT64 last_end;
+
+ // delete ignored extent items
+ le = fcb->extents.Flink;
+ while (le != &fcb->extents) {
+ LIST_ENTRY* le2 = le->Flink;
+ extent* ext = CONTAINING_RECORD(le, extent, list_entry);
+
+ if (ext->ignore) {
+ RemoveEntryList(&ext->list_entry);
+ ExFreePool(ext->data);
+ ExFreePool(ext);
+ }
+
+ le = le2;
+ }
+
+ if (!IsListEmpty(&fcb->extents)) {
+ rationalize_extents(fcb, Irp);
+
+ // merge together adjacent EXTENT_DATAs pointing to same extent
+
+ le = fcb->extents.Flink;
+ while (le != &fcb->extents) {
+ LIST_ENTRY* le2 = le->Flink;
+ extent* ext = CONTAINING_RECORD(le, extent, list_entry);
+
+ if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) {
+ extent* nextext = CONTAINING_RECORD(le->Flink, extent, list_entry);
+
+ if (ext->data->type == nextext->data->type) {
+ EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
+ EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->data->data;
+
+ if (ed2->size != 0 && ed2->address == ned2->address && ed2->size == ned2->size &&
+ nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) {
+ chunk* c;
+
+ ext->data->generation = fcb->Vcb->superblock.generation;
+ ed2->num_bytes += ned2->num_bytes;
+
+ RemoveEntryList(&nextext->list_entry);
+ ExFreePool(nextext->data);
+ ExFreePool(nextext);
+
+ c = get_chunk_from_address(fcb->Vcb, ed2->address);
+
+ if (!c) {
+ ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
+ } else {
+ Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
+ fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_changed_extent_ref returned %08x\n", Status);
+ goto end;
+ }
+ }
+
+ le2 = le;
+ }
+ }
+ }
+
+ le = le2;
+ }
+ }
+
+ if (!fcb->created) {
+ // delete existing EXTENT_DATA items
+
+ searchkey.obj_id = fcb->inode;
+ searchkey.obj_type = TYPE_EXTENT_DATA;
+ searchkey.offset = 0;
+
+ Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ goto end;
+ }
+
+ do {
+ if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
+ delete_tree_item(fcb->Vcb, &tp, rollback);
+
+ b = find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp);
+
+ if (b) {
+ tp = next_tp;
+
+ if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
+ break;
+ }
+ } while (b);
+ }
+
+ if (!fcb->deleted) {
+ // add new EXTENT_DATAs
+
+ last_end = 0;
+
+ le = fcb->extents.Flink;
+ while (le != &fcb->extents) {
+ extent* ext = CONTAINING_RECORD(le, extent, list_entry);
+ EXTENT_DATA* ed;
+
+ if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) {
+ Status = insert_sparse_extent(fcb, last_end, ext->offset - last_end, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("insert_sparse_extent returned %08x\n", Status);
+ goto end;
+ }
+ }
+
+ ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
+ if (!ed) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto end;
+ }
+
+ RtlCopyMemory(ed, ext->data, ext->datalen);
+
+ if (!insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset,
+ ed, ext->datalen, Batch_Insert, Irp, rollback)) {
+ ERR("insert_tree_item_batch failed\n");
+ Status = STATUS_INTERNAL_ERROR;
+ goto end;
+ }
+
+ if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_PREALLOC)
+ prealloc = TRUE;
+
+ if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_INLINE)
+ extents_inline = TRUE;
+
+ if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) {
+ if (ed->type == EXTENT_TYPE_INLINE)
+ last_end = ext->offset + ed->decoded_size;
+ else {
+ EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
+
+ last_end = ext->offset + ed2->num_bytes;
+ }
+ }
+
+ le = le->Flink;
+ }
+
+ if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline &&
+ sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) {
+ Status = insert_sparse_extent(fcb, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("insert_sparse_extent returned %08x\n", Status);
+ goto end;
+ }
+ }
+
+ // update prealloc flag in INODE_ITEM
+
+ if (!prealloc)
+ fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC;
+ else
+ fcb->inode_item.flags |= BTRFS_INODE_PREALLOC;
+
+ fcb->inode_item_changed = TRUE;
+ }
+
+ fcb->extents_changed = FALSE;
+ }
+
+ if ((!fcb->created && fcb->inode_item_changed) || cache) {
+ searchkey.obj_id = fcb->inode;
+ searchkey.obj_type = TYPE_INODE_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ goto end;
+ }
+
+ if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+ if (cache) {
+ ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
+ if (!ii) {
+ ERR("out of memory\n");
+ goto end;
+ }
+
+ RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
+
+ if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ goto end;
+ }
+
+ ii_offset = 0;
+ } else {
+ ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id);
+ int3;
+ goto end;
+ }
+ } else {
+#ifdef DEBUG_PARANOID
+ INODE_ITEM* ii2 = (INODE_ITEM*)tp.item->data;
+
+ old_size = ii2->st_size;
+#endif
+
+ ii_offset = tp.item->key.offset;
+ }
+
+ if (!cache)
+ delete_tree_item(fcb->Vcb, &tp, rollback);
+ else {
+ searchkey.obj_id = fcb->inode;
+ searchkey.obj_type = TYPE_INODE_ITEM;
+ searchkey.offset = ii_offset;
+
+ Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ goto end;
+ }
+
+ if (keycmp(tp.item->key, searchkey)) {
+ ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id);
+ int3;
+ goto end;
+ } else
+ RtlCopyMemory(tp.item->data, &fcb->inode_item, min(tp.item->size, sizeof(INODE_ITEM)));
+ }
+ } else
+ ii_offset = 0;
+
+#ifdef DEBUG_PARANOID
+ if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) {
+ ERR("error - size has changed but extents not marked as changed\n");
+ int3;
+ }
+#endif
+
+ fcb->created = FALSE;
+
+ if (fcb->deleted) {
+ traverse_ptr tp2;
+
+ // delete XATTR_ITEMs
+
+ searchkey.obj_id = fcb->inode;
+ searchkey.obj_type = TYPE_XATTR_ITEM;
+ searchkey.offset = 0;
+
+ Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ goto end;
+ }
+
+ while (find_next_item(fcb->Vcb, &tp, &tp2, FALSE, Irp)) {
+ tp = tp2;
+
+ if (tp.item->key.obj_id == fcb->inode) {
+ // FIXME - do metadata thing here too?
+ if (tp.item->key.obj_type == TYPE_XATTR_ITEM) {
+ delete_tree_item(fcb->Vcb, &tp, rollback);
+ TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+ }
+ } else
+ break;
+ }
+
+ goto end;
+ }
+
+ if (!cache && fcb->inode_item_changed) {
+ ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
+ if (!ii) {
+ ERR("out of memory\n");
+ goto end;
+ }
+
+ RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
+
+ if (!insert_tree_item_batch(batchlist, fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM),
+ Batch_Insert, Irp, rollback)) {
+ ERR("insert_tree_item_batch failed\n");
+ goto end;
+ }
+
+ fcb->inode_item_changed = FALSE;
+ }
+
+ if (fcb->sd_dirty) {
+ Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_NTACL, EA_NTACL_HASH, (UINT8*)fcb->sd, RtlLengthSecurityDescriptor(fcb->sd), Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("set_xattr returned %08x\n", Status);
+ }
+
+ fcb->sd_dirty = FALSE;
+ }
+
+ if (fcb->atts_changed) {
+ if (!fcb->atts_deleted) {
+ UINT8 val[16], *val2;
+ ULONG atts = fcb->atts;
+
+ TRACE("inserting new DOSATTRIB xattr\n");
+
+ val2 = &val[sizeof(val) - 1];
+
+ do {
+ UINT8 c = atts % 16;
+ *val2 = (c >= 0 && c <= 9) ? (c + '0') : (c - 0xa + 'a');
+
+ val2--;
+ atts >>= 4;
+ } while (atts != 0);
+
+ *val2 = 'x';
+ val2--;
+ *val2 = '0';
+
+ Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, val2, val + sizeof(val) - val2, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("set_xattr returned %08x\n", Status);
+ goto end;
+ }
+ } else
+ delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, Irp, rollback);
+
+ fcb->atts_changed = FALSE;
+ fcb->atts_deleted = FALSE;
+ }
+
+ if (fcb->reparse_xattr_changed) {
+ if (fcb->reparse_xattr.Buffer && fcb->reparse_xattr.Length > 0) {
+ Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_REPARSE, EA_REPARSE_HASH, (UINT8*)fcb->reparse_xattr.Buffer, fcb->reparse_xattr.Length, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("set_xattr returned %08x\n", Status);
+ goto end;
+ }
+ } else
+ delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_REPARSE, EA_REPARSE_HASH, Irp, rollback);
+
+ fcb->reparse_xattr_changed = FALSE;
+ }
+
+ if (fcb->ea_changed) {
+ if (fcb->ea_xattr.Buffer && fcb->ea_xattr.Length > 0) {
+ Status = set_xattr(fcb->Vcb, batchlist, fcb->subvol, fcb->inode, EA_EA, EA_EA_HASH, (UINT8*)fcb->ea_xattr.Buffer, fcb->ea_xattr.Length, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("set_xattr returned %08x\n", Status);
+ goto end;
+ }
+ } else
+ delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_EA, EA_EA_HASH, Irp, rollback);
+
+ fcb->ea_changed = FALSE;
+ }
+
+end:
+ fcb->dirty = FALSE;
+
+// ExReleaseResourceLite(fcb->Header.Resource);
+ return;
+}
+
+static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
+ NTSTATUS Status;
+ KEY searchkey;
+ traverse_ptr tp;
+ UINT64 i, factor;
+ CHUNK_ITEM_STRIPE* cis;
+
+ TRACE("dropping chunk %llx\n", c->offset);
+
+ // remove free space cache
+ if (c->cache) {
+ c->cache->deleted = TRUE;
+
+ flush_fcb(c->cache, TRUE, batchlist, Irp, rollback);
+
+ free_fcb(c->cache);
+
+ searchkey.obj_id = FREE_SPACE_CACHE_ID;
+ searchkey.obj_type = 0;
+ searchkey.offset = c->offset;
+
+ Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (!keycmp(tp.item->key, searchkey)) {
+ delete_tree_item(Vcb, &tp, rollback);
+ }
+ }
+
+ if (c->chunk_item->type & BLOCK_FLAG_RAID0)
+ factor = c->chunk_item->num_stripes;
+ else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
+ factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
+ else // SINGLE, DUPLICATE, RAID1
+ factor = 1;
+
+ cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
+ for (i = 0; i < c->chunk_item->num_stripes; i++) {
+ if (!c->created) {
+ // remove DEV_EXTENTs from tree 4
+ searchkey.obj_id = cis[i].dev_id;
+ searchkey.obj_type = TYPE_DEV_EXTENT;
+ searchkey.offset = cis[i].offset;
+
+ Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (!keycmp(tp.item->key, searchkey)) {
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (tp.item->size >= sizeof(DEV_EXTENT)) {
+ DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data;
+
+ c->devices[i]->devitem.bytes_used -= de->length;
+
+ space_list_add2(Vcb, &c->devices[i]->space, NULL, cis[i].offset, de->length, rollback);
+ }
+ } else
+ WARN("could not find (%llx,%x,%llx) in dev tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+ } else {
+ UINT64 len = c->chunk_item->size / factor;
+
+ c->devices[i]->devitem.bytes_used -= len;
+ space_list_add2(Vcb, &c->devices[i]->space, NULL, cis[i].offset, len, rollback);
+ }
+ }
+
+ // modify DEV_ITEMs in chunk tree
+ for (i = 0; i < c->chunk_item->num_stripes; i++) {
+ if (c->devices[i]) {
+ UINT64 j;
+ DEV_ITEM* di;
+
+ searchkey.obj_id = 1;
+ searchkey.obj_type = TYPE_DEV_ITEM;
+ searchkey.offset = c->devices[i]->devitem.dev_id;
+
+ Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (keycmp(tp.item->key, searchkey)) {
+ ERR("error - could not find DEV_ITEM for device %llx\n", searchkey.offset);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
+ if (!di) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(di, &c->devices[i]->devitem, sizeof(DEV_ITEM));
+
+ if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ for (j = i + 1; j < c->chunk_item->num_stripes; j++) {
+ if (c->devices[j] == c->devices[i])
+ c->devices[j] = NULL;
+ }
+ }
+ }
+
+ if (!c->created) {
+ // remove CHUNK_ITEM from chunk tree
+ searchkey.obj_id = 0x100;
+ searchkey.obj_type = TYPE_CHUNK_ITEM;
+ searchkey.offset = c->offset;
+
+ Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (!keycmp(tp.item->key, searchkey))
+ delete_tree_item(Vcb, &tp, rollback);
+ else
+ WARN("could not find CHUNK_ITEM for chunk %llx\n", c->offset);
+
+ // remove BLOCK_GROUP_ITEM from extent tree
+ searchkey.obj_id = c->offset;
+ searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
+ delete_tree_item(Vcb, &tp, rollback);
+ else
+ WARN("could not find BLOCK_GROUP_ITEM for chunk %llx\n", c->offset);
+ }
+
+ if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
+ remove_from_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset);
+
+ RemoveEntryList(&c->list_entry);
+
+ if (c->list_entry_changed.Flink)
+ RemoveEntryList(&c->list_entry_changed);
+
+ ExFreePool(c->chunk_item);
+ ExFreePool(c->devices);
+
+ while (!IsListEmpty(&c->space)) {
+ space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry);
+
+ RemoveEntryList(&s->list_entry);
+ ExFreePool(s);
+ }
+
+ while (!IsListEmpty(&c->deleting)) {
+ space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
+
+ RemoveEntryList(&s->list_entry);
+ ExFreePool(s);
+ }
+
+ ExDeleteResourceLite(&c->lock);
+ ExDeleteResourceLite(&c->changed_extents_lock);
+
+ ExFreePool(c);
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS update_chunks(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
+ LIST_ENTRY *le = Vcb->chunks_changed.Flink, *le2;
+ NTSTATUS Status;
+ UINT64 used_minus_cache;
+
+ ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
+
+ // FIXME - do tree chunks before data chunks
+
+ while (le != &Vcb->chunks_changed) {
+ chunk* c = CONTAINING_RECORD(le, chunk, list_entry_changed);
+
+ le2 = le->Flink;
+
+ ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+ used_minus_cache = c->used;
+
+ // subtract self-hosted cache
+ if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) {
+ LIST_ENTRY* le3;
+
+ le3 = c->cache->extents.Flink;
+ while (le3 != &c->cache->extents) {
+ extent* ext = CONTAINING_RECORD(le3, extent, list_entry);
+ EXTENT_DATA* ed = ext->data;
+
+ if (!ext->ignore) {
+ if (ext->datalen < sizeof(EXTENT_DATA)) {
+ ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA));
+ break;
+ }
+
+ if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
+ EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
+
+ if (ext->datalen < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
+ ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen,
+ sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
+ break;
+ }
+
+ if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size)
+ used_minus_cache -= ed2->size;
+ }
+ }
+
+ le3 = le3->Flink;
+ }
+ }
+
+ if (used_minus_cache == 0) {
+ Status = drop_chunk(Vcb, c, batchlist, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("drop_chunk returned %08x\n", Status);
+ ExReleaseResourceLite(&c->lock);
+ ExReleaseResourceLite(&Vcb->chunk_lock);
+ return Status;
+ }
+ } else if (c->created) {
+ Status = create_chunk(Vcb, c, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("create_chunk returned %08x\n", Status);
+ ExReleaseResourceLite(&c->lock);
+ ExReleaseResourceLite(&Vcb->chunk_lock);
+ return Status;
+ }
+ }
+
+ if (used_minus_cache > 0)
+ ExReleaseResourceLite(&c->lock);
+
+ le = le2;
+ }
+
+ ExReleaseResourceLite(&Vcb->chunk_lock);
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS delete_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, UINT64 parinode, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
+ KEY searchkey;
+ traverse_ptr tp;
+ NTSTATUS Status;
+
+ searchkey.obj_id = parsubvolid;
+ searchkey.obj_type = TYPE_ROOT_REF;
+ searchkey.offset = subvolid;
+
+ Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (!keycmp(searchkey, tp.item->key)) {
+ if (tp.item->size < sizeof(ROOT_REF)) {
+ ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
+ return STATUS_INTERNAL_ERROR;
+ } else {
+ ROOT_REF* rr;
+ ULONG len;
+
+ rr = (ROOT_REF*)tp.item->data;
+ len = tp.item->size;
+
+ do {
+ ULONG itemlen;
+
+ if (len < sizeof(ROOT_REF) || len < sizeof(ROOT_REF) - 1 + rr->n) {
+ ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+ break;
+ }
+
+ itemlen = sizeof(ROOT_REF) - sizeof(char) + rr->n;
+
+ if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(rr->name, utf8->Buffer, rr->n) == rr->n) {
+ ULONG newlen = tp.item->size - itemlen;
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (newlen == 0) {
+ TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+ } else {
+ UINT8 *newrr = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *rroff;
+
+ if (!newrr) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ TRACE("modifying (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+
+ if ((UINT8*)rr > tp.item->data) {
+ RtlCopyMemory(newrr, tp.item->data, (UINT8*)rr - tp.item->data);
+ rroff = newrr + ((UINT8*)rr - tp.item->data);
+ } else {
+ rroff = newrr;
+ }
+
+ if ((UINT8*)&rr->name[rr->n] - tp.item->data < tp.item->size)
+ RtlCopyMemory(rroff, &rr->name[rr->n], tp.item->size - ((UINT8*)&rr->name[rr->n] - tp.item->data));
+
+ insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp, rollback);
+ }
+
+ break;
+ }
+
+ if (len > itemlen) {
+ len -= itemlen;
+ rr = (ROOT_REF*)&rr->name[rr->n];
+ } else
+ break;
+ } while (len > 0);
+ }
+ } else {
+ WARN("could not find ROOT_REF entry for subvol %llx in %llx\n", searchkey.offset, searchkey.obj_id);
+ return STATUS_NOT_FOUND;
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, ROOT_REF* rr, PIRP Irp, LIST_ENTRY* rollback) {
+ KEY searchkey;
+ traverse_ptr tp;
+ NTSTATUS Status;
+
+ searchkey.obj_id = parsubvolid;
+ searchkey.obj_type = TYPE_ROOT_REF;
+ searchkey.offset = subvolid;
+
+ Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (!keycmp(searchkey, tp.item->key)) {
+ ULONG rrsize = tp.item->size + sizeof(ROOT_REF) - 1 + rr->n;
+ UINT8* rr2;
+
+ rr2 = ExAllocatePoolWithTag(PagedPool, rrsize, ALLOC_TAG);
+ if (!rr2) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ if (tp.item->size > 0)
+ RtlCopyMemory(rr2, tp.item->data, tp.item->size);
+
+ RtlCopyMemory(rr2 + tp.item->size, rr, sizeof(ROOT_REF) - 1 + rr->n);
+ ExFreePool(rr);
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp, rollback)) {
+ ERR("error - failed to insert item\n");
+ ExFreePool(rr2);
+ return STATUS_INTERNAL_ERROR;
+ }
+ } else {
+ if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, sizeof(ROOT_REF) - 1 + rr->n, NULL, Irp, rollback)) {
+ ERR("error - failed to insert item\n");
+ ExFreePool(rr);
+ return STATUS_INTERNAL_ERROR;
+ }
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS STDCALL update_root_backref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, PIRP Irp, LIST_ENTRY* rollback) {
+ KEY searchkey;
+ traverse_ptr tp;
+ UINT8* data;
+ ULONG datalen;
+ NTSTATUS Status;
+
+ searchkey.obj_id = parsubvolid;
+ searchkey.obj_type = TYPE_ROOT_REF;
+ searchkey.offset = subvolid;
+
+ Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (!keycmp(tp.item->key, searchkey) && tp.item->size > 0) {
+ datalen = tp.item->size;
+
+ data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
+ if (!data) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(data, tp.item->data, datalen);
+ } else {
+ datalen = 0;
+ }
+
+ searchkey.obj_id = subvolid;
+ searchkey.obj_type = TYPE_ROOT_BACKREF;
+ searchkey.offset = parsubvolid;
+
+ Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+
+ if (datalen > 0)
+ ExFreePool(data);
+
+ return Status;
+ }
+
+ if (!keycmp(tp.item->key, searchkey))
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (datalen > 0) {
+ if (!insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp, rollback)) {
+ ERR("error - failed to insert item\n");
+ ExFreePool(data);
+ return STATUS_INTERNAL_ERROR;
+ }
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_root_item_to_cache(device_extension* Vcb, UINT64 root, PIRP Irp, LIST_ENTRY* rollback) {
+ KEY searchkey;
+ traverse_ptr tp;
+ NTSTATUS Status;
+
+ searchkey.obj_id = root;
+ searchkey.obj_type = TYPE_ROOT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+ ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
+ int3;
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
+ ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
+ if (!ri) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ if (tp.item->size > 0)
+ RtlCopyMemory(ri, tp.item->data, tp.item->size);
+
+ RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+ } else {
+ tp.tree->write = TRUE;
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_dir_item(device_extension* Vcb, root* subvol, UINT64 inode, UINT32 crc32, DIR_ITEM* di, ULONG disize, PIRP Irp, LIST_ENTRY* rollback) {
+ KEY searchkey;
+ traverse_ptr tp;
+ UINT8* di2;
+ NTSTATUS Status;
+
+ searchkey.obj_id = inode;
+ searchkey.obj_type = TYPE_DIR_ITEM;
+ searchkey.offset = crc32;
+
+ Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (!keycmp(tp.item->key, searchkey)) {
+ ULONG maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
+
+ if (tp.item->size + disize > maxlen) {
+ WARN("DIR_ITEM was longer than maxlen (%u + %u > %u)\n", tp.item->size, disize, maxlen);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ di2 = ExAllocatePoolWithTag(PagedPool, tp.item->size + disize, ALLOC_TAG);
+ if (!di2) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ if (tp.item->size > 0)
+ RtlCopyMemory(di2, tp.item->data, tp.item->size);
+
+ RtlCopyMemory(di2 + tp.item->size, di, disize);
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ insert_tree_item(Vcb, subvol, inode, TYPE_DIR_ITEM, crc32, di2, tp.item->size + disize, NULL, Irp, rollback);
+
+ ExFreePool(di);
+ } else {
+ insert_tree_item(Vcb, subvol, inode, TYPE_DIR_ITEM, crc32, di, disize, NULL, Irp, rollback);
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_inode_extref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, UINT64 index, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
+ KEY searchkey;
+ traverse_ptr tp;
+ INODE_EXTREF* ier;
+ NTSTATUS Status;
+
+ searchkey.obj_id = inode;
+ searchkey.obj_type = TYPE_INODE_EXTREF;
+ searchkey.offset = calc_crc32c((UINT32)parinode, (UINT8*)utf8->Buffer, utf8->Length);
+
+ Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (!keycmp(searchkey, tp.item->key)) {
+ ULONG iersize = tp.item->size + sizeof(INODE_EXTREF) - 1 + utf8->Length;
+ UINT8* ier2;
+ UINT32 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
+
+ if (iersize > maxlen) {
+ ERR("item would be too long (%u > %u)\n", iersize, maxlen);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ ier2 = ExAllocatePoolWithTag(PagedPool, iersize, ALLOC_TAG);
+ if (!ier2) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ if (tp.item->size > 0)
+ RtlCopyMemory(ier2, tp.item->data, tp.item->size);
+
+ ier = (INODE_EXTREF*)&ier2[tp.item->size];
+ ier->dir = parinode;
+ ier->index = index;
+ ier->n = utf8->Length;
+ RtlCopyMemory(ier->name, utf8->Buffer, utf8->Length);
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ier2, iersize, NULL, Irp, rollback)) {
+ ERR("error - failed to insert item\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+ } else {
+ ier = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_EXTREF) - 1 + utf8->Length, ALLOC_TAG);
+ if (!ier) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ ier->dir = parinode;
+ ier->index = index;
+ ier->n = utf8->Length;
+ RtlCopyMemory(ier->name, utf8->Buffer, utf8->Length);
+
+ if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ier, sizeof(INODE_EXTREF) - 1 + utf8->Length, NULL, Irp, rollback)) {
+ ERR("error - failed to insert item\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS add_inode_ref(device_extension* Vcb, root* subvol, UINT64 inode, UINT64 parinode, UINT64 index, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
+ KEY searchkey;
+ traverse_ptr tp;
+ INODE_REF* ir;
+ NTSTATUS Status;
+
+ searchkey.obj_id = inode;
+ searchkey.obj_type = TYPE_INODE_REF;
+ searchkey.offset = parinode;
+
+ Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (!keycmp(searchkey, tp.item->key)) {
+ ULONG irsize = tp.item->size + sizeof(INODE_REF) - 1 + utf8->Length;
+ UINT8* ir2;
+ UINT32 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
+
+ if (irsize > maxlen) {
+ if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) {
+ TRACE("INODE_REF too long, creating INODE_EXTREF\n");
+ return add_inode_extref(Vcb, subvol, inode, parinode, index, utf8, Irp, rollback);
+ } else {
+ ERR("item would be too long (%u > %u)\n", irsize, maxlen);
+ return STATUS_INTERNAL_ERROR;
+ }
+ }
+
+ ir2 = ExAllocatePoolWithTag(PagedPool, irsize, ALLOC_TAG);
+ if (!ir2) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ if (tp.item->size > 0)
+ RtlCopyMemory(ir2, tp.item->data, tp.item->size);
+
+ ir = (INODE_REF*)&ir2[tp.item->size];
+ ir->index = index;
+ ir->n = utf8->Length;
+ RtlCopyMemory(ir->name, utf8->Buffer, utf8->Length);
+
+ delete_tree_item(Vcb, &tp, rollback);
+
+ if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ir2, irsize, NULL, Irp, rollback)) {
+ ERR("error - failed to insert item\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+ } else {
+ ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + utf8->Length, ALLOC_TAG);
+ if (!ir) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ ir->index = index;
+ ir->n = utf8->Length;
+ RtlCopyMemory(ir->name, utf8->Buffer, utf8->Length);
+
+ if (!insert_tree_item(Vcb, subvol, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ir, sizeof(INODE_REF) - 1 + ir->n, NULL, Irp, rollback)) {
+ ERR("error - failed to insert item\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS flush_fileref(file_ref* fileref, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
+ NTSTATUS Status;
+
+ // if fileref created and then immediately deleted, do nothing
+ if (fileref->created && fileref->deleted) {
+ fileref->dirty = FALSE;
+ return STATUS_SUCCESS;
+ }
+
+ if (fileref->fcb->ads) {
+ fileref->dirty = FALSE;
+ return STATUS_SUCCESS;
+ }
+
+ if (fileref->created) {
+ ULONG disize;
+ DIR_ITEM *di, *di2;
+ UINT32 crc32;
+
+ crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->utf8.Buffer, fileref->utf8.Length);
+
+ disize = sizeof(DIR_ITEM) - 1 + fileref->utf8.Length;
+ di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
+ if (!di) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
+ di->key.obj_id = fileref->fcb->inode;
+ di->key.obj_type = TYPE_INODE_ITEM;
+ di->key.offset = 0;
+ } else { // subvolume
+ di->key.obj_id = fileref->fcb->subvol->id;
+ di->key.obj_type = TYPE_ROOT_ITEM;
+ di->key.offset = 0xffffffffffffffff;
+ }
+
+ di->transid = fileref->fcb->Vcb->superblock.generation;
+ di->m = 0;
+ di->n = (UINT16)fileref->utf8.Length;
+ di->type = fileref->fcb->type;
+ RtlCopyMemory(di->name, fileref->utf8.Buffer, fileref->utf8.Length);
+
+ di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
+ if (!di2) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlCopyMemory(di2, di, disize);
+
+ if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, fileref->index,
+ di, disize, Batch_Insert, Irp, rollback)) {
+ ERR("insert_tree_item_batch failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_ITEM, crc32,
+ di2, disize, Batch_DirItem, Irp, rollback)) {
+ ERR("insert_tree_item_batch failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
+ INODE_REF* ir;
+
+ ir = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_REF) - 1 + fileref->utf8.Length, ALLOC_TAG);
+ if (!ir) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ ir->index = fileref->index;
+ ir->n = fileref->utf8.Length;
+ RtlCopyMemory(ir->name, fileref->utf8.Buffer, ir->n);
+
+ if (!insert_tree_item_batch(batchlist, fileref->fcb->Vcb, fileref->fcb->subvol, fileref->fcb->inode, TYPE_INODE_REF, fileref->parent->fcb->inode,
+ ir, sizeof(INODE_REF) - 1 + ir->n, Batch_InodeRef, Irp, rollback)) {
+ ERR("insert_tree_item_batch failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+ } else {
+ ULONG rrlen;
+ ROOT_REF* rr;
+
+ rrlen = sizeof(ROOT_REF) - 1 + fileref->utf8.Length;
+
+ rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
+ if (!rr) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ rr->dir = fileref->parent->fcb->inode;
+ rr->index = fileref->index;
+ rr->n = fileref->utf8.Length;
+ RtlCopyMemory(rr->name, fileref->utf8.Buffer, fileref->utf8.Length);
+
+ Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_root_ref returned %08x\n", Status);
+ return Status;
+ }
+
+ Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_root_backref returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ fileref->created = FALSE;
+ } else if (fileref->deleted) {
+ UINT32 crc32;
+ KEY searchkey;
+ traverse_ptr tp;
+ ANSI_STRING* name;
+
+ if (fileref->oldutf8.Buffer)
+ name = &fileref->oldutf8;
+ else
+ name = &fileref->utf8;
+
+ crc32 = calc_crc32c(0xfffffffe, (UINT8*)name->Buffer, name->Length);
+
+ TRACE("deleting %.*S\n", file_desc_fileref(fileref));
+
+ // delete DIR_ITEM (0x54)
+
+ Status = delete_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, crc32, name, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("delete_dir_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
+ // delete INODE_REF (0xc)
+
+ Status = delete_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, name, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("delete_inode_ref returned %08x\n", Status);
+ return Status;
+ }
+ } else { // subvolume
+ Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("delete_root_ref returned %08x\n", Status);
+ }
+
+ Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_root_backref returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ // delete DIR_INDEX (0x60)
+
+ searchkey.obj_id = fileref->parent->fcb->inode;
+ searchkey.obj_type = TYPE_DIR_INDEX;
+ searchkey.offset = fileref->index;
+
+ Status = find_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ Status = STATUS_INTERNAL_ERROR;
+ return Status;
+ }
+
+ if (!keycmp(searchkey, tp.item->key)) {
+ delete_tree_item(fileref->fcb->Vcb, &tp, rollback);
+ TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+ }
+
+ if (fileref->oldutf8.Buffer) {
+ ExFreePool(fileref->oldutf8.Buffer);
+ fileref->oldutf8.Buffer = NULL;
+ }
+ } else { // rename or change type
+ PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->utf8;
+ UINT32 crc32, oldcrc32;
+ ULONG disize;
+ DIR_ITEM *di, *di2;
+ KEY searchkey;
+ traverse_ptr tp;
+
+ crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->utf8.Buffer, fileref->utf8.Length);
+
+ if (!fileref->oldutf8.Buffer)
+ oldcrc32 = crc32;
+ else
+ oldcrc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->oldutf8.Buffer, fileref->oldutf8.Length);
+
+ // delete DIR_ITEM (0x54)
+
+ Status = delete_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, oldcrc32, oldutf8, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("delete_dir_item returned %08x\n", Status);
+ return Status;
+ }
+
+ // add DIR_ITEM (0x54)
+
+ disize = sizeof(DIR_ITEM) - 1 + fileref->utf8.Length;
+ di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
+ if (!di) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
+ if (!di2) {
+ ERR("out of memory\n");
+ ExFreePool(di);
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
+ di->key.obj_id = fileref->fcb->inode;
+ di->key.obj_type = TYPE_INODE_ITEM;
+ di->key.offset = 0;
+ } else { // subvolume
+ di->key.obj_id = fileref->fcb->subvol->id;
+ di->key.obj_type = TYPE_ROOT_ITEM;
+ di->key.offset = 0xffffffffffffffff;
+ }
+
+ di->transid = fileref->fcb->Vcb->superblock.generation;
+ di->m = 0;
+ di->n = (UINT16)fileref->utf8.Length;
+ di->type = fileref->fcb->type;
+ RtlCopyMemory(di->name, fileref->utf8.Buffer, fileref->utf8.Length);
+
+ RtlCopyMemory(di2, di, disize);
+
+ Status = add_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, crc32, di, disize, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_dir_item returned %08x\n", Status);
+ return Status;
+ }
+
+ if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
+ // delete INODE_REF (0xc)
+
+ Status = delete_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, oldutf8, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("delete_inode_ref returned %08x\n", Status);
+ return Status;
+ }
+
+ // add INODE_REF (0xc)
+
+ Status = add_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, fileref->index, &fileref->utf8, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_inode_ref returned %08x\n", Status);
+ return Status;
+ }
+ } else { // subvolume
+ ULONG rrlen;
+ ROOT_REF* rr;
+
+ // FIXME - make sure this works with duff subvols within snapshots
+
+ Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("delete_root_ref returned %08x\n", Status);
+ }
+
+ rrlen = sizeof(ROOT_REF) - 1 + fileref->utf8.Length;
+
+ rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
+ if (!rr) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ rr->dir = fileref->parent->fcb->inode;
+ rr->index = fileref->index;
+ rr->n = fileref->utf8.Length;
+ RtlCopyMemory(rr->name, fileref->utf8.Buffer, fileref->utf8.Length);
+
+ Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_root_ref returned %08x\n", Status);
+ return Status;
+ }
+
+ Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_root_backref returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ // delete DIR_INDEX (0x60)
+
+ searchkey.obj_id = fileref->parent->fcb->inode;
+ searchkey.obj_type = TYPE_DIR_INDEX;
+ searchkey.offset = fileref->index;
+
+ Status = find_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ Status = STATUS_INTERNAL_ERROR;
+ return Status;
+ }
+
+ if (!keycmp(searchkey, tp.item->key)) {
+ delete_tree_item(fileref->fcb->Vcb, &tp, rollback);
+ TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+ } else
+ WARN("could not find (%llx,%x,%llx) in subvol %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, fileref->fcb->subvol->id);
+
+ // add DIR_INDEX (0x60)
+
+ if (!insert_tree_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, fileref->index, di2, disize, NULL, Irp, rollback)) {
+ ERR("insert_tree_item failed\n");
+ Status = STATUS_INTERNAL_ERROR;
+ return Status;
+ }
+
+ if (fileref->oldutf8.Buffer) {
+ ExFreePool(fileref->oldutf8.Buffer);
+ fileref->oldutf8.Buffer = NULL;
+ }
+ }
+
+ fileref->dirty = FALSE;
+
+ return STATUS_SUCCESS;
+}
+
+NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+ NTSTATUS Status;
+ LIST_ENTRY *le, batchlist;
+ BOOL cache_changed = FALSE;
+#ifdef DEBUG_FLUSH_TIMES
+ UINT64 filerefs = 0, fcbs = 0;
+ LARGE_INTEGER freq, time1, time2;
+#endif
+#ifdef DEBUG_WRITE_LOOPS
+ UINT loops = 0;
+#endif
+
+ TRACE("(%p)\n", Vcb);
+
+ InitializeListHead(&batchlist);
+
+#ifdef DEBUG_FLUSH_TIMES
+ time1 = KeQueryPerformanceCounter(&freq);
+#endif
+
+ while (!IsListEmpty(&Vcb->dirty_filerefs)) {
+ dirty_fileref* dirt;
+
+ le = RemoveHeadList(&Vcb->dirty_filerefs);
+
+ dirt = CONTAINING_RECORD(le, dirty_fileref, list_entry);
+
+ flush_fileref(dirt->fileref, &batchlist, Irp, rollback);
+ free_fileref(dirt->fileref);
+ ExFreePool(dirt);
+
+#ifdef DEBUG_FLUSH_TIMES
+ filerefs++;
+#endif
+ }
+
+ commit_batch_list(Vcb, &batchlist, Irp, rollback);
+
+#ifdef DEBUG_FLUSH_TIMES
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ ERR("flushed %llu filerefs in %llu (freq = %llu)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
+
+ time1 = KeQueryPerformanceCounter(&freq);
+#endif
+
+ // We process deleted streams first, so we don't run over our xattr
+ // limit unless we absolutely have to.
+
+ le = Vcb->dirty_fcbs.Flink;
+ while (le != &Vcb->dirty_fcbs) {
+ dirty_fcb* dirt;
+ LIST_ENTRY* le2 = le->Flink;
+
+ dirt = CONTAINING_RECORD(le, dirty_fcb, list_entry);
+
+ if (dirt->fcb->deleted && dirt->fcb->ads) {
+ RemoveEntryList(le);
+
+ flush_fcb(dirt->fcb, FALSE, &batchlist, Irp, rollback);
+ free_fcb(dirt->fcb);
+ ExFreePool(dirt);
+
+#ifdef DEBUG_FLUSH_TIMES
+ fcbs++;
+#endif
+ }
+
+ le = le2;
+ }
+
+ le = Vcb->dirty_fcbs.Flink;
+ while (le != &Vcb->dirty_fcbs) {
+ dirty_fcb* dirt;
+ LIST_ENTRY* le2 = le->Flink;
+
+ dirt = CONTAINING_RECORD(le, dirty_fcb, list_entry);
+
+ if (dirt->fcb->subvol != Vcb->root_root || dirt->fcb->deleted) {
+ RemoveEntryList(le);
+
+ flush_fcb(dirt->fcb, FALSE, &batchlist, Irp, rollback);
+ free_fcb(dirt->fcb);
+ ExFreePool(dirt);
+
+#ifdef DEBUG_FLUSH_TIMES
+ fcbs++;
+#endif
+ }
+
+ le = le2;
+ }
+
+ commit_batch_list(Vcb, &batchlist, Irp, rollback);
+
+#ifdef DEBUG_FLUSH_TIMES
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ ERR("flushed %llu fcbs in %llu (freq = %llu)\n", filerefs, time2.QuadPart - time1.QuadPart, freq.QuadPart);
+#endif
+
+ ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE);
+ if (!IsListEmpty(&Vcb->sector_checksums)) {
+ update_checksum_tree(Vcb, Irp, rollback);
+ }
+ ExReleaseResourceLite(&Vcb->checksum_lock);
+
+ if (!IsListEmpty(&Vcb->drop_roots)) {
+ Status = drop_roots(Vcb, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("drop_roots returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ if (!IsListEmpty(&Vcb->chunks_changed)) {
+ Status = update_chunks(Vcb, &batchlist, Irp, rollback);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_chunks returned %08x\n", Status);
+ return Status;
+ }
+ }
+
+ commit_batch_list(Vcb, &batchlist, Irp, rollback);
+
+ // If only changing superblock, e.g. changing label, we still need to rewrite
+ // the root tree so the generations match, otherwise you won't be able to mount on Linux.
+ if (!Vcb->root_root->treeholder.tree || !Vcb->root_root->treeholder.tree->write) {
+ KEY searchkey;
+
+ traverse_ptr tp;
+
+ searchkey.obj_id = 0;
+ searchkey.obj_type = 0;
+ searchkey.offset = 0;
+
+ Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ return Status;
+ }
+
+ Vcb->root_root->treeholder.tree->write = TRUE;
+ }
+
+ // make sure we always update the extent tree
+ Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_root_item_to_cache returned %08x\n", Status);
+ return Status;
+ }
+
+ do {
+ Status = add_parents(Vcb, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_parents returned %08x\n", Status);
+ goto end;
+ }
+
+ Status = do_splits(Vcb, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("do_splits returned %08x\n", Status);
+ goto end;
+ }
+
+ Status = allocate_tree_extents(Vcb, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("add_parents returned %08x\n", Status);
+ goto end;
+ }
+
+ Status = update_chunk_usage(Vcb, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_chunk_usage returned %08x\n", Status);
+ goto end;
+ }
+
+ Status = allocate_cache(Vcb, &cache_changed, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("allocate_cache returned %08x\n", Status);
+ goto end;
+ }
+
+#ifdef DEBUG_WRITE_LOOPS
+ loops++;
+
+ if (cache_changed)
+ ERR("cache has changed, looping again\n");
+#endif
+ } while (cache_changed || !trees_consistent(Vcb, rollback));
+
+#ifdef DEBUG_WRITE_LOOPS
+ ERR("%u loops\n", loops);
+#endif
+
+ TRACE("trees consistent\n");
+
+ Status = update_root_root(Vcb, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("update_root_root returned %08x\n", Status);
+ goto end;
+ }
+
+ Status = write_trees(Vcb, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("write_trees returned %08x\n", Status);
+ goto end;
+ }
+
+ Vcb->superblock.cache_generation = Vcb->superblock.generation;
+
+ Status = write_superblocks(Vcb, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("write_superblocks returned %08x\n", Status);
+ goto end;
+ }
+
+ clean_space_cache(Vcb);
+
+ Vcb->superblock.generation++;
+
+ Status = STATUS_SUCCESS;
+
+ le = Vcb->trees.Flink;
+ while (le != &Vcb->trees) {
+ tree* t = CONTAINING_RECORD(le, tree, list_entry);
+
+#ifdef DEBUG_PARANOID
+ KEY searchkey;
+ traverse_ptr tp;
+
+ searchkey.obj_id = t->header.address;
+ searchkey.obj_type = TYPE_METADATA_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ int3;
+ }
+
+ if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+ searchkey.obj_id = t->header.address;
+ searchkey.obj_type = TYPE_EXTENT_ITEM;
+ searchkey.offset = 0xffffffffffffffff;
+
+ Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("error - find_item returned %08x\n", Status);
+ int3;
+ }
+
+ if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
+ ERR("error - could not find entry in extent tree for tree at %llx\n", t->header.address);
+ int3;
+ }
+ }
+#endif
+
+ t->write = FALSE;
+
+ le = le->Flink;
+ }
+
+ Vcb->need_write = FALSE;
+
+ while (!IsListEmpty(&Vcb->drop_roots)) {
+ LIST_ENTRY* le = RemoveHeadList(&Vcb->drop_roots);
+ root* r = CONTAINING_RECORD(le, root, list_entry);
+
+ ExDeleteResourceLite(&r->nonpaged->load_tree_lock);
+ ExFreePool(r->nonpaged);
+ ExFreePool(r);
+ }
+
+end:
+ TRACE("do_write returning %08x\n", Status);
+
+ return Status;
+}
+
+#ifdef DEBUG_STATS
+static void print_stats(device_extension* Vcb) {
+ ERR("READ STATS:\n");
+ ERR("number of reads: %llu\n", Vcb->stats.num_reads);
+ ERR("data read: %llu bytes\n", Vcb->stats.data_read);
+ ERR("total time taken: %llu\n", Vcb->stats.read_total_time);
+ ERR("csum time taken: %llu\n", Vcb->stats.read_csum_time);
+ ERR("disk time taken: %llu\n", Vcb->stats.read_disk_time);
+ ERR("other time taken: %llu\n", Vcb->stats.read_total_time - Vcb->stats.read_csum_time - Vcb->stats.read_disk_time);
+
+ RtlZeroMemory(&Vcb->stats, sizeof(debug_stats));
+}
+#endif
+
static void do_flush(device_extension* Vcb) {
LIST_ENTRY rollback;
ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
+#ifdef DEBUG_STATS
+ print_stats(Vcb);
+#endif
+
if (Vcb->need_write && !Vcb->readonly)
do_write(Vcb, NULL, &rollback);
free_trees(Vcb);
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
ExReleaseResourceLite(&Vcb->tree_lock);
// #define DEBUG_SPACE_LISTS
-static NTSTATUS remove_free_space_inode(device_extension* Vcb, UINT64 inode, PIRP Irp, LIST_ENTRY* rollback) {
+static NTSTATUS remove_free_space_inode(device_extension* Vcb, UINT64 inode, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
NTSTATUS Status;
fcb* fcb;
- Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, NULL, &fcb, Irp);
+ Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, NULL, &fcb, PagedPool, Irp);
if (!NT_SUCCESS(Status)) {
ERR("open_fcb returned %08x\n", Status);
return Status;
fcb->deleted = TRUE;
- flush_fcb(fcb, FALSE, Irp, rollback);
+ flush_fcb(fcb, FALSE, batchlist, Irp, rollback);
free_fcb(fcb);
return STATUS_SUCCESS;
}
-NTSTATUS clear_free_space_cache(device_extension* Vcb, PIRP Irp) {
+NTSTATUS clear_free_space_cache(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp) {
KEY searchkey;
traverse_ptr tp, next_tp;
NTSTATUS Status;
else {
LIST_ENTRY* le;
- Status = remove_free_space_inode(Vcb, fsi->key.obj_id, Irp, &rollback);
+ Status = remove_free_space_inode(Vcb, fsi->key.obj_id, batchlist, Irp, &rollback);
if (!NT_SUCCESS(Status)) {
ERR("remove_free_space_inode for (%llx,%x,%llx) returned %08x\n", fsi->key.obj_id, fsi->key.obj_type, fsi->key.offset, Status);
end:
if (NT_SUCCESS(Status))
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
else
do_rollback(Vcb, &rollback);
return STATUS_SUCCESS;
}
-static void load_free_space_bitmap(device_extension* Vcb, chunk* c, UINT64 offset, void* data) {
+static void load_free_space_bitmap(device_extension* Vcb, chunk* c, UINT64 offset, void* data, UINT64* total_space) {
RTL_BITMAP bmph;
UINT32 i, *dwords = data;
ULONG runlength, index;
add_space_entry(&c->space, &c->space_size, addr, length);
index += runlength;
+ *total_space += length;
runlength = RtlFindNextForwardRunClear(&bmph, index, &index);
}
InsertTailList(list_size, &s->list_entry_size);
}
+typedef struct {
+ UINT64 stripe;
+ LIST_ENTRY list_entry;
+} superblock_stripe;
+
+static void add_superblock_stripe(LIST_ENTRY* stripes, UINT64 off, UINT64 len) {
+ UINT64 i;
+
+ for (i = 0; i < len; i++) {
+ LIST_ENTRY* le;
+ superblock_stripe* ss;
+
+ le = stripes->Flink;
+ while (le != stripes) {
+ ss = CONTAINING_RECORD(le, superblock_stripe, list_entry);
+
+ if (ss->stripe == off + i)
+ continue;
+
+ le = le->Flink;
+ }
+
+ ss = ExAllocatePoolWithTag(PagedPool, sizeof(superblock_stripe), ALLOC_TAG);
+ ss->stripe = off + i;
+ InsertTailList(stripes, &ss->list_entry);
+ }
+}
+
+static UINT64 get_superblock_size(chunk* c) {
+ CHUNK_ITEM* ci = c->chunk_item;
+ CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
+ UINT64 off_start, off_end, space;
+ UINT16 i = 0, j;
+ LIST_ENTRY stripes;
+
+ InitializeListHead(&stripes);
+
+ while (superblock_addrs[i] != 0) {
+ if (ci->type & BLOCK_FLAG_RAID0 || ci->type & BLOCK_FLAG_RAID10) {
+ for (j = 0; j < ci->num_stripes; j++) {
+ ULONG sub_stripes = max(ci->sub_stripes, 1);
+
+ if (cis[j].offset + (ci->size * ci->num_stripes / sub_stripes) > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
+ off_start = superblock_addrs[i] - cis[j].offset;
+ off_start -= off_start % ci->stripe_length;
+ off_start *= ci->num_stripes / sub_stripes;
+ off_start += (j / sub_stripes) * ci->stripe_length;
+
+ off_end = off_start + ci->stripe_length;
+
+ add_superblock_stripe(&stripes, off_start / ci->stripe_length, 1);
+ }
+ }
+ } else if (ci->type & BLOCK_FLAG_RAID5) {
+ for (j = 0; j < ci->num_stripes; j++) {
+ UINT64 stripe_size = ci->size / (ci->num_stripes - 1);
+
+ if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
+ off_start = superblock_addrs[i] - cis[j].offset;
+ off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 1));
+ off_start *= ci->num_stripes - 1;
+
+ off_end = off_start + (ci->stripe_length * (ci->num_stripes - 1));
+
+ add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length);
+ }
+ }
+ } else if (ci->type & BLOCK_FLAG_RAID6) {
+ for (j = 0; j < ci->num_stripes; j++) {
+ UINT64 stripe_size = ci->size / (ci->num_stripes - 2);
+
+ if (cis[j].offset + stripe_size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
+ off_start = superblock_addrs[i] - cis[j].offset;
+ off_start -= off_start % (ci->stripe_length * (ci->num_stripes - 2));
+ off_start *= ci->num_stripes - 2;
+
+ off_end = off_start + (ci->stripe_length * (ci->num_stripes - 2));
+
+ add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length);
+ }
+ }
+ } else { // SINGLE, DUPLICATE, RAID1
+ for (j = 0; j < ci->num_stripes; j++) {
+ if (cis[j].offset + ci->size > superblock_addrs[i] && cis[j].offset <= superblock_addrs[i] + sizeof(superblock)) {
+ off_start = ((superblock_addrs[i] - cis[j].offset) / c->chunk_item->stripe_length) * c->chunk_item->stripe_length;
+ off_end = sector_align(superblock_addrs[i] - cis[j].offset + sizeof(superblock), c->chunk_item->stripe_length);
+
+ add_superblock_stripe(&stripes, off_start / ci->stripe_length, (off_end - off_start) / ci->stripe_length);
+ }
+ }
+ }
+
+ i++;
+ }
+
+ space = 0;
+
+ while (!IsListEmpty(&stripes)) {
+ LIST_ENTRY* le = RemoveHeadList(&stripes);
+ superblock_stripe* ss = CONTAINING_RECORD(le, superblock_stripe, list_entry);
+
+ space++;
+
+ ExFreePool(ss);
+ }
+
+ return space * ci->stripe_length;
+}
+
static NTSTATUS load_stored_free_space_cache(device_extension* Vcb, chunk* c, PIRP Irp) {
KEY searchkey;
traverse_ptr tp;
NTSTATUS Status;
UINT32 *checksums, crc32;
FREE_SPACE_ENTRY* fse;
- UINT64 size, num_entries, num_bitmaps, extent_length, bmpnum, off;
+ UINT64 size, num_entries, num_bitmaps, extent_length, bmpnum, off, total_space = 0, superblock_size;
LIST_ENTRY *le, rollback;
// FIXME - does this break if Vcb->superblock.sector_size is not 4096?
return Status;
}
- if (keycmp(&tp.item->key, &searchkey)) {
+ if (keycmp(tp.item->key, searchkey)) {
TRACE("(%llx,%x,%llx) not found\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
return STATUS_NOT_FOUND;
}
num_entries = fsi->num_entries;
num_bitmaps = fsi->num_bitmaps;
- Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, NULL, &c->cache, Irp);
+ Status = open_fcb(Vcb, Vcb->root_root, inode, BTRFS_TYPE_FILE, NULL, NULL, &c->cache, PagedPool, Irp);
if (!NT_SUCCESS(Status)) {
ERR("open_fcb returned %08x\n", Status);
return STATUS_NOT_FOUND;
c->cache->inode_item.flags |= BTRFS_INODE_NODATACOW;
+ if (num_entries == 0 && num_bitmaps == 0)
+ return STATUS_SUCCESS;
+
size = sector_align(c->cache->inode_item.st_size, Vcb->superblock.sector_size);
data = ExAllocatePoolWithTag(PagedPool, size, ALLOC_TAG);
ExFreePool(data);
return Status;
}
+
+ total_space += fse->size;
} else if (fse->type != FREE_SPACE_BITMAP) {
ERR("unknown free-space type %x\n", fse->type);
}
if (fse->type == FREE_SPACE_BITMAP) {
// FIXME - make sure we don't overflow the buffer here
- load_free_space_bitmap(Vcb, c, fse->offset, &data[bmpnum * Vcb->superblock.sector_size]);
+ load_free_space_bitmap(Vcb, c, fse->offset, &data[bmpnum * Vcb->superblock.sector_size], &total_space);
bmpnum++;
}
}
}
+ // do sanity check
+
+ superblock_size = get_superblock_size(c);
+ if (c->chunk_item->size - c->used != total_space + superblock_size) {
+ WARN("invalidating cache for chunk %llx: space was %llx, expected %llx\n", c->offset, total_space + superblock_size, c->chunk_item->size - c->used);
+ goto clearcache;
+ }
+
le = c->space.Flink;
while (le != &c->space) {
space* s = CONTAINING_RECORD(le, space, list_entry);
return Status;
}
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
c->cache->deleted = TRUE;
mark_fcb_dirty(c->cache);
while (le != &fcb->Vcb->chunks) {
c = CONTAINING_RECORD(le, chunk, list_entry);
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
- if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length)) {
- ExReleaseResourceLite(&c->lock);
- ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
- return STATUS_SUCCESS;
+ if (!c->readonly) {
+ ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+ if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
+ if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length)) {
+ ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
+ return STATUS_SUCCESS;
+ }
}
+
+ ExReleaseResourceLite(&c->lock);
}
- ExReleaseResourceLite(&c->lock);
-
le = le->Flink;
}
ExAcquireResourceExclusiveLite(&c->lock, TRUE);
if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
- if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length)) {
- ExReleaseResourceLite(&c->lock);
+ if (insert_extent_chunk(fcb->Vcb, fcb, c, start, length, FALSE, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, length))
return STATUS_SUCCESS;
- }
}
ExReleaseResourceLite(&c->lock);
return STATUS_DISK_FULL;
}
-static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* changed, PIRP Irp, LIST_ENTRY* rollback) {
+static NTSTATUS allocate_cache_chunk(device_extension* Vcb, chunk* c, BOOL* changed, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
LIST_ENTRY* le;
NTSTATUS Status;
UINT64 num_entries, new_cache_size, i;
new_cache_size = sector_align(new_cache_size, CACHE_INCREMENTS * Vcb->superblock.sector_size);
- TRACE("chunk %llx: cache_size = %llx, new_cache_size = %llx\n", c->offset, c->cache->inode_item.st_size, new_cache_size);
+ TRACE("chunk %llx: cache_size = %llx, new_cache_size = %llx\n", c->offset, c->cache ? c->cache->inode_item.st_size : 0, new_cache_size);
if (!c->cache) {
FREE_SPACE_ITEM* fsi;
c->cache->subvol = Vcb->root_root;
- if (Vcb->root_root->lastinode == 0)
- get_last_inode(Vcb, Vcb->root_root, Irp);
-
- c->cache->inode = Vcb->root_root->lastinode > 0x100 ? (Vcb->root_root->lastinode + 1) : 0x101;
+ c->cache->inode = InterlockedIncrement64(&Vcb->root_root->lastinode);
c->cache->type = BTRFS_TYPE_FILE;
c->cache->created = TRUE;
return Status;
}
- if (!keycmp(&searchkey, &tp.item->key))
+ if (!keycmp(searchkey, tp.item->key))
delete_tree_item(Vcb, &tp, rollback);
fsi->key.obj_id = c->cache->inode;
c->cache->extents_changed = TRUE;
- Vcb->root_root->lastinode = c->cache->inode;
-
- flush_fcb(c->cache, TRUE, Irp, rollback);
+ flush_fcb(c->cache, TRUE, batchlist, Irp, rollback);
*changed = TRUE;
} else if (new_cache_size > c->cache->inode_item.st_size) {
return Status;
}
- if (keycmp(&searchkey, &tp.item->key)) {
+ if (keycmp(searchkey, tp.item->key)) {
ERR("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
return STATUS_INTERNAL_ERROR;
}
c->cache->inode_item.st_size = new_cache_size;
c->cache->inode_item.st_blocks = new_cache_size;
- flush_fcb(c->cache, TRUE, Irp, rollback);
+ flush_fcb(c->cache, TRUE, batchlist, Irp, rollback);
*changed = TRUE;
} else {
return Status;
}
- if (keycmp(&searchkey, &tp.item->key)) {
+ if (keycmp(searchkey, tp.item->key)) {
INODE_ITEM* ii;
ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
return Status;
}
- if (keycmp(&searchkey, &tp.item->key)) {
+ if (keycmp(searchkey, tp.item->key)) {
ERR("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
+ int3;
return STATUS_INTERNAL_ERROR;
}
}
NTSTATUS allocate_cache(device_extension* Vcb, BOOL* changed, PIRP Irp, LIST_ENTRY* rollback) {
- LIST_ENTRY* le = Vcb->chunks_changed.Flink;
+ LIST_ENTRY *le = Vcb->chunks_changed.Flink, batchlist;
NTSTATUS Status;
*changed = FALSE;
+ InitializeListHead(&batchlist);
+
while (le != &Vcb->chunks_changed) {
BOOL b;
chunk* c = CONTAINING_RECORD(le, chunk, list_entry_changed);
ExAcquireResourceExclusiveLite(&c->lock, TRUE);
- Status = allocate_cache_chunk(Vcb, c, &b, Irp, rollback);
+ Status = allocate_cache_chunk(Vcb, c, &b, &batchlist, Irp, rollback);
ExReleaseResourceLite(&c->lock);
if (b)
if (!NT_SUCCESS(Status)) {
ERR("allocate_cache_chunk(%llx) returned %08x\n", c->offset, Status);
+ clear_batch_list(Vcb, &batchlist);
return Status;
}
le = le->Flink;
}
+ commit_batch_list(Vcb, &batchlist, Irp, rollback);
+
return STATUS_SUCCESS;
}
-static void add_rollback_space(LIST_ENTRY* rollback, BOOL add, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c) {
+static void add_rollback_space(device_extension* Vcb, LIST_ENTRY* rollback, BOOL add, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c) {
rollback_space* rs;
rs = ExAllocatePoolWithTag(PagedPool, sizeof(rollback_space), ALLOC_TAG);
rs->length = length;
rs->chunk = c;
- add_rollback(rollback, add ? ROLLBACK_ADD_SPACE : ROLLBACK_SUBTRACT_SPACE, rs);
+ add_rollback(Vcb, rollback, add ? ROLLBACK_ADD_SPACE : ROLLBACK_SUBTRACT_SPACE, rs);
}
-void _space_list_add2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func) {
+void _space_list_add2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func) {
LIST_ENTRY* le;
space *s, *s2;
InsertTailList(list_size, &s->list_entry_size);
if (rollback)
- add_rollback_space(rollback, TRUE, list, list_size, address, length, c);
+ add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, length, c);
return;
}
if (address <= s2->address && address + length >= s2->address + s2->size) {
if (address < s2->address) {
if (rollback)
- add_rollback_space(rollback, TRUE, list, list_size, address, s2->address - address, c);
+ add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, s2->address - address, c);
s2->size += s2->address - address;
s2->address = address;
if (length > s2->size) {
if (rollback)
- add_rollback_space(rollback, TRUE, list, list_size, s2->address + s2->size, address + length - s2->address - s2->size, c);
+ add_rollback_space(Vcb, rollback, TRUE, list, list_size, s2->address + s2->size, address + length - s2->address - s2->size, c);
s2->size = length;
// new entry overlaps start of old one
if (address < s2->address && address + length >= s2->address) {
if (rollback)
- add_rollback_space(rollback, TRUE, list, list_size, address, s2->address - address, c);
+ add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, s2->address - address, c);
s2->size += s2->address - address;
s2->address = address;
// new entry overlaps end of old one
if (address <= s2->address + s2->size && address + length > s2->address + s2->size) {
if (rollback)
- add_rollback_space(rollback, TRUE, list, list_size, address, s2->address + s2->size - address, c);
+ add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, s2->address + s2->size - address, c);
s2->size = address + length - s2->address;
}
if (rollback)
- add_rollback_space(rollback, TRUE, list, list_size, address, length, c);
+ add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, length, c);
s->address = address;
s->size = length;
order_space_entry(s, list_size);
if (rollback)
- add_rollback_space(rollback, TRUE, list, list_size, address, length, c);
+ add_rollback_space(Vcb, rollback, TRUE, list, list_size, address, length, c);
}
-static void space_list_merge(LIST_ENTRY* spacelist, LIST_ENTRY* spacelist_size, LIST_ENTRY* deleting) {
+static void space_list_merge(device_extension* Vcb, LIST_ENTRY* spacelist, LIST_ENTRY* spacelist_size, LIST_ENTRY* deleting) {
LIST_ENTRY* le;
if (!IsListEmpty(deleting)) {
while (le != deleting) {
space* s = CONTAINING_RECORD(le, space, list_entry);
- space_list_add2(spacelist, spacelist_size, s->address, s->size, NULL);
+ space_list_add2(Vcb, spacelist, spacelist_size, s->address, s->size, NULL);
le = le->Flink;
}
}
}
-static NTSTATUS update_chunk_cache(device_extension* Vcb, chunk* c, BTRFS_TIME* now, PIRP Irp, LIST_ENTRY* rollback) {
+static NTSTATUS update_chunk_cache(device_extension* Vcb, chunk* c, BTRFS_TIME* now, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
NTSTATUS Status;
KEY searchkey;
traverse_ptr tp;
UINT32* checksums;
LIST_ENTRY* le;
- space_list_merge(&c->space, &c->space_size, &c->deleting);
+ space_list_merge(Vcb, &c->space, &c->space_size, &c->deleting);
data = ExAllocatePoolWithTag(NonPagedPool, c->cache->inode_item.st_size, ALLOC_TAG);
if (!data) {
c->cache->inode_item.sequence++;
c->cache->inode_item.st_ctime = *now;
- flush_fcb(c->cache, TRUE, Irp, rollback);
+ flush_fcb(c->cache, TRUE, batchlist, Irp, rollback);
// update free_space item
return Status;
}
- if (keycmp(&searchkey, &tp.item->key)) {
+ if (keycmp(searchkey, tp.item->key)) {
ERR("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
return STATUS_INTERNAL_ERROR;
}
}
NTSTATUS update_chunk_caches(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
- LIST_ENTRY* le = Vcb->chunks_changed.Flink;
+ LIST_ENTRY *le = Vcb->chunks_changed.Flink, batchlist;
NTSTATUS Status;
chunk* c;
LARGE_INTEGER time;
KeQuerySystemTime(&time);
win_time_to_unix(time, &now);
+ InitializeListHead(&batchlist);
+
while (le != &Vcb->chunks_changed) {
c = CONTAINING_RECORD(le, chunk, list_entry_changed);
ExAcquireResourceExclusiveLite(&c->lock, TRUE);
- Status = update_chunk_cache(Vcb, c, &now, Irp, rollback);
+ Status = update_chunk_cache(Vcb, c, &now, &batchlist, Irp, rollback);
ExReleaseResourceLite(&c->lock);
if (!NT_SUCCESS(Status)) {
ERR("update_chunk_cache(%llx) returned %08x\n", c->offset, Status);
+ clear_batch_list(Vcb, &batchlist);
return Status;
}
le = le->Flink;
}
+ commit_batch_list(Vcb, &batchlist, Irp, rollback);
+
return STATUS_SUCCESS;
}
if (!c->list_entry_changed.Flink)
InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed);
- _space_list_add2(list, deleting ? NULL : &c->space_size, address, length, c, rollback, func);
+ _space_list_add2(Vcb, list, deleting ? NULL : &c->space_size, address, length, c, rollback, func);
}
-void _space_list_subtract2(LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func) {
+void _space_list_subtract2(device_extension* Vcb, LIST_ENTRY* list, LIST_ENTRY* list_size, UINT64 address, UINT64 length, chunk* c, LIST_ENTRY* rollback, const char* func) {
LIST_ENTRY *le, *le2;
space *s, *s2;
if (s2->address >= address && s2->address + s2->size <= address + length) { // remove entry entirely
if (rollback)
- add_rollback_space(rollback, FALSE, list, list_size, s2->address, s2->size, c);
+ add_rollback_space(Vcb, rollback, FALSE, list, list_size, s2->address, s2->size, c);
RemoveEntryList(&s2->list_entry);
} else if (address + length > s2->address && address + length < s2->address + s2->size) {
if (address > s2->address) { // cut out hole
if (rollback)
- add_rollback_space(rollback, FALSE, list, list_size, address, length, c);
+ add_rollback_space(Vcb, rollback, FALSE, list, list_size, address, length, c);
s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
return;
} else { // remove start of entry
if (rollback)
- add_rollback_space(rollback, FALSE, list, list_size, s2->address, address + length - s2->address, c);
+ add_rollback_space(Vcb, rollback, FALSE, list, list_size, s2->address, address + length - s2->address, c);
s2->size -= address + length - s2->address;
s2->address = address + length;
}
} else if (address > s2->address && address < s2->address + s2->size) { // remove end of entry
if (rollback)
- add_rollback_space(rollback, FALSE, list, list_size, address, s2->address + s2->size - address, c);
+ add_rollback_space(Vcb, rollback, FALSE, list, list_size, address, s2->address + s2->size - address, c);
s2->size = address - s2->address;
if (!c->list_entry_changed.Flink)
InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed);
- _space_list_subtract2(list, deleting ? NULL : &c->space_size, address, length, c, rollback, func);
+ _space_list_subtract2(Vcb, list, deleting ? NULL : &c->space_size, address, length, c, rollback, func);
}
extern LIST_ENTRY VcbList;
extern ERESOURCE global_loading_lock;
+extern LIST_ENTRY volumes;
static NTSTATUS get_file_ids(PFILE_OBJECT FileObject, void* data, ULONG length) {
btrfs_get_file_ids* bgfi;
return STATUS_INSUFFICIENT_RESOURCES;
}
- Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, Irp);
+ Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, NULL, Irp);
if (!NT_SUCCESS(Status)) {
ERR("read_data returned %08x\n", Status);
goto end;
th->address = t.new_address;
th->tree_id = subvol->id;
th->generation = Vcb->superblock.generation;
+ th->fs_uuid = Vcb->superblock.uuid;
if (th->level == 0) {
UINT32 i;
if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) && ln[i].offset + ln[i].size <= Vcb->superblock.node_size - sizeof(tree_header)) {
EXTENT_DATA* ed = (EXTENT_DATA*)(((UINT8*)&th[1]) + ln[i].offset);
- // FIXME - what are we supposed to do with prealloc here? Replace it with sparse extents, or do new preallocation?
- if (ed->type == EXTENT_TYPE_REGULAR && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
+ if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)&ed->data[0];
if (ed2->size != 0) { // not sparse
}
} else {
UINT32 i;
- UINT64 newaddr;
internal_node* in = (internal_node*)&th[1];
for (i = 0; i < th->num_items; i++) {
- Status = snapshot_tree_copy(Vcb, in[i].address, subvol, dupflags, &newaddr, Irp, rollback);
+ TREE_BLOCK_REF tbr;
+ tbr.offset = subvol->id;
+
+ Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, NULL, th->level - 1, Irp, rollback);
if (!NT_SUCCESS(Status)) {
- ERR("snapshot_tree_copy returned %08x\n", Status);
+ ERR("increase_extent_refcount returned %08x\n", Status);
goto end;
}
-
- in[i].generation = Vcb->superblock.generation;
- in[i].address = newaddr;
}
}
free_trees(Vcb);
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
InitializeListHead(&rollback);
// create new root
- if (Vcb->root_root->lastinode == 0)
- get_last_inode(Vcb, Vcb->root_root, Irp);
-
- id = Vcb->root_root->lastinode > 0x100 ? (Vcb->root_root->lastinode + 1) : 0x101;
+ id = InterlockedIncrement64(&Vcb->root_root->lastinode);
Status = create_root(Vcb, id, &r, TRUE, Vcb->superblock.generation, Irp, &rollback);
if (!NT_SUCCESS(Status)) {
goto end;
}
+ r->lastinode = subvol->lastinode;
+
if (!Vcb->uuid_root) {
root* uuid_root;
RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
- } while (NT_SUCCESS(Status) && !keycmp(&searchkey, &tp.item->key));
+ } while (NT_SUCCESS(Status) && !keycmp(searchkey, tp.item->key));
*root_num = r->id;
}
RtlCopyMemory(tp.item->data, &r->root_item, sizeof(ROOT_ITEM));
- Vcb->root_root->lastinode = r->id;
// update ROOT_ITEM of original subvol
RtlCopyMemory(fr->utf8.Buffer, utf8->Buffer, utf8->Length);
- Status = open_fcb(Vcb, r, r->root_item.objid, BTRFS_TYPE_DIRECTORY, utf8, fcb, &fr->fcb, Irp);
+ Status = open_fcb(Vcb, r, r->root_item.objid, BTRFS_TYPE_DIRECTORY, utf8, fcb, &fr->fcb, PagedPool, Irp);
if (!NT_SUCCESS(Status)) {
ERR("open_fcb returned %08x\n", Status);
free_fileref(fr);
fcb->inode_item.transid = Vcb->superblock.generation;
fcb->inode_item.sequence++;
fcb->inode_item.st_size += utf8->Length * 2;
- fcb->inode_item.st_ctime = now;
- fcb->inode_item.st_mtime = now;
+ if (!ccb->user_set_change_time)
+ fcb->inode_item.st_ctime = now;
+
+ if (!ccb->user_set_write_time)
+ fcb->inode_item.st_mtime = now;
+
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
fcb->subvol->root_item.ctime = now;
end:
if (NT_SUCCESS(Status))
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
else
do_rollback(Vcb, &rollback);
ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
// no need for fcb_lock as we have tree_lock exclusively
- Status = open_fileref(fcb->Vcb, &fr2, &nameus, fileref, FALSE, NULL, NULL, Irp);
+ Status = open_fileref(fcb->Vcb, &fr2, &nameus, fileref, FALSE, NULL, NULL, PagedPool, FALSE, Irp);
if (NT_SUCCESS(Status)) {
if (!fr2->deleted) {
goto end;
}
+ // clear unique flag on extents of open files in subvol
+ if (!IsListEmpty(&subvol_fcb->subvol->fcbs)) {
+ LIST_ENTRY* le = subvol_fcb->subvol->fcbs.Flink;
+
+ while (le != &subvol_fcb->subvol->fcbs) {
+ struct _fcb* openfcb = CONTAINING_RECORD(le, struct _fcb, list_entry);
+ LIST_ENTRY* le2;
+
+ ExAcquireResourceExclusiveLite(openfcb->Header.Resource, TRUE);
+
+ le2 = openfcb->extents.Flink;
+
+ while (le2 != &openfcb->extents) {
+ extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
+
+ ext->unique = FALSE;
+
+ le2 = le2->Flink;
+ }
+
+ ExReleaseResourceLite(openfcb->Header.Resource);
+
+ le = le->Flink;
+ }
+ }
+
Status = do_create_snapshot(Vcb, FileObject, subvol_fcb, &utf8, &nameus, Irp);
if (NT_SUCCESS(Status)) {
file_ref* fr;
- Status = open_fileref(Vcb, &fr, &nameus, fileref, FALSE, NULL, NULL, Irp);
+ Status = open_fileref(Vcb, &fr, &nameus, fileref, FALSE, NULL, NULL, PagedPool, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("open_fileref returned %08x\n", Status);
InitializeListHead(&rollback);
// no need for fcb_lock as we have tree_lock exclusively
- Status = open_fileref(fcb->Vcb, &fr2, &nameus, fileref, FALSE, NULL, NULL, Irp);
+ Status = open_fileref(fcb->Vcb, &fr2, &nameus, fileref, FALSE, NULL, NULL, PagedPool, FALSE, Irp);
if (NT_SUCCESS(Status)) {
if (!fr2->deleted) {
goto end;
}
- if (Vcb->root_root->lastinode == 0)
- get_last_inode(Vcb, Vcb->root_root, Irp);
-
// FIXME - make sure rollback removes new roots from internal structures
- id = Vcb->root_root->lastinode > 0x100 ? (Vcb->root_root->lastinode + 1) : 0x101;
+ id = InterlockedIncrement64(&Vcb->root_root->lastinode);
Status = create_root(Vcb, id, &r, FALSE, 0, Irp, &rollback);
if (!NT_SUCCESS(Status)) {
RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
- } while (NT_SUCCESS(Status) && !keycmp(&searchkey, &tp.item->key));
+ } while (NT_SUCCESS(Status) && !keycmp(searchkey, tp.item->key));
*root_num = r->id;
}
rootfcb->sd_dirty = TRUE;
+ rootfcb->inode_item_changed = TRUE;
ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
InsertTailList(&r->fcbs, &rootfcb->list_entry);
rootfcb->created = TRUE;
+ r->lastinode = rootfcb->inode;
+
// add INODE_REF
irsize = sizeof(INODE_REF) - 1 + strlen(DOTDOT);
fcb->inode_item.transid = Vcb->superblock.generation;
fcb->inode_item.st_size += utf8.Length * 2;
fcb->inode_item.sequence++;
- fcb->inode_item.st_ctime = now;
- fcb->inode_item.st_mtime = now;
+ if (!ccb->user_set_change_time)
+ fcb->inode_item.st_ctime = now;
+
+ if (!ccb->user_set_write_time)
+ fcb->inode_item.st_mtime = now;
+
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
- Vcb->root_root->lastinode = id;
-
Status = STATUS_SUCCESS;
end:
if (!NT_SUCCESS(Status))
do_rollback(Vcb, &rollback);
else
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
ExReleaseResourceLite(&Vcb->tree_lock);
if (bsii->gid_changed)
fcb->inode_item.st_gid = bsii->st_gid;
- if (bsii->flags_changed || bsii->mode_changed || bsii->uid_changed || bsii->gid_changed)
+ if (bsii->flags_changed || bsii->mode_changed || bsii->uid_changed || bsii->gid_changed) {
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
+ }
Status = STATUS_SUCCESS;
return STATUS_SUCCESS;
}
-static NTSTATUS fs_get_statistics(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, DWORD* retlen) {
+static NTSTATUS fs_get_statistics(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, ULONG_PTR* retlen) {
FILESYSTEM_STATISTICS* fss;
WARN("STUB: FSCTL_FILESYSTEM_GET_STATISTICS\n");
return STATUS_INVALID_PARAMETER;
}
- if (!(ccb->access & FILE_WRITE_ATTRIBUTES)) {
+ if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_ATTRIBUTES)) {
WARN("insufficient privileges\n");
return STATUS_ACCESS_DENIED;
}
return STATUS_INVALID_PARAMETER;
}
- if (!(ccb->access & FILE_WRITE_DATA)) {
+ if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_DATA)) {
WARN("insufficient privileges\n");
return STATUS_ACCESS_DENIED;
}
fcb->inode_item.transid = Vcb->superblock.generation;
fcb->inode_item.sequence++;
- fcb->inode_item.st_ctime = now;
- fcb->inode_item.st_mtime = now;
+
+ if (!ccb->user_set_change_time)
+ fcb->inode_item.st_ctime = now;
+
+ if (!ccb->user_set_write_time)
+ fcb->inode_item.st_mtime = now;
fcb->extents_changed = TRUE;
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
send_notification_fcb(fileref, FILE_NOTIFY_CHANGE_LAST_WRITE, FILE_ACTION_MODIFIED);
if (!NT_SUCCESS(Status))
do_rollback(Vcb, &rollback);
else
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
ExReleaseResourceLite(fcb->Header.Resource);
ExReleaseResourceLite(&Vcb->tree_lock);
return Status;
}
-static NTSTATUS query_ranges(device_extension* Vcb, PFILE_OBJECT FileObject, FILE_ALLOCATED_RANGE_BUFFER* inbuf, ULONG inbuflen, void* outbuf, ULONG outbuflen, DWORD* retlen) {
+static NTSTATUS query_ranges(device_extension* Vcb, PFILE_OBJECT FileObject, FILE_ALLOCATED_RANGE_BUFFER* inbuf, ULONG inbuflen, void* outbuf, ULONG outbuflen, ULONG_PTR* retlen) {
NTSTATUS Status;
fcb* fcb;
LIST_ENTRY* le;
return Status;
}
-static NTSTATUS get_object_id(device_extension* Vcb, PFILE_OBJECT FileObject, FILE_OBJECTID_BUFFER* buf, ULONG buflen, DWORD* retlen) {
+static NTSTATUS get_object_id(device_extension* Vcb, PFILE_OBJECT FileObject, FILE_OBJECTID_BUFFER* buf, ULONG buflen, ULONG_PTR* retlen) {
fcb* fcb;
TRACE("(%p, %p, %p, %x, %p)\n", Vcb, FileObject, buf, buflen, retlen);
ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
- if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->fcb->open_count > 0 || has_open_children(Vcb->root_fileref))) {
+ if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->open_count > 0 || has_open_children(Vcb->root_fileref))) {
Status = STATUS_ACCESS_DENIED;
ExReleaseResourceLite(&Vcb->fcb_lock);
goto end;
free_trees(Vcb);
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
ExReleaseResourceLite(&Vcb->tree_lock);
free_trees(Vcb);
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
flush_fcb_caches(Vcb);
return STATUS_SUCCESS;
}
+static NTSTATUS get_compression(device_extension* Vcb, PIRP Irp) {
+ PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+ USHORT* compression;
+
+ TRACE("FSCTL_GET_COMPRESSION\n");
+
+ if (Irp->AssociatedIrp.SystemBuffer) {
+ compression = Irp->AssociatedIrp.SystemBuffer;
+ } else if (Irp->MdlAddress != NULL) {
+ compression = MmGetSystemAddressForMdlSafe(Irp->MdlAddress, LowPagePriority);
+
+ if (!compression)
+ return STATUS_INSUFFICIENT_RESOURCES;
+ } else
+ return STATUS_INVALID_USER_BUFFER;
+
+ if (IrpSp->Parameters.FileSystemControl.OutputBufferLength < sizeof(USHORT))
+ return STATUS_INVALID_PARAMETER;
+
+ *compression = COMPRESSION_FORMAT_NONE;
+
+ Irp->IoStatus.Information = sizeof(USHORT);
+
+ return STATUS_SUCCESS;
+}
+
+static void update_volumes(device_extension* Vcb) {
+ LIST_ENTRY* le = volumes.Flink;
+
+ while (le != &volumes) {
+ volume* v = CONTAINING_RECORD(le, volume, list_entry);
+
+ if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
+ UINT64 i;
+
+ for (i = 0; i < Vcb->superblock.num_devices; i++) {
+ if (RtlCompareMemory(&Vcb->devices[i].devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
+ v->gen1 = v->gen2 = Vcb->superblock.generation - 1;
+ break;
+ }
+ }
+ }
+
+ le = le->Flink;
+ }
+}
+
+static NTSTATUS dismount_volume(device_extension* Vcb, PIRP Irp) {
+ NTSTATUS Status;
+ KIRQL irql;
+ LIST_ENTRY rollback;
+
+ TRACE("FSCTL_DISMOUNT_VOLUME\n");
+
+ if (!(Vcb->Vpb->Flags & VPB_MOUNTED))
+ return STATUS_SUCCESS;
+
+ if (Vcb->disallow_dismount) {
+ WARN("attempting to dismount boot volume or one containing a pagefile\n");
+ return STATUS_ACCESS_DENIED;
+ }
+
+ InitializeListHead(&rollback);
+
+ Status = FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_DISMOUNT);
+ if (!NT_SUCCESS(Status)) {
+ WARN("FsRtlNotifyVolumeEvent returned %08x\n", Status);
+ }
+
+ ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
+
+ flush_fcb_caches(Vcb);
+
+ if (Vcb->need_write && !Vcb->readonly)
+ do_write(Vcb, Irp, &rollback);
+
+ free_trees(Vcb);
+
+ clear_rollback(Vcb, &rollback);
+
+ Vcb->removing = TRUE;
+ update_volumes(Vcb);
+
+ ExReleaseResourceLite(&Vcb->tree_lock);
+
+ IoAcquireVpbSpinLock(&irql);
+ Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+ Vcb->Vpb->Flags |= VPB_DIRECT_WRITES_ALLOWED;
+ IoReleaseVpbSpinLock(irql);
+
+ return STATUS_SUCCESS;
+}
+
NTSTATUS fsctl_request(PDEVICE_OBJECT DeviceObject, PIRP Irp, UINT32 type, BOOL user) {
PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
NTSTATUS Status;
break;
case FSCTL_DISMOUNT_VOLUME:
- WARN("STUB: FSCTL_DISMOUNT_VOLUME\n");
- Status = STATUS_NOT_IMPLEMENTED;
+ Status = dismount_volume(DeviceObject->DeviceExtension, Irp);
break;
case FSCTL_IS_VOLUME_MOUNTED:
break;
case FSCTL_GET_COMPRESSION:
- WARN("STUB: FSCTL_GET_COMPRESSION\n");
- Status = STATUS_NOT_IMPLEMENTED;
+ Status = get_compression(DeviceObject->DeviceExtension, Irp);
break;
case FSCTL_SET_COMPRESSION:
--- /dev/null
+/* Copyright (c) Mark Harmstone 2016
+ *
+ * This file is part of WinBtrfs.
+ *
+ * WinBtrfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public Licence as published by
+ * the Free Software Foundation, either version 3 of the Licence, or
+ * (at your option) any later version.
+ *
+ * WinBtrfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public Licence for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public Licence
+ * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "btrfs_drv.h"
+
+static const UINT8 glog[] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
+ 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
+ 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
+ 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
+ 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+ 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
+ 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
+ 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
+ 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
+ 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+ 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
+ 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
+ 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
+ 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
+ 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+ 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01};
+
+static const UINT8 gilog[] = {0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
+ 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
+ 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
+ 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
+ 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
+ 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
+ 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
+ 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b, 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
+ 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d, 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
+ 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c, 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
+ 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd, 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
+ 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e, 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
+ 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76, 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
+ 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
+ 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
+ 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf};
+
+// divides the bytes in data by 2^div
+void galois_divpower(UINT8* data, UINT8 div, UINT32 len) {
+ while (len > 0) {
+ if (data[0] != 0) {
+ if (gilog[data[0]] <= div)
+ data[0] = glog[(gilog[data[0]] + (255 - div)) % 255];
+ else
+ data[0] = glog[(gilog[data[0]] - div) % 255];
+ }
+
+ data++;
+ len--;
+ }
+}
+
+UINT8 gpow2(UINT8 e) {
+ return glog[e%255];
+}
+
+UINT8 gmul(UINT8 a, UINT8 b) {
+ if (a == 0 || b == 0)
+ return 0;
+ else
+ return glog[(gilog[a] + gilog[b]) % 255];
+}
+
+UINT8 gdiv(UINT8 a, UINT8 b) {
+ if (b == 0) {
+ return 0xff; // shouldn't happen
+ } else if (a == 0) {
+ return 0;
+ } else {
+ if (gilog[a] >= gilog[b])
+ return glog[(gilog[a] - gilog[b]) % 255];
+ else
+ return glog[255-((gilog[b] - gilog[a]) % 255)];
+ }
+}
+
+// The code from the following functions is derived from the paper
+// "The mathematics of RAID-6", by H. Peter Anvin.
+// https://www.kernel.org/pub/linux/kernel/people/hpa/raid6.pdf
+
+#ifdef _AMD64_
+#ifdef __REACTOS__
+static __inline UINT64 galois_double_mask64(UINT64 v) {
+#else
+static UINT64 __inline galois_double_mask64(UINT64 v) {
+#endif
+ v &= 0x8080808080808080;
+ return (v << 1) - (v >> 7);
+}
+#else
+#ifdef __REACTOS__
+static __inline UINT32 galois_double_mask32(UINT32 v) {
+#else
+static UINT32 __inline galois_double_mask32(UINT32 v) {
+#endif
+ v &= 0x80808080;
+ return (v << 1) - (v >> 7);
+}
+#endif
+
+void galois_double(UINT8* data, UINT32 len) {
+ // FIXME - SIMD?
+
+#ifdef _AMD64_
+ while (len > sizeof(UINT64)) {
+ UINT64 v = *((UINT64*)data), vv;
+
+ vv = (v << 1) & 0xfefefefefefefefe;
+ vv ^= galois_double_mask64(v) & 0x1d1d1d1d1d1d1d1d;
+ *((UINT64*)data) = vv;
+
+ data += sizeof(UINT64);
+ len -= sizeof(UINT64);
+ }
+#else
+ while (len > sizeof(UINT32)) {
+ UINT32 v = *((UINT32*)data), vv;
+
+ vv = (v << 1) & 0xfefefefe;
+ vv ^= galois_double_mask32(v) & 0x1d1d1d1d;
+ *((UINT32*)data) = vv;
+
+ data += sizeof(UINT32);
+ len -= sizeof(UINT32);
+ }
+#endif
+
+ while (len > 0) {
+ data[0] = (data[0] << 1) ^ ((data[0] & 0x80) ? 0x1d : 0);
+ data++;
+ len--;
+ }
+}
ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
- if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->fcb->open_count > 0 || has_open_children(Vcb->root_fileref))) {
+ if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->open_count > 0 || has_open_children(Vcb->root_fileref))) {
Status = STATUS_ACCESS_DENIED;
goto end;
}
ExAcquireResourceExclusiveLite(&Vcb->fcb_lock, TRUE);
- if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->fcb->open_count > 0 || has_open_children(Vcb->root_fileref))) {
+ if (Vcb->root_fileref && Vcb->root_fileref->fcb && (Vcb->root_fileref->open_count > 0 || has_open_children(Vcb->root_fileref))) {
Status = STATUS_ACCESS_DENIED;
goto end;
}
if (Vcb->need_write && !Vcb->readonly)
do_write(Vcb, Irp, &rollback);
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
ExReleaseResourceLite(&Vcb->tree_lock);
WARN("FsRtlNotifyVolumeEvent returned %08x\n", Status);
}
- uninit(Vcb, FALSE);
- Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+ if (Vcb->open_files > 0) {
+ Vcb->removing = TRUE;
+ Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+ } else
+ uninit(Vcb, FALSE);
}
return STATUS_SUCCESS;
TRACE("(%p, %p)\n", DeviceObject, Irp);
if (DeviceObject->Vpb->Flags & VPB_MOUNTED) {
- uninit(Vcb, FALSE);
- Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+ if (Vcb->open_files > 0) {
+ Vcb->removing = TRUE;
+ Vcb->Vpb->Flags &= ~VPB_MOUNTED;
+ } else
+ uninit(Vcb, FALSE);
}
return STATUS_SUCCESS;
struct read_data_context* context;
UINT8* buf;
UINT16 stripenum;
+ BOOL rewrite;
PIRP Irp;
IO_STATUS_BLOCK iosb;
enum read_data_status status;
KEVENT Event;
NTSTATUS Status;
chunk* c;
+ UINT64 address;
UINT32 buflen;
UINT64 num_stripes;
LONG stripes_left;
UINT64 type;
UINT32 sector_size;
- UINT16 firstoff, startoffstripe, sectors_per_stripe;
+ UINT16 firstoff, startoffstripe, sectors_per_stripe, stripes_cancel;
UINT32* csum;
BOOL tree;
read_data_stripe* stripes;
+ KSPIN_LOCK spin_lock;
} read_data_context;
static NTSTATUS STDCALL read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
read_data_stripe* stripe = conptr;
read_data_context* context = (read_data_context*)stripe->context;
UINT64 i;
+ LONG stripes_left;
+ KIRQL irql;
- // FIXME - we definitely need a per-stripe lock here
+ KeAcquireSpinLock(&context->spin_lock, &irql);
+
+ stripes_left = InterlockedDecrement(&context->stripes_left);
if (stripe->status == ReadDataStatus_Cancelling) {
stripe->status = ReadDataStatus_Cancelled;
if (NT_SUCCESS(Irp->IoStatus.Status)) {
if (context->type == BLOCK_FLAG_DUPLICATE) {
- if (context->tree) {
- tree_header* th = (tree_header*)stripe->buf;
- UINT32 crc32;
-
- crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
-
- if (crc32 != *((UINT32*)th->csum))
- stripe->status = ReadDataStatus_CRCError;
- } else if (context->csum) {
- for (i = 0; i < Irp->IoStatus.Information / context->sector_size; i++) {
- UINT32 crc32 = ~calc_crc32c(0xffffffff, stripe->buf + (i * context->sector_size), context->sector_size);
-
- if (crc32 != context->csum[i]) {
- stripe->status = ReadDataStatus_CRCError;
- goto end;
- }
- }
- }
-
stripe->status = ReadDataStatus_Success;
-
- for (i = 0; i < context->num_stripes; i++) {
- if (context->stripes[i].status == ReadDataStatus_Pending) {
- context->stripes[i].status = ReadDataStatus_Cancelling;
- IoCancelIrp(context->stripes[i].Irp);
+
+ if (stripes_left > 0 && stripes_left == context->stripes_cancel) {
+ for (i = 0; i < context->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Pending) {
+ context->stripes[i].status = ReadDataStatus_Cancelling;
+ IoCancelIrp(context->stripes[i].Irp);
+ }
}
}
} else if (context->type == BLOCK_FLAG_RAID0) {
- // no point checking the checksum here, as there's nothing we can do
stripe->status = ReadDataStatus_Success;
} else if (context->type == BLOCK_FLAG_RAID10) {
- if (context->csum) {
- UINT16 start, left;
- UINT32 j;
-
- if (context->startoffstripe == stripe->stripenum) {
- start = 0;
- left = context->sectors_per_stripe - context->firstoff;
- } else {
- UINT16 ns;
-
- if (context->startoffstripe > stripe->stripenum) {
- ns = stripe->stripenum + (context->num_stripes / 2) - context->startoffstripe;
- } else {
- ns = stripe->stripenum - context->startoffstripe;
+ stripe->status = ReadDataStatus_Success;
+
+ if (stripes_left > 0 && context->stripes_cancel != 0) {
+ for (i = 0; i < context->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Pending && context->stripes[i].stripenum == stripe->stripenum) {
+ context->stripes[i].status = ReadDataStatus_Cancelling;
+ IoCancelIrp(context->stripes[i].Irp);
+ break;
}
-
- if (context->firstoff == 0)
- start = context->sectors_per_stripe * ns;
- else
- start = (context->sectors_per_stripe - context->firstoff) + (context->sectors_per_stripe * (ns - 1));
-
- left = context->sectors_per_stripe;
}
-
- j = start;
- for (i = 0; i < Irp->IoStatus.Information / context->sector_size; i++) {
- UINT32 crc32 = ~calc_crc32c(0xffffffff, stripe->buf + (i * context->sector_size), context->sector_size);
-
- if (crc32 != context->csum[j]) {
- int3;
- stripe->status = ReadDataStatus_CRCError;
- goto end;
- }
-
- j++;
- left--;
-
- if (left == 0) {
- j += context->sectors_per_stripe;
- left = context->sectors_per_stripe;
+ }
+ } else if (context->type == BLOCK_FLAG_RAID5) {
+ stripe->status = ReadDataStatus_Success;
+
+ if (stripes_left > 0 && stripes_left == context->stripes_cancel && (context->csum || context->tree)) {
+ for (i = 0; i < context->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Pending) {
+ context->stripes[i].status = ReadDataStatus_Cancelling;
+ IoCancelIrp(context->stripes[i].Irp);
+ break;
}
}
}
-
+ } else if (context->type == BLOCK_FLAG_RAID6) {
stripe->status = ReadDataStatus_Success;
-
- for (i = 0; i < context->num_stripes; i++) {
- if (context->stripes[i].status == ReadDataStatus_Pending && context->stripes[i].stripenum == stripe->stripenum) {
- context->stripes[i].status = ReadDataStatus_Cancelling;
- IoCancelIrp(context->stripes[i].Irp);
+
+ if (stripes_left > 0 && stripes_left == context->stripes_cancel && (context->csum || context->tree)) {
+ for (i = 0; i < context->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Pending) {
+ context->stripes[i].status = ReadDataStatus_Cancelling;
+ IoCancelIrp(context->stripes[i].Irp);
+ }
}
}
}
}
end:
- if (InterlockedDecrement(&context->stripes_left) == 0)
+ KeReleaseSpinLock(&context->spin_lock, irql);
+
+ if (stripes_left == 0)
KeSetEvent(&context->Event, 0, FALSE);
return STATUS_MORE_PROCESSING_REQUIRED;
}
-NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk** pc, PIRP Irp) {
- CHUNK_ITEM* ci;
- CHUNK_ITEM_STRIPE* cis;
- read_data_context* context;
- UINT64 i, type, offset;
- NTSTATUS Status;
- device** devices;
- UINT64 *stripestart = NULL, *stripeend = NULL;
- UINT16 startoffstripe;
+static void raid5_reconstruct(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT64 maxsize,
+ BOOL first, UINT32 firststripesize, UINT16 missing) {
+ UINT16 parity, stripe;
+ UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+ UINT32 readlen;
- Status = verify_vcb(Vcb, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("verify_vcb returned %08x\n", Status);
- return Status;
- }
+ TRACE("(%llx, %x, %p, %p, %llx, %llx, %u, %x, %x)\n", off, skip, context, ci, *stripeoff, maxsize, first, firststripesize, missing);
- if (Vcb->log_to_phys_loaded) {
- chunk* c = get_chunk_from_address(Vcb, addr);
+ parity = ((off / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
+
+ readlen = min(min(ci->stripe_length - (skip % ci->stripe_length), stripelen), maxsize - *stripeoff);
+
+ if (missing != parity) {
+ UINT16 firststripe = missing == 0 ? 1 : 0;
- if (!c) {
- ERR("get_chunk_from_address failed\n");
- return STATUS_INTERNAL_ERROR;
+ RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], &context->stripes[firststripe].buf[*stripeoff], readlen);
+
+ for (stripe = firststripe + 1; stripe < context->num_stripes; stripe++) {
+ if (stripe != missing)
+ do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+ }
+ } else
+ TRACE("parity == missing == %x, skipping\n", parity);
+
+ *stripeoff += stripelen;
+}
+
+static void raid5_decode(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf,
+ UINT32* pos, UINT32 length, UINT32 firststripesize) {
+ UINT16 parity, stripe;
+ BOOL first = *pos == 0;
+ UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+
+ parity = ((off / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
+
+ stripe = (parity + 1) % ci->num_stripes;
+
+ while (TRUE) {
+ if (stripe == parity) {
+ *stripeoff += stripelen;
+ return;
}
- ci = c->chunk_item;
- offset = c->offset;
- devices = c->devices;
-
- if (pc)
- *pc = c;
- } else {
- LIST_ENTRY* le = Vcb->sys_chunks.Flink;
+ if (skip >= ci->stripe_length) {
+ skip -= ci->stripe_length;
+ } else {
+ UINT32 copylen = min(ci->stripe_length - skip, length - *pos);
+
+ RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+
+ *pos += copylen;
+
+ if (*pos == length)
+ return;
+
+ skip = 0;
+ }
- ci = NULL;
+ stripe = (stripe + 1) % ci->num_stripes;
+ }
+}
+
+static BOOL raid5_decode_with_checksum(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf,
+ UINT32* pos, UINT32 length, UINT32 firststripesize, UINT32* csum, UINT32 sector_size) {
+ UINT16 parity, stripe;
+ BOOL first = *pos == 0;
+ UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+
+ parity = ((off / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
+
+ stripe = (parity + 1) % ci->num_stripes;
+
+ while (TRUE) {
+ if (stripe == parity) {
+ *stripeoff += stripelen;
+ return TRUE;
+ }
- while (le != &Vcb->sys_chunks) {
- sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
+ if (skip >= ci->stripe_length) {
+ skip -= ci->stripe_length;
+ } else {
+ UINT32 i;
+ UINT32 copylen = min(ci->stripe_length - skip, length - *pos);
- if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
- CHUNK_ITEM* chunk_item = sc->data;
+ RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+
+ for (i = 0; i < copylen / sector_size; i ++) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + *pos + (i * sector_size), sector_size);
- if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
- ci = chunk_item;
- offset = sc->key.offset;
- cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
+ if (crc32 != csum[i]) {
+ UINT16 j, firststripe = stripe == 0 ? 1 : 0;
- devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
- if (!devices) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
+ RtlCopyMemory(buf + *pos + (i * sector_size),
+ &context->stripes[firststripe].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+
+ for (j = firststripe + 1; j < ci->num_stripes; j++) {
+ if (j != stripe) {
+ do_xor(buf + *pos + (i * sector_size), &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+ }
}
- for (i = 0; i < ci->num_stripes; i++) {
- devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
+ crc32 = ~calc_crc32c(0xffffffff, buf + *pos + (i * sector_size), sector_size);
+
+ if (crc32 != csum[i]) {
+ ERR("unrecoverable checksum error\n");
+ return FALSE;
}
- break;
+ RtlCopyMemory(&context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], buf + *pos + (i * sector_size), sector_size);
+ context->stripes[stripe].rewrite = TRUE;
}
}
- le = le->Flink;
- }
-
- if (!ci) {
- ERR("could not find chunk for %llx in bootstrap\n", addr);
- return STATUS_INTERNAL_ERROR;
+ *pos += copylen;
+
+ if (*pos == length)
+ return TRUE;
+
+ skip = 0;
}
- if (pc)
- *pc = NULL;
+ stripe = (stripe + 1) % ci->num_stripes;
}
- if (ci->type & BLOCK_FLAG_DUPLICATE) {
- type = BLOCK_FLAG_DUPLICATE;
- } else if (ci->type & BLOCK_FLAG_RAID0) {
- type = BLOCK_FLAG_RAID0;
- } else if (ci->type & BLOCK_FLAG_RAID1) {
- type = BLOCK_FLAG_DUPLICATE;
- } else if (ci->type & BLOCK_FLAG_RAID10) {
- type = BLOCK_FLAG_RAID10;
- } else if (ci->type & BLOCK_FLAG_RAID5) {
- FIXME("RAID5 not yet supported\n");
- return STATUS_NOT_IMPLEMENTED;
- } else if (ci->type & BLOCK_FLAG_RAID6) {
- FIXME("RAID6 not yet supported\n");
- return STATUS_NOT_IMPLEMENTED;
- } else { // SINGLE
- type = BLOCK_FLAG_DUPLICATE;
- }
-
- cis = (CHUNK_ITEM_STRIPE*)&ci[1];
+ return FALSE;
+}
- context = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_context), ALLOC_TAG);
- if (!context) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
+static BOOL raid5_decode_with_checksum_metadata(UINT64 addr, UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf,
+ UINT32* pos, UINT32 length, UINT32 firststripesize, UINT32 node_size) {
+ UINT16 parity, stripe;
+ BOOL first = *pos == 0;
+ UINT32 stripelen = first ? firststripesize : ci->stripe_length;
- RtlZeroMemory(context, sizeof(read_data_context));
- KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
+ parity = ((off / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
- context->stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
- if (!context->stripes) {
- ERR("out of memory\n");
- ExFreePool(context);
- return STATUS_INSUFFICIENT_RESOURCES;
+ stripe = (parity + 1) % ci->num_stripes;
+
+ while (TRUE) {
+ if (stripe == parity) {
+ *stripeoff += stripelen;
+ return TRUE;
+ }
+
+ if (skip >= ci->stripe_length) {
+ skip -= ci->stripe_length;
+ } else {
+ UINT32 copylen = min(ci->stripe_length - skip, length - *pos);
+ tree_header* th = (tree_header*)buf;
+ UINT32 crc32;
+
+ RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+
+ crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, node_size - sizeof(th->csum));
+
+ if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+ UINT16 j, firststripe = stripe == 0 ? 1 : 0;
+
+ RtlCopyMemory(buf + *pos, &context->stripes[firststripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+
+ for (j = firststripe + 1; j < ci->num_stripes; j++) {
+ if (j != stripe) {
+ do_xor(buf + *pos, &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+ }
+ }
+
+ crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, node_size - sizeof(th->csum));
+
+ if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+ ERR("unrecoverable checksum error\n");
+ return FALSE;
+ }
+ }
+
+ RtlCopyMemory(&context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], buf + *pos, copylen);
+ context->stripes[stripe].rewrite = TRUE;
+
+ *pos += copylen;
+
+ if (*pos == length)
+ return TRUE;
+
+ skip = 0;
+ }
+
+ stripe = (stripe + 1) % ci->num_stripes;
}
- RtlZeroMemory(context->stripes, sizeof(read_data_stripe) * ci->num_stripes);
+ return FALSE;
+}
+
+static void raid6_reconstruct1(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT64 maxsize,
+ BOOL first, UINT32 firststripesize, UINT16 missing) {
+ UINT16 parity1, parity2, stripe;
+ UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+ UINT32 readlen;
- context->buflen = length;
- context->num_stripes = ci->num_stripes;
- context->stripes_left = context->num_stripes;
- context->sector_size = Vcb->superblock.sector_size;
- context->csum = csum;
- context->tree = is_tree;
- context->type = type;
+ TRACE("(%llx, %x, %p, %p, %llx, %llx, %u, %x, %x)\n", off, skip, context, ci, *stripeoff, maxsize, first, firststripesize, missing);
- stripestart = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG);
- if (!stripestart) {
- ERR("out of memory\n");
- ExFreePool(context);
- return STATUS_INSUFFICIENT_RESOURCES;
- }
+ parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
+ parity2 = (parity1 + 1) % ci->num_stripes;
- stripeend = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG);
- if (!stripeend) {
- ERR("out of memory\n");
- ExFreePool(stripestart);
- ExFreePool(context);
- return STATUS_INSUFFICIENT_RESOURCES;
+ readlen = min(min(ci->stripe_length - (skip % ci->stripe_length), stripelen), maxsize - *stripeoff);
+
+ if (missing != parity1 && missing != parity2) {
+ RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], &context->stripes[parity1].buf[*stripeoff], readlen);
+ stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+
+ do {
+ if (stripe != missing)
+ do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+
+ stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+ } while (stripe != parity2);
+ } else
+ TRACE("skipping parity stripe\n");
+
+ *stripeoff += stripelen;
+}
+
+static void raid6_reconstruct2(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT64 maxsize,
+ BOOL first, UINT32 firststripesize, UINT16 missing1, UINT16 missing2) {
+ UINT16 parity1, parity2, stripe;
+ UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+ UINT32 readlen = min(min(ci->stripe_length - (skip % ci->stripe_length), stripelen), maxsize - *stripeoff);
+
+ TRACE("(%llx, %x, %p, %p, %llx, %llx, %u, %x, %x, %x)\n", off, skip, context, ci, *stripeoff, maxsize,
+ first, firststripesize, missing1, missing2);
+
+ parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
+ parity2 = (parity1 + 1) % ci->num_stripes;
+
+ // skip if missing stripes are p and q
+ if ((parity1 == missing1 && parity2 == missing2) || (parity1 == missing2 && parity2 == missing1)) {
+ *stripeoff += stripelen;
+ return;
}
- if (type == BLOCK_FLAG_RAID0) {
- UINT64 startoff, endoff;
- UINT16 endoffstripe;
+ if (missing1 == parity2 || missing2 == parity2) { // reconstruct from p and data
+ UINT16 missing = missing1 == parity2 ? missing2 : missing1;
- get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
- get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
+ RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], &context->stripes[parity1].buf[*stripeoff], readlen);
+ stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
- for (i = 0; i < ci->num_stripes; i++) {
- if (startoffstripe > i) {
- stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
- } else if (startoffstripe == i) {
- stripestart[i] = startoff;
- } else {
- stripestart[i] = startoff - (startoff % ci->stripe_length);
- }
+ do {
+ if (stripe != missing)
+ do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
- if (endoffstripe > i) {
- stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
- } else if (endoffstripe == i) {
- stripeend[i] = endoff + 1;
- } else {
- stripeend[i] = endoff - (endoff % ci->stripe_length);
- }
- }
- } else if (type == BLOCK_FLAG_RAID10) {
- UINT64 startoff, endoff;
- UINT16 endoffstripe, j;
+ stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+ } while (stripe != parity2);
+ } else if (missing1 == parity1 || missing2 == parity1) { // reconstruct from q and data
+ UINT16 missing = missing1 == parity1 ? missing2 : missing1;
+ UINT16 i, div;
- get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
- get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
+ stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
- if ((ci->num_stripes % ci->sub_stripes) != 0) {
- ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
- Status = STATUS_INTERNAL_ERROR;
- goto exit;
- }
+ i = ci->num_stripes - 3;
- context->firstoff = (startoff % ci->stripe_length) / Vcb->superblock.sector_size;
- context->startoffstripe = startoffstripe;
- context->sectors_per_stripe = ci->stripe_length / Vcb->superblock.sector_size;
+ if (stripe == missing) {
+ RtlZeroMemory(&context->stripes[missing].buf[*stripeoff], readlen);
+ div = i;
+ } else
+ RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
- startoffstripe *= ci->sub_stripes;
- endoffstripe *= ci->sub_stripes;
+ stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
- for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
- if (startoffstripe > i) {
- stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
- } else if (startoffstripe == i) {
- stripestart[i] = startoff;
- } else {
- stripestart[i] = startoff - (startoff % ci->stripe_length);
- }
+ i--;
+ do {
+ galois_double(&context->stripes[missing].buf[*stripeoff], readlen);
- if (endoffstripe > i) {
- stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
- } else if (endoffstripe == i) {
- stripeend[i] = endoff + 1;
- } else {
- stripeend[i] = endoff - (endoff % ci->stripe_length);
- }
+ if (stripe != missing)
+ do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+ else
+ div = i;
- for (j = 1; j < ci->sub_stripes; j++) {
- stripestart[i+j] = stripestart[i];
- stripeend[i+j] = stripeend[i];
- }
+ stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+ i--;
+ } while (stripe != parity2);
+
+ do_xor(&context->stripes[missing].buf[*stripeoff], &context->stripes[parity2].buf[*stripeoff], readlen);
+
+ if (div != 0)
+ galois_divpower(&context->stripes[missing].buf[*stripeoff], div, readlen);
+ } else { // reconstruct from p and q
+ UINT16 x, y, i;
+ UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
+ UINT32 j;
+
+ stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+
+ // put qxy in missing1
+ // put pxy in missing2
+
+ i = ci->num_stripes - 3;
+ if (stripe == missing1 || stripe == missing2) {
+ RtlZeroMemory(&context->stripes[missing1].buf[*stripeoff], readlen);
+ RtlZeroMemory(&context->stripes[missing2].buf[*stripeoff], readlen);
+
+ if (stripe == missing1)
+ x = i;
+ else
+ y = i;
+ } else {
+ RtlCopyMemory(&context->stripes[missing1].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+ RtlCopyMemory(&context->stripes[missing2].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
}
- } else if (type == BLOCK_FLAG_DUPLICATE) {
- for (i = 0; i < ci->num_stripes; i++) {
- stripestart[i] = addr - offset;
- stripeend[i] = stripestart[i] + length;
+
+ stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+
+ i--;
+ do {
+ galois_double(&context->stripes[missing1].buf[*stripeoff], readlen);
+
+ if (stripe != missing1 && stripe != missing2) {
+ do_xor(&context->stripes[missing1].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+ do_xor(&context->stripes[missing2].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+ } else if (stripe == missing1)
+ x = i;
+ else if (stripe == missing2)
+ y = i;
+
+ stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+ i--;
+ } while (stripe != parity2);
+
+ gyx = gpow2(y > x ? (y-x) : (255-x+y));
+ gx = gpow2(255-x);
+
+ denom = gdiv(1, gyx ^ 1);
+ a = gmul(gyx, denom);
+ b = gmul(gx, denom);
+
+ p = &context->stripes[parity1].buf[*stripeoff];
+ q = &context->stripes[parity2].buf[*stripeoff];
+ pxy = &context->stripes[missing2].buf[*stripeoff];
+ qxy = &context->stripes[missing1].buf[*stripeoff];
+
+ for (j = 0; j < readlen; j++) {
+ *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
+
+ p++;
+ q++;
+ pxy++;
+ qxy++;
}
+
+ do_xor(&context->stripes[missing2].buf[*stripeoff], &context->stripes[missing1].buf[*stripeoff], readlen);
+ do_xor(&context->stripes[missing2].buf[*stripeoff], &context->stripes[parity1].buf[*stripeoff], readlen);
}
- // FIXME - for RAID, check beforehand whether there's enough devices to satisfy request
+ *stripeoff += stripelen;
+}
+
+static void raid6_decode(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf,
+ UINT32* pos, UINT32 length, UINT32 firststripesize) {
+ UINT16 parity1, stripe;
+ BOOL first = *pos == 0;
+ UINT32 stripelen = first ? firststripesize : ci->stripe_length;
- for (i = 0; i < ci->num_stripes; i++) {
- PIO_STACK_LOCATION IrpSp;
+ parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
+
+ stripe = (parity1 + 2) % ci->num_stripes;
+
+ while (TRUE) {
+ if (stripe == parity1) {
+ *stripeoff += stripelen;
+ return;
+ }
- if (!devices[i] || stripestart[i] == stripeend[i]) {
- context->stripes[i].status = ReadDataStatus_MissingDevice;
- context->stripes[i].buf = NULL;
- context->stripes_left--;
+ if (skip >= ci->stripe_length) {
+ skip -= ci->stripe_length;
} else {
- context->stripes[i].context = (struct read_data_context*)context;
- context->stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, stripeend[i] - stripestart[i], ALLOC_TAG);
+ UINT32 copylen = min(ci->stripe_length - skip, length - *pos);
- if (!context->stripes[i].buf) {
- ERR("out of memory\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto exit;
+ RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+
+ *pos += copylen;
+
+ if (*pos == length)
+ return;
+
+ skip = 0;
+ }
+
+ stripe = (stripe + 1) % ci->num_stripes;
+ }
+}
+
+static BOOL raid6_decode_with_checksum(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf,
+ UINT32* pos, UINT32 length, UINT32 firststripesize, UINT32* csum, UINT32 sector_size) {
+ UINT16 parity1, parity2, stripe;
+ BOOL first = *pos == 0;
+ UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+
+ parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
+ parity2 = (parity1 + 1) % ci->num_stripes;
+ stripe = (parity1 + 2) % ci->num_stripes;
+
+ while (TRUE) {
+ if (stripe == parity1) {
+ *stripeoff += stripelen;
+ return TRUE;
+ }
+
+ if (skip >= ci->stripe_length) {
+ skip -= ci->stripe_length;
+ } else {
+ UINT32 i;
+ UINT32 copylen = min(ci->stripe_length - skip, length - *pos);
+
+ RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+
+ for (i = 0; i < copylen / sector_size; i ++) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + *pos + (i * sector_size), sector_size);
+
+ if (crc32 != csum[i]) {
+ UINT16 j, firststripe;
+
+ if (parity2 == 0 && stripe == 1)
+ firststripe = 2;
+ else if (parity2 == 0 || stripe == 0)
+ firststripe = 1;
+ else
+ firststripe = 0;
+
+ RtlCopyMemory(buf + *pos + (i * sector_size),
+ &context->stripes[firststripe].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+
+ for (j = firststripe + 1; j < ci->num_stripes; j++) {
+ if (j != stripe && j != parity2) {
+ do_xor(buf + *pos + (i * sector_size), &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+ }
+ }
+
+ crc32 = ~calc_crc32c(0xffffffff, buf + *pos + (i * sector_size), sector_size);
+
+ if (crc32 != csum[i]) {
+ UINT8 *parity, *buf2;
+ UINT16 rs, div;
+
+ // assume p is wrong
+
+ parity = ExAllocatePoolWithTag(NonPagedPool, sector_size, ALLOC_TAG);
+ if (!parity) {
+ ERR("out of memory\n");
+ return FALSE;
+ }
+
+ rs = (parity1 + ci->num_stripes - 1) % ci->num_stripes;
+ j = ci->num_stripes - 3;
+
+ if (rs == stripe) {
+ RtlZeroMemory(parity, sector_size);
+ div = j;
+ } else
+ RtlCopyMemory(parity, &context->stripes[rs].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+
+ rs = (rs + ci->num_stripes - 1) % ci->num_stripes;
+ j--;
+ while (rs != parity2) {
+ galois_double(parity, sector_size);
+
+ if (rs != stripe)
+ do_xor(parity, &context->stripes[rs].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+ else
+ div = j;
+
+ rs = (rs + ci->num_stripes - 1) % ci->num_stripes;
+ j--;
+ }
+
+ do_xor(parity, &context->stripes[parity2].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+
+ if (div != 0)
+ galois_divpower(parity, div, sector_size);
+
+ crc32 = ~calc_crc32c(0xffffffff, parity, sector_size);
+ if (crc32 == csum[i]) {
+ RtlCopyMemory(buf + *pos + (i * sector_size), parity, sector_size);
+
+ // recalculate p
+ RtlCopyMemory(&context->stripes[parity1].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], parity, sector_size);
+
+ for (j = 0; j < ci->num_stripes; j++) {
+ if (j != stripe && j != parity1 && j != parity2) {
+ do_xor(&context->stripes[parity1].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)],
+ &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], sector_size);
+ }
+ }
+
+ context->stripes[parity1].rewrite = TRUE;
+
+ ExFreePool(parity);
+ goto success;
+ }
+
+ // assume another of the data stripes is wrong
+
+ buf2 = ExAllocatePoolWithTag(NonPagedPool, sector_size, ALLOC_TAG);
+ if (!buf2) {
+ ERR("out of memory\n");
+ ExFreePool(parity);
+ return FALSE;
+ }
+
+ j = (parity2 + 1) % ci->num_stripes;
+
+ while (j != parity1) {
+ if (j != stripe) {
+ UINT16 curstripe, k;
+ UINT32 bufoff = *stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size);
+ UINT16 x, y;
+ UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
+
+ curstripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+
+ // put qxy in parity
+ // put pxy in buf2
+
+ k = ci->num_stripes - 3;
+ if (curstripe == stripe || curstripe == j) {
+ RtlZeroMemory(parity, sector_size);
+ RtlZeroMemory(buf2, sector_size);
+
+ if (curstripe == stripe)
+ x = k;
+ else
+ y = k;
+ } else {
+ RtlCopyMemory(parity, &context->stripes[curstripe].buf[bufoff], sector_size);
+ RtlCopyMemory(buf2, &context->stripes[curstripe].buf[bufoff], sector_size);
+ }
+
+ curstripe = curstripe == 0 ? (ci->num_stripes - 1) : (curstripe - 1);
+
+ k--;
+ do {
+ galois_double(parity, sector_size);
+
+ if (curstripe != stripe && curstripe != j) {
+ do_xor(parity, &context->stripes[curstripe].buf[bufoff], sector_size);
+ do_xor(buf2, &context->stripes[curstripe].buf[bufoff], sector_size);
+ } else if (curstripe == stripe)
+ x = k;
+ else if (curstripe == j)
+ y = k;
+
+ curstripe = curstripe == 0 ? (ci->num_stripes - 1) : (curstripe - 1);
+ k--;
+ } while (curstripe != parity2);
+
+ gyx = gpow2(y > x ? (y-x) : (255-x+y));
+ gx = gpow2(255-x);
+
+ denom = gdiv(1, gyx ^ 1);
+ a = gmul(gyx, denom);
+ b = gmul(gx, denom);
+
+ p = &context->stripes[parity1].buf[bufoff];
+ q = &context->stripes[parity2].buf[bufoff];
+ pxy = buf2;
+ qxy = parity;
+
+ for (k = 0; k < sector_size; k++) {
+ *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
+
+ p++;
+ q++;
+ pxy++;
+ qxy++;
+ }
+
+ crc32 = ~calc_crc32c(0xffffffff, parity, sector_size);
+
+ if (crc32 == csum[i]) {
+ do_xor(buf2, parity, sector_size);
+ do_xor(buf2, &context->stripes[parity1].buf[bufoff], sector_size);
+
+ RtlCopyMemory(&context->stripes[j].buf[bufoff], buf2, sector_size);
+ context->stripes[j].rewrite = TRUE;
+
+ RtlCopyMemory(buf + *pos + (i * sector_size), parity, sector_size);
+ ExFreePool(parity);
+ ExFreePool(buf2);
+ goto success;
+ }
+ }
+
+ j = (j + 1) % ci->num_stripes;
+ }
+
+ ExFreePool(parity);
+ ExFreePool(buf2);
+
+ ERR("unrecoverable checksum error\n");
+ return FALSE;
+ }
+
+success:
+ RtlCopyMemory(&context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen + (i * sector_size)], buf + *pos + (i * sector_size), sector_size);
+ context->stripes[stripe].rewrite = TRUE;
+ }
+ }
+
+ *pos += copylen;
+
+ if (*pos == length)
+ return TRUE;
+
+ skip = 0;
+ }
+
+ stripe = (stripe + 1) % ci->num_stripes;
+ }
+}
+
+static BOOL raid6_decode_with_checksum_metadata(UINT64 addr, UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT8* buf,
+ UINT32* pos, UINT32 length, UINT32 firststripesize, UINT32 node_size) {
+ UINT16 parity1, parity2, stripe;
+ BOOL first = *pos == 0;
+ UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+
+ parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
+ parity2 = (parity1 + 1) % ci->num_stripes;
+ stripe = (parity1 + 2) % ci->num_stripes;
+
+ while (TRUE) {
+ if (stripe == parity1) {
+ *stripeoff += stripelen;
+ return TRUE;
+ }
+
+ if (skip >= ci->stripe_length) {
+ skip -= ci->stripe_length;
+ } else {
+ UINT32 copylen = min(ci->stripe_length - skip, length - *pos);
+ tree_header* th = (tree_header*)buf;
+ UINT32 crc32;
+
+ RtlCopyMemory(buf + *pos, &context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], copylen);
+
+ crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, node_size - sizeof(th->csum));
+
+ if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+ UINT16 j, firststripe;
+
+ if (parity2 == 0 && stripe == 1)
+ firststripe = 2;
+ else if (parity2 == 0 || stripe == 0)
+ firststripe = 1;
+ else
+ firststripe = 0;
+
+ RtlCopyMemory(buf + *pos, &context->stripes[firststripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size);
+
+ for (j = firststripe + 1; j < ci->num_stripes; j++) {
+ if (j != stripe && j != parity2) {
+ do_xor(buf + *pos, &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size);
+ }
+ }
+
+ crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, node_size - sizeof(th->csum));
+
+ if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+ UINT8 *parity, *buf2;
+ UINT16 rs, div;
+ tree_header* th2;
+
+ // assume p is wrong
+
+ parity = ExAllocatePoolWithTag(NonPagedPool, node_size, ALLOC_TAG);
+ if (!parity) {
+ ERR("out of memory\n");
+ return FALSE;
+ }
+
+ rs = (parity1 + ci->num_stripes - 1) % ci->num_stripes;
+ j = ci->num_stripes - 3;
+
+ if (rs == stripe) {
+ RtlZeroMemory(parity, node_size);
+ div = j;
+ } else
+ RtlCopyMemory(parity, &context->stripes[rs].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size);
+
+ rs = (rs + ci->num_stripes - 1) % ci->num_stripes;
+ j--;
+ while (rs != parity2) {
+ galois_double(parity, node_size);
+
+ if (rs != stripe)
+ do_xor(parity, &context->stripes[rs].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size);
+ else
+ div = j;
+
+ rs = (rs + ci->num_stripes - 1) % ci->num_stripes;
+ j--;
+ }
+
+ do_xor(parity, &context->stripes[parity2].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size);
+
+ if (div != 0)
+ galois_divpower(parity, div, node_size);
+
+ th2 = (tree_header*)parity;
+
+ crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th2->fs_uuid, node_size - sizeof(th2->csum));
+
+ if (addr != th2->address || crc32 == *((UINT32*)th2->csum)) {
+ RtlCopyMemory(buf + *pos, parity, node_size);
+
+ // recalculate p
+ RtlCopyMemory(&context->stripes[parity1].buf[*stripeoff + skip - ci->stripe_length + stripelen], parity, node_size);
+
+ for (j = 0; j < ci->num_stripes; j++) {
+ if (j != stripe && j != parity1 && j != parity2) {
+ do_xor(&context->stripes[parity1].buf[*stripeoff + skip - ci->stripe_length + stripelen],
+ &context->stripes[j].buf[*stripeoff + skip - ci->stripe_length + stripelen], node_size);
+ }
+ }
+
+ context->stripes[parity1].rewrite = TRUE;
+
+ ExFreePool(parity);
+ goto success;
+ }
+
+ // assume another of the data stripes is wrong
+
+ buf2 = ExAllocatePoolWithTag(NonPagedPool, node_size, ALLOC_TAG);
+ if (!buf2) {
+ ERR("out of memory\n");
+ ExFreePool(parity);
+ return FALSE;
+ }
+
+ j = (parity2 + 1) % ci->num_stripes;
+
+ while (j != parity1) {
+ if (j != stripe) {
+ UINT16 curstripe, k;
+ UINT32 bufoff = *stripeoff + skip - ci->stripe_length + stripelen;
+ UINT16 x, y;
+ UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
+
+ curstripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+
+ // put qxy in parity
+ // put pxy in buf2
+
+ k = ci->num_stripes - 3;
+ if (curstripe == stripe || curstripe == j) {
+ RtlZeroMemory(parity, node_size);
+ RtlZeroMemory(buf2, node_size);
+
+ if (curstripe == stripe)
+ x = k;
+ else
+ y = k;
+ } else {
+ RtlCopyMemory(parity, &context->stripes[curstripe].buf[bufoff], node_size);
+ RtlCopyMemory(buf2, &context->stripes[curstripe].buf[bufoff], node_size);
+ }
+
+ curstripe = curstripe == 0 ? (ci->num_stripes - 1) : (curstripe - 1);
+
+ k--;
+ do {
+ galois_double(parity, node_size);
+
+ if (curstripe != stripe && curstripe != j) {
+ do_xor(parity, &context->stripes[curstripe].buf[bufoff], node_size);
+ do_xor(buf2, &context->stripes[curstripe].buf[bufoff], node_size);
+ } else if (curstripe == stripe)
+ x = k;
+ else if (curstripe == j)
+ y = k;
+
+ curstripe = curstripe == 0 ? (ci->num_stripes - 1) : (curstripe - 1);
+ k--;
+ } while (curstripe != parity2);
+
+ gyx = gpow2(y > x ? (y-x) : (255-x+y));
+ gx = gpow2(255-x);
+
+ denom = gdiv(1, gyx ^ 1);
+ a = gmul(gyx, denom);
+ b = gmul(gx, denom);
+
+ p = &context->stripes[parity1].buf[bufoff];
+ q = &context->stripes[parity2].buf[bufoff];
+ pxy = buf2;
+ qxy = parity;
+
+ for (k = 0; k < node_size; k++) {
+ *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
+
+ p++;
+ q++;
+ pxy++;
+ qxy++;
+ }
+
+ th2 = (tree_header*)parity;
+
+ crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th2->fs_uuid, node_size - sizeof(th2->csum));
+
+ if (addr != th2->address || crc32 == *((UINT32*)th2->csum)) {
+ do_xor(buf2, parity, node_size);
+ do_xor(buf2, &context->stripes[parity1].buf[bufoff], node_size);
+
+ RtlCopyMemory(&context->stripes[j].buf[bufoff], buf2, node_size);
+ context->stripes[j].rewrite = TRUE;
+
+ RtlCopyMemory(buf + *pos, parity, node_size);
+ ExFreePool(parity);
+ ExFreePool(buf2);
+ goto success;
+ }
+ }
+
+ j = (j + 1) % ci->num_stripes;
+ }
+
+ ExFreePool(parity);
+ ExFreePool(buf2);
+
+ ERR("unrecoverable checksum error\n");
+ return FALSE;
+ }
+
+success:
+ RtlCopyMemory(&context->stripes[stripe].buf[*stripeoff + skip - ci->stripe_length + stripelen], buf + *pos, node_size);
+ context->stripes[stripe].rewrite = TRUE;
+ }
+
+ *pos += copylen;
+
+ if (*pos == length)
+ return TRUE;
+
+ skip = 0;
+ }
+
+ stripe = (stripe + 1) % ci->num_stripes;
+ }
+}
+
+static NTSTATUS check_raid6_nocsum_parity(UINT64 off, UINT32 skip, read_data_context* context, CHUNK_ITEM* ci, UINT64* stripeoff, UINT64 maxsize,
+ BOOL first, UINT32 firststripesize, UINT8* scratch) {
+ UINT16 parity1, parity2, stripe;
+ UINT32 stripelen = first ? firststripesize : ci->stripe_length;
+ UINT32 readlen, i;
+ BOOL bad = FALSE;
+
+ TRACE("(%llx, %x, %p, %p, %llx, %llx, %u, %x, %p)\n", off, skip, context, ci, *stripeoff, maxsize, first, firststripesize, scratch);
+
+ parity1 = ((off / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
+ parity2 = (parity1 + 1) % ci->num_stripes;
+
+ readlen = min(min(ci->stripe_length - (skip % ci->stripe_length), stripelen), maxsize - *stripeoff);
+
+ RtlCopyMemory(scratch, &context->stripes[parity1].buf[*stripeoff], readlen);
+ stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+
+ do {
+ do_xor(scratch, &context->stripes[stripe].buf[*stripeoff], readlen);
+
+ stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+ } while (stripe != parity2);
+
+ for (i = 0; i < readlen; i++) {
+ if (scratch[i] != 0) {
+ bad = TRUE;
+ break;
+ }
+ }
+
+ if (bad) {
+ UINT16 missing;
+ UINT8* buf2;
+
+ // assume parity is bad
+ stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+ RtlCopyMemory(scratch, &context->stripes[stripe].buf[*stripeoff], readlen);
+ stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+
+ do {
+ galois_double(scratch, readlen);
+
+ do_xor(scratch, &context->stripes[stripe].buf[*stripeoff], readlen);
+
+ stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+ } while (stripe != parity2);
+
+ if (RtlCompareMemory(scratch, &context->stripes[parity2].buf[*stripeoff], readlen) == readlen) {
+ WARN("recovering from invalid parity stripe\n");
+
+ // recalc p
+ stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+ RtlCopyMemory(&context->stripes[parity1].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+ stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+
+ do {
+ do_xor(&context->stripes[parity1].buf[*stripeoff], &context->stripes[stripe].buf[*stripeoff], readlen);
+
+ stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+ } while (stripe != parity2);
+
+ context->stripes[parity1].rewrite = TRUE;
+ goto end;
+ }
+
+ // assume one of the data stripes is bad
+
+ buf2 = ExAllocatePoolWithTag(NonPagedPool, readlen, ALLOC_TAG);
+ if (!buf2) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ missing = (parity2 + 1) % ci->num_stripes;
+ while (missing != parity1) {
+ RtlCopyMemory(scratch, &context->stripes[parity1].buf[*stripeoff], readlen);
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (i != parity1 && i != parity2 && i != missing) {
+ do_xor(scratch, &context->stripes[i].buf[*stripeoff], readlen);
+ }
+ }
+
+ stripe = parity1 == 0 ? (ci->num_stripes - 1) : (parity1 - 1);
+ RtlCopyMemory(buf2, stripe == missing ? scratch : &context->stripes[stripe].buf[*stripeoff], readlen);
+ stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+
+ do {
+ galois_double(buf2, readlen);
+
+ do_xor(buf2, stripe == missing ? scratch : &context->stripes[stripe].buf[*stripeoff], readlen);
+
+ stripe = stripe == 0 ? (ci->num_stripes - 1) : (stripe - 1);
+ } while (stripe != parity2);
+
+ if (RtlCompareMemory(buf2, &context->stripes[parity2].buf[*stripeoff], readlen) == readlen) {
+ WARN("recovering from invalid data stripe\n");
+
+ RtlCopyMemory(&context->stripes[missing].buf[*stripeoff], scratch, readlen);
+ ExFreePool(buf2);
+
+ context->stripes[missing].rewrite = TRUE;
+ goto end;
+ }
+
+ missing = (missing + 1) % ci->num_stripes;
+ }
+
+ ExFreePool(buf2);
+
+ ERR("unrecoverable checksum error\n");
+ return STATUS_CRC_ERROR;
+ }
+
+end:
+ *stripeoff += stripelen;
+
+ return STATUS_SUCCESS;
+}
+
+NTSTATUS STDCALL read_data(device_extension* Vcb, UINT64 addr, UINT32 length, UINT32* csum, BOOL is_tree, UINT8* buf, chunk* c, chunk** pc, PIRP Irp) {
+ CHUNK_ITEM* ci;
+ CHUNK_ITEM_STRIPE* cis;
+ read_data_context* context;
+ UINT64 i, type, offset;
+ NTSTATUS Status;
+ device** devices;
+ UINT64 *stripestart = NULL, *stripeend = NULL;
+ UINT32 firststripesize;
+ UINT16 startoffstripe, allowed_missing, missing_devices = 0;
+#ifdef DEBUG_STATS
+ LARGE_INTEGER time1, time2;
+#endif
+
+ Status = verify_vcb(Vcb, Irp);
+ if (!NT_SUCCESS(Status)) {
+ ERR("verify_vcb returned %08x\n", Status);
+ return Status;
+ }
+
+ if (Vcb->log_to_phys_loaded) {
+ if (!c) {
+ c = get_chunk_from_address(Vcb, addr);
+
+ if (!c) {
+ ERR("get_chunk_from_address failed\n");
+ return STATUS_INTERNAL_ERROR;
+ }
+ }
+
+ ci = c->chunk_item;
+ offset = c->offset;
+ devices = c->devices;
+
+ if (pc)
+ *pc = c;
+ } else {
+ LIST_ENTRY* le = Vcb->sys_chunks.Flink;
+
+ ci = NULL;
+
+ while (le != &Vcb->sys_chunks) {
+ sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
+
+ if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
+ CHUNK_ITEM* chunk_item = sc->data;
+
+ if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
+ ci = chunk_item;
+ offset = sc->key.offset;
+ cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
+
+ devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
+ if (!devices) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
+ }
+
+ break;
+ }
+ }
+
+ le = le->Flink;
+ }
+
+ if (!ci) {
+ ERR("could not find chunk for %llx in bootstrap\n", addr);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (pc)
+ *pc = NULL;
+ }
+
+ if (ci->type & BLOCK_FLAG_DUPLICATE) {
+ type = BLOCK_FLAG_DUPLICATE;
+ allowed_missing = 0;
+ } else if (ci->type & BLOCK_FLAG_RAID0) {
+ type = BLOCK_FLAG_RAID0;
+ allowed_missing = 0;
+ } else if (ci->type & BLOCK_FLAG_RAID1) {
+ type = BLOCK_FLAG_DUPLICATE;
+ allowed_missing = 1;
+ } else if (ci->type & BLOCK_FLAG_RAID10) {
+ type = BLOCK_FLAG_RAID10;
+ allowed_missing = 1;
+ } else if (ci->type & BLOCK_FLAG_RAID5) {
+ type = BLOCK_FLAG_RAID5;
+ allowed_missing = 1;
+ } else if (ci->type & BLOCK_FLAG_RAID6) {
+ type = BLOCK_FLAG_RAID6;
+ allowed_missing = 2;
+ } else { // SINGLE
+ type = BLOCK_FLAG_DUPLICATE;
+ allowed_missing = 0;
+ }
+
+ cis = (CHUNK_ITEM_STRIPE*)&ci[1];
+
+ context = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_context), ALLOC_TAG);
+ if (!context) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlZeroMemory(context, sizeof(read_data_context));
+ KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
+
+ context->stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
+ if (!context->stripes) {
+ ERR("out of memory\n");
+ ExFreePool(context);
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ RtlZeroMemory(context->stripes, sizeof(read_data_stripe) * ci->num_stripes);
+
+ context->buflen = length;
+ context->num_stripes = ci->num_stripes;
+ context->stripes_left = context->num_stripes;
+ context->sector_size = Vcb->superblock.sector_size;
+ context->csum = csum;
+ context->tree = is_tree;
+ context->type = type;
+
+ stripestart = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG);
+ if (!stripestart) {
+ ERR("out of memory\n");
+ ExFreePool(context);
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ stripeend = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT64) * ci->num_stripes, ALLOC_TAG);
+ if (!stripeend) {
+ ERR("out of memory\n");
+ ExFreePool(stripestart);
+ ExFreePool(context);
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ if (type == BLOCK_FLAG_RAID0) {
+ UINT64 startoff, endoff;
+ UINT16 endoffstripe;
+
+ get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
+ get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (startoffstripe > i) {
+ stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
+ } else if (startoffstripe == i) {
+ stripestart[i] = startoff;
+ } else {
+ stripestart[i] = startoff - (startoff % ci->stripe_length);
+ }
+
+ if (endoffstripe > i) {
+ stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
+ } else if (endoffstripe == i) {
+ stripeend[i] = endoff + 1;
+ } else {
+ stripeend[i] = endoff - (endoff % ci->stripe_length);
+ }
+ }
+ } else if (type == BLOCK_FLAG_RAID10) {
+ UINT64 startoff, endoff;
+ UINT16 endoffstripe, j;
+
+ get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
+ get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
+
+ if ((ci->num_stripes % ci->sub_stripes) != 0) {
+ ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
+ Status = STATUS_INTERNAL_ERROR;
+ goto exit;
+ }
+
+ context->firstoff = (startoff % ci->stripe_length) / Vcb->superblock.sector_size;
+ context->startoffstripe = startoffstripe;
+ context->sectors_per_stripe = ci->stripe_length / Vcb->superblock.sector_size;
+
+ startoffstripe *= ci->sub_stripes;
+ endoffstripe *= ci->sub_stripes;
+
+ for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
+ if (startoffstripe > i) {
+ stripestart[i] = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
+ } else if (startoffstripe == i) {
+ stripestart[i] = startoff;
+ } else {
+ stripestart[i] = startoff - (startoff % ci->stripe_length);
+ }
+
+ if (endoffstripe > i) {
+ stripeend[i] = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
+ } else if (endoffstripe == i) {
+ stripeend[i] = endoff + 1;
+ } else {
+ stripeend[i] = endoff - (endoff % ci->stripe_length);
+ }
+
+ for (j = 1; j < ci->sub_stripes; j++) {
+ stripestart[i+j] = stripestart[i];
+ stripeend[i+j] = stripeend[i];
+ }
+ }
+
+ context->stripes_cancel = 1;
+ } else if (type == BLOCK_FLAG_DUPLICATE) {
+ for (i = 0; i < ci->num_stripes; i++) {
+ stripestart[i] = addr - offset;
+ stripeend[i] = stripestart[i] + length;
+ }
+
+ context->stripes_cancel = ci->num_stripes - 1;
+ } else if (type == BLOCK_FLAG_RAID5) {
+ UINT64 startoff, endoff;
+ UINT16 endoffstripe;
+ UINT64 start = 0xffffffffffffffff, end = 0;
+
+ get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
+ get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
+
+ for (i = 0; i < ci->num_stripes - 1; i++) {
+ UINT64 ststart, stend;
+
+ if (startoffstripe > i) {
+ ststart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
+ } else if (startoffstripe == i) {
+ ststart = startoff;
+ } else {
+ ststart = startoff - (startoff % ci->stripe_length);
+ }
+
+ if (endoffstripe > i) {
+ stend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
+ } else if (endoffstripe == i) {
+ stend = endoff + 1;
+ } else {
+ stend = endoff - (endoff % ci->stripe_length);
+ }
+
+ if (ststart != stend) {
+ if (ststart < start) {
+ start = ststart;
+ firststripesize = ci->stripe_length - (ststart % ci->stripe_length);
+ }
+
+ if (stend > end)
+ end = stend;
+ }
+ }
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ stripestart[i] = start;
+ stripeend[i] = end;
+ }
+
+ context->stripes_cancel = Vcb->options.raid5_recalculation;
+ } else if (type == BLOCK_FLAG_RAID6) {
+ UINT64 startoff, endoff;
+ UINT16 endoffstripe;
+ UINT64 start = 0xffffffffffffffff, end = 0;
+
+ get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
+ get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
+
+ for (i = 0; i < ci->num_stripes - 2; i++) {
+ UINT64 ststart, stend;
+
+ if (startoffstripe > i) {
+ ststart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
+ } else if (startoffstripe == i) {
+ ststart = startoff;
+ } else {
+ ststart = startoff - (startoff % ci->stripe_length);
+ }
+
+ if (endoffstripe > i) {
+ stend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
+ } else if (endoffstripe == i) {
+ stend = endoff + 1;
+ } else {
+ stend = endoff - (endoff % ci->stripe_length);
+ }
+
+ if (ststart != stend) {
+ if (ststart < start) {
+ start = ststart;
+ firststripesize = ci->stripe_length - (ststart % ci->stripe_length);
+ }
+
+ if (stend > end)
+ end = stend;
+ }
+ }
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ stripestart[i] = start;
+ stripeend[i] = end;
+ }
+
+ context->stripes_cancel = Vcb->options.raid6_recalculation;
+ }
+
+ KeInitializeSpinLock(&context->spin_lock);
+
+ context->address = addr;
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (!devices[i] || stripestart[i] == stripeend[i]) {
+ context->stripes[i].status = ReadDataStatus_MissingDevice;
+ context->stripes[i].buf = NULL;
+ context->stripes_left--;
+
+ if (!devices[i])
+ missing_devices++;
+ }
+ }
+
+ if (missing_devices > allowed_missing) {
+ ERR("not enough devices to service request (%u missing)\n", missing_devices);
+ Status = STATUS_UNEXPECTED_IO_ERROR;
+ goto exit;
+ }
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ PIO_STACK_LOCATION IrpSp;
+
+ if (devices[i] && stripestart[i] != stripeend[i]) {
+ context->stripes[i].context = (struct read_data_context*)context;
+ context->stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, stripeend[i] - stripestart[i], ALLOC_TAG);
+
+ if (!context->stripes[i].buf) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+
+ if (type == BLOCK_FLAG_RAID10) {
+ context->stripes[i].stripenum = i / ci->sub_stripes;
+ }
+
+ if (!Irp) {
+ context->stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE);
+
+ if (!context->stripes[i].Irp) {
+ ERR("IoAllocateIrp failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+ } else {
+ context->stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
+
+ if (!context->stripes[i].Irp) {
+ ERR("IoMakeAssociatedIrp failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+ }
+
+ IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp);
+ IrpSp->MajorFunction = IRP_MJ_READ;
+
+ if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
+ FIXME("FIXME - buffered IO\n");
+ } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) {
+ context->stripes[i].Irp->MdlAddress = IoAllocateMdl(context->stripes[i].buf, stripeend[i] - stripestart[i], FALSE, FALSE, NULL);
+ if (!context->stripes[i].Irp->MdlAddress) {
+ ERR("IoAllocateMdl failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+
+ MmProbeAndLockPages(context->stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
+ } else {
+ context->stripes[i].Irp->UserBuffer = context->stripes[i].buf;
+ }
+
+ IrpSp->Parameters.Read.Length = stripeend[i] - stripestart[i];
+ IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[i] + cis[i].offset;
+
+ context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb;
+
+ IoSetCompletionRoutine(context->stripes[i].Irp, read_data_completion, &context->stripes[i], TRUE, TRUE, TRUE);
+
+ context->stripes[i].status = ReadDataStatus_Pending;
+ }
+ }
+
+#ifdef DEBUG_STATS
+ if (!is_tree)
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status != ReadDataStatus_MissingDevice) {
+ IoCallDriver(devices[i]->devobj, context->stripes[i].Irp);
+ }
+ }
+
+ KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
+
+#ifdef DEBUG_STATS
+ if (!is_tree) {
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
+ }
+#endif
+
+ // check if any of the devices return a "user-induced" error
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context->stripes[i].iosb.Status)) {
+ if (Irp && context->stripes[i].iosb.Status == STATUS_VERIFY_REQUIRED) {
+ PDEVICE_OBJECT dev;
+
+ dev = IoGetDeviceToVerify(Irp->Tail.Overlay.Thread);
+ IoSetDeviceToVerify(Irp->Tail.Overlay.Thread, NULL);
+
+ if (!dev) {
+ dev = IoGetDeviceToVerify(PsGetCurrentThread());
+ IoSetDeviceToVerify(PsGetCurrentThread(), NULL);
+ }
+
+ dev = Vcb->Vpb ? Vcb->Vpb->RealDevice : NULL;
+
+ if (dev)
+ IoVerifyVolume(dev, FALSE);
+ }
+// IoSetHardErrorOrVerifyDevice(context->stripes[i].Irp, devices[i]->devobj);
+
+ Status = context->stripes[i].iosb.Status;
+ goto exit;
+ }
+ }
+
+ if (type == BLOCK_FLAG_RAID0) {
+ UINT32 pos, *stripeoff;
+ UINT8 stripe;
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Error) {
+ WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status);
+ Status = context->stripes[i].iosb.Status;
+ goto exit;
+ }
+ }
+
+ pos = 0;
+ stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
+ if (!stripeoff) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+
+ RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
+
+ stripe = startoffstripe;
+ while (pos < length) {
+ if (pos == 0) {
+ UINT32 readlen = min(stripeend[stripe] - stripestart[stripe], ci->stripe_length - (stripestart[stripe] % ci->stripe_length));
+
+ RtlCopyMemory(buf, context->stripes[stripe].buf, readlen);
+ stripeoff[stripe] += readlen;
+ pos += readlen;
+ } else if (length - pos < ci->stripe_length) {
+ RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], length - pos);
+ pos = length;
+ } else {
+ RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], ci->stripe_length);
+ stripeoff[stripe] += ci->stripe_length;
+ pos += ci->stripe_length;
+ }
+
+ stripe = (stripe + 1) % ci->num_stripes;
+ }
+
+ ExFreePool(stripeoff);
+
+ // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short
+
+ if (is_tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
+ tree_header* th = (tree_header*)buf;
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
+
+ if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+ WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+ } else if (csum) {
+#ifdef DEBUG_STATS
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
+ for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+
+ if (crc32 != csum[i]) {
+ WARN("checksum error (%08x != %08x)\n", crc32, csum[i]);
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+ }
+#ifdef DEBUG_STATS
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+ }
+
+ Status = STATUS_SUCCESS;
+ } else if (type == BLOCK_FLAG_RAID10) {
+ BOOL checksum_error = FALSE;
+ UINT32 pos, *stripeoff;
+ UINT8 stripe;
+ read_data_stripe** stripes;
+
+ stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
+ if (!stripes) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+
+ RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
+
+ for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
+ UINT16 j;
+
+ for (j = 0; j < ci->sub_stripes; j++) {
+ if (context->stripes[i+j].status == ReadDataStatus_Success) {
+ stripes[i / ci->sub_stripes] = &context->stripes[i+j];
+ break;
+ }
+ }
+
+ if (!stripes[i / ci->sub_stripes]) {
+ for (j = 0; j < ci->sub_stripes; j++) {
+ if (context->stripes[i+j].status == ReadDataStatus_Error) {
+ // both stripes must have errored if we get here
+ WARN("stripe %llu returned error %08x\n", i+j, context->stripes[i+j].iosb.Status);
+ Status = context->stripes[i].iosb.Status;
+ ExFreePool(stripes);
+ goto exit;
+ }
+ }
+ }
+ }
+
+ pos = 0;
+ stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
+ if (!stripeoff) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ ExFreePool(stripes);
+ goto exit;
+ }
+
+ RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes);
+
+ stripe = startoffstripe / ci->sub_stripes;
+ while (pos < length) {
+ if (pos == 0) {
+ UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length));
+
+ RtlCopyMemory(buf, stripes[stripe]->buf, readlen);
+ stripeoff[stripe] += readlen;
+ pos += readlen;
+
+ if (context->csum) {
+#ifdef DEBUG_STATS
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
+ for (i = 0; i < readlen / Vcb->superblock.sector_size; i++) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+
+ if (crc32 != csum[i]) {
+ checksum_error = TRUE;
+ stripes[stripe]->status = ReadDataStatus_CRCError;
+ }
+ }
+#ifdef DEBUG_STATS
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+ }
+ } else if (length - pos < ci->stripe_length) {
+ RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], length - pos);
+
+ if (context->csum) {
+#ifdef DEBUG_STATS
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
+ for (i = 0; i < (length - pos) / Vcb->superblock.sector_size; i++) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+
+ if (crc32 != csum[(pos / Vcb->superblock.sector_size) + i]) {
+ checksum_error = TRUE;
+ stripes[stripe]->status = ReadDataStatus_CRCError;
+ }
+ }
+#ifdef DEBUG_STATS
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+ }
+
+ pos = length;
+ } else {
+ RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], ci->stripe_length);
+ stripeoff[stripe] += ci->stripe_length;
+
+ if (context->csum) {
+#ifdef DEBUG_STATS
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
+ for (i = 0; i < ci->stripe_length / Vcb->superblock.sector_size; i++) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+
+ if (crc32 != csum[(pos / Vcb->superblock.sector_size) + i]) {
+ checksum_error = TRUE;
+ stripes[stripe]->status = ReadDataStatus_CRCError;
+ }
+ }
+#ifdef DEBUG_STATS
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+ }
+
+ pos += ci->stripe_length;
+ }
+
+ stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
+ }
+
+ if (is_tree) {
+ tree_header* th = (tree_header*)buf;
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
+
+ if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+ WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
+ checksum_error = TRUE;
+ stripes[startoffstripe]->status = ReadDataStatus_CRCError;
+ }
+ }
+
+ if (checksum_error) {
+ // FIXME - update dev stats
+
+ WARN("checksum error\n");
+
+ context->stripes_left = 0;
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_CRCError) {
+ UINT16 other_stripe = (i % 1) ? (i - 1) : (i + 1);
+
+ if (context->stripes[other_stripe].status == ReadDataStatus_Cancelled) {
+ PIO_STACK_LOCATION IrpSp;
+
+ // re-run Irp that we cancelled
+
+ if (context->stripes[other_stripe].Irp) {
+ if (devices[other_stripe]->devobj->Flags & DO_DIRECT_IO) {
+ MmUnlockPages(context->stripes[other_stripe].Irp->MdlAddress);
+ IoFreeMdl(context->stripes[other_stripe].Irp->MdlAddress);
+ }
+ IoFreeIrp(context->stripes[other_stripe].Irp);
+ }
+
+ if (!Irp) {
+ context->stripes[other_stripe].Irp = IoAllocateIrp(devices[other_stripe]->devobj->StackSize, FALSE);
+
+ if (!context->stripes[other_stripe].Irp) {
+ ERR("IoAllocateIrp failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+ } else {
+ context->stripes[other_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[other_stripe]->devobj->StackSize);
+
+ if (!context->stripes[other_stripe].Irp) {
+ ERR("IoMakeAssociatedIrp failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+ }
+
+ IrpSp = IoGetNextIrpStackLocation(context->stripes[other_stripe].Irp);
+ IrpSp->MajorFunction = IRP_MJ_READ;
+
+ if (devices[other_stripe]->devobj->Flags & DO_BUFFERED_IO) {
+ FIXME("FIXME - buffered IO\n");
+ } else if (devices[other_stripe]->devobj->Flags & DO_DIRECT_IO) {
+ context->stripes[other_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[other_stripe].buf, stripeend[other_stripe] - stripestart[other_stripe], FALSE, FALSE, NULL);
+ if (!context->stripes[other_stripe].Irp->MdlAddress) {
+ ERR("IoAllocateMdl failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+
+ MmProbeAndLockPages(context->stripes[other_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess);
+ } else {
+ context->stripes[other_stripe].Irp->UserBuffer = context->stripes[other_stripe].buf;
+ }
+
+ IrpSp->Parameters.Read.Length = stripeend[other_stripe] - stripestart[other_stripe];
+ IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[other_stripe] + cis[other_stripe].offset;
+
+ context->stripes[other_stripe].Irp->UserIosb = &context->stripes[other_stripe].iosb;
+
+ IoSetCompletionRoutine(context->stripes[other_stripe].Irp, read_data_completion, &context->stripes[other_stripe], TRUE, TRUE, TRUE);
+
+ context->stripes_left++;
+ context->stripes[other_stripe].status = ReadDataStatus_Pending;
+ }
+ }
+ }
+
+ if (context->stripes_left == 0) {
+ WARN("could not recover from checksum error\n");
+ ExFreePool(stripes);
+ ExFreePool(stripeoff);
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+
+ context->stripes_cancel = 0;
+ KeClearEvent(&context->Event);
+
+#ifdef DEBUG_STATS
+ if (!is_tree)
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Pending) {
+ IoCallDriver(devices[i]->devobj, context->stripes[i].Irp);
+ }
+ }
+
+ KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
+
+#ifdef DEBUG_STATS
+ if (!is_tree) {
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
+ }
+#endif
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_CRCError) {
+ UINT16 other_stripe = (i % 1) ? (i - 1) : (i + 1);
+
+ if (context->stripes[other_stripe].status != ReadDataStatus_Success) {
+ WARN("could not recover from checksum error\n");
+ ExFreePool(stripes);
+ ExFreePool(stripeoff);
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+ }
+ }
+
+ RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes);
+
+ pos = 0;
+ stripe = startoffstripe / ci->sub_stripes;
+ while (pos < length) {
+ if (pos == 0) {
+ UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length));
+
+ stripeoff[stripe] += readlen;
+ pos += readlen;
+
+ if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) {
+ for (i = 0; i < readlen / Vcb->superblock.sector_size; i++) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+
+ if (crc32 != csum[i]) {
+ UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0);
+ UINT32 crc32b = ~calc_crc32c(0xffffffff, context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+
+ if (crc32b == csum[i]) {
+ RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+ RtlCopyMemory(stripes[stripe]->buf + (i * Vcb->superblock.sector_size), context->stripes[other_stripe].buf + (i * Vcb->superblock.sector_size),
+ Vcb->superblock.sector_size);
+ stripes[stripe]->rewrite = TRUE;
+ } else {
+ WARN("could not recover from checksum error\n");
+ ExFreePool(stripes);
+ ExFreePool(stripeoff);
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+ }
+ }
+ } else if (is_tree) {
+ UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0);
+ tree_header* th = (tree_header*)buf;
+ UINT32 crc32;
+
+ RtlCopyMemory(buf, context->stripes[other_stripe].buf, readlen);
+
+ crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
+
+ if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
+ WARN("could not recover from checksum error\n");
+ ExFreePool(stripes);
+ ExFreePool(stripeoff);
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+
+ RtlCopyMemory(stripes[stripe]->buf, buf, readlen);
+ stripes[stripe]->rewrite = TRUE;
+ }
+ } else if (length - pos < ci->stripe_length) {
+ if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) {
+ for (i = 0; i < (length - pos) / Vcb->superblock.sector_size; i++) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+
+ if (crc32 != csum[(pos / Vcb->superblock.sector_size) + i]) {
+ UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0);
+ UINT32 crc32b = ~calc_crc32c(0xffffffff, &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)],
+ Vcb->superblock.sector_size);
+
+ if (crc32b == csum[i]) {
+ RtlCopyMemory(buf + pos + (i * Vcb->superblock.sector_size),
+ &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size);
+ RtlCopyMemory(&stripes[stripe]->buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)],
+ &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)],
+ Vcb->superblock.sector_size);
+ stripes[stripe]->rewrite = TRUE;
+ } else {
+ WARN("could not recover from checksum error\n");
+ ExFreePool(stripes);
+ ExFreePool(stripeoff);
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+ }
+ }
+ }
+
+ pos = length;
+ } else {
+ if (context->csum && stripes[stripe]->status == ReadDataStatus_CRCError) {
+ for (i = 0; i < ci->stripe_length / Vcb->superblock.sector_size; i++) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + pos + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+
+ if (crc32 != csum[(pos / Vcb->superblock.sector_size) + i]) {
+ UINT16 other_stripe = (stripe * ci->sub_stripes) + (context->stripes[stripe * ci->sub_stripes].status == ReadDataStatus_CRCError ? 1 : 0);
+ UINT32 crc32b = ~calc_crc32c(0xffffffff, &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)],
+ Vcb->superblock.sector_size);
+
+ if (crc32b == csum[i]) {
+ RtlCopyMemory(buf + pos + (i * Vcb->superblock.sector_size),
+ &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size);
+ RtlCopyMemory(&stripes[stripe]->buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)],
+ &context->stripes[other_stripe].buf[stripeoff[stripe] + (i * Vcb->superblock.sector_size)],
+ Vcb->superblock.sector_size);
+ stripes[stripe]->rewrite = TRUE;
+ } else {
+ WARN("could not recover from checksum error\n");
+ ExFreePool(stripes);
+ ExFreePool(stripeoff);
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+ }
+ }
+ }
+
+ stripeoff[stripe] += ci->stripe_length;
+ pos += ci->stripe_length;
+ }
+
+ stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
+ }
+
+ // write good data over bad
+
+ if (!Vcb->readonly) {
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) {
+ Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]);
+
+ if (!NT_SUCCESS(Status))
+ WARN("write_data_phys returned %08x\n", Status);
+ }
+ }
+ }
+ }
+
+ ExFreePool(stripes);
+ ExFreePool(stripeoff);
+
+ // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short
+
+ Status = STATUS_SUCCESS;
+ } else if (type == BLOCK_FLAG_DUPLICATE) {
+ BOOL checksum_error = FALSE;
+ UINT16 cancelled = 0;
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Success) {
+ if (context->tree) {
+ tree_header* th = (tree_header*)context->stripes[i].buf;
+ UINT32 crc32;
+
+ crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
+
+ if (th->address != context->address || crc32 != *((UINT32*)th->csum)) {
+ context->stripes[i].status = ReadDataStatus_CRCError;
+ checksum_error = TRUE;
+ }
+ } else if (context->csum) {
+ UINT32 j;
+
+#ifdef DEBUG_STATS
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
+
+ for (j = 0; j < context->stripes[i].Irp->IoStatus.Information / context->sector_size; j++) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[i].buf + (j * context->sector_size), context->sector_size);
+
+ if (crc32 != context->csum[j]) {
+ context->stripes[i].status = ReadDataStatus_CRCError;
+ checksum_error = TRUE;
+ break;
+ }
+ }
+#ifdef DEBUG_STATS
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+ }
+ } else if (context->stripes[i].status == ReadDataStatus_Cancelled) {
+ cancelled++;
}
+ }
+
+ if (checksum_error) {
+ // FIXME - update dev stats
- if (type == BLOCK_FLAG_RAID10) {
- context->stripes[i].stripenum = i / ci->sub_stripes;
- }
+ if (cancelled > 0) {
+ context->stripes_left = 0;
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Cancelled) {
+ PIO_STACK_LOCATION IrpSp;
+
+ // re-run Irp that we cancelled
+
+ if (context->stripes[i].Irp) {
+ if (devices[i]->devobj->Flags & DO_DIRECT_IO) {
+ MmUnlockPages(context->stripes[i].Irp->MdlAddress);
+ IoFreeMdl(context->stripes[i].Irp->MdlAddress);
+ }
+ IoFreeIrp(context->stripes[i].Irp);
+ }
+
+ if (!Irp) {
+ context->stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE);
+
+ if (!context->stripes[i].Irp) {
+ ERR("IoAllocateIrp failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+ } else {
+ context->stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
+
+ if (!context->stripes[i].Irp) {
+ ERR("IoMakeAssociatedIrp failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+ }
+
+ IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp);
+ IrpSp->MajorFunction = IRP_MJ_READ;
+
+ if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
+ FIXME("FIXME - buffered IO\n");
+ } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) {
+ context->stripes[i].Irp->MdlAddress = IoAllocateMdl(context->stripes[i].buf, stripeend[i] - stripestart[i], FALSE, FALSE, NULL);
+ if (!context->stripes[i].Irp->MdlAddress) {
+ ERR("IoAllocateMdl failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+
+ MmProbeAndLockPages(context->stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
+ } else {
+ context->stripes[i].Irp->UserBuffer = context->stripes[i].buf;
+ }
- if (!Irp) {
- context->stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE);
+ IrpSp->Parameters.Read.Length = stripeend[i] - stripestart[i];
+ IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[i] + cis[i].offset;
+
+ context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb;
+
+ IoSetCompletionRoutine(context->stripes[i].Irp, read_data_completion, &context->stripes[i], TRUE, TRUE, TRUE);
+
+ context->stripes_left++;
+ context->stripes[i].status = ReadDataStatus_Pending;
+ }
+ }
- if (!context->stripes[i].Irp) {
- ERR("IoAllocateIrp failed\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto exit;
+ context->stripes_cancel = 0;
+ KeClearEvent(&context->Event);
+
+#ifdef DEBUG_STATS
+ if (!is_tree)
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Pending) {
+ IoCallDriver(devices[i]->devobj, context->stripes[i].Irp);
+ }
}
- } else {
- context->stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
- if (!context->stripes[i].Irp) {
- ERR("IoMakeAssociatedIrp failed\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto exit;
+ KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
+
+#ifdef DEBUG_STATS
+ if (!is_tree) {
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
+ }
+#endif
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Success) {
+ if (context->tree) {
+ tree_header* th = (tree_header*)context->stripes[i].buf;
+ UINT32 crc32;
+
+ crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
+
+ if (th->address != context->address || crc32 != *((UINT32*)th->csum)) {
+ context->stripes[i].status = ReadDataStatus_CRCError;
+ checksum_error = TRUE;
+ }
+ } else if (context->csum) {
+ UINT32 j;
+
+#ifdef DEBUG_STATS
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
+ for (j = 0; j < context->stripes[i].Irp->IoStatus.Information / context->sector_size; j++) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[i].buf + (j * context->sector_size), context->sector_size);
+
+ if (crc32 != context->csum[j]) {
+ context->stripes[i].status = ReadDataStatus_CRCError;
+ checksum_error = TRUE;
+ break;
+ }
+ }
+#ifdef DEBUG_STATS
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+ }
+ }
}
}
- IrpSp = IoGetNextIrpStackLocation(context->stripes[i].Irp);
- IrpSp->MajorFunction = IRP_MJ_READ;
-
- if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
- FIXME("FIXME - buffered IO\n");
- } else if (devices[i]->devobj->Flags & DO_DIRECT_IO) {
- context->stripes[i].Irp->MdlAddress = IoAllocateMdl(context->stripes[i].buf, stripeend[i] - stripestart[i], FALSE, FALSE, NULL);
- if (!context->stripes[i].Irp->MdlAddress) {
- ERR("IoAllocateMdl failed\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto exit;
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Success) {
+ RtlCopyMemory(buf, context->stripes[i].buf, length);
+ goto raid1write;
}
+ }
+
+ if (context->tree || ci->num_stripes == 1) { // unable to recover from checksum error
+ ERR("unrecoverable checksum error at %llx\n", addr);
- MmProbeAndLockPages(context->stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
- } else {
- context->stripes[i].Irp->UserBuffer = context->stripes[i].buf;
+#ifdef _DEBUG
+ if (context->tree) {
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_CRCError) {
+ tree_header* th = (tree_header*)context->stripes[i].buf;
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
+
+ WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
+ }
+ }
+ }
+#endif
+ Status = STATUS_CRC_ERROR;
+ goto exit;
}
-
- IrpSp->Parameters.Read.Length = stripeend[i] - stripestart[i];
- IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[i] + cis[i].offset;
- context->stripes[i].Irp->UserIosb = &context->stripes[i].iosb;
+ // checksum errors on both stripes - we need to check sector by sector
- IoSetCompletionRoutine(context->stripes[i].Irp, read_data_completion, &context->stripes[i], TRUE, TRUE, TRUE);
-
- context->stripes[i].status = ReadDataStatus_Pending;
- }
- }
-
- for (i = 0; i < ci->num_stripes; i++) {
- if (context->stripes[i].status != ReadDataStatus_MissingDevice) {
- IoCallDriver(devices[i]->devobj, context->stripes[i].Irp);
- }
- }
-
- KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
-
- // FIXME - if checksum error, write good data over bad
-
- // check if any of the devices return a "user-induced" error
-
- for (i = 0; i < ci->num_stripes; i++) {
- if (context->stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context->stripes[i].iosb.Status)) {
- if (Irp && context->stripes[i].iosb.Status == STATUS_VERIFY_REQUIRED) {
- PDEVICE_OBJECT dev;
+ for (i = 0; i < (stripeend[0] - stripestart[0]) / context->sector_size; i++) {
+ UINT16 j;
+ BOOL success = FALSE;
- dev = IoGetDeviceToVerify(Irp->Tail.Overlay.Thread);
- IoSetDeviceToVerify(Irp->Tail.Overlay.Thread, NULL);
+#ifdef DEBUG_STATS
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
- if (!dev) {
- dev = IoGetDeviceToVerify(PsGetCurrentThread());
- IoSetDeviceToVerify(PsGetCurrentThread(), NULL);
+ for (j = 0; j < ci->num_stripes; j++) {
+ if (context->stripes[j].status == ReadDataStatus_CRCError) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[j].buf + (i * context->sector_size), context->sector_size);
+
+ if (crc32 == context->csum[i]) {
+ RtlCopyMemory(buf + (i * context->sector_size), context->stripes[j].buf + (i * context->sector_size), context->sector_size);
+ success = TRUE;
+ break;
+ }
+ }
}
- dev = Vcb->Vpb ? Vcb->Vpb->RealDevice : NULL;
-
- if (dev)
- IoVerifyVolume(dev, FALSE);
+#ifdef DEBUG_STATS
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+ if (!success) {
+ ERR("unrecoverable checksum error at %llx\n", addr + (i * context->sector_size));
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
}
-// IoSetHardErrorOrVerifyDevice(context->stripes[i].Irp, devices[i]->devobj);
- Status = context->stripes[i].iosb.Status;
+raid1write:
+ // write good data over bad
+
+ if (!Vcb->readonly) {
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_CRCError && devices[i] && !devices[i]->readonly) {
+ Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], buf, length);
+
+ if (!NT_SUCCESS(Status))
+ WARN("write_data_phys returned %08x\n", Status);
+ }
+ }
+ }
+
+ Status = STATUS_SUCCESS;
goto exit;
}
- }
-
- if (type == BLOCK_FLAG_RAID0) {
- UINT32 pos, *stripeoff;
- UINT8 stripe;
+
+ // check if any of the stripes succeeded
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Success) {
+ RtlCopyMemory(buf, context->stripes[i].buf, length);
+ Status = STATUS_SUCCESS;
+ goto exit;
+ }
+ }
+
+ // failing that, return the first error we encountered
for (i = 0; i < ci->num_stripes; i++) {
if (context->stripes[i].status == ReadDataStatus_Error) {
- WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status);
Status = context->stripes[i].iosb.Status;
goto exit;
}
}
- pos = 0;
- stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
- if (!stripeoff) {
- ERR("out of memory\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto exit;
+ // if we somehow get here, return STATUS_INTERNAL_ERROR
+
+ Status = STATUS_INTERNAL_ERROR;
+ } else if (type == BLOCK_FLAG_RAID5) {
+ UINT32 pos, skip;
+ int num_errors = 0;
+ UINT64 off, stripeoff, origoff;
+ BOOL needs_reconstruct = FALSE;
+ UINT64 reconstruct_stripe;
+ BOOL checksum_error = FALSE;
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Error) {
+ num_errors++;
+ if (num_errors > 1)
+ break;
+ }
}
- RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
+ if (num_errors > 1) {
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Error) {
+ WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status);
+ Status = context->stripes[i].iosb.Status;
+ goto exit;
+ }
+ }
+ }
+
+ off = addr - offset;
+ off -= off % ((ci->num_stripes - 1) * ci->stripe_length);
+ skip = addr - offset - off;
+ origoff = off;
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Cancelled) {
+ if (needs_reconstruct) {
+ ERR("more than one stripe needs reconstruction\n");
+ Status = STATUS_INTERNAL_ERROR;
+ goto exit;
+ } else {
+ needs_reconstruct = TRUE;
+ reconstruct_stripe = i;
+ }
+ }
+ }
+
+ if (needs_reconstruct) {
+ TRACE("reconstructing stripe %u\n", reconstruct_stripe);
+
+ stripeoff = 0;
+
+ raid5_reconstruct(off, skip, context, ci, &stripeoff, stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], TRUE, firststripesize, reconstruct_stripe);
+
+ while (stripeoff < stripeend[0] - stripestart[0]) {
+ off += (ci->num_stripes - 1) * ci->stripe_length;
+ raid5_reconstruct(off, 0, context, ci, &stripeoff, stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, 0, reconstruct_stripe);
+ }
+
+ off = addr - offset;
+ off -= off % ((ci->num_stripes - 1) * ci->stripe_length);
+ }
+
+ pos = 0;
+ stripeoff = 0;
+ raid5_decode(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize);
- stripe = startoffstripe;
while (pos < length) {
- if (pos == 0) {
- UINT32 readlen = min(stripeend[stripe] - stripestart[stripe], ci->stripe_length - (stripestart[stripe] % ci->stripe_length));
+ off += (ci->num_stripes - 1) * ci->stripe_length;
+ raid5_decode(off, 0, context, ci, &stripeoff, buf, &pos, length, 0);
+ }
+
+ if (is_tree) {
+ tree_header* th = (tree_header*)buf;
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
+
+ if (addr != th->address || crc32 != *((UINT32*)th->csum))
+ checksum_error = TRUE;
+ } else if (csum) {
+#ifdef DEBUG_STATS
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
+ for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
- RtlCopyMemory(buf, context->stripes[stripe].buf, readlen);
- stripeoff[stripe] += readlen;
- pos += readlen;
- } else if (length - pos < ci->stripe_length) {
- RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], length - pos);
- pos = length;
+ if (crc32 != csum[i]) {
+ checksum_error = TRUE;
+ break;
+ }
+ }
+#ifdef DEBUG_STATS
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
+ }
+
+ if (checksum_error) {
+ if (needs_reconstruct) {
+ PIO_STACK_LOCATION IrpSp;
+
+ // re-run Irp that we cancelled
+
+ if (context->stripes[reconstruct_stripe].Irp) {
+ if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) {
+ MmUnlockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress);
+ IoFreeMdl(context->stripes[reconstruct_stripe].Irp->MdlAddress);
+ }
+ IoFreeIrp(context->stripes[reconstruct_stripe].Irp);
+ }
+
+ if (!Irp) {
+ context->stripes[reconstruct_stripe].Irp = IoAllocateIrp(devices[reconstruct_stripe]->devobj->StackSize, FALSE);
+
+ if (!context->stripes[reconstruct_stripe].Irp) {
+ ERR("IoAllocateIrp failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+ } else {
+ context->stripes[reconstruct_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[reconstruct_stripe]->devobj->StackSize);
+
+ if (!context->stripes[reconstruct_stripe].Irp) {
+ ERR("IoMakeAssociatedIrp failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+ }
+
+ IrpSp = IoGetNextIrpStackLocation(context->stripes[reconstruct_stripe].Irp);
+ IrpSp->MajorFunction = IRP_MJ_READ;
+
+ if (devices[reconstruct_stripe]->devobj->Flags & DO_BUFFERED_IO) {
+ FIXME("FIXME - buffered IO\n");
+ } else if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) {
+ context->stripes[reconstruct_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[reconstruct_stripe].buf,
+ stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, FALSE, NULL);
+ if (!context->stripes[reconstruct_stripe].Irp->MdlAddress) {
+ ERR("IoAllocateMdl failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+
+ MmProbeAndLockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess);
+ } else {
+ context->stripes[reconstruct_stripe].Irp->UserBuffer = context->stripes[reconstruct_stripe].buf;
+ }
+
+ IrpSp->Parameters.Read.Length = stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe];
+ IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[reconstruct_stripe] + cis[reconstruct_stripe].offset;
+
+ context->stripes[reconstruct_stripe].Irp->UserIosb = &context->stripes[reconstruct_stripe].iosb;
+
+ IoSetCompletionRoutine(context->stripes[reconstruct_stripe].Irp, read_data_completion, &context->stripes[reconstruct_stripe], TRUE, TRUE, TRUE);
+
+ context->stripes[reconstruct_stripe].status = ReadDataStatus_Pending;
+
+ context->stripes_left = 1;
+ KeClearEvent(&context->Event);
+
+#ifdef DEBUG_STATS
+ if (!is_tree)
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
+
+ IoCallDriver(devices[reconstruct_stripe]->devobj, context->stripes[reconstruct_stripe].Irp);
+
+ KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
+
+#ifdef DEBUG_STATS
+ if (!is_tree) {
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
+ }
+#endif
+
+ if (context->stripes[reconstruct_stripe].status != ReadDataStatus_Success) {
+ ERR("unrecoverable checksum error\n");
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+ }
+
+ if (context->tree) {
+ off = origoff;
+ pos = 0;
+ stripeoff = 0;
+ if (!raid5_decode_with_checksum_metadata(addr, off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, Vcb->superblock.node_size)) {
+ ERR("unrecoverable metadata checksum error\n");
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
} else {
- RtlCopyMemory(buf + pos, &context->stripes[stripe].buf[stripeoff[stripe]], ci->stripe_length);
- stripeoff[stripe] += ci->stripe_length;
- pos += ci->stripe_length;
+ off = origoff;
+ pos = 0;
+ stripeoff = 0;
+ if (!raid5_decode_with_checksum(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, csum, Vcb->superblock.sector_size)) {
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+
+ while (pos < length) {
+ off += (ci->num_stripes - 1) * ci->stripe_length;
+ if (!raid5_decode_with_checksum(off, 0, context, ci, &stripeoff, buf, &pos, length, 0, csum, Vcb->superblock.sector_size)) {
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+ }
}
- stripe = (stripe + 1) % ci->num_stripes;
+ // write good data over bad
+
+ if (!Vcb->readonly) {
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) {
+ Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]);
+
+ if (!NT_SUCCESS(Status))
+ WARN("write_data_phys returned %08x\n", Status);
+ }
+ }
+ }
}
- ExFreePool(stripeoff);
-
- // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short
-
- if (is_tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
- tree_header* th = (tree_header*)buf;
- UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
+ if (!context->tree && !context->csum) {
+ UINT32* parity_buf;
- if (crc32 != *((UINT32*)th->csum)) {
- WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
- Status = STATUS_CRC_ERROR;
+ // We are reading a nodatacsum extent. Even though there's no checksum, we
+ // can still identify errors by checking if the parity is consistent.
+
+ parity_buf = ExAllocatePoolWithTag(NonPagedPool, stripeend[0] - stripestart[0], ALLOC_TAG);
+
+ if (!parity_buf) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
- } else if (csum) {
- for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
- UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
-
- if (crc32 != csum[i]) {
- WARN("checksum error (%08x != %08x)\n", crc32, csum[i]);
+
+ RtlCopyMemory(parity_buf, context->stripes[0].buf, stripeend[0] - stripestart[0]);
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ do_xor((UINT8*)parity_buf, context->stripes[i].buf, stripeend[0] - stripestart[0]);
+ }
+
+ for (i = 0; i < (stripeend[0] - stripestart[0]) / sizeof(UINT32); i++) {
+ if (parity_buf[i] != 0) {
+ ERR("parity error on nodatacsum inode\n");
+ ExFreePool(parity_buf);
Status = STATUS_CRC_ERROR;
goto exit;
}
}
+
+ ExFreePool(parity_buf);
}
Status = STATUS_SUCCESS;
- } else if (type == BLOCK_FLAG_RAID10) {
- UINT32 pos, *stripeoff;
- UINT8 stripe;
- read_data_stripe** stripes;
+ } else if (type == BLOCK_FLAG_RAID6) {
+ UINT32 pos, skip;
+ int num_errors = 0;
+ UINT64 off, stripeoff, origoff;
+ UINT8 needs_reconstruct = 0;
+ UINT16 missing1, missing2;
+ BOOL checksum_error = FALSE;
- stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
- if (!stripes) {
- ERR("out of memory\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto exit;
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Error) {
+ num_errors++;
+ if (num_errors > 2)
+ break;
+ }
}
- RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
+ if (num_errors > 2) {
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Error) {
+ WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status);
+ Status = context->stripes[i].iosb.Status;
+ goto exit;
+ }
+ }
+ }
- for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
- UINT16 j;
-
- for (j = 0; j < ci->sub_stripes; j++) {
- if (context->stripes[i+j].status == ReadDataStatus_Success) {
- stripes[i / ci->sub_stripes] = &context->stripes[i+j];
- break;
+ off = addr - offset;
+ off -= off % ((ci->num_stripes - 2) * ci->stripe_length);
+ skip = addr - offset - off;
+ origoff = off;
+
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].status == ReadDataStatus_Cancelled) {
+ if (needs_reconstruct == 2) {
+ ERR("more than two stripes need reconstruction\n");
+ Status = STATUS_INTERNAL_ERROR;
+ goto exit;
+ } else if (needs_reconstruct == 1) {
+ needs_reconstruct++;
+ missing2 = i;
+ } else {
+ needs_reconstruct++;
+ missing1 = i;
}
}
+ }
+
+ if (needs_reconstruct > 0) {
+ stripeoff = 0;
- if (!stripes[i / ci->sub_stripes]) {
- for (j = 0; j < ci->sub_stripes; j++) {
- if (context->stripes[i+j].status == ReadDataStatus_CRCError) {
- WARN("stripe %llu had a checksum error\n", i+j);
- Status = STATUS_CRC_ERROR;
- goto exit;
- }
+ if (needs_reconstruct == 2) {
+ TRACE("reconstructing stripes %u and %u\n", missing1, missing2);
+
+ raid6_reconstruct2(off, skip, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1],
+ TRUE, firststripesize, missing1, missing2);
+
+ while (stripeoff < stripeend[0] - stripestart[0]) {
+ off += (ci->num_stripes - 2) * ci->stripe_length;
+ raid6_reconstruct2(off, 0, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1],
+ FALSE, 0, missing1, missing2);
}
+ } else {
+ TRACE("reconstructing stripe %u\n", missing1);
- for (j = 0; j < ci->sub_stripes; j++) {
- if (context->stripes[i+j].status == ReadDataStatus_Error) {
- WARN("stripe %llu returned error %08x\n", i+j, context->stripes[i+j].iosb.Status);
- Status = context->stripes[i].iosb.Status;
- goto exit;
- }
+ raid6_reconstruct1(off, skip, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], TRUE, firststripesize, missing1);
+
+ while (stripeoff < stripeend[0] - stripestart[0]) {
+ off += (ci->num_stripes - 2) * ci->stripe_length;
+ raid6_reconstruct1(off, 0, context, ci, &stripeoff, stripeend[missing1] - stripestart[missing1], FALSE, 0, missing1);
}
}
+
+ off = origoff;
}
- pos = 0;
- stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
- if (!stripeoff) {
- ERR("out of memory\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto exit;
- }
-
- RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes);
-
- stripe = startoffstripe / ci->sub_stripes;
- while (pos < length) {
- if (pos == 0) {
- UINT32 readlen = min(stripeend[stripe * ci->sub_stripes] - stripestart[stripe * ci->sub_stripes], ci->stripe_length - (stripestart[stripe * ci->sub_stripes] % ci->stripe_length));
+ if (!context->tree && !context->csum) {
+ UINT8* scratch;
+
+ scratch = ExAllocatePoolWithTag(NonPagedPool, ci->stripe_length, ALLOC_TAG);
+ if (!scratch) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+
+ stripeoff = 0;
+ Status = check_raid6_nocsum_parity(off, skip, context, ci, &stripeoff, stripeend[0] - stripestart[0], TRUE, firststripesize, scratch);
+ if (!NT_SUCCESS(Status)) {
+ ERR("check_raid6_nocsum_parity returned %08x\n", Status);
+ ExFreePool(scratch);
+ goto exit;
+ }
- RtlCopyMemory(buf, stripes[stripe]->buf, readlen);
- stripeoff[stripe] += readlen;
- pos += readlen;
- } else if (length - pos < ci->stripe_length) {
- RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], length - pos);
- pos = length;
- } else {
- RtlCopyMemory(buf + pos, &stripes[stripe]->buf[stripeoff[stripe]], ci->stripe_length);
- stripeoff[stripe] += ci->stripe_length;
- pos += ci->stripe_length;
+ while (stripeoff < stripeend[0] - stripestart[0]) {
+ off += (ci->num_stripes - 2) * ci->stripe_length;
+ Status = check_raid6_nocsum_parity(off, 0, context, ci, &stripeoff, stripeend[0] - stripestart[0], FALSE, 0, scratch);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("check_raid6_nocsum_parity returned %08x\n", Status);
+ ExFreePool(scratch);
+ goto exit;
+ }
}
- stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
+ ExFreePool(scratch);
+
+ off = origoff;
}
- ExFreePool(stripes);
- ExFreePool(stripeoff);
+ pos = 0;
+ stripeoff = 0;
+ raid6_decode(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize);
- // FIXME - handle the case where one of the stripes doesn't read everything, i.e. Irp->IoStatus.Information is short
+ while (pos < length) {
+ off += (ci->num_stripes - 2) * ci->stripe_length;
+ raid6_decode(off, 0, context, ci, &stripeoff, buf, &pos, length, 0);
+ }
if (is_tree) {
tree_header* th = (tree_header*)buf;
UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
- if (crc32 != *((UINT32*)th->csum)) {
- WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
- Status = STATUS_CRC_ERROR;
- goto exit;
+ if (addr != th->address || crc32 != *((UINT32*)th->csum))
+ checksum_error = TRUE;
+ } else if (csum) {
+#ifdef DEBUG_STATS
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
+ for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
+ UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
+
+ if (crc32 != csum[i]) {
+ checksum_error = TRUE;
+ break;
+ }
}
+#ifdef DEBUG_STATS
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
+#endif
}
- Status = STATUS_SUCCESS;
- } else if (type == BLOCK_FLAG_DUPLICATE) {
- // check if any of the stripes succeeded
-
- for (i = 0; i < ci->num_stripes; i++) {
- if (context->stripes[i].status == ReadDataStatus_Success) {
- RtlCopyMemory(buf, context->stripes[i].buf, length);
- Status = STATUS_SUCCESS;
- goto exit;
+ if (checksum_error) {
+ for (i = 0; i < needs_reconstruct; i++) {
+ PIO_STACK_LOCATION IrpSp;
+ UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2;
+
+ // re-run Irps that we cancelled
+
+ if (context->stripes[reconstruct_stripe].Irp) {
+ if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) {
+ MmUnlockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress);
+ IoFreeMdl(context->stripes[reconstruct_stripe].Irp->MdlAddress);
+ }
+ IoFreeIrp(context->stripes[reconstruct_stripe].Irp);
+ }
+
+ if (!Irp) {
+ context->stripes[reconstruct_stripe].Irp = IoAllocateIrp(devices[reconstruct_stripe]->devobj->StackSize, FALSE);
+
+ if (!context->stripes[reconstruct_stripe].Irp) {
+ ERR("IoAllocateIrp failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+ } else {
+ context->stripes[reconstruct_stripe].Irp = IoMakeAssociatedIrp(Irp, devices[reconstruct_stripe]->devobj->StackSize);
+
+ if (!context->stripes[reconstruct_stripe].Irp) {
+ ERR("IoMakeAssociatedIrp failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+ }
+
+ IrpSp = IoGetNextIrpStackLocation(context->stripes[reconstruct_stripe].Irp);
+ IrpSp->MajorFunction = IRP_MJ_READ;
+
+ if (devices[reconstruct_stripe]->devobj->Flags & DO_BUFFERED_IO) {
+ FIXME("FIXME - buffered IO\n");
+ } else if (devices[reconstruct_stripe]->devobj->Flags & DO_DIRECT_IO) {
+ context->stripes[reconstruct_stripe].Irp->MdlAddress = IoAllocateMdl(context->stripes[reconstruct_stripe].buf,
+ stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe], FALSE, FALSE, NULL);
+ if (!context->stripes[reconstruct_stripe].Irp->MdlAddress) {
+ ERR("IoAllocateMdl failed\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
+
+ MmProbeAndLockPages(context->stripes[reconstruct_stripe].Irp->MdlAddress, KernelMode, IoWriteAccess);
+ } else {
+ context->stripes[reconstruct_stripe].Irp->UserBuffer = context->stripes[reconstruct_stripe].buf;
+ }
+
+ IrpSp->Parameters.Read.Length = stripeend[reconstruct_stripe] - stripestart[reconstruct_stripe];
+ IrpSp->Parameters.Read.ByteOffset.QuadPart = stripestart[reconstruct_stripe] + cis[reconstruct_stripe].offset;
+
+ context->stripes[reconstruct_stripe].Irp->UserIosb = &context->stripes[reconstruct_stripe].iosb;
+
+ IoSetCompletionRoutine(context->stripes[reconstruct_stripe].Irp, read_data_completion, &context->stripes[reconstruct_stripe], TRUE, TRUE, TRUE);
+
+ context->stripes[reconstruct_stripe].status = ReadDataStatus_Pending;
}
- }
-
- // if not, see if we got a checksum error
-
- for (i = 0; i < ci->num_stripes; i++) {
- if (context->stripes[i].status == ReadDataStatus_CRCError) {
-#ifdef _DEBUG
- WARN("stripe %llu had a checksum error\n", i);
+
+ if (needs_reconstruct > 0) {
+ context->stripes_left = needs_reconstruct;
+ KeClearEvent(&context->Event);
- if (context->tree) {
- tree_header* th = (tree_header*)context->stripes[i].buf;
- UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
+#ifdef DEBUG_STATS
+ if (!is_tree)
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
+
+ for (i = 0; i < needs_reconstruct; i++) {
+ UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2;
+
+ IoCallDriver(devices[reconstruct_stripe]->devobj, context->stripes[reconstruct_stripe].Irp);
+ }
+
+ KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
+
+#ifdef DEBUG_STATS
+ if (!is_tree) {
+ time2 = KeQueryPerformanceCounter(NULL);
- WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
+ Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
}
#endif
+
+ for (i = 0; i < needs_reconstruct; i++) {
+ UINT16 reconstruct_stripe = i == 0 ? missing1 : missing2;
+
+ if (context->stripes[reconstruct_stripe].status != ReadDataStatus_Success) {
+ ERR("unrecoverable checksum error\n");
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+ }
+ }
+
+ off = origoff;
+
+ if (context->tree) {
+ pos = 0;
+ stripeoff = 0;
+ if (!raid6_decode_with_checksum_metadata(addr, off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, Vcb->superblock.node_size)) {
+ ERR("unrecoverable metadata checksum error\n");
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+ } else {
+ pos = 0;
+ stripeoff = 0;
+ if (!raid6_decode_with_checksum(off, skip, context, ci, &stripeoff, buf, &pos, length, firststripesize, csum, Vcb->superblock.sector_size)) {
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
- Status = STATUS_CRC_ERROR;
- goto exit;
+ while (pos < length) {
+ off += (ci->num_stripes - 1) * ci->stripe_length;
+ if (!raid6_decode_with_checksum(off, 0, context, ci, &stripeoff, buf, &pos, length, 0, csum, Vcb->superblock.sector_size)) {
+ Status = STATUS_CRC_ERROR;
+ goto exit;
+ }
+ }
}
}
- // failing that, return the first error we encountered
+ // write good data over bad
- for (i = 0; i < ci->num_stripes; i++) {
- if (context->stripes[i].status == ReadDataStatus_Error) {
- Status = context->stripes[i].iosb.Status;
- goto exit;
+ if (!Vcb->readonly) {
+ for (i = 0; i < ci->num_stripes; i++) {
+ if (context->stripes[i].rewrite && devices[i] && !devices[i]->readonly) {
+ Status = write_data_phys(devices[i]->devobj, cis[i].offset + stripestart[i], context->stripes[i].buf, stripeend[i] - stripestart[i]);
+
+ if (!NT_SUCCESS(Status))
+ WARN("write_data_phys returned %08x\n", Status);
+ }
}
}
- // if we somehow get here, return STATUS_INTERNAL_ERROR
-
- Status = STATUS_INTERNAL_ERROR;
+ Status = STATUS_SUCCESS;
}
exit:
UINT64 bytes_read = 0;
UINT64 last_end;
LIST_ENTRY* le;
+#ifdef DEBUG_STATS
+ LARGE_INTEGER time1, time2;
+#endif
TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr);
Status = STATUS_END_OF_FILE;
goto exit;
}
+
+#ifdef DEBUG_STATS
+ time1 = KeQueryPerformanceCounter(NULL);
+#endif
le = fcb->extents.Flink;
UINT64 off = start + bytes_read - ext->offset;
UINT32 to_read, read;
UINT8* buf;
+ BOOL buf_free;
UINT32 *csum, bumpoff = 0;
- UINT64 addr;
+ UINT64 addr, lockaddr, locklen;
+ chunk* c;
read = len - off;
if (read > length) read = length;
to_read = sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
}
- buf = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG);
-
- if (!buf) {
- ERR("out of memory\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto exit;
+ if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 &&
+ length % fcb->Vcb->superblock.sector_size == 0) {
+ buf = data + bytes_read;
+ buf_free = FALSE;
+ } else {
+ buf = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG);
+ buf_free = TRUE;
+
+ if (!buf) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto exit;
+ }
}
if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
if (!NT_SUCCESS(Status)) {
ERR("load_csum returned %08x\n", Status);
- ExFreePool(buf);
+
+ if (buf_free)
+ ExFreePool(buf);
+
goto exit;
}
} else
csum = NULL;
- Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, NULL, Irp);
+ c = get_chunk_from_address(fcb->Vcb, addr);
+
+ if (!c) {
+ ERR("get_chunk_from_address(%llx) failed\n", addr);
+
+ if (buf_free)
+ ExFreePool(buf);
+
+ goto exit;
+ }
+
+ if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
+ get_raid56_lock_range(c, addr, to_read, &lockaddr, &locklen);
+ chunk_lock_range(fcb->Vcb, c, lockaddr, locklen);
+ }
+
+
+ Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, c, NULL, Irp);
if (!NT_SUCCESS(Status)) {
ERR("read_data returned %08x\n", Status);
- ExFreePool(buf);
+
+ if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
+ chunk_unlock_range(fcb->Vcb, c, lockaddr, locklen);
+
+ if (buf_free)
+ ExFreePool(buf);
+
goto exit;
}
+ if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
+ chunk_unlock_range(fcb->Vcb, c, lockaddr, locklen);
+
if (ed->compression == BTRFS_COMPRESSION_NONE) {
- RtlCopyMemory(data + bytes_read, buf + bumpoff, read);
+ if (buf_free)
+ RtlCopyMemory(data + bytes_read, buf + bumpoff, read);
} else {
UINT8* decomp = NULL;
ExFreePool(decomp);
}
- ExFreePool(buf);
+ if (buf_free)
+ ExFreePool(buf);
if (csum)
ExFreePool(csum);
if (pbr)
*pbr = bytes_read;
+#ifdef DEBUG_STATS
+ time2 = KeQueryPerformanceCounter(NULL);
+
+ fcb->Vcb->stats.num_reads++;
+ fcb->Vcb->stats.data_read += bytes_read;
+ fcb->Vcb->stats.read_total_time += time2.QuadPart - time1.QuadPart;
+#endif
+
exit:
return Status;
}
TRACE("offset = %llx, length = %x\n", start, length);
TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "TRUE" : "FALSE", Irp->Flags & IRP_NOCACHE ? "TRUE" : "FALSE");
- if (fcb->type == BTRFS_TYPE_DIRECTORY)
+ if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
return STATUS_INVALID_DEVICE_REQUEST;
if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
tree_lock = TRUE;
}
+ }
+ if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
if (!ExAcquireResourceSharedLite(fcb->Header.Resource, IoIsOperationSynchronous(Irp))) {
Status = STATUS_PENDING;
IoMarkIrpPending(Irp);
BTRFS_UUID* uuid = &Vcb->superblock.uuid;
mount_options* options = &Vcb->options;
UNICODE_STRING path, ignoreus, compressus, compressforceus, compresstypeus, readonlyus, zliblevelus, flushintervalus,
- maxinlineus, subvolidus;
+ maxinlineus, subvolidus, raid5recalcus, raid6recalcus;
OBJECT_ATTRIBUTES oa;
NTSTATUS Status;
ULONG i, j, kvfilen, index, retlen;
options->zlib_level = mount_zlib_level;
options->flush_interval = mount_flush_interval;
options->max_inline = min(mount_max_inline, Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node) - sizeof(EXTENT_DATA) + 1);
+ options->raid5_recalculation = mount_raid5_recalculation;
+ options->raid6_recalculation = mount_raid6_recalculation;
options->subvol_id = 0;
path.Length = path.MaximumLength = registry_path.Length + (37 * sizeof(WCHAR));
RtlInitUnicodeString(&flushintervalus, L"FlushInterval");
RtlInitUnicodeString(&maxinlineus, L"MaxInline");
RtlInitUnicodeString(&subvolidus, L"SubvolId");
+ RtlInitUnicodeString(&raid5recalcus, L"Raid5Recalculation");
+ RtlInitUnicodeString(&raid6recalcus, L"Raid6Recalculation");
do {
Status = ZwEnumerateValueKey(h, index, KeyValueFullInformation, kvfi, kvfilen, &retlen);
UINT64* val = (UINT64*)((UINT8*)kvfi + kvfi->DataOffset);
options->subvol_id = *val;
+ } else if (FsRtlAreNamesEqual(&raid5recalcus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) {
+ DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset);
+
+ options->raid5_recalculation = *val;
+ } else if (FsRtlAreNamesEqual(&raid6recalcus, &us, TRUE, NULL) && kvfi->DataOffset > 0 && kvfi->DataLength > 0 && kvfi->Type == REG_DWORD) {
+ DWORD* val = (DWORD*)((UINT8*)kvfi + kvfi->DataOffset);
+
+ options->raid6_recalculation = *val;
}
} else if (Status != STATUS_NO_MORE_ENTRIES) {
ERR("ZwEnumerateValueKey returned %08x\n", Status);
if (options->flush_interval == 0)
options->flush_interval = mount_flush_interval;
+
+ if (options->raid5_recalculation > 1)
+ options->raid5_recalculation = 1;
+
+ if (options->raid6_recalculation > 2)
+ options->raid6_recalculation = 2;
Status = STATUS_SUCCESS;
get_registry_value(h, L"ZlibLevel", REG_DWORD, &mount_zlib_level, sizeof(mount_zlib_level));
get_registry_value(h, L"FlushInterval", REG_DWORD, &mount_flush_interval, sizeof(mount_flush_interval));
get_registry_value(h, L"MaxInline", REG_DWORD, &mount_max_inline, sizeof(mount_max_inline));
+ get_registry_value(h, L"Raid5Recalculation", REG_DWORD, &mount_raid5_recalculation, sizeof(mount_raid5_recalculation));
+ get_registry_value(h, L"Raid6Recalculation", REG_DWORD, &mount_raid6_recalculation, sizeof(mount_raid6_recalculation));
if (mount_flush_interval == 0)
mount_flush_interval = 1;
+ if (mount_raid5_recalculation > 1)
+ mount_raid5_recalculation = 1;
+
+ if (mount_raid6_recalculation > 2)
+ mount_raid6_recalculation = 2;
+
#ifdef _DEBUG
get_registry_value(h, L"DebugLogLevel", REG_DWORD, &debug_log_level, sizeof(debug_log_level));
#include "btrfs_drv.h"
-NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, DWORD* retlen) {
+NTSTATUS get_reparse_point(PDEVICE_OBJECT DeviceObject, PFILE_OBJECT FileObject, void* buffer, DWORD buflen, ULONG_PTR* retlen) {
USHORT subnamelen, printnamelen, i;
ULONG stringlen;
DWORD reqlen;
ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE);
if (fcb->type == BTRFS_TYPE_SYMLINK) {
- data = ExAllocatePoolWithTag(PagedPool, fcb->inode_item.st_size, ALLOC_TAG);
- if (!data) {
- ERR("out of memory\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto end;
- }
-
- TRACE("data = %p, size = %x\n", data, fcb->inode_item.st_size);
- Status = read_file(fcb, (UINT8*)data, 0, fcb->inode_item.st_size, NULL, NULL);
-
- if (!NT_SUCCESS(Status)) {
- ERR("read_file returned %08x\n", Status);
- ExFreePool(data);
- goto end;
- }
-
- Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, data, fcb->inode_item.st_size);
- if (!NT_SUCCESS(Status)) {
- ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status);
- ExFreePool(data);
- goto end;
- }
-
- subnamelen = stringlen;
- printnamelen = stringlen;
-
- reqlen = offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) + subnamelen + printnamelen;
-
- if (buflen < reqlen) {
- Status = STATUS_BUFFER_OVERFLOW;
- goto end;
- }
-
- rdb->ReparseTag = IO_REPARSE_TAG_SYMLINK;
- rdb->ReparseDataLength = reqlen - offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer);
- rdb->Reserved = 0;
-
- rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset = 0;
- rdb->SymbolicLinkReparseBuffer.SubstituteNameLength = subnamelen;
- rdb->SymbolicLinkReparseBuffer.PrintNameOffset = subnamelen;
- rdb->SymbolicLinkReparseBuffer.PrintNameLength = printnamelen;
- rdb->SymbolicLinkReparseBuffer.Flags = SYMLINK_FLAG_RELATIVE;
-
- Status = RtlUTF8ToUnicodeN(&rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)],
- stringlen, &stringlen, data, fcb->inode_item.st_size);
+ if (called_from_lxss()) {
+ reqlen = offsetof(REPARSE_DATA_BUFFER, GenericReparseBuffer.DataBuffer) + sizeof(UINT32);
+
+ if (buflen < reqlen) {
+ Status = STATUS_BUFFER_OVERFLOW;
+ goto end;
+ }
+
+ rdb->ReparseTag = IO_REPARSE_TAG_LXSS_SYMLINK;
+ rdb->ReparseDataLength = offsetof(REPARSE_DATA_BUFFER, GenericReparseBuffer.DataBuffer) + sizeof(UINT32);
+ rdb->Reserved = 0;
+
+ *((UINT32*)rdb->GenericReparseBuffer.DataBuffer) = 1;
+
+ *retlen = reqlen;
+ } else {
+ data = ExAllocatePoolWithTag(PagedPool, fcb->inode_item.st_size, ALLOC_TAG);
+ if (!data) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto end;
+ }
+
+ TRACE("data = %p, size = %x\n", data, fcb->inode_item.st_size);
+ Status = read_file(fcb, (UINT8*)data, 0, fcb->inode_item.st_size, NULL, NULL);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("read_file returned %08x\n", Status);
+ ExFreePool(data);
+ goto end;
+ }
+
+ Status = RtlUTF8ToUnicodeN(NULL, 0, &stringlen, data, fcb->inode_item.st_size);
+ if (!NT_SUCCESS(Status)) {
+ ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status);
+ ExFreePool(data);
+ goto end;
+ }
+
+ subnamelen = stringlen;
+ printnamelen = stringlen;
+
+ reqlen = offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) + subnamelen + printnamelen;
+
+ if (buflen < reqlen) {
+ Status = STATUS_BUFFER_OVERFLOW;
+ goto end;
+ }
+
+ rdb->ReparseTag = IO_REPARSE_TAG_SYMLINK;
+ rdb->ReparseDataLength = reqlen - offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer);
+ rdb->Reserved = 0;
+
+ rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset = 0;
+ rdb->SymbolicLinkReparseBuffer.SubstituteNameLength = subnamelen;
+ rdb->SymbolicLinkReparseBuffer.PrintNameOffset = subnamelen;
+ rdb->SymbolicLinkReparseBuffer.PrintNameLength = printnamelen;
+ rdb->SymbolicLinkReparseBuffer.Flags = SYMLINK_FLAG_RELATIVE;
+
+ Status = RtlUTF8ToUnicodeN(&rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)],
+ stringlen, &stringlen, data, fcb->inode_item.st_size);
- if (!NT_SUCCESS(Status)) {
- ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status);
+ if (!NT_SUCCESS(Status)) {
+ ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status);
+ ExFreePool(data);
+ goto end;
+ }
+
+ for (i = 0; i < stringlen / sizeof(WCHAR); i++) {
+ if (rdb->SymbolicLinkReparseBuffer.PathBuffer[(rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)) + i] == '/')
+ rdb->SymbolicLinkReparseBuffer.PathBuffer[(rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)) + i] = '\\';
+ }
+
+ RtlCopyMemory(&rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.PrintNameOffset / sizeof(WCHAR)],
+ &rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)],
+ rdb->SymbolicLinkReparseBuffer.SubstituteNameLength);
+
+ *retlen = reqlen;
+
ExFreePool(data);
- goto end;
- }
-
- for (i = 0; i < stringlen / sizeof(WCHAR); i++) {
- if (rdb->SymbolicLinkReparseBuffer.PathBuffer[(rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)) + i] == '/')
- rdb->SymbolicLinkReparseBuffer.PathBuffer[(rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)) + i] = '\\';
}
- RtlCopyMemory(&rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.PrintNameOffset / sizeof(WCHAR)],
- &rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)],
- rdb->SymbolicLinkReparseBuffer.SubstituteNameLength);
-
- *retlen = reqlen;
-
- ExFreePool(data);
-
Status = STATUS_SUCCESS;
} else if (fcb->atts & FILE_ATTRIBUTE_REPARSE_POINT) {
if (fcb->type == BTRFS_TYPE_FILE) {
- Status = read_file(fcb, buffer, 0, buflen, retlen, NULL);
+ ULONG len;
+
+ Status = read_file(fcb, buffer, 0, buflen, &len, NULL);
if (!NT_SUCCESS(Status)) {
ERR("read_file returned %08x\n", Status);
}
+
+ *retlen = len;
} else if (fcb->type == BTRFS_TYPE_DIRECTORY) {
if (!fcb->reparse_xattr.Buffer || fcb->reparse_xattr.Length < sizeof(ULONG)) {
Status = STATUS_NOT_A_REPARSE_POINT;
return Status;
}
-static NTSTATUS set_symlink(PIRP Irp, file_ref* fileref, REPARSE_DATA_BUFFER* rdb, ULONG buflen, LIST_ENTRY* rollback) {
+static NTSTATUS set_symlink(PIRP Irp, file_ref* fileref, ccb* ccb, REPARSE_DATA_BUFFER* rdb, ULONG buflen, BOOL write, LIST_ENTRY* rollback) {
NTSTATUS Status;
ULONG minlen;
ULONG tlength;
BTRFS_TIME now;
USHORT i;
- minlen = offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) + sizeof(WCHAR);
- if (buflen < minlen) {
- WARN("buffer was less than minimum length (%u < %u)\n", buflen, minlen);
- return STATUS_INVALID_PARAMETER;
+ if (write) {
+ minlen = offsetof(REPARSE_DATA_BUFFER, SymbolicLinkReparseBuffer.PathBuffer) + sizeof(WCHAR);
+ if (buflen < minlen) {
+ WARN("buffer was less than minimum length (%u < %u)\n", buflen, minlen);
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ subname.Buffer = &rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)];
+ subname.MaximumLength = subname.Length = rdb->SymbolicLinkReparseBuffer.SubstituteNameLength;
+
+ TRACE("substitute name = %.*S\n", subname.Length / sizeof(WCHAR), subname.Buffer);
}
- subname.Buffer = &rdb->SymbolicLinkReparseBuffer.PathBuffer[rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset / sizeof(WCHAR)];
- subname.MaximumLength = subname.Length = rdb->SymbolicLinkReparseBuffer.SubstituteNameLength;
-
- TRACE("substitute name = %.*S\n", subname.Length / sizeof(WCHAR), subname.Buffer);
-
fileref->fcb->type = BTRFS_TYPE_SYMLINK;
fileref->fcb->inode_item.st_mode |= __S_IFLNK;
- Status = truncate_file(fileref->fcb, 0, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("truncate_file returned %08x\n", Status);
- return Status;
- }
-
- Status = RtlUnicodeToUTF8N(NULL, 0, (PULONG)&target.Length, subname.Buffer, subname.Length);
- if (!NT_SUCCESS(Status)) {
- ERR("RtlUnicodeToUTF8N 1 failed with error %08x\n", Status);
- return Status;
- }
-
- target.MaximumLength = target.Length;
- target.Buffer = ExAllocatePoolWithTag(PagedPool, target.MaximumLength, ALLOC_TAG);
- if (!target.Buffer) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- Status = RtlUnicodeToUTF8N(target.Buffer, target.Length, (PULONG)&target.Length, subname.Buffer, subname.Length);
- if (!NT_SUCCESS(Status)) {
- ERR("RtlUnicodeToUTF8N 2 failed with error %08x\n", Status);
+ if (write) {
+ Status = truncate_file(fileref->fcb, 0, Irp, rollback);
+ if (!NT_SUCCESS(Status)) {
+ ERR("truncate_file returned %08x\n", Status);
+ return Status;
+ }
+
+ Status = RtlUnicodeToUTF8N(NULL, 0, (PULONG)&target.Length, subname.Buffer, subname.Length);
+ if (!NT_SUCCESS(Status)) {
+ ERR("RtlUnicodeToUTF8N 1 failed with error %08x\n", Status);
+ return Status;
+ }
+
+ target.MaximumLength = target.Length;
+ target.Buffer = ExAllocatePoolWithTag(PagedPool, target.MaximumLength, ALLOC_TAG);
+ if (!target.Buffer) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ Status = RtlUnicodeToUTF8N(target.Buffer, target.Length, (PULONG)&target.Length, subname.Buffer, subname.Length);
+ if (!NT_SUCCESS(Status)) {
+ ERR("RtlUnicodeToUTF8N 2 failed with error %08x\n", Status);
+ ExFreePool(target.Buffer);
+ return Status;
+ }
+
+ for (i = 0; i < target.Length; i++) {
+ if (target.Buffer[i] == '\\')
+ target.Buffer[i] = '/';
+ }
+
+ offset.QuadPart = 0;
+ tlength = target.Length;
+ Status = write_file2(fileref->fcb->Vcb, Irp, offset, target.Buffer, &tlength, FALSE, TRUE,
+ TRUE, FALSE, rollback);
ExFreePool(target.Buffer);
- return Status;
- }
-
- for (i = 0; i < target.Length; i++) {
- if (target.Buffer[i] == '\\')
- target.Buffer[i] = '/';
- }
-
- offset.QuadPart = 0;
- tlength = target.Length;
- Status = write_file2(fileref->fcb->Vcb, Irp, offset, target.Buffer, &tlength, FALSE, TRUE,
- TRUE, FALSE, rollback);
- ExFreePool(target.Buffer);
+ } else
+ Status = STATUS_SUCCESS;
KeQuerySystemTime(&time);
win_time_to_unix(time, &now);
fileref->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation;
fileref->fcb->inode_item.sequence++;
- fileref->fcb->inode_item.st_ctime = now;
- fileref->fcb->inode_item.st_mtime = now;
+
+ if (!ccb->user_set_change_time)
+ fileref->fcb->inode_item.st_ctime = now;
+
+ if (!ccb->user_set_write_time)
+ fileref->fcb->inode_item.st_mtime = now;
fileref->fcb->subvol->root_item.ctransid = fileref->fcb->Vcb->superblock.generation;
fileref->fcb->subvol->root_item.ctime = now;
+ fileref->fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fileref->fcb);
+
mark_fileref_dirty(fileref);
return Status;
// It isn't documented what permissions FSCTL_SET_REPARSE_POINT needs, but CreateSymbolicLinkW
// creates a file with FILE_WRITE_ATTRIBUTES | DELETE | SYNCHRONIZE.
- if (!(ccb->access & FILE_WRITE_ATTRIBUTES)) {
+ if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_ATTRIBUTES)) {
WARN("insufficient privileges\n");
return STATUS_ACCESS_DENIED;
}
RtlCopyMemory(&tag, buffer, sizeof(ULONG));
- if (fcb->type == BTRFS_TYPE_FILE && tag == IO_REPARSE_TAG_SYMLINK && rdb->SymbolicLinkReparseBuffer.Flags & SYMLINK_FLAG_RELATIVE) {
- Status = set_symlink(Irp, fileref, rdb, buflen, &rollback);
+ if (fcb->type == BTRFS_TYPE_FILE &&
+ ((tag == IO_REPARSE_TAG_SYMLINK && rdb->SymbolicLinkReparseBuffer.Flags & SYMLINK_FLAG_RELATIVE) || tag == IO_REPARSE_TAG_LXSS_SYMLINK)) {
+ Status = set_symlink(Irp, fileref, ccb, rdb, buflen, tag == IO_REPARSE_TAG_SYMLINK, &rollback);
fcb->atts |= FILE_ATTRIBUTE_REPARSE_POINT;
} else {
LARGE_INTEGER offset, time;
fcb->inode_item.transid = fcb->Vcb->superblock.generation;
fcb->inode_item.sequence++;
- fcb->inode_item.st_ctime = now;
- fcb->inode_item.st_mtime = now;
+
+ if (!ccb->user_set_change_time)
+ fcb->inode_item.st_ctime = now;
+
+ if (!ccb->user_set_write_time)
+ fcb->inode_item.st_mtime = now;
+
fcb->atts |= FILE_ATTRIBUTE_REPARSE_POINT;
fcb->atts_changed = TRUE;
fcb->subvol->root_item.ctransid = fcb->Vcb->superblock.generation;
fcb->subvol->root_item.ctime = now;
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
}
end:
if (NT_SUCCESS(Status))
- clear_rollback(&rollback);
+ clear_rollback(fcb->Vcb, &rollback);
else
do_rollback(fcb->Vcb, &rollback);
return STATUS_INVALID_PARAMETER;
}
- if (!(ccb->access & FILE_WRITE_ATTRIBUTES)) {
+ if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_WRITE_ATTRIBUTES)) {
WARN("insufficient privileges\n");
return STATUS_ACCESS_DENIED;
}
fileref->fcb->inode_item.st_mode |= __S_IFREG;
fileref->fcb->inode_item.transid = fileref->fcb->Vcb->superblock.generation;
fileref->fcb->inode_item.sequence++;
- fileref->fcb->inode_item.st_ctime = now;
- fileref->fcb->inode_item.st_mtime = now;
+
+ if (!ccb->user_set_change_time)
+ fileref->fcb->inode_item.st_ctime = now;
+
+ if (!ccb->user_set_write_time)
+ fileref->fcb->inode_item.st_mtime = now;
+
fileref->fcb->atts &= ~FILE_ATTRIBUTE_REPARSE_POINT;
mark_fileref_dirty(fileref);
+
+ fileref->fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fileref->fcb);
fileref->fcb->subvol->root_item.ctransid = fcb->Vcb->superblock.generation;
fcb->inode_item.transid = fcb->Vcb->superblock.generation;
fcb->inode_item.sequence++;
- fcb->inode_item.st_ctime = now;
- fcb->inode_item.st_mtime = now;
+
+ if (!ccb->user_set_change_time)
+ fcb->inode_item.st_ctime = now;
+
+ if (!ccb->user_set_write_time)
+ fcb->inode_item.st_mtime = now;
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
fcb->subvol->root_item.ctransid = fcb->Vcb->superblock.generation;
fcb->inode_item.transid = fcb->Vcb->superblock.generation;
fcb->inode_item.sequence++;
- fcb->inode_item.st_ctime = now;
- fcb->inode_item.st_mtime = now;
+
+ if (!ccb->user_set_change_time)
+ fcb->inode_item.st_ctime = now;
+
+ if (!ccb->user_set_write_time)
+ fcb->inode_item.st_mtime = now;
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
fcb->subvol->root_item.ctransid = fcb->Vcb->superblock.generation;
end:
if (NT_SUCCESS(Status))
- clear_rollback(&rollback);
+ clear_rollback(fcb->Vcb, &rollback);
else
do_rollback(fcb->Vcb, &rollback);
v->length = gli.Length.QuadPart;
v->gen1 = sb->generation;
v->gen2 = 0;
+ v->seeding = sb->flags & BTRFS_SUPERBLOCK_FLAGS_SEEDING ? TRUE : FALSE;
InsertTailList(volumes, &v->list_entry);
i = 1;
win_time_to_unix(time, &now);
fcb->inode_item.transid = Vcb->superblock.generation;
- fcb->inode_item.st_ctime = now;
+
+ if (!ccb->user_set_change_time)
+ fcb->inode_item.st_ctime = now;
+
fcb->inode_item.sequence++;
if (flags & OWNER_SECURITY_INFORMATION) {
}
fcb->sd_dirty = TRUE;
+ fcb->inode_item_changed = TRUE;
fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
fcb->subvol->root_item.ctime = now;
// #define DEBUG_TREE_LOCKS
-typedef struct {
- enum rollback_type type;
- void* ptr;
- LIST_ENTRY list_entry;
-} rollback_item;
-
NTSTATUS STDCALL _load_tree(device_extension* Vcb, UINT64 addr, root* r, tree** pt, tree* parent, PIRP Irp, const char* func, const char* file, unsigned int line) {
UINT8* buf;
NTSTATUS Status;
tree* t;
tree_data* td;
chunk* c;
- shared_data* sd;
TRACE("(%p, %llx)\n", Vcb, addr);
return STATUS_INSUFFICIENT_RESOURCES;
}
- Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, &c, Irp);
+ Status = read_data(Vcb, addr, Vcb->superblock.node_size, NULL, TRUE, buf, NULL, &c, Irp);
if (!NT_SUCCESS(Status)) {
ERR("read_data returned 0x%08x\n", Status);
ExFreePool(buf);
t->size = 0;
t->new_address = 0;
t->has_new_address = FALSE;
+ t->updated_extents = FALSE;
t->write = FALSE;
if (c)
// t->items = ExAllocatePoolWithTag(PagedPool, num_items * sizeof(tree_data), ALLOC_TAG);
InitializeListHead(&t->itemlist);
- if (t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
- sd = ExAllocatePoolWithTag(NonPagedPool, sizeof(shared_data), ALLOC_TAG);
- if (!sd) {
- ERR("out of memory\n");
- ExFreePool(buf);
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- sd->address = addr;
- sd->parent = parent ? parent->header.address : addr;
- InitializeListHead(&sd->entries);
-
- ExInterlockedInsertTailList(&Vcb->shared_extents, &sd->list_entry, &Vcb->shared_extents_lock);
- }
-
if (t->header.level == 0) { // leaf node
leaf_node* ln = (leaf_node*)(buf + sizeof(tree_header));
unsigned int i;
}
for (i = 0; i < t->header.num_items; i++) {
- td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
+ td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
if (!td) {
ERR("out of memory\n");
ExFreePool(buf);
} else
td->data = NULL;
- if ((t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) &&
- ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA)) {
- EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
-
- if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
- EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
-
- if (ed2->size != 0) {
- LIST_ENTRY* le;
- BOOL found = FALSE;
-
- TRACE("shared extent %llx,%llx\n", ed2->address, ed2->size);
-
- le = sd->entries.Flink;
- while (le != &sd->entries) {
- shared_data_entry* sde = CONTAINING_RECORD(le, shared_data_entry, list_entry);
-
- if (sde->address == ed2->address && sde->size == ed2->size && sde->edr.root == t->header.tree_id &&
- sde->edr.objid == ln[i].key.obj_id && sde->edr.offset == ln[i].key.offset - ed2->offset) {
- sde->edr.count++;
- found = TRUE;
- break;
- }
-
- le = le->Flink;
- }
-
- if (!found) {
- shared_data_entry* sde = ExAllocatePoolWithTag(PagedPool, sizeof(shared_data_entry), ALLOC_TAG);
-
- if (!sde) {
- ERR("out of memory\n");
- ExFreePool(buf);
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- sde->address = ed2->address;
- sde->size = ed2->size;
- sde->edr.root = t->header.tree_id;
- sde->edr.objid = ln[i].key.obj_id;
- sde->edr.offset = ln[i].key.offset - ed2->offset;
- sde->edr.count = 1;
-
- InsertTailList(&sd->entries, &sde->list_entry);
- }
- }
- }
- }
-
td->size = ln[i].size;
td->ignore = FALSE;
td->inserted = FALSE;
}
for (i = 0; i < t->header.num_items; i++) {
- td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
+ td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
if (!td) {
ERR("out of memory\n");
ExFreePool(buf);
if (t->header.level == 0 && td->data)
ExFreePool(td->data);
- ExFreePool(td);
+ ExFreeToPagedLookasideList(&t->Vcb->tree_data_lookaside, td);
}
InterlockedDecrement(&t->Vcb->open_trees);
}
th->tree->parent = t;
+
+#ifdef DEBUG_PARANOID
+ if (t && t->header.level <= th->tree->header.level) int3;
+#endif
+
th->tree->paritem = td;
ret = TRUE;
const char* func, const char* file, unsigned int line) {
int cmp;
tree_data *td, *lasttd;
+ KEY key2;
TRACE("(%p, %p, %p, %p, %u)\n", Vcb, t, tp, searchkey, ignore);
if (!td) return STATUS_NOT_FOUND;
+ key2 = *searchkey;
+
do {
- cmp = keycmp(searchkey, &td->key);
+ cmp = keycmp(key2, td->key);
// TRACE("(%u) comparing (%x,%x,%x) to (%x,%x,%x) - %i (ignore = %s)\n", t->header.level, (UINT32)searchkey->obj_id, searchkey->obj_type, (UINT32)searchkey->offset, (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset, cmp, td->ignore ? "TRUE" : "FALSE");
if (cmp == 1) {
lasttd = td;
td = next_item(t, td);
if (td) {
- cmp = keycmp(searchkey, &td->key);
+ cmp = keycmp(key2, td->key);
if (cmp != 0) {
td = origtd;
td = prev_item(t->parent, t->paritem);
- Status = _do_load_tree(Vcb, &td->treeholder, t->parent->root, t, td, &loaded, Irp, func, file, line);
+ Status = _do_load_tree(Vcb, &td->treeholder, t->parent->root, t->parent, td, &loaded, Irp, func, file, line);
if (!NT_SUCCESS(Status)) {
ERR("do_load_tree returned %08x\n", Status);
return FALSE;
r->treeholder.tree = NULL;
if (IsListEmpty(&Vcb->trees))
- goto free_shared;
+ return;
} else if (t->header.level > level)
empty = FALSE;
if (empty)
break;
}
-
-free_shared:
- while (!IsListEmpty(&Vcb->shared_extents)) {
- shared_data* sd;
-
- le = RemoveHeadList(&Vcb->shared_extents);
- sd = CONTAINING_RECORD(le, shared_data, list_entry);
-
- while (!IsListEmpty(&sd->entries)) {
- LIST_ENTRY* le2 = RemoveHeadList(&sd->entries);
- shared_data_entry* sde = CONTAINING_RECORD(le2, shared_data_entry, list_entry);
-
- ExFreePool(sde);
- }
-
- ExFreePool(sd);
- }
}
-void add_rollback(LIST_ENTRY* rollback, enum rollback_type type, void* ptr) {
+void add_rollback(device_extension* Vcb, LIST_ENTRY* rollback, enum rollback_type type, void* ptr) {
rollback_item* ri;
ri = ExAllocatePoolWithTag(PagedPool, sizeof(rollback_item), ALLOC_TAG);
TRACE("(%p, %p, %llx, %x, %llx, %p, %x, %p, %p)\n", Vcb, r, obj_id, obj_type, offset, data, size, ptp, rollback);
-#ifdef DEBUG_PARANOID
- if (!ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) {
- ERR("ERROR - tree_lock not held exclusively\n");
- int3;
- }
-#endif
+// #ifdef DEBUG_PARANOID
+// if (!ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) {
+// ERR("ERROR - tree_lock not held exclusively\n");
+// int3;
+// }
+// #endif
searchkey.obj_id = obj_id;
searchkey.obj_type = obj_type;
if (tp.item) {
TRACE("tp.item->key = %p\n", &tp.item->key);
- cmp = keycmp(&searchkey, &tp.item->key);
+ cmp = keycmp(searchkey, tp.item->key);
if (cmp == 0 && !tp.item->ignore) { // FIXME - look for all items of the same key to make sure none are non-ignored
ERR("error: key (%llx,%x,%llx) already present\n", obj_id, obj_type, offset);
} else
cmp = -1;
- td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
+ td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
if (!td) {
ERR("out of memory\n");
goto end;
paritem = tp.tree->paritem;
while (paritem) {
// ERR("paritem = %llx,%x,%llx, tp.item->key = %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
- if (!keycmp(&paritem->key, &tp.item->key)) {
+ if (!keycmp(paritem->key, tp.item->key)) {
paritem->key = searchkey;
} else
break;
paritem = paritem->treeholder.tree->paritem;
}
-
- } else {
- InsertAfter(&tp.tree->itemlist, &td->list_entry, &tp.item->list_entry); // FIXME - we don't need this
- }
+ } else if (cmp == 0)
+ InsertHeadList(tp.item->list_entry.Blink, &td->list_entry); // make sure non-deleted item is before deleted ones
+ else
+ InsertHeadList(&tp.item->list_entry, &td->list_entry);
tp.tree->header.num_items++;
tp.tree->size += size + sizeof(leaf_node);
// FIXME - free this correctly
- tp2 = ExAllocatePoolWithTag(PagedPool, sizeof(traverse_ptr), ALLOC_TAG);
+ tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside);
if (!tp2) {
ERR("out of memory\n");
goto end;
tp2->tree = tp.tree;
tp2->item = td;
- add_rollback(rollback, ROLLBACK_INSERT_ITEM, tp2);
+ add_rollback(Vcb, rollback, ROLLBACK_INSERT_ITEM, tp2);
success = TRUE;
t = t->parent;
}
- tp2 = ExAllocatePoolWithTag(PagedPool, sizeof(traverse_ptr), ALLOC_TAG);
+ tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside);
if (!tp2) {
ERR("out of memory\n");
return;
tp2->tree = tp->tree;
tp2->item = tp->item;
- add_rollback(rollback, ROLLBACK_DELETE_ITEM, tp2);
+ add_rollback(Vcb, rollback, ROLLBACK_DELETE_ITEM, tp2);
}
-void clear_rollback(LIST_ENTRY* rollback) {
+void clear_rollback(device_extension* Vcb, LIST_ENTRY* rollback) {
rollback_item* ri;
while (!IsListEmpty(rollback)) {
switch (ri->type) {
case ROLLBACK_INSERT_ITEM:
case ROLLBACK_DELETE_ITEM:
+ ExFreeToPagedLookasideList(&Vcb->traverse_ptr_lookaside, ri->ptr);
+ break;
+
case ROLLBACK_ADD_SPACE:
case ROLLBACK_SUBTRACT_SPACE:
case ROLLBACK_INSERT_EXTENT:
tp->tree->size -= sizeof(internal_node);
}
- ExFreePool(tp);
+ ExFreeToPagedLookasideList(&Vcb->traverse_ptr_lookaside, tp);
break;
}
tp->tree->size += sizeof(internal_node);
}
- ExFreePool(tp);
+ ExFreeToPagedLookasideList(&Vcb->traverse_ptr_lookaside, tp);
break;
}
if (c) {
Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, re->fcb->subvol->id,
re->fcb->inode, re->ext->offset - ed2->offset, -1,
- re->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, NULL);
+ re->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, NULL);
if (!NT_SUCCESS(Status))
ERR("update_changed_extent_ref returned %08x\n", Status);
if (c) {
Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, re->fcb->subvol->id,
re->fcb->inode, re->ext->offset - ed2->offset, 1,
- re->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, NULL);
+ re->fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, NULL);
if (!NT_SUCCESS(Status))
ERR("update_changed_extent_ref returned %08x\n", Status);
ExAcquireResourceExclusiveLite(&rs->chunk->lock, TRUE);
if (ri->type == ROLLBACK_ADD_SPACE)
- space_list_subtract2(rs->list, rs->list_size, rs->address, rs->length, NULL);
+ space_list_subtract2(Vcb, rs->list, rs->list_size, rs->address, rs->length, NULL);
else
- space_list_add2(rs->list, rs->list_size, rs->address, rs->length, NULL);
+ space_list_add2(Vcb, rs->list, rs->list_size, rs->address, rs->length, NULL);
if (rs->chunk) {
LIST_ENTRY* le2 = le->Blink;
if (rs2->chunk == rs->chunk) {
if (ri2->type == ROLLBACK_ADD_SPACE)
- space_list_subtract2(rs2->list, rs2->list_size, rs2->address, rs2->length, NULL);
+ space_list_subtract2(Vcb, rs2->list, rs2->list_size, rs2->address, rs2->length, NULL);
else
- space_list_add2(rs2->list, rs2->list_size, rs2->address, rs2->length, NULL);
+ space_list_add2(Vcb, rs2->list, rs2->list_size, rs2->address, rs2->length, NULL);
ExFreePool(rs2);
RemoveEntryList(&ri2->list_entry);
ExFreePool(ri);
}
}
+
+static void find_tree_end(tree* t, KEY* tree_end, BOOL* no_end) {
+ tree* p;
+
+ p = t;
+ do {
+ tree_data* pi;
+
+ if (!p->parent) {
+ *no_end = TRUE;
+ return;
+ }
+
+ pi = p->paritem;
+
+ if (pi->list_entry.Flink != &p->parent->itemlist) {
+ tree_data* td = CONTAINING_RECORD(pi->list_entry.Flink, tree_data, list_entry);
+
+ *tree_end = td->key;
+ *no_end = FALSE;
+ return;
+ }
+
+ p = p->parent;
+ } while (p);
+}
+
+void clear_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist) {
+ while (!IsListEmpty(batchlist)) {
+ LIST_ENTRY* le = RemoveHeadList(batchlist);
+ batch_root* br = CONTAINING_RECORD(le, batch_root, list_entry);
+
+ while (!IsListEmpty(&br->items)) {
+ LIST_ENTRY* le2 = RemoveHeadList(&br->items);
+ batch_item* bi = CONTAINING_RECORD(le2, batch_item, list_entry);
+
+ ExFreeToPagedLookasideList(&Vcb->batch_item_lookaside, bi);
+ }
+
+ ExFreePool(br);
+ }
+}
+
+static BOOL handle_batch_collision(device_extension* Vcb, batch_item* bi, tree* t, tree_data* td, tree_data* newtd, LIST_ENTRY* listhead, LIST_ENTRY* rollback) {
+ if (bi->operation == Batch_SetXattr || bi->operation == Batch_DirItem || bi->operation == Batch_InodeRef || bi->operation == Batch_InodeExtRef) {
+ UINT16 maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
+
+ if (bi->operation == Batch_SetXattr) {
+ if (td->size < sizeof(DIR_ITEM)) {
+ ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset, td->size, sizeof(DIR_ITEM));
+ } else {
+ UINT8* newdata;
+ ULONG size = td->size;
+ DIR_ITEM* newxa = (DIR_ITEM*)bi->data;
+ DIR_ITEM* xa = (DIR_ITEM*)td->data;
+
+ while (TRUE) {
+ ULONG oldxasize;
+
+ if (size < sizeof(DIR_ITEM) || size < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) {
+ ERR("(%llx,%x,%llx) was truncated\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset);
+ break;
+ }
+
+ oldxasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n;
+
+ if (xa->n == newxa->n && RtlCompareMemory(newxa->name, xa->name, xa->n) == xa->n) {
+ UINT64 pos;
+
+ // replace
+
+ if (td->size + bi->datalen - oldxasize > maxlen)
+ ERR("DIR_ITEM would be over maximum size, truncating (%u + %u - %u > %u)\n", td->size, bi->datalen, oldxasize, maxlen);
+
+ newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen - oldxasize, ALLOC_TAG);
+ if (!newdata) {
+ ERR("out of memory\n");
+ return TRUE;
+ }
+
+ pos = (UINT8*)xa - td->data;
+ if (pos + oldxasize < td->size) { // copy after changed xattr
+ RtlCopyMemory(newdata + pos + bi->datalen, td->data + pos + oldxasize, td->size - pos - oldxasize);
+ }
+
+ if (pos > 0) { // copy before changed xattr
+ RtlCopyMemory(newdata, td->data, pos);
+ xa = (DIR_ITEM*)(newdata + pos);
+ } else
+ xa = (DIR_ITEM*)newdata;
+
+ RtlCopyMemory(xa, bi->data, bi->datalen);
+
+ bi->datalen = min(td->size + bi->datalen - oldxasize, maxlen);
+
+ ExFreePool(bi->data);
+ bi->data = newdata;
+
+ break;
+ }
+
+ if ((UINT8*)xa - (UINT8*)td->data + oldxasize >= size) {
+ // not found, add to end of data
+
+ if (td->size + bi->datalen > maxlen)
+ ERR("DIR_ITEM would be over maximum size, truncating (%u + %u > %u)\n", td->size, bi->datalen, maxlen);
+
+ newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG);
+ if (!newdata) {
+ ERR("out of memory\n");
+ return TRUE;
+ }
+
+ RtlCopyMemory(newdata, td->data, td->size);
+
+ xa = (DIR_ITEM*)((UINT8*)newdata + td->size);
+ RtlCopyMemory(xa, bi->data, bi->datalen);
+
+ bi->datalen = min(bi->datalen + td->size, maxlen);
+
+ ExFreePool(bi->data);
+ bi->data = newdata;
+
+ break;
+ } else {
+ xa = (DIR_ITEM*)&xa->name[xa->m + xa->n];
+ size -= oldxasize;
+ }
+ }
+ }
+ } else if (bi->operation == Batch_DirItem) {
+ UINT8* newdata;
+
+ if (td->size + bi->datalen > maxlen) {
+ ERR("DIR_ITEM would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen);
+ return TRUE;
+ }
+
+ newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG);
+ if (!newdata) {
+ ERR("out of memory\n");
+ return TRUE;
+ }
+
+ RtlCopyMemory(newdata, td->data, td->size);
+
+ RtlCopyMemory(newdata + td->size, bi->data, bi->datalen);
+
+ bi->datalen += td->size;
+
+ ExFreePool(bi->data);
+ bi->data = newdata;
+ } else if (bi->operation == Batch_InodeRef) {
+ UINT8* newdata;
+
+ if (td->size + bi->datalen > maxlen) {
+ if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF) {
+ INODE_REF* ir = (INODE_REF*)bi->data;
+ INODE_EXTREF* ier;
+ ULONG ierlen;
+ batch_item* bi2;
+ LIST_ENTRY* le;
+ BOOL inserted = FALSE;
+
+ TRACE("INODE_REF would be too long, adding INODE_EXTREF instead\n");
+
+ ierlen = sizeof(INODE_EXTREF) - 1 + ir->n;
+
+ ier = ExAllocatePoolWithTag(PagedPool, ierlen, ALLOC_TAG);
+ if (!ier) {
+ ERR("out of memory\n");
+ return TRUE;
+ }
+
+ ier->dir = bi->key.offset;
+ ier->index = ir->index;
+ ier->n = ir->n;
+ RtlCopyMemory(ier->name, ir->name, ier->n);
+
+ bi2 = ExAllocateFromPagedLookasideList(&Vcb->batch_item_lookaside);
+ if (!bi2) {
+ ERR("out of memory\n");
+ ExFreePool(ier);
+ return TRUE;
+ }
+
+ bi2->key.obj_id = bi->key.obj_id;
+ bi2->key.obj_type = TYPE_INODE_EXTREF;
+ bi2->key.offset = calc_crc32c((UINT32)ier->dir, (UINT8*)ier->name, ier->n);
+ bi2->data = ier;
+ bi2->datalen = ierlen;
+ bi2->operation = Batch_InodeExtRef;
+
+ le = bi->list_entry.Flink;
+ while (le != listhead) {
+ batch_item* bi3 = CONTAINING_RECORD(le, batch_item, list_entry);
+
+ if (keycmp(bi3->key, bi2->key) != -1) {
+ InsertHeadList(le->Blink, &bi2->list_entry);
+ inserted = TRUE;
+ }
+
+ le = le->Flink;
+ }
+
+ if (!inserted)
+ InsertTailList(listhead, &bi2->list_entry);
+
+ return TRUE;
+ } else {
+ ERR("INODE_REF would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen);
+ return TRUE;
+ }
+ }
+
+ newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG);
+ if (!newdata) {
+ ERR("out of memory\n");
+ return TRUE;
+ }
+
+ RtlCopyMemory(newdata, td->data, td->size);
+
+ RtlCopyMemory(newdata + td->size, bi->data, bi->datalen);
+
+ bi->datalen += td->size;
+
+ ExFreePool(bi->data);
+ bi->data = newdata;
+ } else if (bi->operation == Batch_InodeExtRef) {
+ UINT8* newdata;
+
+ if (td->size + bi->datalen > maxlen) {
+ ERR("INODE_EXTREF would be over maximum size (%u + %u > %u)\n", td->size, bi->datalen, maxlen);
+ return TRUE;
+ }
+
+ newdata = ExAllocatePoolWithTag(PagedPool, td->size + bi->datalen, ALLOC_TAG);
+ if (!newdata) {
+ ERR("out of memory\n");
+ return TRUE;
+ }
+
+ RtlCopyMemory(newdata, td->data, td->size);
+
+ RtlCopyMemory(newdata + td->size, bi->data, bi->datalen);
+
+ bi->datalen += td->size;
+
+ ExFreePool(bi->data);
+ bi->data = newdata;
+ }
+
+ newtd->data = bi->data;
+ newtd->size = bi->datalen;
+
+ // delete old item
+ if (!td->ignore) {
+ traverse_ptr* tp2;
+
+ td->ignore = TRUE;
+
+ t->header.num_items--;
+ t->size -= sizeof(leaf_node) + td->size;
+
+ if (rollback) {
+ tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside);
+ if (!tp2) {
+ ERR("out of memory\n");
+ return FALSE;
+ }
+
+ tp2->tree = t;
+ tp2->item = td;
+
+ add_rollback(Vcb, rollback, ROLLBACK_DELETE_ITEM, tp2);
+ }
+ }
+
+ InsertHeadList(&td->list_entry, &newtd->list_entry);
+ } else {
+ ERR("(%llx,%x,%llx) already exists\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset);
+ int3;
+ }
+
+ return FALSE;
+}
+
+static void commit_batch_list_root(device_extension* Vcb, batch_root* br, PIRP Irp, LIST_ENTRY* rollback) {
+ LIST_ENTRY* le;
+ NTSTATUS Status;
+
+ TRACE("root: %llx\n", br->r->id);
+
+ le = br->items.Flink;
+ while (le != &br->items) {
+ batch_item* bi = CONTAINING_RECORD(le, batch_item, list_entry);
+ LIST_ENTRY *le2, *listhead;
+ traverse_ptr tp, *tp2;
+ KEY tree_end;
+ BOOL no_end;
+ tree_data* td;
+ int cmp;
+ tree* t;
+ BOOL ignore = FALSE;
+
+ TRACE("(%llx,%x,%llx)\n", bi->key.obj_id, bi->key.obj_type, bi->key.offset);
+
+ Status = find_item(Vcb, br->r, &tp, &bi->key, FALSE, Irp);
+ if (!NT_SUCCESS(Status)) { // FIXME - handle STATUS_NOT_FOUND
+ ERR("find_item returned %08x\n", Status);
+ return;
+ }
+
+ find_tree_end(tp.tree, &tree_end, &no_end);
+
+ td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
+ if (!td) {
+ ERR("out of memory\n");
+ return;
+ }
+
+ td->key = bi->key;
+ td->size = bi->datalen;
+ td->data = bi->data;
+ td->ignore = FALSE;
+ td->inserted = TRUE;
+
+ cmp = keycmp(bi->key, tp.item->key);
+
+ if (cmp == -1) { // very first key in root
+ tree_data* paritem;
+
+ InsertHeadList(&tp.tree->itemlist, &td->list_entry);
+
+ paritem = tp.tree->paritem;
+ while (paritem) {
+ if (!keycmp(paritem->key, tp.item->key)) {
+ paritem->key = bi->key;
+ } else
+ break;
+
+ paritem = paritem->treeholder.tree->paritem;
+ }
+ } else if (cmp == 0) { // item already exists
+ ignore = handle_batch_collision(Vcb, bi, tp.tree, tp.item, td, &br->items, rollback);
+ } else {
+ InsertHeadList(&tp.item->list_entry, &td->list_entry);
+ }
+
+ if (!ignore) {
+ tp.tree->header.num_items++;
+ tp.tree->size += bi->datalen + sizeof(leaf_node);
+ tp.tree->write = TRUE;
+
+ if (rollback) {
+ // FIXME - free this correctly
+ tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside);
+ if (!tp2) {
+ ERR("out of memory\n");
+ return;
+ }
+
+ tp2->tree = tp.tree;
+ tp2->item = td;
+
+ add_rollback(Vcb, rollback, ROLLBACK_INSERT_ITEM, tp2);
+ }
+
+ listhead = &td->list_entry;
+ } else
+ listhead = &tp.item->list_entry;
+
+ le2 = le->Flink;
+ while (le2 != &br->items) {
+ batch_item* bi2 = CONTAINING_RECORD(le2, batch_item, list_entry);
+
+ if (no_end || keycmp(bi2->key, tree_end) == -1) {
+ LIST_ENTRY* le3;
+ BOOL inserted = FALSE;
+
+ ignore = FALSE;
+
+ td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
+ if (!td) {
+ ERR("out of memory\n");
+ return;
+ }
+
+ td->key = bi2->key;
+ td->size = bi2->datalen;
+ td->data = bi2->data;
+ td->ignore = FALSE;
+ td->inserted = TRUE;
+
+ le3 = listhead;
+ while (le3 != &tp.tree->itemlist) {
+ tree_data* td2 = CONTAINING_RECORD(le3, tree_data, list_entry);
+
+ if (!td2->ignore) {
+ cmp = keycmp(bi2->key, td2->key);
+
+ if (cmp == 0) {
+ ignore = handle_batch_collision(Vcb, bi2, tp.tree, td2, td, &br->items, rollback);
+ inserted = TRUE;
+ break;
+ } else if (cmp == -1) {
+ InsertHeadList(le3->Blink, &td->list_entry);
+ inserted = TRUE;
+ break;
+ }
+ }
+
+ le3 = le3->Flink;
+ }
+
+ if (!inserted)
+ InsertTailList(&tp.tree->itemlist, &td->list_entry);
+
+ if (!ignore) {
+ tp.tree->header.num_items++;
+ tp.tree->size += bi2->datalen + sizeof(leaf_node);
+
+ if (rollback) {
+ // FIXME - free this correctly
+ tp2 = ExAllocateFromPagedLookasideList(&Vcb->traverse_ptr_lookaside);
+ if (!tp2) {
+ ERR("out of memory\n");
+ return;
+ }
+
+ tp2->tree = tp.tree;
+ tp2->item = td;
+
+ add_rollback(Vcb, rollback, ROLLBACK_INSERT_ITEM, tp2);
+ }
+
+ listhead = &td->list_entry;
+ }
+
+ le = le2;
+ } else
+ break;
+
+ le2 = le2->Flink;
+ }
+
+ t = tp.tree;
+ while (t) {
+ if (t->paritem && t->paritem->ignore) {
+ t->paritem->ignore = FALSE;
+ t->parent->header.num_items++;
+ t->parent->size += sizeof(internal_node);
+
+ // FIXME - do we need to add a rollback entry here?
+ }
+
+ t->header.generation = Vcb->superblock.generation;
+ t = t->parent;
+ }
+
+ le = le->Flink;
+ }
+
+ // FIXME - remove as we are going along
+ while (!IsListEmpty(&br->items)) {
+ LIST_ENTRY* le = RemoveHeadList(&br->items);
+ batch_item* bi = CONTAINING_RECORD(le, batch_item, list_entry);
+
+ ExFreeToPagedLookasideList(&Vcb->batch_item_lookaside, bi);
+ }
+}
+
+void commit_batch_list(device_extension* Vcb, LIST_ENTRY* batchlist, PIRP Irp, LIST_ENTRY* rollback) {
+ while (!IsListEmpty(batchlist)) {
+ LIST_ENTRY* le = RemoveHeadList(batchlist);
+ batch_root* br2 = CONTAINING_RECORD(le, batch_root, list_entry);
+
+ commit_batch_list_root(Vcb, br2, Irp, rollback);
+
+ ExFreePool(br2);
+ }
+}
#include "btrfs_drv.h"
+typedef struct {
+ device_extension* Vcb;
+ PIRP Irp;
+ WORK_QUEUE_ITEM item;
+} job_info;
+
void do_read_job(PIRP Irp) {
NTSTATUS Status;
ULONG bytes_read;
BOOL top_level = is_top_level(Irp);
+ PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
+ PFILE_OBJECT FileObject = IrpSp->FileObject;
+ fcb* fcb = FileObject->FsContext;
+ BOOL fcb_lock = FALSE;
Irp->IoStatus.Information = 0;
+ if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
+ ExAcquireResourceSharedLite(fcb->Header.Resource, TRUE);
+ fcb_lock = TRUE;
+ }
+
Status = do_read(Irp, TRUE, &bytes_read);
+
+ if (fcb_lock)
+ ExReleaseResourceLite(fcb->Header.Resource);
Irp->IoStatus.Status = Status;
Status = _SEH2_GetExceptionCode();
} _SEH2_END;
+ if (!NT_SUCCESS(Status))
+ ERR("write_file returned %08x\n", Status);
+
Irp->IoStatus.Status = Status;
TRACE("wrote %u bytes\n", Irp->IoStatus.Information);
TRACE("returning %08x\n", Status);
}
-static void do_job(drv_thread* thread, LIST_ENTRY* le) {
- thread_job* tj = CONTAINING_RECORD(le, thread_job, list_entry);
- PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(tj->Irp);
+#ifdef __REACTOS__
+static void NTAPI do_job(void* context) {
+#else
+static void do_job(void* context) {
+#endif
+ job_info* ji = context;
+ PIO_STACK_LOCATION IrpSp = ji->Irp ? IoGetCurrentIrpStackLocation(ji->Irp) : NULL;
if (IrpSp->MajorFunction == IRP_MJ_READ) {
- do_read_job(tj->Irp);
+ do_read_job(ji->Irp);
} else if (IrpSp->MajorFunction == IRP_MJ_WRITE) {
- do_write_job(thread->DeviceObject->DeviceExtension, tj->Irp);
- } else {
- ERR("unsupported major function %x\n", IrpSp->MajorFunction);
- tj->Irp->IoStatus.Status = STATUS_INTERNAL_ERROR;
- tj->Irp->IoStatus.Information = 0;
- IoCompleteRequest(tj->Irp, IO_NO_INCREMENT);
+ do_write_job(ji->Vcb, ji->Irp);
}
- ExFreePool(tj);
+ ExFreePool(ji);
}
-void STDCALL worker_thread(void* context) {
- drv_thread* thread = context;
- KIRQL irql;
+BOOL add_thread_job(device_extension* Vcb, PIRP Irp) {
+ job_info* ji;
- ObReferenceObject(thread->DeviceObject);
+ ji = ExAllocatePoolWithTag(NonPagedPool, sizeof(job_info), ALLOC_TAG);
+ if (!ji) {
+ ERR("out of memory\n");
+ return FALSE;
+ }
- while (TRUE) {
- KeWaitForSingleObject(&thread->event, Executive, KernelMode, FALSE, NULL);
-
- FsRtlEnterFileSystem();
+ ji->Vcb = Vcb;
+ ji->Irp = Irp;
+
+ if (!Irp->MdlAddress) {
+ PMDL Mdl;
+ LOCK_OPERATION op;
+ ULONG len;
+ PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
- while (TRUE) {
- LIST_ENTRY* le;
- device_extension* Vcb = thread->DeviceObject->DeviceExtension;
-
- KeAcquireSpinLock(&thread->spin_lock, &irql);
-
- if (IsListEmpty(&thread->jobs)) {
- KeReleaseSpinLock(&thread->spin_lock, irql);
- break;
- }
-
- le = thread->jobs.Flink;
- RemoveEntryList(le);
-
- KeReleaseSpinLock(&thread->spin_lock, irql);
-
- InterlockedDecrement(&Vcb->threads.pending_jobs);
- do_job(thread, le);
+ if (IrpSp->MajorFunction == IRP_MJ_READ) {
+ op = IoWriteAccess;
+ len = IrpSp->Parameters.Read.Length;
+ } else if (IrpSp->MajorFunction == IRP_MJ_WRITE) {
+ op = IoReadAccess;
+ len = IrpSp->Parameters.Write.Length;
+ } else {
+ ERR("unexpected major function %u\n", IrpSp->MajorFunction);
+ return FALSE;
}
- FsRtlExitFileSystem();
+ Mdl = IoAllocateMdl(Irp->UserBuffer, len, FALSE, FALSE, Irp);
+
+ if (!Mdl) {
+ ERR("out of memory\n");
+ return FALSE;
+ }
- if (thread->quit)
- break;
+ _SEH2_TRY {
+ MmProbeAndLockPages(Mdl, Irp->RequestorMode, op);
+ } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
+ ERR("MmProbeAndLockPages raised status %08x\n", _SEH2_GetExceptionCode());
+
+ IoFreeMdl(Mdl);
+ Irp->MdlAddress = NULL;
+
+ _SEH2_YIELD(return FALSE);
+ } _SEH2_END;
}
- ObDereferenceObject(thread->DeviceObject);
-
- KeSetEvent(&thread->finished, 0, FALSE);
+ ExInitializeWorkItem(&ji->item, do_job, ji);
+ ExQueueWorkItem(&ji->item, DelayedWorkQueue);
- PsTerminateSystemThread(STATUS_SUCCESS);
+ return TRUE;
}
#include "btrfs_drv.h"
-#define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
-
-// #define DEBUG_WRITE_LOOPS
-
// BOOL did_split;
BOOL chunk_test = FALSE;
typedef struct {
- KEVENT Event;
- IO_STATUS_BLOCK iosb;
-} write_context;
-
-typedef struct {
- EXTENT_ITEM ei;
- UINT8 type;
- EXTENT_DATA_REF edr;
-} EXTENT_ITEM_DATA_REF;
+ UINT64 start;
+ UINT64 end;
+ UINT8* data;
+ UINT32 skip_start;
+ UINT32 skip_end;
+} write_stripe;
typedef struct {
- EXTENT_ITEM_TREE eit;
- UINT8 type;
- TREE_BLOCK_REF tbr;
-} EXTENT_ITEM_TREE2;
+ LONG stripes_left;
+ KEVENT event;
+} read_stripe_master;
typedef struct {
- EXTENT_ITEM ei;
- UINT8 type;
- TREE_BLOCK_REF tbr;
-} EXTENT_ITEM_SKINNY_METADATA;
+ PIRP Irp;
+ PDEVICE_OBJECT devobj;
+ IO_STATUS_BLOCK iosb;
+ read_stripe_master* master;
+} read_stripe;
// static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len);
static NTSTATUS STDCALL write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr);
-static void update_checksum_tree(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback);
static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback);
-static NTSTATUS STDCALL write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
- write_context* context = conptr;
-
- context->iosb = Irp->IoStatus;
- KeSetEvent(&context->Event, 0, FALSE);
-
-// return STATUS_SUCCESS;
- return STATUS_MORE_PROCESSING_REQUIRED;
-}
-
-static NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length) {
- NTSTATUS Status;
- LARGE_INTEGER offset;
- PIRP Irp;
- PIO_STACK_LOCATION IrpSp;
- write_context* context = NULL;
-
- TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
-
- context = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_context), ALLOC_TAG);
- if (!context) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- RtlZeroMemory(context, sizeof(write_context));
-
- KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
-
- offset.QuadPart = address;
-
-// Irp = IoBuildSynchronousFsdRequest(IRP_MJ_WRITE, Vcb->device, data, length, &offset, NULL, &context->iosb);
-
- Irp = IoAllocateIrp(device->StackSize, FALSE);
-
- if (!Irp) {
- ERR("IoAllocateIrp failed\n");
- Status = STATUS_INTERNAL_ERROR;
- goto exit2;
- }
-
- IrpSp = IoGetNextIrpStackLocation(Irp);
- IrpSp->MajorFunction = IRP_MJ_WRITE;
-
- if (device->Flags & DO_BUFFERED_IO) {
- Irp->AssociatedIrp.SystemBuffer = data;
-
- Irp->Flags = IRP_BUFFERED_IO;
- } else if (device->Flags & DO_DIRECT_IO) {
- Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
- if (!Irp->MdlAddress) {
- DbgPrint("IoAllocateMdl failed\n");
- goto exit;
- }
-
- MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
- } else {
- Irp->UserBuffer = data;
- }
-
- IrpSp->Parameters.Write.Length = length;
- IrpSp->Parameters.Write.ByteOffset = offset;
-
- Irp->UserIosb = &context->iosb;
-
- Irp->UserEvent = &context->Event;
-
- IoSetCompletionRoutine(Irp, write_completion, context, TRUE, TRUE, TRUE);
-
- // FIXME - support multiple devices
- Status = IoCallDriver(device, Irp);
-
- if (Status == STATUS_PENDING) {
- KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
- Status = context->iosb.Status;
- }
-
- if (!NT_SUCCESS(Status)) {
- ERR("IoCallDriver returned %08x\n", Status);
- }
-
- if (device->Flags & DO_DIRECT_IO) {
- MmUnlockPages(Irp->MdlAddress);
- IoFreeMdl(Irp->MdlAddress);
- }
-
-exit:
- IoFreeIrp(Irp);
-
-exit2:
- if (context)
- ExFreePool(context);
-
- return Status;
-}
-
-static NTSTATUS STDCALL write_superblock(device_extension* Vcb, device* device) {
- NTSTATUS Status;
- unsigned int i = 0;
- UINT32 crc32;
-
-#ifdef __REACTOS__
- Status = STATUS_INTERNAL_ERROR;
-#endif
-
- RtlCopyMemory(&Vcb->superblock.dev_item, &device->devitem, sizeof(DEV_ITEM));
-
- // FIXME - only write one superblock if on SSD (?)
- while (superblock_addrs[i] > 0 && device->length >= superblock_addrs[i] + sizeof(superblock)) {
- TRACE("writing superblock %u\n", i);
-
- Vcb->superblock.sb_phys_addr = superblock_addrs[i];
-
- crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
- crc32 = ~crc32;
- TRACE("crc32 is %08x\n", crc32);
- RtlCopyMemory(&Vcb->superblock.checksum, &crc32, sizeof(UINT32));
-
- Status = write_data_phys(device->devobj, superblock_addrs[i], &Vcb->superblock, sizeof(superblock));
-
- if (!NT_SUCCESS(Status))
- break;
-
- i++;
- }
-
- if (i == 0) {
- ERR("no superblocks written!\n");
- }
-
- return Status;
-}
-
-static BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
+BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
LIST_ENTRY* le;
space* s;
return lastaddr;
}
-static NTSTATUS update_dev_item(device_extension* Vcb, device* device, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp;
- DEV_ITEM* di;
- NTSTATUS Status;
-
- searchkey.obj_id = 1;
- searchkey.obj_type = TYPE_DEV_ITEM;
- searchkey.offset = device->devitem.dev_id;
-
- Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (keycmp(&tp.item->key, &searchkey)) {
- ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
- return STATUS_INTERNAL_ERROR;
- }
-
- delete_tree_item(Vcb, &tp, rollback);
-
- di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
- if (!di) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
-
- if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- return STATUS_INTERNAL_ERROR;
- }
-
- return STATUS_SUCCESS;
-}
-
-static void regen_bootstrap(device_extension* Vcb) {
- sys_chunk* sc2;
- USHORT i = 0;
- LIST_ENTRY* le;
-
- i = 0;
- le = Vcb->sys_chunks.Flink;
- while (le != &Vcb->sys_chunks) {
- sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
-
- TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
-
- RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
- i += sizeof(KEY);
-
- RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
- i += sc2->size;
-
- le = le->Flink;
- }
-}
-
-static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, ULONG size) {
- sys_chunk *sc, *sc2;
- LIST_ENTRY* le;
-
- if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
- ERR("error - bootstrap is full\n");
- return STATUS_INTERNAL_ERROR;
- }
-
- sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
- if (!sc) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- sc->key.obj_id = obj_id;
- sc->key.obj_type = obj_type;
- sc->key.offset = offset;
- sc->size = size;
- sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
- if (!sc->data) {
- ERR("out of memory\n");
- ExFreePool(sc);
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- RtlCopyMemory(sc->data, data, sc->size);
-
- le = Vcb->sys_chunks.Flink;
- while (le != &Vcb->sys_chunks) {
- sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
-
- if (keycmp(&sc2->key, &sc->key) == 1)
- break;
-
- le = le->Flink;
- }
- InsertTailList(le, &sc->list_entry);
-
- Vcb->superblock.n += sizeof(KEY) + size;
-
- regen_bootstrap(Vcb);
-
- return STATUS_SUCCESS;
-}
-
static BOOL find_new_dup_stripes(device_extension* Vcb, stripe* stripes, UINT64 max_stripe_size) {
UINT64 j, devnum, devusage = 0xffffffffffffffff;
space *devdh1 = NULL, *devdh2 = NULL;
for (j = 0; j < Vcb->superblock.num_devices; j++) {
- UINT64 usage;
-
- usage = (Vcb->devices[j].devitem.bytes_used * 4096) / Vcb->devices[j].devitem.num_bytes;
-
- // favour devices which have been used the least
- if (usage < devusage) {
- if (!IsListEmpty(&Vcb->devices[j].space)) {
- LIST_ENTRY* le;
- space *dh1 = NULL, *dh2 = NULL;
-
- le = Vcb->devices[j].space.Flink;
- while (le != &Vcb->devices[j].space) {
- space* dh = CONTAINING_RECORD(le, space, list_entry);
+ if (!Vcb->devices[j].readonly) {
+ UINT64 usage = (Vcb->devices[j].devitem.bytes_used * 4096) / Vcb->devices[j].devitem.num_bytes;
+
+ // favour devices which have been used the least
+ if (usage < devusage) {
+ if (!IsListEmpty(&Vcb->devices[j].space)) {
+ LIST_ENTRY* le;
+ space *dh1 = NULL, *dh2 = NULL;
- if (dh->size >= max_stripe_size && (!dh1 || dh->size < dh1->size)) {
- dh2 = dh1;
- dh1 = dh;
- }
+ le = Vcb->devices[j].space.Flink;
+ while (le != &Vcb->devices[j].space) {
+ space* dh = CONTAINING_RECORD(le, space, list_entry);
+
+ if (dh->size >= max_stripe_size && (!dh1 || dh->size < dh1->size)) {
+ dh2 = dh1;
+ dh1 = dh;
+ }
- le = le->Flink;
- }
-
- if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) {
- devnum = j;
- devusage = usage;
- devdh1 = dh1;
- devdh2 = dh2 ? dh2 : dh1;
+ le = le->Flink;
+ }
+
+ if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) {
+ devnum = j;
+ devusage = usage;
+ devdh1 = dh1;
+ devdh2 = dh2 ? dh2 : dh1;
+ }
}
}
}
UINT64 usage;
BOOL skip = FALSE;
+ if (Vcb->devices[j].readonly)
+ continue;
+
// skip this device if it already has a stripe
if (i > 0) {
for (k = 0; k < i; k++) {
sub_stripes = 2;
type = BLOCK_FLAG_RAID10;
} else if (flags & BLOCK_FLAG_RAID5) {
- FIXME("RAID5 not yet supported\n");
- goto end;
+ min_stripes = 3;
+ max_stripes = Vcb->superblock.num_devices;
+ sub_stripes = 1;
+ type = BLOCK_FLAG_RAID5;
} else if (flags & BLOCK_FLAG_RAID6) {
- FIXME("RAID6 not yet supported\n");
- goto end;
+ min_stripes = 4;
+ max_stripes = 257;
+ sub_stripes = 1;
+ type = BLOCK_FLAG_RAID6;
} else { // SINGLE
min_stripes = 1;
max_stripes = 1;
factor = num_stripes;
else if (type == BLOCK_FLAG_RAID10)
factor = num_stripes / sub_stripes;
+ else if (type == BLOCK_FLAG_RAID5)
+ factor = num_stripes - 1;
+ else if (type == BLOCK_FLAG_RAID6)
+ factor = num_stripes - 2;
if (stripe_size * factor > max_chunk_size)
stripe_size = max_chunk_size / factor;
c->offset = logaddr;
c->used = c->oldused = 0;
c->cache = NULL;
+ c->readonly = FALSE;
InitializeListHead(&c->space);
InitializeListHead(&c->space_size);
InitializeListHead(&c->deleting);
InitializeListHead(&c->changed_extents);
+ InitializeListHead(&c->range_locks);
+ KeInitializeSpinLock(&c->range_locks_spinlock);
+ KeInitializeEvent(&c->range_locks_event, NotificationEvent, FALSE);
+
ExInitializeResourceLite(&c->lock);
ExInitializeResourceLite(&c->changed_extents_lock);
for (i = 0; i < num_stripes; i++) {
stripes[i].device->devitem.bytes_used += stripe_size;
- space_list_subtract2(&stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL);
+ space_list_subtract2(Vcb, &stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL);
}
success = TRUE;
return success ? c : NULL;
}
-NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, BOOL need_free, UINT32 length, write_data_context* wtc, PIRP Irp, chunk* c) {
- NTSTATUS Status;
+static NTSTATUS prepare_raid0_write(chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) {
+ UINT64 startoff, endoff;
+ UINT16 startoffstripe, endoffstripe, stripenum;
+ UINT64 pos, *stripeoff;
UINT32 i;
- CHUNK_ITEM_STRIPE* cis;
- write_data_stripe* stripe;
- UINT64 *stripestart = NULL, *stripeend = NULL;
- UINT8** stripedata = NULL;
- BOOL need_free2;
-
- TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
- if (!c) {
- c = get_chunk_from_address(Vcb, address);
- if (!c) {
- ERR("could not get chunk for address %llx\n", address);
- return STATUS_INTERNAL_ERROR;
- }
+ stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG);
+ if (!stripeoff) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
}
+
+ get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
+ get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
- if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
- FIXME("RAID5 not yet supported\n");
- return STATUS_NOT_IMPLEMENTED;
- } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
- FIXME("RAID6 not yet supported\n");
- return STATUS_NOT_IMPLEMENTED;
+ for (i = 0; i < c->chunk_item->num_stripes; i++) {
+ if (startoffstripe > i) {
+ stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+ } else if (startoffstripe == i) {
+ stripes[i].start = startoff;
+ } else {
+ stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
+ }
+
+ if (endoffstripe > i) {
+ stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+ } else if (endoffstripe == i) {
+ stripes[i].end = endoff + 1;
+ } else {
+ stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
+ }
+
+ if (stripes[i].start != stripes[i].end) {
+ stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, stripes[i].end - stripes[i].start, ALLOC_TAG);
+
+ if (!stripes[i].data) {
+ ERR("out of memory\n");
+ ExFreePool(stripeoff);
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+ }
}
- stripestart = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG);
- if (!stripestart) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
+ pos = 0;
+ RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes);
- stripeend = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG);
- if (!stripeend) {
- ERR("out of memory\n");
- ExFreePool(stripestart);
- return STATUS_INSUFFICIENT_RESOURCES;
+ stripenum = startoffstripe;
+ while (pos < length) {
+ if (pos == 0) {
+ UINT32 writelen = min(stripes[stripenum].end - stripes[stripenum].start,
+ c->chunk_item->stripe_length - (stripes[stripenum].start % c->chunk_item->stripe_length));
+
+ RtlCopyMemory(stripes[stripenum].data, data, writelen);
+ stripeoff[stripenum] += writelen;
+ pos += writelen;
+ } else if (length - pos < c->chunk_item->stripe_length) {
+ RtlCopyMemory(stripes[stripenum].data + stripeoff[stripenum], (UINT8*)data + pos, length - pos);
+ break;
+ } else {
+ RtlCopyMemory(stripes[stripenum].data + stripeoff[stripenum], (UINT8*)data + pos, c->chunk_item->stripe_length);
+ stripeoff[stripenum] += c->chunk_item->stripe_length;
+ pos += c->chunk_item->stripe_length;
+ }
+
+ stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
}
+
+ ExFreePool(stripeoff);
- stripedata = ExAllocatePoolWithTag(PagedPool, sizeof(UINT8*) * c->chunk_item->num_stripes, ALLOC_TAG);
- if (!stripedata) {
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS prepare_raid10_write(chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) {
+ UINT64 startoff, endoff;
+ UINT16 startoffstripe, endoffstripe, stripenum;
+ UINT64 pos, *stripeoff;
+ UINT32 i;
+
+ stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG);
+ if (!stripeoff) {
ERR("out of memory\n");
- ExFreePool(stripeend);
- ExFreePool(stripestart);
return STATUS_INSUFFICIENT_RESOURCES;
}
- RtlZeroMemory(stripedata, sizeof(UINT8*) * c->chunk_item->num_stripes);
-
- cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
-
- if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
- UINT64 startoff, endoff;
- UINT16 startoffstripe, endoffstripe, stripenum;
- UINT64 pos, *stripeoff;
+
+ get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &startoff, &startoffstripe);
+ get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &endoff, &endoffstripe);
+
+ startoffstripe *= c->chunk_item->sub_stripes;
+ endoffstripe *= c->chunk_item->sub_stripes;
+
+ for (i = 0; i < c->chunk_item->num_stripes; i += c->chunk_item->sub_stripes) {
+ UINT16 j;
- stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG);
- if (!stripeoff) {
- ERR("out of memory\n");
- ExFreePool(stripedata);
- ExFreePool(stripeend);
- ExFreePool(stripestart);
- return STATUS_INSUFFICIENT_RESOURCES;
+ if (startoffstripe > i) {
+ stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+ } else if (startoffstripe == i) {
+ stripes[i].start = startoff;
+ } else {
+ stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
}
-
- get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
- get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
- for (i = 0; i < c->chunk_item->num_stripes; i++) {
- if (startoffstripe > i) {
- stripestart[i] = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
- } else if (startoffstripe == i) {
- stripestart[i] = startoff;
- } else {
- stripestart[i] = startoff - (startoff % c->chunk_item->stripe_length);
- }
-
- if (endoffstripe > i) {
- stripeend[i] = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
- } else if (endoffstripe == i) {
- stripeend[i] = endoff + 1;
- } else {
- stripeend[i] = endoff - (endoff % c->chunk_item->stripe_length);
- }
+ if (endoffstripe > i) {
+ stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+ } else if (endoffstripe == i) {
+ stripes[i].end = endoff + 1;
+ } else {
+ stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
+ }
+
+ if (stripes[i].start != stripes[i].end) {
+ stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, stripes[i].end - stripes[i].start, ALLOC_TAG);
- if (stripestart[i] != stripeend[i]) {
- stripedata[i] = ExAllocatePoolWithTag(NonPagedPool, stripeend[i] - stripestart[i], ALLOC_TAG);
-
- if (!stripedata[i]) {
- ERR("out of memory\n");
- ExFreePool(stripeoff);
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto end;
- }
+ if (!stripes[i].data) {
+ ERR("out of memory\n");
+ ExFreePool(stripeoff);
+ return STATUS_INSUFFICIENT_RESOURCES;
}
}
- pos = 0;
- RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes);
-
- stripenum = startoffstripe;
- while (pos < length) {
- if (pos == 0) {
- UINT32 writelen = min(stripeend[stripenum] - stripestart[stripenum],
- c->chunk_item->stripe_length - (stripestart[stripenum] % c->chunk_item->stripe_length));
-
- RtlCopyMemory(stripedata[stripenum], data, writelen);
- stripeoff[stripenum] += writelen;
- pos += writelen;
- } else if (length - pos < c->chunk_item->stripe_length) {
- RtlCopyMemory(stripedata[stripenum] + stripeoff[stripenum], (UINT8*)data + pos, length - pos);
- break;
- } else {
- RtlCopyMemory(stripedata[stripenum] + stripeoff[stripenum], (UINT8*)data + pos, c->chunk_item->stripe_length);
- stripeoff[stripenum] += c->chunk_item->stripe_length;
- pos += c->chunk_item->stripe_length;
- }
-
- stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
+ for (j = 1; j < c->chunk_item->sub_stripes; j++) {
+ stripes[i+j].start = stripes[i].start;
+ stripes[i+j].end = stripes[i].end;
+ stripes[i+j].data = stripes[i].data;
}
+ }
- ExFreePool(stripeoff);
-
- if (need_free)
- ExFreePool(data);
+ pos = 0;
+ RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
- need_free2 = TRUE;
- } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
- UINT64 startoff, endoff;
- UINT16 startoffstripe, endoffstripe, stripenum;
- UINT64 pos, *stripeoff;
+ stripenum = startoffstripe / c->chunk_item->sub_stripes;
+ while (pos < length) {
+ if (pos == 0) {
+ UINT32 writelen = min(stripes[stripenum * c->chunk_item->sub_stripes].end - stripes[stripenum * c->chunk_item->sub_stripes].start,
+ c->chunk_item->stripe_length - (stripes[stripenum * c->chunk_item->sub_stripes].start % c->chunk_item->stripe_length));
+
+ RtlCopyMemory(stripes[stripenum * c->chunk_item->sub_stripes].data, data, writelen);
+ stripeoff[stripenum] += writelen;
+ pos += writelen;
+ } else if (length - pos < c->chunk_item->stripe_length) {
+ RtlCopyMemory(stripes[stripenum * c->chunk_item->sub_stripes].data + stripeoff[stripenum], (UINT8*)data + pos, length - pos);
+ break;
+ } else {
+ RtlCopyMemory(stripes[stripenum * c->chunk_item->sub_stripes].data + stripeoff[stripenum], (UINT8*)data + pos, c->chunk_item->stripe_length);
+ stripeoff[stripenum] += c->chunk_item->stripe_length;
+ pos += c->chunk_item->stripe_length;
+ }
- stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG);
- if (!stripeoff) {
- ERR("out of memory\n");
- ExFreePool(stripedata);
- ExFreePool(stripeend);
- ExFreePool(stripestart);
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &startoff, &startoffstripe);
- get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &endoff, &endoffstripe);
-
- startoffstripe *= c->chunk_item->sub_stripes;
- endoffstripe *= c->chunk_item->sub_stripes;
-
- for (i = 0; i < c->chunk_item->num_stripes; i += c->chunk_item->sub_stripes) {
- UINT16 j;
-
- if (startoffstripe > i) {
- stripestart[i] = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
- } else if (startoffstripe == i) {
- stripestart[i] = startoff;
- } else {
- stripestart[i] = startoff - (startoff % c->chunk_item->stripe_length);
- }
-
- if (endoffstripe > i) {
- stripeend[i] = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
- } else if (endoffstripe == i) {
- stripeend[i] = endoff + 1;
- } else {
- stripeend[i] = endoff - (endoff % c->chunk_item->stripe_length);
- }
-
- if (stripestart[i] != stripeend[i]) {
- stripedata[i] = ExAllocatePoolWithTag(NonPagedPool, stripeend[i] - stripestart[i], ALLOC_TAG);
-
- if (!stripedata[i]) {
- ERR("out of memory\n");
- ExFreePool(stripeoff);
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto end;
- }
- }
-
- for (j = 1; j < c->chunk_item->sub_stripes; j++) {
- stripestart[i+j] = stripestart[i];
- stripeend[i+j] = stripeend[i];
- stripedata[i+j] = stripedata[i];
- }
- }
-
- pos = 0;
- RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
-
- stripenum = startoffstripe / c->chunk_item->sub_stripes;
- while (pos < length) {
- if (pos == 0) {
- UINT32 writelen = min(stripeend[stripenum * c->chunk_item->sub_stripes] - stripestart[stripenum * c->chunk_item->sub_stripes],
- c->chunk_item->stripe_length - (stripestart[stripenum * c->chunk_item->sub_stripes] % c->chunk_item->stripe_length));
-
- RtlCopyMemory(stripedata[stripenum * c->chunk_item->sub_stripes], data, writelen);
- stripeoff[stripenum] += writelen;
- pos += writelen;
- } else if (length - pos < c->chunk_item->stripe_length) {
- RtlCopyMemory(stripedata[stripenum * c->chunk_item->sub_stripes] + stripeoff[stripenum], (UINT8*)data + pos, length - pos);
- break;
- } else {
- RtlCopyMemory(stripedata[stripenum * c->chunk_item->sub_stripes] + stripeoff[stripenum], (UINT8*)data + pos, c->chunk_item->stripe_length);
- stripeoff[stripenum] += c->chunk_item->stripe_length;
- pos += c->chunk_item->stripe_length;
- }
-
- stripenum = (stripenum + 1) % (c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
- }
-
- ExFreePool(stripeoff);
-
- if (need_free)
- ExFreePool(data);
-
- need_free2 = TRUE;
- } else {
- for (i = 0; i < c->chunk_item->num_stripes; i++) {
- stripestart[i] = address - c->offset;
- stripeend[i] = stripestart[i] + length;
- stripedata[i] = data;
- }
- need_free2 = need_free;
- }
-
- for (i = 0; i < c->chunk_item->num_stripes; i++) {
- PIO_STACK_LOCATION IrpSp;
-
- // FIXME - handle missing devices
-
- stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_stripe), ALLOC_TAG);
- if (!stripe) {
- ERR("out of memory\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto end;
- }
-
- if (stripestart[i] == stripeend[i]) {
- stripe->status = WriteDataStatus_Ignore;
- stripe->Irp = NULL;
- stripe->buf = NULL;
- } else {
- stripe->context = (struct _write_data_context*)wtc;
- stripe->buf = stripedata[i];
- stripe->need_free = need_free2;
- stripe->device = c->devices[i];
- RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
- stripe->status = WriteDataStatus_Pending;
-
- if (!Irp) {
- stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
-
- if (!stripe->Irp) {
- ERR("IoAllocateIrp failed\n");
- Status = STATUS_INTERNAL_ERROR;
- goto end;
- }
- } else {
- stripe->Irp = IoMakeAssociatedIrp(Irp, stripe->device->devobj->StackSize);
-
- if (!stripe->Irp) {
- ERR("IoMakeAssociatedIrp failed\n");
- Status = STATUS_INTERNAL_ERROR;
- goto end;
- }
- }
-
- IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
- IrpSp->MajorFunction = IRP_MJ_WRITE;
-
- if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
- stripe->Irp->AssociatedIrp.SystemBuffer = stripedata[i];
-
- stripe->Irp->Flags = IRP_BUFFERED_IO;
- } else if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
- stripe->Irp->MdlAddress = IoAllocateMdl(stripedata[i], stripeend[i] - stripestart[i], FALSE, FALSE, NULL);
- if (!stripe->Irp->MdlAddress) {
- ERR("IoAllocateMdl failed\n");
- Status = STATUS_INTERNAL_ERROR;
- goto end;
- }
-
- MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess);
- } else {
- stripe->Irp->UserBuffer = stripedata[i];
- }
-
- IrpSp->Parameters.Write.Length = stripeend[i] - stripestart[i];
- IrpSp->Parameters.Write.ByteOffset.QuadPart = stripestart[i] + cis[i].offset;
-
- stripe->Irp->UserIosb = &stripe->iosb;
- wtc->stripes_left++;
-
- IoSetCompletionRoutine(stripe->Irp, write_data_completion, stripe, TRUE, TRUE, TRUE);
- }
-
- InsertTailList(&wtc->stripes, &stripe->list_entry);
- }
-
- Status = STATUS_SUCCESS;
-
-end:
-
- if (stripestart) ExFreePool(stripestart);
- if (stripeend) ExFreePool(stripeend);
- if (stripedata) ExFreePool(stripedata);
-
- if (!NT_SUCCESS(Status)) {
- free_write_data_stripes(wtc);
- ExFreePool(wtc);
- }
-
- return Status;
-}
-
-NTSTATUS STDCALL write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c) {
- write_data_context* wtc;
- NTSTATUS Status;
-
- wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG);
- if (!wtc) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
- InitializeListHead(&wtc->stripes);
- wtc->tree = FALSE;
- wtc->stripes_left = 0;
-
- Status = write_data(Vcb, address, data, FALSE, length, wtc, Irp, c);
- if (!NT_SUCCESS(Status)) {
- ERR("write_data returned %08x\n", Status);
- free_write_data_stripes(wtc);
- ExFreePool(wtc);
- return Status;
- }
-
- if (wtc->stripes.Flink != &wtc->stripes) {
- // launch writes and wait
- LIST_ENTRY* le = wtc->stripes.Flink;
- while (le != &wtc->stripes) {
- write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-
- if (stripe->status != WriteDataStatus_Ignore)
- IoCallDriver(stripe->device->devobj, stripe->Irp);
-
- le = le->Flink;
- }
-
- KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
-
- le = wtc->stripes.Flink;
- while (le != &wtc->stripes) {
- write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-
- if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
- Status = stripe->iosb.Status;
- break;
- }
-
- le = le->Flink;
- }
-
- free_write_data_stripes(wtc);
- }
-
- ExFreePool(wtc);
-
- return STATUS_SUCCESS;
-}
-
-static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
- // FIXME - loop through c->deleting and do TRIM if device supports it
- // FIXME - also find way of doing TRIM of dropped chunks
-
- while (!IsListEmpty(&c->deleting)) {
- space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
-
- RemoveEntryList(&s->list_entry);
- ExFreePool(s);
- }
-}
-
-static void clean_space_cache(device_extension* Vcb) {
- chunk* c;
-
- TRACE("(%p)\n", Vcb);
-
- while (!IsListEmpty(&Vcb->chunks_changed)) {
- c = CONTAINING_RECORD(Vcb->chunks_changed.Flink, chunk, list_entry_changed);
-
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- clean_space_cache_chunk(Vcb, c);
- RemoveEntryList(&c->list_entry_changed);
- c->list_entry_changed.Flink = NULL;
-
- ExReleaseResourceLite(&c->lock);
- }
-}
-
-static BOOL trees_consistent(device_extension* Vcb, LIST_ENTRY* rollback) {
- ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
- LIST_ENTRY* le;
-
- le = Vcb->trees.Flink;
- while (le != &Vcb->trees) {
- tree* t = CONTAINING_RECORD(le, tree, list_entry);
-
- if (t->write) {
- if (t->header.num_items == 0 && t->parent) {
-#ifdef DEBUG_WRITE_LOOPS
- ERR("empty tree found, looping again\n");
-#endif
- return FALSE;
- }
-
- if (t->size > maxsize) {
-#ifdef DEBUG_WRITE_LOOPS
- ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize);
-#endif
- return FALSE;
- }
-
- if (!t->has_new_address) {
-#ifdef DEBUG_WRITE_LOOPS
- ERR("tree found without new address, looping again\n");
-#endif
- return FALSE;
- }
- }
-
- le = le->Flink;
- }
-
- return TRUE;
-}
-
-static NTSTATUS add_parents(device_extension* Vcb, LIST_ENTRY* rollback) {
- UINT8 level;
- LIST_ENTRY* le;
-
- for (level = 0; level <= 255; level++) {
- BOOL nothing_found = TRUE;
-
- TRACE("level = %u\n", level);
-
- le = Vcb->trees.Flink;
- while (le != &Vcb->trees) {
- tree* t = CONTAINING_RECORD(le, tree, list_entry);
-
- if (t->write && t->header.level == level) {
- TRACE("tree %p: root = %llx, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent);
-
- nothing_found = FALSE;
-
- if (t->parent) {
- if (!t->parent->write)
- TRACE("adding tree %p (level %x)\n", t->parent, t->header.level);
-
- t->parent->write = TRUE;
- }
- }
-
- le = le->Flink;
- }
-
- if (nothing_found)
- break;
- }
-
- return STATUS_SUCCESS;
-}
-
-static void add_parents_to_cache(device_extension* Vcb, tree* t) {
- while (t->parent) {
- t = t->parent;
- t->write = TRUE;
- }
-}
-
-static BOOL insert_tree_extent_skinny(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64 address, PIRP Irp, LIST_ENTRY* rollback) {
- EXTENT_ITEM_SKINNY_METADATA* eism;
- traverse_ptr insert_tp;
-
- eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
- if (!eism) {
- ERR("out of memory\n");
- return FALSE;
- }
-
- eism->ei.refcount = 1;
- eism->ei.generation = Vcb->superblock.generation;
- eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
- eism->type = TYPE_TREE_BLOCK_REF;
- eism->tbr.offset = root_id;
-
- if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- ExFreePool(eism);
- return FALSE;
- }
-
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- space_list_subtract(Vcb, c, FALSE, address, Vcb->superblock.node_size, rollback);
-
- ExReleaseResourceLite(&c->lock);
-
- add_parents_to_cache(Vcb, insert_tp.tree);
-
- return TRUE;
-}
-
-static BOOL insert_tree_extent(device_extension* Vcb, UINT8 level, UINT64 root_id, chunk* c, UINT64* new_address, PIRP Irp, LIST_ENTRY* rollback) {
- UINT64 address;
- EXTENT_ITEM_TREE2* eit2;
- traverse_ptr insert_tp;
-
- TRACE("(%p, %x, %llx, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, rollback);
-
- if (!find_address_in_chunk(Vcb, c, Vcb->superblock.node_size, &address))
- return FALSE;
-
- if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
- BOOL b = insert_tree_extent_skinny(Vcb, level, root_id, c, address, Irp, rollback);
-
- if (b)
- *new_address = address;
-
- return b;
- }
-
- eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
- if (!eit2) {
- ERR("out of memory\n");
- return FALSE;
- }
-
- eit2->eit.extent_item.refcount = 1;
- eit2->eit.extent_item.generation = Vcb->superblock.generation;
- eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
-// eit2->eit.firstitem = wt->firstitem;
- eit2->eit.level = level;
- eit2->type = TYPE_TREE_BLOCK_REF;
- eit2->tbr.offset = root_id;
-
-// #ifdef DEBUG_PARANOID
-// if (wt->firstitem.obj_type == 0xcc) { // TESTING
-// ERR("error - firstitem not set (wt = %p, tree = %p, address = %x)\n", wt, wt->tree, (UINT32)address);
-// ERR("num_items = %u, level = %u, root = %x, delete = %u\n", wt->tree->header.num_items, wt->tree->header.level, (UINT32)wt->tree->root->id, wt->delete);
-// int3;
-// }
-// #endif
-
- if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- ExFreePool(eit2);
- return FALSE;
- }
-
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- space_list_subtract(Vcb, c, FALSE, address, Vcb->superblock.node_size, rollback);
-
- ExReleaseResourceLite(&c->lock);
-
- add_parents_to_cache(Vcb, insert_tp.tree);
-
- *new_address = address;
-
- return TRUE;
-}
-
-NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
- chunk *origchunk = NULL, *c;
- LIST_ENTRY* le;
- UINT64 flags = t->flags, addr;
-
- if (flags == 0) {
- if (t->root->id == BTRFS_ROOT_CHUNK)
- flags = BLOCK_FLAG_SYSTEM | BLOCK_FLAG_DUPLICATE;
- else if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS)
- flags = BLOCK_FLAG_DATA | BLOCK_FLAG_METADATA;
- else
- flags = BLOCK_FLAG_METADATA | BLOCK_FLAG_DUPLICATE;
- }
-
-// TRACE("flags = %x\n", (UINT32)wt->flags);
-
-// if (!chunk_test) { // TESTING
-// if ((c = alloc_chunk(Vcb, flags))) {
-// if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
-// if (insert_tree_extent(Vcb, t, c)) {
-// chunk_test = TRUE;
-// return STATUS_SUCCESS;
-// }
-// }
-// }
-// }
-
- if (t->has_address) {
- origchunk = get_chunk_from_address(Vcb, t->header.address);
-
- if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, origchunk, &addr, Irp, rollback)) {
- t->new_address = addr;
- t->has_new_address = TRUE;
- return STATUS_SUCCESS;
- }
- }
-
- ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
-
- le = Vcb->chunks.Flink;
- while (le != &Vcb->chunks) {
- c = CONTAINING_RECORD(le, chunk, list_entry);
-
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
- if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, c, &addr, Irp, rollback)) {
- ExReleaseResourceLite(&c->lock);
- ExReleaseResourceLite(&Vcb->chunk_lock);
- t->new_address = addr;
- t->has_new_address = TRUE;
- return STATUS_SUCCESS;
- }
- }
-
- ExReleaseResourceLite(&c->lock);
-
- le = le->Flink;
- }
-
- // allocate new chunk if necessary
- if ((c = alloc_chunk(Vcb, flags))) {
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
- if (insert_tree_extent(Vcb, t->header.level, t->header.tree_id, c, &addr, Irp, rollback)) {
- ExReleaseResourceLite(&c->lock);
- ExReleaseResourceLite(&Vcb->chunk_lock);
- t->new_address = addr;
- t->has_new_address = TRUE;
- return STATUS_SUCCESS;
- }
- }
-
- ExReleaseResourceLite(&c->lock);
- }
-
- ExReleaseResourceLite(&Vcb->chunk_lock);
-
- ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
-
- return STATUS_DISK_FULL;
-}
-
-static BOOL reduce_tree_extent_skinny(device_extension* Vcb, UINT64 address, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp;
- chunk* c;
- NTSTATUS Status;
-
- searchkey.obj_id = address;
- searchkey.obj_type = TYPE_METADATA_ITEM;
- searchkey.offset = 0xffffffffffffffff;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return FALSE;
- }
-
- if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
- TRACE("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
- return FALSE;
- }
-
- if (tp.item->size < sizeof(EXTENT_ITEM_SKINNY_METADATA)) {
- ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_SKINNY_METADATA));
- return FALSE;
- }
-
- delete_tree_item(Vcb, &tp, rollback);
-
- c = get_chunk_from_address(Vcb, address);
-
- if (c) {
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- decrease_chunk_usage(c, Vcb->superblock.node_size);
-
- space_list_add(Vcb, c, TRUE, address, Vcb->superblock.node_size, rollback);
-
- ExReleaseResourceLite(&c->lock);
- } else
- ERR("could not find chunk for address %llx\n", address);
-
- return TRUE;
-}
-
-// TESTING
-// static void check_tree_num_items(tree* t) {
-// LIST_ENTRY* le2;
-// UINT32 ni;
-//
-// le2 = t->itemlist.Flink;
-// ni = 0;
-// while (le2 != &t->itemlist) {
-// tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
-// if (!td->ignore)
-// ni++;
-// le2 = le2->Flink;
-// }
-//
-// if (t->header.num_items != ni) {
-// ERR("tree %p not okay: num_items was %x, expecting %x\n", t, ni, t->header.num_items);
-// int3;
-// } else {
-// ERR("tree %p okay\n", t);
-// }
-// }
-//
-// static void check_trees_num_items(LIST_ENTRY* tc) {
-// LIST_ENTRY* le = tc->Flink;
-// while (le != tc) {
-// tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
-//
-// check_tree_num_items(tc2->tree);
-//
-// le = le->Flink;
-// }
-// }
-
-static void convert_old_tree_extent(device_extension* Vcb, tree_data* td, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp, tp2, insert_tp;
- EXTENT_REF_V0* erv0;
- NTSTATUS Status;
-
- TRACE("(%p, %p, %p)\n", Vcb, td, t);
-
- searchkey.obj_id = td->treeholder.address;
- searchkey.obj_type = TYPE_EXTENT_REF_V0;
- searchkey.offset = 0xffffffffffffffff;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return;
- }
-
- if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
- TRACE("could not find EXTENT_REF_V0 for %llx\n", searchkey.obj_id);
- return;
- }
-
- searchkey.obj_id = td->treeholder.address;
- searchkey.obj_type = TYPE_EXTENT_ITEM;
- searchkey.offset = Vcb->superblock.node_size;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return;
- }
-
- if (keycmp(&searchkey, &tp2.item->key)) {
- ERR("could not find %llx,%x,%llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
- return;
- }
-
- if (tp.item->size < sizeof(EXTENT_REF_V0)) {
- ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_REF_V0));
- return;
- }
-
- erv0 = (EXTENT_REF_V0*)tp.item->data;
-
- delete_tree_item(Vcb, &tp, rollback);
- delete_tree_item(Vcb, &tp2, rollback);
-
- if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
- EXTENT_ITEM_SKINNY_METADATA* eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
-
- if (!eism) {
- ERR("out of memory\n");
- return;
- }
-
- eism->ei.refcount = 1;
- eism->ei.generation = erv0->gen;
- eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
- eism->type = TYPE_TREE_BLOCK_REF;
- eism->tbr.offset = t->header.tree_id;
-
- if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_METADATA_ITEM, t->header.level -1, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- return;
- }
- } else {
- EXTENT_ITEM_TREE2* eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
-
- if (!eit2) {
- ERR("out of memory\n");
- return;
- }
-
- eit2->eit.extent_item.refcount = 1;
- eit2->eit.extent_item.generation = erv0->gen;
- eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
- eit2->eit.firstitem = td->key;
- eit2->eit.level = t->header.level - 1;
- eit2->type = TYPE_TREE_BLOCK_REF;
- eit2->tbr.offset = t->header.tree_id;
-
- if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- return;
- }
- }
-
- add_parents_to_cache(Vcb, insert_tp.tree);
- add_parents_to_cache(Vcb, tp.tree);
- add_parents_to_cache(Vcb, tp2.tree);
-}
-
-static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp;
- EXTENT_ITEM* ei;
- EXTENT_ITEM_V0* eiv0;
- chunk* c;
- NTSTATUS Status;
-
- // FIXME - deal with refcounts > 1
-
- TRACE("(%p, %llx, %p)\n", Vcb, address, t);
-
- if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
- if (reduce_tree_extent_skinny(Vcb, address, t, Irp, rollback)) {
- return STATUS_SUCCESS;
- }
- }
-
- searchkey.obj_id = address;
- searchkey.obj_type = TYPE_EXTENT_ITEM;
- searchkey.offset = Vcb->superblock.node_size;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (keycmp(&tp.item->key, &searchkey)) {
- ERR("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
- int3;
- return STATUS_INTERNAL_ERROR;
- }
-
- if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
- eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
-
- if (eiv0->refcount > 1) {
- FIXME("FIXME - cannot deal with refcounts larger than 1 at present (eiv0->refcount == %llx)\n", eiv0->refcount);
- return STATUS_INTERNAL_ERROR;
- }
- } else {
- if (tp.item->size < sizeof(EXTENT_ITEM)) {
- ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
- return STATUS_INTERNAL_ERROR;
- }
-
- ei = (EXTENT_ITEM*)tp.item->data;
-
- if (ei->refcount > 1) {
- FIXME("FIXME - cannot deal with refcounts larger than 1 at present (ei->refcount == %llx)\n", ei->refcount);
- return STATUS_INTERNAL_ERROR;
- }
- }
-
- delete_tree_item(Vcb, &tp, rollback);
-
- // if EXTENT_ITEM_V0, delete corresponding B4 item
- if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
- traverse_ptr tp2;
-
- searchkey.obj_id = address;
- searchkey.obj_type = TYPE_EXTENT_REF_V0;
- searchkey.offset = 0xffffffffffffffff;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (tp2.item->key.obj_id == searchkey.obj_id && tp2.item->key.obj_type == searchkey.obj_type) {
- delete_tree_item(Vcb, &tp2, rollback);
- }
- }
-
- if (t && !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
- LIST_ENTRY* le;
-
- // when writing old internal trees, convert related extents
-
- le = t->itemlist.Flink;
- while (le != &t->itemlist) {
- tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-
-// ERR("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
-
- if (!td->ignore && !td->inserted) {
- if (t->header.level > 0) {
- convert_old_tree_extent(Vcb, td, t, Irp, rollback);
- } else if (td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA)) {
- EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
-
- if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
- EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
-
- if (ed2->address != 0) {
- TRACE("trying to convert old data extent %llx,%llx\n", ed2->address, ed2->size);
- convert_old_data_extent(Vcb, ed2->address, ed2->size, Irp, rollback);
- }
- }
- }
- }
-
- le = le->Flink;
- }
- }
-
- c = get_chunk_from_address(Vcb, address);
-
- if (c) {
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- decrease_chunk_usage(c, tp.item->key.offset);
-
- space_list_add(Vcb, c, TRUE, address, tp.item->key.offset, rollback);
-
- ExReleaseResourceLite(&c->lock);
- } else
- ERR("could not find chunk for address %llx\n", address);
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS allocate_tree_extents(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
- LIST_ENTRY* le;
- NTSTATUS Status;
-
- TRACE("(%p)\n", Vcb);
-
- le = Vcb->trees.Flink;
- while (le != &Vcb->trees) {
- tree* t = CONTAINING_RECORD(le, tree, list_entry);
-
- if (t->write && !t->has_new_address) {
- chunk* c;
-
- Status = get_tree_new_address(Vcb, t, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("get_tree_new_address returned %08x\n", Status);
- return Status;
- }
-
- TRACE("allocated extent %llx\n", t->new_address);
-
- if (t->has_address) {
- Status = reduce_tree_extent(Vcb, t->header.address, t, Irp, rollback);
-
- if (!NT_SUCCESS(Status)) {
- ERR("reduce_tree_extent returned %08x\n", Status);
- return Status;
- }
- }
-
- c = get_chunk_from_address(Vcb, t->new_address);
-
- if (c) {
- increase_chunk_usage(c, Vcb->superblock.node_size);
- } else {
- ERR("could not find chunk for address %llx\n", t->new_address);
- return STATUS_INTERNAL_ERROR;
- }
- }
-
- le = le->Flink;
- }
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS update_root_root(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
- LIST_ENTRY* le;
- NTSTATUS Status;
-
- TRACE("(%p)\n", Vcb);
-
- le = Vcb->trees.Flink;
- while (le != &Vcb->trees) {
- tree* t = CONTAINING_RECORD(le, tree, list_entry);
-
- if (t->write && !t->parent) {
- if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
- KEY searchkey;
- traverse_ptr tp;
-
- searchkey.obj_id = t->root->id;
- searchkey.obj_type = TYPE_ROOT_ITEM;
- searchkey.offset = 0xffffffffffffffff;
-
- Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
- ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
- int3;
- return STATUS_INTERNAL_ERROR;
- }
-
- TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, t->new_address);
-
- t->root->root_item.block_number = t->new_address;
- t->root->root_item.root_level = t->header.level;
- t->root->root_item.generation = Vcb->superblock.generation;
- t->root->root_item.generation2 = Vcb->superblock.generation;
-
- if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
- ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
-
- if (!ri) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
-
- delete_tree_item(Vcb, &tp, rollback);
-
- if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- return STATUS_INTERNAL_ERROR;
- }
- } else
- RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
- }
-
- t->root->treeholder.address = t->new_address;
- }
-
- le = le->Flink;
- }
-
- Status = update_chunk_caches(Vcb, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("update_chunk_caches returned %08x\n", Status);
- return Status;
- }
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS STDCALL write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
- write_data_stripe* stripe = conptr;
- write_data_context* context = (write_data_context*)stripe->context;
- LIST_ENTRY* le;
-
- // FIXME - we need a lock here
-
- if (stripe->status == WriteDataStatus_Cancelling) {
- stripe->status = WriteDataStatus_Cancelled;
- goto end;
- }
-
- stripe->iosb = Irp->IoStatus;
-
- if (NT_SUCCESS(Irp->IoStatus.Status)) {
- stripe->status = WriteDataStatus_Success;
- } else {
- le = context->stripes.Flink;
-
- stripe->status = WriteDataStatus_Error;
-
- while (le != &context->stripes) {
- write_data_stripe* s2 = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-
- if (s2->status == WriteDataStatus_Pending) {
- s2->status = WriteDataStatus_Cancelling;
- IoCancelIrp(s2->Irp);
- }
-
- le = le->Flink;
- }
- }
-
-end:
- if (InterlockedDecrement(&context->stripes_left) == 0)
- KeSetEvent(&context->Event, 0, FALSE);
-
- return STATUS_MORE_PROCESSING_REQUIRED;
-}
-
-void free_write_data_stripes(write_data_context* wtc) {
- LIST_ENTRY *le, *le2, *nextle;
-
- le = wtc->stripes.Flink;
- while (le != &wtc->stripes) {
- write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-
- if (stripe->Irp) {
- if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
- MmUnlockPages(stripe->Irp->MdlAddress);
- IoFreeMdl(stripe->Irp->MdlAddress);
- }
- }
-
- le = le->Flink;
- }
-
- le = wtc->stripes.Flink;
- while (le != &wtc->stripes) {
- write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-
- nextle = le->Flink;
-
- if (stripe->buf && stripe->need_free) {
- ExFreePool(stripe->buf);
-
- le2 = le->Flink;
- while (le2 != &wtc->stripes) {
- write_data_stripe* s2 = CONTAINING_RECORD(le2, write_data_stripe, list_entry);
-
- if (s2->buf == stripe->buf)
- s2->buf = NULL;
-
- le2 = le2->Flink;
- }
-
- }
-
- ExFreePool(stripe);
-
- le = nextle;
- }
-}
-
-static NTSTATUS write_trees(device_extension* Vcb, PIRP Irp) {
- UINT8 level;
- UINT8 *data, *body;
- UINT32 crc32;
- NTSTATUS Status;
- LIST_ENTRY* le;
- write_data_context* wtc;
-
- TRACE("(%p)\n", Vcb);
-
- for (level = 0; level <= 255; level++) {
- BOOL nothing_found = TRUE;
-
- TRACE("level = %u\n", level);
-
- le = Vcb->trees.Flink;
- while (le != &Vcb->trees) {
- tree* t = CONTAINING_RECORD(le, tree, list_entry);
-
- if (t->write && t->header.level == level) {
- KEY firstitem, searchkey;
- LIST_ENTRY* le2;
- traverse_ptr tp;
- EXTENT_ITEM_TREE* eit;
-
- if (!t->has_new_address) {
- ERR("error - tried to write tree with no new address\n");
- int3;
- }
-
- le2 = t->itemlist.Flink;
- while (le2 != &t->itemlist) {
- tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
- if (!td->ignore) {
- firstitem = td->key;
- break;
- }
- le2 = le2->Flink;
- }
-
- if (t->parent) {
- t->paritem->key = firstitem;
- t->paritem->treeholder.address = t->new_address;
- t->paritem->treeholder.generation = Vcb->superblock.generation;
- }
-
- if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
- searchkey.obj_id = t->new_address;
- searchkey.obj_type = TYPE_EXTENT_ITEM;
- searchkey.offset = Vcb->superblock.node_size;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (keycmp(&searchkey, &tp.item->key)) {
-// traverse_ptr next_tp;
-// BOOL b;
-// tree_data* paritem;
-
- ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-
-// searchkey.obj_id = 0;
-// searchkey.obj_type = 0;
-// searchkey.offset = 0;
-//
-// find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
-//
-// paritem = NULL;
-// do {
-// if (tp.tree->paritem != paritem) {
-// paritem = tp.tree->paritem;
-// ERR("paritem: %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset);
-// }
-//
-// ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-//
-// b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
-// if (b) {
-// free_traverse_ptr(&tp);
-// tp = next_tp;
-// }
-// } while (b);
-//
-// free_traverse_ptr(&tp);
-
- return STATUS_INTERNAL_ERROR;
- }
-
- if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
- ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
- return STATUS_INTERNAL_ERROR;
- }
-
- eit = (EXTENT_ITEM_TREE*)tp.item->data;
- eit->firstitem = firstitem;
- }
-
- nothing_found = FALSE;
- }
-
- le = le->Flink;
- }
-
- if (nothing_found)
- break;
- }
-
- TRACE("allocated tree extents\n");
-
- wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG);
- if (!wtc) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
- InitializeListHead(&wtc->stripes);
- wtc->tree = TRUE;
- wtc->stripes_left = 0;
-
- le = Vcb->trees.Flink;
- while (le != &Vcb->trees) {
- tree* t = CONTAINING_RECORD(le, tree, list_entry);
-#ifdef DEBUG_PARANOID
- UINT32 num_items = 0, size = 0;
- LIST_ENTRY* le2;
- BOOL crash = FALSE;
-#endif
-
- if (t->write) {
-#ifdef DEBUG_PARANOID
- le2 = t->itemlist.Flink;
- while (le2 != &t->itemlist) {
- tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
- if (!td->ignore) {
- num_items++;
-
- if (t->header.level == 0)
- size += td->size;
- }
- le2 = le2->Flink;
- }
-
- if (t->header.level == 0)
- size += num_items * sizeof(leaf_node);
- else
- size += num_items * sizeof(internal_node);
-
- if (num_items != t->header.num_items) {
- ERR("tree %llx, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
- crash = TRUE;
- }
-
- if (size != t->size) {
- ERR("tree %llx, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
- crash = TRUE;
- }
-
- if (t->header.num_items == 0 && t->parent) {
- ERR("tree %llx, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
- crash = TRUE;
- }
-
- if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
- ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
- crash = TRUE;
- }
-
- if (crash) {
- ERR("tree %p\n", t);
- le2 = t->itemlist.Flink;
- while (le2 != &t->itemlist) {
- tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
- if (!td->ignore) {
- ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
- }
- le2 = le2->Flink;
- }
- int3;
- }
-#endif
- t->header.address = t->new_address;
- t->header.generation = Vcb->superblock.generation;
- t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
- t->has_address = TRUE;
-
- data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
- if (!data) {
- ERR("out of memory\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto end;
- }
-
- body = data + sizeof(tree_header);
-
- RtlCopyMemory(data, &t->header, sizeof(tree_header));
- RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
-
- if (t->header.level == 0) {
- leaf_node* itemptr = (leaf_node*)body;
- int i = 0;
- LIST_ENTRY* le2;
- UINT8* dataptr = data + Vcb->superblock.node_size;
-
- le2 = t->itemlist.Flink;
- while (le2 != &t->itemlist) {
- tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
- if (!td->ignore) {
- dataptr = dataptr - td->size;
-
- itemptr[i].key = td->key;
- itemptr[i].offset = (UINT8*)dataptr - (UINT8*)body;
- itemptr[i].size = td->size;
- i++;
-
- if (td->size > 0)
- RtlCopyMemory(dataptr, td->data, td->size);
- }
-
- le2 = le2->Flink;
- }
- } else {
- internal_node* itemptr = (internal_node*)body;
- int i = 0;
- LIST_ENTRY* le2;
-
- le2 = t->itemlist.Flink;
- while (le2 != &t->itemlist) {
- tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
- if (!td->ignore) {
- itemptr[i].key = td->key;
- itemptr[i].address = td->treeholder.address;
- itemptr[i].generation = td->treeholder.generation;
- i++;
- }
-
- le2 = le2->Flink;
- }
- }
-
- crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
- crc32 = ~crc32;
- *((UINT32*)data) = crc32;
- TRACE("setting crc32 to %08x\n", crc32);
-
- Status = write_data(Vcb, t->new_address, data, TRUE, Vcb->superblock.node_size, wtc, NULL, NULL);
- if (!NT_SUCCESS(Status)) {
- ERR("write_data returned %08x\n", Status);
- goto end;
- }
- }
-
- le = le->Flink;
- }
-
- Status = STATUS_SUCCESS;
-
- if (wtc->stripes.Flink != &wtc->stripes) {
- // launch writes and wait
- le = wtc->stripes.Flink;
- while (le != &wtc->stripes) {
- write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-
- if (stripe->status != WriteDataStatus_Ignore)
- IoCallDriver(stripe->device->devobj, stripe->Irp);
-
- le = le->Flink;
- }
-
- KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
-
- le = wtc->stripes.Flink;
- while (le != &wtc->stripes) {
- write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
-
- if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
- Status = stripe->iosb.Status;
- break;
- }
-
- le = le->Flink;
- }
-
- free_write_data_stripes(wtc);
- }
-
-end:
- ExFreePool(wtc);
-
- return Status;
-}
-
-static void update_backup_superblock(device_extension* Vcb, superblock_backup* sb, PIRP Irp) {
- KEY searchkey;
- traverse_ptr tp;
-
- RtlZeroMemory(sb, sizeof(superblock_backup));
-
- sb->root_tree_addr = Vcb->superblock.root_tree_addr;
- sb->root_tree_generation = Vcb->superblock.generation;
- sb->root_level = Vcb->superblock.root_level;
-
- sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
- sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
- sb->chunk_root_level = Vcb->superblock.chunk_root_level;
-
- searchkey.obj_id = BTRFS_ROOT_EXTENT;
- searchkey.obj_type = TYPE_ROOT_ITEM;
- searchkey.offset = 0xffffffffffffffff;
-
- if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
- if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
- ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
-
- sb->extent_tree_addr = ri->block_number;
- sb->extent_tree_generation = ri->generation;
- sb->extent_root_level = ri->root_level;
- }
- }
-
- searchkey.obj_id = BTRFS_ROOT_FSTREE;
-
- if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
- if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
- ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
-
- sb->fs_tree_addr = ri->block_number;
- sb->fs_tree_generation = ri->generation;
- sb->fs_root_level = ri->root_level;
- }
- }
-
- searchkey.obj_id = BTRFS_ROOT_DEVTREE;
-
- if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
- if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
- ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
-
- sb->dev_root_addr = ri->block_number;
- sb->dev_root_generation = ri->generation;
- sb->dev_root_level = ri->root_level;
- }
- }
-
- searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
-
- if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp))) {
- if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
- ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
-
- sb->csum_root_addr = ri->block_number;
- sb->csum_root_generation = ri->generation;
- sb->csum_root_level = ri->root_level;
- }
- }
-
- sb->total_bytes = Vcb->superblock.total_bytes;
- sb->bytes_used = Vcb->superblock.bytes_used;
- sb->num_devices = Vcb->superblock.num_devices;
-}
-
-static NTSTATUS write_superblocks(device_extension* Vcb, PIRP Irp) {
- UINT64 i;
- NTSTATUS Status;
- LIST_ENTRY* le;
-
- TRACE("(%p)\n", Vcb);
-
- le = Vcb->trees.Flink;
- while (le != &Vcb->trees) {
- tree* t = CONTAINING_RECORD(le, tree, list_entry);
-
- if (t->write && !t->parent) {
- if (t->root == Vcb->root_root) {
- Vcb->superblock.root_tree_addr = t->new_address;
- Vcb->superblock.root_level = t->header.level;
- } else if (t->root == Vcb->chunk_root) {
- Vcb->superblock.chunk_tree_addr = t->new_address;
- Vcb->superblock.chunk_root_generation = t->header.generation;
- Vcb->superblock.chunk_root_level = t->header.level;
- }
- }
-
- le = le->Flink;
- }
-
- for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
- RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
- }
-
- update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp);
-
- for (i = 0; i < Vcb->superblock.num_devices; i++) {
- if (Vcb->devices[i].devobj) {
- Status = write_superblock(Vcb, &Vcb->devices[i]);
- if (!NT_SUCCESS(Status)) {
- ERR("write_superblock returned %08x\n", Status);
- return Status;
- }
- }
- }
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS flush_changed_extent(device_extension* Vcb, chunk* c, changed_extent* ce, PIRP Irp, LIST_ENTRY* rollback) {
- LIST_ENTRY *le, *le2;
- NTSTATUS Status;
- UINT64 old_size;
-
- le = ce->refs.Flink;
- while (le != &ce->refs) {
- changed_extent_ref* cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
- LIST_ENTRY* le3 = le->Flink;
- UINT64 old_count = 0;
-
- le2 = ce->old_refs.Flink;
- while (le2 != &ce->old_refs) {
- changed_extent_ref* cer2 = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
-
- if (cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
- old_count = cer2->edr.count;
-
- RemoveEntryList(&cer2->list_entry);
- ExFreePool(cer2);
- break;
- }
-
- le2 = le2->Flink;
- }
-
- old_size = ce->old_count > 0 ? ce->old_size : ce->size;
-
- if (cer->edr.count > old_count) {
- Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp, rollback);
-
- if (!NT_SUCCESS(Status)) {
- ERR("increase_extent_refcount_data returned %08x\n", Status);
- return Status;
- }
- } else if (cer->edr.count < old_count) {
- Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset,
- old_count - cer->edr.count, Irp, rollback);
-
- if (!NT_SUCCESS(Status)) {
- ERR("decrease_extent_refcount_data returned %08x\n", Status);
- return Status;
- }
- }
-
- if (ce->size != ce->old_size && ce->old_count > 0) {
- KEY searchkey;
- traverse_ptr tp;
- void* data;
-
- searchkey.obj_id = ce->address;
- searchkey.obj_type = TYPE_EXTENT_ITEM;
- searchkey.offset = ce->old_size;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (keycmp(&searchkey, &tp.item->key)) {
- ERR("could not find (%llx,%x,%llx) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
- return STATUS_INTERNAL_ERROR;
- }
-
- if (tp.item->size > 0) {
- data = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
-
- if (!data) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- RtlCopyMemory(data, tp.item->data, tp.item->size);
- } else
- data = NULL;
-
- if (!insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- return STATUS_INTERNAL_ERROR;
- }
-
- delete_tree_item(Vcb, &tp, rollback);
- }
-
- RemoveEntryList(&cer->list_entry);
- ExFreePool(cer);
-
- le = le3;
- }
-
-#ifdef DEBUG_PARANOID
- if (!IsListEmpty(&ce->old_refs))
- WARN("old_refs not empty\n");
-#endif
-
- if (ce->count == 0) {
- if (!ce->no_csum) {
- LIST_ENTRY changed_sector_list;
-
- changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
- if (!sc) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- sc->ol.key = ce->address;
- sc->checksums = NULL;
- sc->length = ce->size / Vcb->superblock.sector_size;
-
- sc->deleted = TRUE;
-
- InitializeListHead(&changed_sector_list);
- insert_into_ordered_list(&changed_sector_list, &sc->ol);
-
- ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE);
- commit_checksum_changes(Vcb, &changed_sector_list);
- ExReleaseResourceLite(&Vcb->checksum_lock);
- }
-
- decrease_chunk_usage(c, ce->size);
-
- space_list_add(Vcb, c, TRUE, ce->address, ce->size, rollback);
- }
-
- RemoveEntryList(&ce->list_entry);
- ExFreePool(ce);
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS update_chunk_usage(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
- LIST_ENTRY *le = Vcb->chunks.Flink, *le2;
- chunk* c;
- KEY searchkey;
- traverse_ptr tp;
- BLOCK_GROUP_ITEM* bgi;
- NTSTATUS Status;
- BOOL flushed_extents = FALSE;
-
- TRACE("(%p)\n", Vcb);
-
- ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
-
- while (le != &Vcb->chunks) {
- c = CONTAINING_RECORD(le, chunk, list_entry);
-
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- le2 = c->changed_extents.Flink;
- while (le2 != &c->changed_extents) {
- LIST_ENTRY* le3 = le2->Flink;
- changed_extent* ce = CONTAINING_RECORD(le2, changed_extent, list_entry);
-
- Status = flush_changed_extent(Vcb, c, ce, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("flush_changed_extent returned %08x\n", Status);
- ExReleaseResourceLite(&c->lock);
- goto end;
- }
-
- flushed_extents = TRUE;
-
- le2 = le3;
- }
-
- if (c->used != c->oldused) {
- searchkey.obj_id = c->offset;
- searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
- searchkey.offset = c->chunk_item->size;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- ExReleaseResourceLite(&c->lock);
- goto end;
- }
-
- if (keycmp(&searchkey, &tp.item->key)) {
- ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
- int3;
- Status = STATUS_INTERNAL_ERROR;
- ExReleaseResourceLite(&c->lock);
- goto end;
- }
-
- if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
- ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
- Status = STATUS_INTERNAL_ERROR;
- ExReleaseResourceLite(&c->lock);
- goto end;
- }
-
- bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
- if (!bgi) {
- ERR("out of memory\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- ExReleaseResourceLite(&c->lock);
- goto end;
- }
-
- RtlCopyMemory(bgi, tp.item->data, tp.item->size);
- bgi->used = c->used;
-
- TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
-
- delete_tree_item(Vcb, &tp, rollback);
-
- if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- ExFreePool(bgi);
- Status = STATUS_INTERNAL_ERROR;
- ExReleaseResourceLite(&c->lock);
- goto end;
- }
-
- TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
- TRACE("chunk_item type = %llx\n", c->chunk_item->type);
-
- if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
- Vcb->superblock.bytes_used += c->used - c->oldused;
- } else if (c->chunk_item->type & BLOCK_FLAG_RAID1 || c->chunk_item->type & BLOCK_FLAG_DUPLICATE || c->chunk_item->type & BLOCK_FLAG_RAID10) {
- Vcb->superblock.bytes_used += 2 * (c->used - c->oldused);
- } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
- FIXME("RAID5 not yet supported\n");
- ExFreePool(bgi);
- Status = STATUS_INTERNAL_ERROR;
- ExReleaseResourceLite(&c->lock);
- goto end;
- } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
- FIXME("RAID6 not yet supported\n");
- ExFreePool(bgi);
- Status = STATUS_INTERNAL_ERROR;
- ExReleaseResourceLite(&c->lock);
- goto end;
- } else { // SINGLE
- Vcb->superblock.bytes_used += c->used - c->oldused;
- }
-
- TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
-
- c->oldused = c->used;
- }
-
- ExReleaseResourceLite(&c->lock);
-
- le = le->Flink;
- }
-
- if (flushed_extents) {
- ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE);
- if (!IsListEmpty(&Vcb->sector_checksums)) {
- update_checksum_tree(Vcb, Irp, rollback);
- }
- ExReleaseResourceLite(&Vcb->checksum_lock);
- }
-
- Status = STATUS_SUCCESS;
-
-end:
- ExReleaseResourceLite(&Vcb->chunk_lock);
-
- return Status;
-}
-
-static void get_first_item(tree* t, KEY* key) {
- LIST_ENTRY* le;
-
- le = t->itemlist.Flink;
- while (le != &t->itemlist) {
- tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-
- *key = td->key;
- return;
- }
-}
-
-static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
- tree *nt, *pt;
- tree_data* td;
- tree_data* oldlastitem;
-// write_tree* wt2;
-// // tree_data *firsttd, *lasttd;
-// // LIST_ENTRY* le;
-// #ifdef DEBUG_PARANOID
-// KEY lastkey1, lastkey2;
-// traverse_ptr tp, next_tp;
-// ULONG numitems1, numitems2;
-// #endif
-
- TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
-
-// #ifdef DEBUG_PARANOID
-// lastkey1.obj_id = 0xffffffffffffffff;
-// lastkey1.obj_type = 0xff;
-// lastkey1.offset = 0xffffffffffffffff;
-//
-// if (!find_item(Vcb, t->root, &tp, &lastkey1, NULL, FALSE))
-// ERR("error - find_item failed\n");
-// else {
-// lastkey1 = tp.item->key;
-// numitems1 = 0;
-// while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
-// free_traverse_ptr(&tp);
-// tp = next_tp;
-// numitems1++;
-// }
-// free_traverse_ptr(&tp);
-// }
-// #endif
-
- nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
- if (!nt) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
- nt->header.address = 0;
- nt->header.generation = Vcb->superblock.generation;
- nt->header.num_items = t->header.num_items - numitems;
- nt->header.flags = HEADER_FLAG_MIXED_BACKREF;
-
- nt->has_address = FALSE;
- nt->Vcb = Vcb;
- nt->parent = t->parent;
- nt->root = t->root;
-// nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
- nt->new_address = 0;
- nt->has_new_address = FALSE;
- nt->flags = t->flags;
- InitializeListHead(&nt->itemlist);
-
-// ExInitializeResourceLite(&nt->nonpaged->load_tree_lock);
-
- oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
-
-// // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
-// // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
-// //
-// // TRACE("old tree in %x was from (%x,%x,%x) to (%x,%x,%x)\n",
-// // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
-// // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
-// //
-// // le = wt->tree->itemlist.Flink;
-// // while (le != &wt->tree->itemlist) {
-// // td = CONTAINING_RECORD(le, tree_data, list_entry);
-// // TRACE("old tree item was (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
-// // le = le->Flink;
-// // }
-
- nt->itemlist.Flink = &newfirstitem->list_entry;
- nt->itemlist.Blink = t->itemlist.Blink;
- nt->itemlist.Flink->Blink = &nt->itemlist;
- nt->itemlist.Blink->Flink = &nt->itemlist;
-
- t->itemlist.Blink = &oldlastitem->list_entry;
- t->itemlist.Blink->Flink = &t->itemlist;
-
-// // le = wt->tree->itemlist.Flink;
-// // while (le != &wt->tree->itemlist) {
-// // td = CONTAINING_RECORD(le, tree_data, list_entry);
-// // TRACE("old tree item now (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
-// // le = le->Flink;
-// // }
-// //
-// // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
-// // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
-// //
-// // TRACE("old tree in %x is now from (%x,%x,%x) to (%x,%x,%x)\n",
-// // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
-// // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
-
- nt->size = t->size - size;
- t->size = size;
- t->header.num_items = numitems;
- nt->write = TRUE;
-
- InterlockedIncrement(&Vcb->open_trees);
- InsertTailList(&Vcb->trees, &nt->list_entry);
-
-// // // TESTING
-// // td = wt->tree->items;
-// // while (td) {
-// // if (!td->ignore) {
-// // TRACE("old tree item: (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
-// // }
-// // td = td->next;
-// // }
-
-// // oldlastitem->next = NULL;
-// // wt->tree->lastitem = oldlastitem;
-
-// // TRACE("last item is now (%x,%x,%x)\n", (UINT32)oldlastitem->key.obj_id, oldlastitem->key.obj_type, (UINT32)oldlastitem->key.offset);
-
- if (nt->header.level > 0) {
- LIST_ENTRY* le = nt->itemlist.Flink;
-
- while (le != &nt->itemlist) {
- tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
-
- if (td2->treeholder.tree)
- td2->treeholder.tree->parent = nt;
-
- le = le->Flink;
- }
- }
-
- if (nt->parent) {
- td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
- if (!td) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- td->key = newfirstitem->key;
-
- InsertHeadList(&t->paritem->list_entry, &td->list_entry);
-
- td->ignore = FALSE;
- td->inserted = TRUE;
- td->treeholder.tree = nt;
-// td->treeholder.nonpaged->status = tree_holder_loaded;
- nt->paritem = td;
-
- nt->parent->header.num_items++;
- nt->parent->size += sizeof(internal_node);
-
- goto end;
- }
-
- TRACE("adding new tree parent\n");
-
- if (nt->header.level == 255) {
- ERR("cannot add parent to tree at level 255\n");
- return STATUS_INTERNAL_ERROR;
- }
-
- pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
- if (!pt) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
- pt->header.address = 0;
- pt->header.num_items = 2;
- pt->header.level = nt->header.level + 1;
- pt->header.flags = HEADER_FLAG_MIXED_BACKREF;
-
- pt->has_address = FALSE;
- pt->Vcb = Vcb;
- pt->parent = NULL;
- pt->paritem = NULL;
- pt->root = t->root;
- pt->new_address = 0;
- pt->has_new_address = FALSE;
-// pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
- pt->size = pt->header.num_items * sizeof(internal_node);
- pt->flags = t->flags;
- InitializeListHead(&pt->itemlist);
-
-// ExInitializeResourceLite(&pt->nonpaged->load_tree_lock);
-
- InterlockedIncrement(&Vcb->open_trees);
- InsertTailList(&Vcb->trees, &pt->list_entry);
-
- td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
- if (!td) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- get_first_item(t, &td->key);
- td->ignore = FALSE;
- td->inserted = FALSE;
- td->treeholder.address = 0;
- td->treeholder.generation = Vcb->superblock.generation;
- td->treeholder.tree = t;
-// td->treeholder.nonpaged->status = tree_holder_loaded;
- InsertTailList(&pt->itemlist, &td->list_entry);
- t->paritem = td;
-
- td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
- if (!td) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- td->key = newfirstitem->key;
- td->ignore = FALSE;
- td->inserted = FALSE;
- td->treeholder.address = 0;
- td->treeholder.generation = Vcb->superblock.generation;
- td->treeholder.tree = nt;
-// td->treeholder.nonpaged->status = tree_holder_loaded;
- InsertTailList(&pt->itemlist, &td->list_entry);
- nt->paritem = td;
-
- pt->write = TRUE;
-
- t->root->treeholder.tree = pt;
-
- t->parent = pt;
- nt->parent = pt;
-
-end:
- t->root->root_item.bytes_used += Vcb->superblock.node_size;
-
-// #ifdef DEBUG_PARANOID
-// lastkey2.obj_id = 0xffffffffffffffff;
-// lastkey2.obj_type = 0xff;
-// lastkey2.offset = 0xffffffffffffffff;
-//
-// if (!find_item(Vcb, wt->tree->root, &tp, &lastkey2, NULL, FALSE))
-// ERR("error - find_item failed\n");
-// else {
-// lastkey2 = tp.item->key;
-//
-// numitems2 = 0;
-// while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
-// free_traverse_ptr(&tp);
-// tp = next_tp;
-// numitems2++;
-// }
-// free_traverse_ptr(&tp);
-// }
-//
-// ERR("lastkey1 = %llx,%x,%llx\n", lastkey1.obj_id, lastkey1.obj_type, lastkey1.offset);
-// ERR("lastkey2 = %llx,%x,%llx\n", lastkey2.obj_id, lastkey2.obj_type, lastkey2.offset);
-// ERR("numitems1 = %u\n", numitems1);
-// ERR("numitems2 = %u\n", numitems2);
-// #endif
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) {
- LIST_ENTRY* le;
- UINT32 size, ds, numitems;
-
- size = 0;
- numitems = 0;
-
- // FIXME - naïve implementation: maximizes number of filled trees
-
- le = t->itemlist.Flink;
- while (le != &t->itemlist) {
- tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-
- if (!td->ignore) {
- if (t->header.level == 0)
- ds = sizeof(leaf_node) + td->size;
- else
- ds = sizeof(internal_node);
-
- // FIXME - move back if previous item was deleted item with same key
- if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
- return split_tree_at(Vcb, t, td, numitems, size);
-
- size += ds;
- numitems++;
- }
-
- le = le->Flink;
- }
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, PIRP Irp, LIST_ENTRY* rollback) {
- LIST_ENTRY* le;
- tree_data* nextparitem = NULL;
- NTSTATUS Status;
- tree *next_tree, *par;
- BOOL loaded;
-
- TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
-
- // FIXME - doesn't capture everything, as it doesn't ascend
- // FIXME - write proper function and put it in treefuncs.c
- le = t->paritem->list_entry.Flink;
- while (le != &t->parent->itemlist) {
- tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-
- if (!td->ignore) {
- nextparitem = td;
- break;
- }
-
- le = le->Flink;
- }
-
- if (!nextparitem)
- return STATUS_SUCCESS;
-
- // FIXME - loop, and capture more than one tree if we can
-
- TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
-// nextparitem = t->paritem;
-
-// ExAcquireResourceExclusiveLite(&t->parent->nonpaged->load_tree_lock, TRUE);
-
- Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded, NULL);
- if (!NT_SUCCESS(Status)) {
- ERR("do_load_tree returned %08x\n", Status);
- return Status;
- }
-
-// ExReleaseResourceLite(&t->parent->nonpaged->load_tree_lock);
-
- next_tree = nextparitem->treeholder.tree;
-
- if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
- // merge two trees into one
-
- t->header.num_items += next_tree->header.num_items;
- t->size += next_tree->size;
-
- if (next_tree->header.level > 0) {
- le = next_tree->itemlist.Flink;
-
- while (le != &next_tree->itemlist) {
- tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
-
- if (td2->treeholder.tree)
- td2->treeholder.tree->parent = t;
-
- le = le->Flink;
- }
- }
-
- t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
- t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
- t->itemlist.Blink = next_tree->itemlist.Blink;
- t->itemlist.Blink->Flink = &t->itemlist;
-
-// // TESTING
-// le = t->itemlist.Flink;
-// while (le != &t->itemlist) {
-// tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-// if (!td->ignore) {
-// ERR("key: %llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
-// }
-// le = le->Flink;
-// }
-
- next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
-
- next_tree->header.num_items = 0;
- next_tree->size = 0;
-
- if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
- Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, Irp, rollback);
-
- if (!NT_SUCCESS(Status)) {
- ERR("reduce_tree_extent returned %08x\n", Status);
- return Status;
- }
- } else if (next_tree->has_address) {
- Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, Irp, rollback);
-
- if (!NT_SUCCESS(Status)) {
- ERR("reduce_tree_extent returned %08x\n", Status);
- return Status;
- }
- }
-
- if (!nextparitem->ignore) {
- nextparitem->ignore = TRUE;
- next_tree->parent->header.num_items--;
- next_tree->parent->size -= sizeof(internal_node);
- }
-
- par = next_tree->parent;
- while (par) {
- par->write = TRUE;
- par = par->parent;
- }
-
- RemoveEntryList(&nextparitem->list_entry);
- ExFreePool(next_tree->paritem);
- next_tree->paritem = NULL;
-
- next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
-
- free_tree(next_tree);
- } else {
- // rebalance by moving items from second tree into first
- ULONG avg_size = (t->size + next_tree->size) / 2;
- KEY firstitem = {0, 0, 0};
-
- TRACE("attempting rebalance\n");
-
- le = next_tree->itemlist.Flink;
- while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
- tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
- ULONG size;
-
- if (!td->ignore) {
- if (next_tree->header.level == 0)
- size = sizeof(leaf_node) + td->size;
- else
- size = sizeof(internal_node);
- } else
- size = 0;
-
- if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
- RemoveEntryList(&td->list_entry);
- InsertTailList(&t->itemlist, &td->list_entry);
-
- if (next_tree->header.level > 0 && td->treeholder.tree)
- td->treeholder.tree->parent = t;
-
- if (!td->ignore) {
- next_tree->size -= size;
- t->size += size;
- next_tree->header.num_items--;
- t->header.num_items++;
- }
- } else
- break;
-
- le = next_tree->itemlist.Flink;
- }
-
- le = next_tree->itemlist.Flink;
- while (le != &next_tree->itemlist) {
- tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-
- if (!td->ignore) {
- firstitem = td->key;
- break;
- }
-
- le = le->Flink;
- }
-
-// ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset);
-
- // FIXME - once ascension is working, make this work with parent's parent, etc.
- if (next_tree->paritem)
- next_tree->paritem->key = firstitem;
-
- par = next_tree;
- while (par) {
- par->write = TRUE;
- par = par->parent;
- }
- }
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp;
- NTSTATUS Status;
-
- if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
- searchkey.obj_id = address;
- searchkey.obj_type = TYPE_METADATA_ITEM;
- searchkey.offset = t->header.level;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (!keycmp(&tp.item->key, &searchkey)) {
- EXTENT_ITEM_SKINNY_METADATA* eism;
-
- if (tp.item->size > 0) {
- eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
-
- if (!eism) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- RtlCopyMemory(eism, tp.item->data, tp.item->size);
- } else
- eism = NULL;
-
- delete_tree_item(Vcb, &tp, rollback);
-
- if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- ExFreePool(eism);
- return STATUS_INTERNAL_ERROR;
- }
-
- return STATUS_SUCCESS;
- }
- }
-
- searchkey.obj_id = address;
- searchkey.obj_type = TYPE_EXTENT_ITEM;
- searchkey.offset = 0xffffffffffffffff;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
- EXTENT_ITEM_TREE* eit;
-
- if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
- ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
- return STATUS_INTERNAL_ERROR;
- }
-
- eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
-
- if (!eit) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- RtlCopyMemory(eit, tp.item->data, tp.item->size);
-
- delete_tree_item(Vcb, &tp, rollback);
-
- eit->level = level;
-
- if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- ExFreePool(eit);
- return STATUS_INTERNAL_ERROR;
- }
-
- return STATUS_SUCCESS;
- }
-
- ERR("could not find EXTENT_ITEM for address %llx\n", address);
-
- return STATUS_INTERNAL_ERROR;
-}
-
-static NTSTATUS STDCALL do_splits(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
-// LIST_ENTRY *le, *le2;
-// write_tree* wt;
-// tree_data* td;
- UINT8 level, max_level;
- UINT32 min_size;
- BOOL empty, done_deletions = FALSE;
- NTSTATUS Status;
- tree* t;
-
- TRACE("(%p)\n", Vcb);
-
- max_level = 0;
-
- for (level = 0; level <= 255; level++) {
- LIST_ENTRY *le, *nextle;
-
- empty = TRUE;
-
- TRACE("doing level %u\n", level);
-
- le = Vcb->trees.Flink;
-
- while (le != &Vcb->trees) {
- t = CONTAINING_RECORD(le, tree, list_entry);
-
- nextle = le->Flink;
-
- if (t->write && t->header.level == level) {
- empty = FALSE;
-
- if (t->header.num_items == 0) {
- if (t->parent) {
- LIST_ENTRY* le2;
- KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
-#ifdef __REACTOS__
- (void)firstitem;
-#endif
-
- done_deletions = TRUE;
-
- le2 = t->itemlist.Flink;
- while (le2 != &t->itemlist) {
- tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
- firstitem = td->key;
- break;
- }
-
- TRACE("deleting tree in root %llx (first item was %llx,%x,%llx)\n",
- t->root->id, firstitem.obj_id, firstitem.obj_type, firstitem.offset);
-
- t->root->root_item.bytes_used -= Vcb->superblock.node_size;
-
- if (t->has_new_address) { // delete associated EXTENT_ITEM
- Status = reduce_tree_extent(Vcb, t->new_address, t, Irp, rollback);
-
- if (!NT_SUCCESS(Status)) {
- ERR("reduce_tree_extent returned %08x\n", Status);
- return Status;
- }
-
- t->has_new_address = FALSE;
- } else if (t->has_address) {
- Status = reduce_tree_extent(Vcb,t->header.address, t, Irp, rollback);
-
- if (!NT_SUCCESS(Status)) {
- ERR("reduce_tree_extent returned %08x\n", Status);
- return Status;
- }
-
- t->has_address = FALSE;
- }
-
- if (!t->paritem->ignore) {
- t->paritem->ignore = TRUE;
- t->parent->header.num_items--;
- t->parent->size -= sizeof(internal_node);
- }
-
- RemoveEntryList(&t->paritem->list_entry);
- ExFreePool(t->paritem);
- t->paritem = NULL;
-
- free_tree(t);
- } else if (t->header.level != 0) {
- if (t->has_new_address) {
- Status = update_extent_level(Vcb, t->new_address, t, 0, Irp, rollback);
-
- if (!NT_SUCCESS(Status)) {
- ERR("update_extent_level returned %08x\n", Status);
- return Status;
- }
- }
-
- t->header.level = 0;
- }
- } else if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
- TRACE("splitting overlarge tree (%x > %x)\n", t->size, Vcb->superblock.node_size - sizeof(tree_header));
- Status = split_tree(Vcb, t);
-
- if (!NT_SUCCESS(Status)) {
- ERR("split_tree returned %08x\n", Status);
- return Status;
- }
- }
- }
-
- le = nextle;
- }
-
- if (!empty) {
- max_level = level;
- } else {
- TRACE("nothing found for level %u\n", level);
- break;
- }
- }
-
- min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
-
- for (level = 0; level <= max_level; level++) {
- LIST_ENTRY* le;
-
- le = Vcb->trees.Flink;
-
- while (le != &Vcb->trees) {
- t = CONTAINING_RECORD(le, tree, list_entry);
-
- if (t->write && t->header.level == level && t->header.num_items > 0 && t->parent && t->size < min_size) {
- Status = try_tree_amalgamate(Vcb, t, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("try_tree_amalgamate returned %08x\n", Status);
- return Status;
- }
- }
-
- le = le->Flink;
- }
- }
-
- // simplify trees if top tree only has one entry
-
- if (done_deletions) {
- for (level = max_level; level > 0; level--) {
- LIST_ENTRY *le, *nextle;
-
- le = Vcb->trees.Flink;
- while (le != &Vcb->trees) {
- nextle = le->Flink;
- t = CONTAINING_RECORD(le, tree, list_entry);
-
- if (t->write && t->header.level == level) {
- if (!t->parent && t->header.num_items == 1) {
- LIST_ENTRY* le2 = t->itemlist.Flink;
- tree_data* td;
- tree* child_tree = NULL;
-
- while (le2 != &t->itemlist) {
- td = CONTAINING_RECORD(le2, tree_data, list_entry);
- if (!td->ignore)
- break;
- le2 = le2->Flink;
- }
-
- TRACE("deleting top-level tree in root %llx with one item\n", t->root->id);
-
- if (t->has_new_address) { // delete associated EXTENT_ITEM
- Status = reduce_tree_extent(Vcb, t->new_address, t, Irp, rollback);
-
- if (!NT_SUCCESS(Status)) {
- ERR("reduce_tree_extent returned %08x\n", Status);
- return Status;
- }
-
- t->has_new_address = FALSE;
- } else if (t->has_address) {
- Status = reduce_tree_extent(Vcb,t->header.address, t, Irp, rollback);
-
- if (!NT_SUCCESS(Status)) {
- ERR("reduce_tree_extent returned %08x\n", Status);
- return Status;
- }
-
- t->has_address = FALSE;
- }
-
- if (!td->treeholder.tree) { // load first item if not already loaded
- KEY searchkey = {0,0,0};
- traverse_ptr tp;
-
- Status = find_item(Vcb, t->root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
- }
-
- child_tree = td->treeholder.tree;
-
- if (child_tree) {
- child_tree->parent = NULL;
- child_tree->paritem = NULL;
- }
-
- t->root->root_item.bytes_used -= Vcb->superblock.node_size;
-
- free_tree(t);
-
- if (child_tree)
- child_tree->root->treeholder.tree = child_tree;
- }
- }
-
- le = nextle;
- }
- }
- }
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS remove_root_extents(device_extension* Vcb, root* r, tree_holder* th, UINT8 level, PIRP Irp, LIST_ENTRY* rollback) {
- NTSTATUS Status;
-
- if (level > 0) {
- if (!th->tree) {
- Status = load_tree(Vcb, th->address, r, &th->tree, NULL, NULL);
-
- if (!NT_SUCCESS(Status)) {
- ERR("load_tree(%llx) returned %08x\n", th->address, Status);
- return Status;
- }
- }
-
- if (th->tree->header.level > 0) {
- LIST_ENTRY* le = th->tree->itemlist.Flink;
-
- while (le != &th->tree->itemlist) {
- tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-
- if (!td->ignore) {
- Status = remove_root_extents(Vcb, r, &td->treeholder, th->tree->header.level - 1, Irp, rollback);
-
- if (!NT_SUCCESS(Status)) {
- ERR("remove_root_extents returned %08x\n", Status);
- return Status;
- }
- }
-
- le = le->Flink;
- }
- }
- }
-
- if (!th->tree || th->tree->has_address) {
- Status = reduce_tree_extent(Vcb, th->address, NULL, Irp, rollback);
-
- if (!NT_SUCCESS(Status)) {
- ERR("reduce_tree_extent(%llx) returned %08x\n", th->address, Status);
- return Status;
- }
- }
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS drop_root(device_extension* Vcb, root* r, PIRP Irp, LIST_ENTRY* rollback) {
- NTSTATUS Status;
- KEY searchkey;
- traverse_ptr tp;
-
- Status = remove_root_extents(Vcb, r, &r->treeholder, r->root_item.root_level, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("remove_root_extents returned %08x\n", Status);
- return Status;
- }
-
- // remove entry in uuid root (tree 9)
- if (Vcb->uuid_root) {
- RtlCopyMemory(&searchkey.obj_id, &r->root_item.uuid.uuid[0], sizeof(UINT64));
- searchkey.obj_type = TYPE_SUBVOL_UUID;
- RtlCopyMemory(&searchkey.offset, &r->root_item.uuid.uuid[sizeof(UINT64)], sizeof(UINT64));
-
- if (searchkey.obj_id != 0 || searchkey.offset != 0) {
- Status = find_item(Vcb, Vcb->uuid_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- WARN("find_item returned %08x\n", Status);
- } else {
- if (!keycmp(&tp.item->key, &searchkey))
- delete_tree_item(Vcb, &tp, rollback);
- else
- WARN("could not find (%llx,%x,%llx) in uuid tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
- }
- }
- }
-
- // delete ROOT_ITEM
-
- searchkey.obj_id = r->id;
- searchkey.obj_type = TYPE_ROOT_ITEM;
- searchkey.offset = 0xffffffffffffffff;
-
- Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("find_item returned %08x\n", Status);
- return Status;
- }
-
- if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
- delete_tree_item(Vcb, &tp, rollback);
- else
- WARN("could not find (%llx,%x,%llx) in root_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
-
- // delete items in tree cache
-
- free_trees_root(Vcb, r);
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS drop_roots(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
- LIST_ENTRY *le = Vcb->drop_roots.Flink, *le2;
- NTSTATUS Status;
-
- while (le != &Vcb->drop_roots) {
- root* r = CONTAINING_RECORD(le, root, list_entry);
-
- le2 = le->Flink;
-
- Status = drop_root(Vcb, r, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("drop_root(%llx) returned %08x\n", r->id, Status);
- return Status;
- }
-
- le = le2;
- }
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS create_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENTRY* rollback) {
- CHUNK_ITEM* ci;
- CHUNK_ITEM_STRIPE* cis;
- BLOCK_GROUP_ITEM* bgi;
- UINT16 i, factor;
- NTSTATUS Status;
-
- ci = ExAllocatePoolWithTag(PagedPool, c->size, ALLOC_TAG);
- if (!ci) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- RtlCopyMemory(ci, c->chunk_item, c->size);
-
- if (!insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size, NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- ExFreePool(ci);
- return STATUS_INTERNAL_ERROR;
- }
-
- if (c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
- Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset, ci, c->size);
- if (!NT_SUCCESS(Status)) {
- ERR("add_to_bootstrap returned %08x\n", Status);
- return Status;
- }
- }
-
- // add BLOCK_GROUP_ITEM to tree 2
-
- bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
- if (!bgi) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- bgi->used = c->used;
- bgi->chunk_tree = 0x100;
- bgi->flags = c->chunk_item->type;
-
- if (!insert_tree_item(Vcb, Vcb->extent_root, c->offset, TYPE_BLOCK_GROUP_ITEM, c->chunk_item->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- ExFreePool(bgi);
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- if (c->chunk_item->type & BLOCK_FLAG_RAID0)
- factor = c->chunk_item->num_stripes;
- else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
- factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
- else // SINGLE, DUPLICATE, RAID1
- factor = 1;
-
- // add DEV_EXTENTs to tree 4
-
- cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
-
- for (i = 0; i < c->chunk_item->num_stripes; i++) {
- DEV_EXTENT* de;
-
- de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
- if (!de) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- de->chunktree = Vcb->chunk_root->id;
- de->objid = 0x100;
- de->address = c->offset;
- de->length = c->chunk_item->size / factor;
- de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
-
- if (!insert_tree_item(Vcb, Vcb->dev_root, c->devices[i]->devitem.dev_id, TYPE_DEV_EXTENT, cis[i].offset, de, sizeof(DEV_EXTENT), NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- ExFreePool(de);
- return STATUS_INTERNAL_ERROR;
- }
-
- // FIXME - no point in calling this twice for the same device
- Status = update_dev_item(Vcb, c->devices[i], Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("update_dev_item returned %08x\n", Status);
- return Status;
- }
- }
-
- c->created = FALSE;
-
- return STATUS_SUCCESS;
-}
-
-static void remove_from_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset) {
- sys_chunk* sc2;
- LIST_ENTRY* le;
-
- le = Vcb->sys_chunks.Flink;
- while (le != &Vcb->sys_chunks) {
- sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
-
- if (sc2->key.obj_id == obj_id && sc2->key.obj_type == obj_type && sc2->key.offset == offset) {
- RemoveEntryList(&sc2->list_entry);
-
- Vcb->superblock.n -= sizeof(KEY) + sc2->size;
-
- ExFreePool(sc2->data);
- ExFreePool(sc2);
- regen_bootstrap(Vcb);
- return;
- }
-
- le = le->Flink;
- }
-}
-
-static NTSTATUS drop_chunk(device_extension* Vcb, chunk* c, PIRP Irp, LIST_ENTRY* rollback) {
- NTSTATUS Status;
- KEY searchkey;
- traverse_ptr tp;
- UINT64 i, factor;
- CHUNK_ITEM_STRIPE* cis;
-
- TRACE("dropping chunk %llx\n", c->offset);
-
- // remove free space cache
- if (c->cache) {
- c->cache->deleted = TRUE;
-
- flush_fcb(c->cache, TRUE, Irp, rollback);
-
- free_fcb(c->cache);
-
- searchkey.obj_id = FREE_SPACE_CACHE_ID;
- searchkey.obj_type = 0;
- searchkey.offset = c->offset;
-
- Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (!keycmp(&tp.item->key, &searchkey)) {
- delete_tree_item(Vcb, &tp, rollback);
- }
- }
-
- if (c->chunk_item->type & BLOCK_FLAG_RAID0)
- factor = c->chunk_item->num_stripes;
- else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
- factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
- else // SINGLE, DUPLICATE, RAID1
- factor = 1;
-
- cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
- for (i = 0; i < c->chunk_item->num_stripes; i++) {
- if (!c->created) {
- // remove DEV_EXTENTs from tree 4
- searchkey.obj_id = cis[i].dev_id;
- searchkey.obj_type = TYPE_DEV_EXTENT;
- searchkey.offset = cis[i].offset;
-
- Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (!keycmp(&tp.item->key, &searchkey)) {
- delete_tree_item(Vcb, &tp, rollback);
-
- if (tp.item->size >= sizeof(DEV_EXTENT)) {
- DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data;
-
- c->devices[i]->devitem.bytes_used -= de->length;
-
- space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, de->length, rollback);
- }
- } else
- WARN("could not find (%llx,%x,%llx) in dev tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
- } else {
- UINT64 len = c->chunk_item->size / factor;
-
- c->devices[i]->devitem.bytes_used -= len;
- space_list_add2(&c->devices[i]->space, NULL, cis[i].offset, len, rollback);
- }
- }
-
- // modify DEV_ITEMs in chunk tree
- for (i = 0; i < c->chunk_item->num_stripes; i++) {
- if (c->devices[i]) {
- UINT64 j;
- DEV_ITEM* di;
-
- searchkey.obj_id = 1;
- searchkey.obj_type = TYPE_DEV_ITEM;
- searchkey.offset = c->devices[i]->devitem.dev_id;
-
- Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (keycmp(&tp.item->key, &searchkey)) {
- ERR("error - could not find DEV_ITEM for device %llx\n", searchkey.offset);
- return STATUS_INTERNAL_ERROR;
- }
-
- delete_tree_item(Vcb, &tp, rollback);
-
- di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
- if (!di) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- RtlCopyMemory(di, &c->devices[i]->devitem, sizeof(DEV_ITEM));
-
- if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, c->devices[i]->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- return STATUS_INTERNAL_ERROR;
- }
-
- for (j = i + 1; j < c->chunk_item->num_stripes; j++) {
- if (c->devices[j] == c->devices[i])
- c->devices[j] = NULL;
- }
- }
- }
-
- if (!c->created) {
- // remove CHUNK_ITEM from chunk tree
- searchkey.obj_id = 0x100;
- searchkey.obj_type = TYPE_CHUNK_ITEM;
- searchkey.offset = c->offset;
-
- Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (!keycmp(&tp.item->key, &searchkey))
- delete_tree_item(Vcb, &tp, rollback);
- else
- WARN("could not find CHUNK_ITEM for chunk %llx\n", c->offset);
-
- // remove BLOCK_GROUP_ITEM from extent tree
- searchkey.obj_id = c->offset;
- searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
- searchkey.offset = 0xffffffffffffffff;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
- delete_tree_item(Vcb, &tp, rollback);
- else
- WARN("could not find BLOCK_GROUP_ITEM for chunk %llx\n", c->offset);
- }
-
- if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
- remove_from_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, c->offset);
-
- RemoveEntryList(&c->list_entry);
-
- if (c->list_entry_changed.Flink)
- RemoveEntryList(&c->list_entry_changed);
-
- ExFreePool(c->chunk_item);
- ExFreePool(c->devices);
-
- while (!IsListEmpty(&c->space)) {
- space* s = CONTAINING_RECORD(c->space.Flink, space, list_entry);
-
- RemoveEntryList(&s->list_entry);
- ExFreePool(s);
- }
-
- while (!IsListEmpty(&c->deleting)) {
- space* s = CONTAINING_RECORD(c->deleting.Flink, space, list_entry);
-
- RemoveEntryList(&s->list_entry);
- ExFreePool(s);
- }
-
- ExDeleteResourceLite(&c->lock);
- ExDeleteResourceLite(&c->changed_extents_lock);
-
- ExFreePool(c);
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS update_chunks(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
- LIST_ENTRY *le = Vcb->chunks_changed.Flink, *le2;
- NTSTATUS Status;
- UINT64 used_minus_cache;
-
- ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
-
- // FIXME - do tree chunks before data chunks
-
- while (le != &Vcb->chunks_changed) {
- chunk* c = CONTAINING_RECORD(le, chunk, list_entry_changed);
-
- le2 = le->Flink;
-
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- used_minus_cache = c->used;
-
- // subtract self-hosted cache
- if (used_minus_cache > 0 && c->chunk_item->type & BLOCK_FLAG_DATA && c->cache && c->cache->inode_item.st_size == c->used) {
- LIST_ENTRY* le3;
-
- le3 = c->cache->extents.Flink;
- while (le3 != &c->cache->extents) {
- extent* ext = CONTAINING_RECORD(le3, extent, list_entry);
- EXTENT_DATA* ed = ext->data;
-
- if (!ext->ignore) {
- if (ext->datalen < sizeof(EXTENT_DATA)) {
- ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA));
- break;
- }
-
- if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
- EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
-
- if (ext->datalen < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
- ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen,
- sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
- break;
- }
-
- if (ed2->size != 0 && ed2->address >= c->offset && ed2->address + ed2->size <= c->offset + c->chunk_item->size)
- used_minus_cache -= ed2->size;
- }
- }
-
- le3 = le3->Flink;
- }
- }
-
- if (used_minus_cache == 0) {
- Status = drop_chunk(Vcb, c, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("drop_chunk returned %08x\n", Status);
- ExReleaseResourceLite(&c->lock);
- ExReleaseResourceLite(&Vcb->chunk_lock);
- return Status;
- }
- } else if (c->created) {
- Status = create_chunk(Vcb, c, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("create_chunk returned %08x\n", Status);
- ExReleaseResourceLite(&c->lock);
- ExReleaseResourceLite(&Vcb->chunk_lock);
- return Status;
- }
- }
-
- if (used_minus_cache > 0)
- ExReleaseResourceLite(&c->lock);
-
- le = le2;
- }
-
- ExReleaseResourceLite(&Vcb->chunk_lock);
-
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS STDCALL set_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, UINT8* data, UINT16 datalen, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp;
- ULONG xasize, maxlen;
- DIR_ITEM* xa;
- NTSTATUS Status;
-
- TRACE("(%p, %llx, %llx, %s, %08x, %p, %u)\n", Vcb, subvol->id, inode, name, crc32, data, datalen);
-
- searchkey.obj_id = inode;
- searchkey.obj_type = TYPE_XATTR_ITEM;
- searchkey.offset = crc32;
-
- Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- xasize = sizeof(DIR_ITEM) - 1 + (ULONG)strlen(name) + datalen;
- maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
-
- if (!keycmp(&tp.item->key, &searchkey)) { // key exists
- UINT8* newdata;
- ULONG size = tp.item->size;
-
- xa = (DIR_ITEM*)tp.item->data;
-
- if (tp.item->size < sizeof(DIR_ITEM)) {
- ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
- } else {
- while (TRUE) {
- ULONG oldxasize;
-
- if (size < sizeof(DIR_ITEM) || size < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) {
- ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
- break;
- }
-
- oldxasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n;
-
- if (xa->n == strlen(name) && RtlCompareMemory(name, xa->name, xa->n) == xa->n) {
- UINT64 pos;
-
- // replace
-
- if (tp.item->size + xasize - oldxasize > maxlen) {
- ERR("DIR_ITEM would be over maximum size (%u + %u - %u > %u)\n", tp.item->size, xasize, oldxasize, maxlen);
- return STATUS_INTERNAL_ERROR;
- }
-
- newdata = ExAllocatePoolWithTag(PagedPool, tp.item->size + xasize - oldxasize, ALLOC_TAG);
- if (!newdata) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- pos = (UINT8*)xa - tp.item->data;
- if (pos + oldxasize < tp.item->size) { // copy after changed xattr
- RtlCopyMemory(newdata + pos + xasize, tp.item->data + pos + oldxasize, tp.item->size - pos - oldxasize);
- }
-
- if (pos > 0) { // copy before changed xattr
- RtlCopyMemory(newdata, tp.item->data, pos);
- xa = (DIR_ITEM*)(newdata + pos);
- } else
- xa = (DIR_ITEM*)newdata;
-
- xa->key.obj_id = 0;
- xa->key.obj_type = 0;
- xa->key.offset = 0;
- xa->transid = Vcb->superblock.generation;
- xa->m = datalen;
- xa->n = (UINT16)strlen(name);
- xa->type = BTRFS_TYPE_EA;
- RtlCopyMemory(xa->name, name, strlen(name));
- RtlCopyMemory(xa->name + strlen(name), data, datalen);
-
- delete_tree_item(Vcb, &tp, rollback);
- insert_tree_item(Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, newdata, tp.item->size + xasize - oldxasize, NULL, Irp, rollback);
-
- break;
- }
-
- if ((UINT8*)xa - (UINT8*)tp.item->data + oldxasize >= size) {
- // not found, add to end of data
-
- if (tp.item->size + xasize > maxlen) {
- ERR("DIR_ITEM would be over maximum size (%u + %u > %u)\n", tp.item->size, xasize, maxlen);
- return STATUS_INTERNAL_ERROR;
- }
-
- newdata = ExAllocatePoolWithTag(PagedPool, tp.item->size + xasize, ALLOC_TAG);
- if (!newdata) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- RtlCopyMemory(newdata, tp.item->data, tp.item->size);
-
- xa = (DIR_ITEM*)((UINT8*)newdata + tp.item->size);
- xa->key.obj_id = 0;
- xa->key.obj_type = 0;
- xa->key.offset = 0;
- xa->transid = Vcb->superblock.generation;
- xa->m = datalen;
- xa->n = (UINT16)strlen(name);
- xa->type = BTRFS_TYPE_EA;
- RtlCopyMemory(xa->name, name, strlen(name));
- RtlCopyMemory(xa->name + strlen(name), data, datalen);
-
- delete_tree_item(Vcb, &tp, rollback);
- insert_tree_item(Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, newdata, tp.item->size + xasize, NULL, Irp, rollback);
-
- break;
- } else {
- xa = (DIR_ITEM*)&xa->name[xa->m + xa->n];
- size -= oldxasize;
- }
- }
- }
- } else {
- if (xasize > maxlen) {
- ERR("DIR_ITEM would be over maximum size (%u > %u)\n", xasize, maxlen);
- return STATUS_INTERNAL_ERROR;
- }
-
- xa = ExAllocatePoolWithTag(PagedPool, xasize, ALLOC_TAG);
- if (!xa) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- xa->key.obj_id = 0;
- xa->key.obj_type = 0;
- xa->key.offset = 0;
- xa->transid = Vcb->superblock.generation;
- xa->m = datalen;
- xa->n = (UINT16)strlen(name);
- xa->type = BTRFS_TYPE_EA;
- RtlCopyMemory(xa->name, name, strlen(name));
- RtlCopyMemory(xa->name + strlen(name), data, datalen);
-
- insert_tree_item(Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, xa, xasize, NULL, Irp, rollback);
- }
-
- return STATUS_SUCCESS;
-}
-
-static BOOL STDCALL delete_xattr(device_extension* Vcb, root* subvol, UINT64 inode, char* name, UINT32 crc32, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp;
- DIR_ITEM* xa;
- NTSTATUS Status;
-
- TRACE("(%p, %llx, %llx, %s, %08x)\n", Vcb, subvol->id, inode, name, crc32);
-
- searchkey.obj_id = inode;
- searchkey.obj_type = TYPE_XATTR_ITEM;
- searchkey.offset = crc32;
-
- Status = find_item(Vcb, subvol, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return FALSE;
- }
-
- if (!keycmp(&tp.item->key, &searchkey)) { // key exists
- ULONG size = tp.item->size;
-
- if (tp.item->size < sizeof(DIR_ITEM)) {
- ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DIR_ITEM));
-
- return FALSE;
- } else {
- xa = (DIR_ITEM*)tp.item->data;
-
- while (TRUE) {
- ULONG oldxasize;
-
- if (size < sizeof(DIR_ITEM) || size < sizeof(DIR_ITEM) - 1 + xa->m + xa->n) {
- ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-
- return FALSE;
- }
-
- oldxasize = sizeof(DIR_ITEM) - 1 + xa->m + xa->n;
-
- if (xa->n == strlen(name) && RtlCompareMemory(name, xa->name, xa->n) == xa->n) {
- ULONG newsize;
- UINT8 *newdata, *dioff;
-
- newsize = tp.item->size - (sizeof(DIR_ITEM) - 1 + xa->n + xa->m);
-
- delete_tree_item(Vcb, &tp, rollback);
-
- if (newsize == 0) {
- TRACE("xattr %s deleted\n", name);
-
- return TRUE;
- }
-
- // FIXME - deleting collisions almost certainly works, but we should test it properly anyway
- newdata = ExAllocatePoolWithTag(PagedPool, newsize, ALLOC_TAG);
- if (!newdata) {
- ERR("out of memory\n");
- return FALSE;
- }
-
- if ((UINT8*)xa > tp.item->data) {
- RtlCopyMemory(newdata, tp.item->data, (UINT8*)xa - tp.item->data);
- dioff = newdata + ((UINT8*)xa - tp.item->data);
- } else {
- dioff = newdata;
- }
-
- if ((UINT8*)&xa->name[xa->n+xa->m] - tp.item->data < tp.item->size)
- RtlCopyMemory(dioff, &xa->name[xa->n+xa->m], tp.item->size - ((UINT8*)&xa->name[xa->n+xa->m] - tp.item->data));
-
- insert_tree_item(Vcb, subvol, inode, TYPE_XATTR_ITEM, crc32, newdata, newsize, NULL, Irp, rollback);
-
-
- return TRUE;
- }
-
- if (xa->m + xa->n >= size) { // FIXME - test this works
- WARN("xattr %s not found\n", name);
-
- return FALSE;
- } else {
- xa = (DIR_ITEM*)&xa->name[xa->m + xa->n];
- size -= oldxasize;
- }
- }
- }
- } else {
- WARN("xattr %s not found\n", name);
-
- return FALSE;
- }
-}
-
-static NTSTATUS insert_sparse_extent(fcb* fcb, UINT64 start, UINT64 length, PIRP Irp, LIST_ENTRY* rollback) {
- EXTENT_DATA* ed;
- EXTENT_DATA2* ed2;
-
- TRACE("((%llx, %llx), %llx, %llx)\n", fcb->subvol->id, fcb->inode, start, length);
-
- ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
- if (!ed) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- ed->generation = fcb->Vcb->superblock.generation;
- ed->decoded_size = length;
- ed->compression = BTRFS_COMPRESSION_NONE;
- ed->encryption = BTRFS_ENCRYPTION_NONE;
- ed->encoding = BTRFS_ENCODING_NONE;
- ed->type = EXTENT_TYPE_REGULAR;
-
- ed2 = (EXTENT_DATA2*)ed->data;
- ed2->address = 0;
- ed2->size = 0;
- ed2->offset = 0;
- ed2->num_bytes = length;
-
- if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- return STATUS_INTERNAL_ERROR;
- }
-
- return STATUS_SUCCESS;
-}
-
-void flush_fcb(fcb* fcb, BOOL cache, PIRP Irp, LIST_ENTRY* rollback) {
- traverse_ptr tp;
- KEY searchkey;
- NTSTATUS Status;
- INODE_ITEM* ii;
- UINT64 ii_offset;
-#ifdef DEBUG_PARANOID
- UINT64 old_size = 0;
- BOOL extents_changed;
-#endif
-
-// ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
-
- while (!IsListEmpty(&fcb->index_list)) {
- LIST_ENTRY* le = RemoveHeadList(&fcb->index_list);
- index_entry* ie = CONTAINING_RECORD(le, index_entry, list_entry);
-
- if (ie->utf8.Buffer) ExFreePool(ie->utf8.Buffer);
- if (ie->filepart_uc.Buffer) ExFreePool(ie->filepart_uc.Buffer);
- ExFreePool(ie);
- }
-
- fcb->index_loaded = FALSE;
-
- if (fcb->ads) {
- if (fcb->deleted)
- delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, Irp, rollback);
- else {
- Status = set_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, (UINT8*)fcb->adsdata.Buffer, fcb->adsdata.Length, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("set_xattr returned %08x\n", Status);
- goto end;
- }
- }
- goto end;
- }
-
-#ifdef DEBUG_PARANOID
- extents_changed = fcb->extents_changed;
-#endif
-
- if (fcb->extents_changed) {
- BOOL b;
- traverse_ptr next_tp;
- LIST_ENTRY* le;
- BOOL prealloc = FALSE, extents_inline = FALSE;
- UINT64 last_end;
-
- // delete ignored extent items
- le = fcb->extents.Flink;
- while (le != &fcb->extents) {
- LIST_ENTRY* le2 = le->Flink;
- extent* ext = CONTAINING_RECORD(le, extent, list_entry);
-
- if (ext->ignore) {
- RemoveEntryList(&ext->list_entry);
- ExFreePool(ext->data);
- ExFreePool(ext);
- }
-
- le = le2;
- }
-
- le = fcb->extents.Flink;
- while (le != &fcb->extents) {
- LIST_ENTRY* le2 = le->Flink;
- extent* ext = CONTAINING_RECORD(le, extent, list_entry);
-
- if ((ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) && le->Flink != &fcb->extents) {
- extent* nextext = CONTAINING_RECORD(le->Flink, extent, list_entry);
-
- if (ext->data->type == nextext->data->type) {
- EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
- EXTENT_DATA2* ned2 = (EXTENT_DATA2*)nextext->data->data;
-
- if (ed2->size != 0 && ed2->address == ned2->address && ed2->size == ned2->size &&
- nextext->offset == ext->offset + ed2->num_bytes && ned2->offset == ed2->offset + ed2->num_bytes) {
- chunk* c;
-
- ext->data->generation = fcb->Vcb->superblock.generation;
- ed2->num_bytes += ned2->num_bytes;
-
- RemoveEntryList(&nextext->list_entry);
- ExFreePool(nextext->data);
- ExFreePool(nextext);
-
- c = get_chunk_from_address(fcb->Vcb, ed2->address);
-
- if (!c) {
- ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
- } else {
- Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
- fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("update_changed_extent_ref returned %08x\n", Status);
- goto end;
- }
- }
-
- le2 = le;
- }
- }
- }
-
- le = le2;
- }
-
- // delete existing EXTENT_DATA items
-
- searchkey.obj_id = fcb->inode;
- searchkey.obj_type = TYPE_EXTENT_DATA;
- searchkey.offset = 0;
-
- Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- goto end;
- }
-
- do {
- if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type)
- delete_tree_item(fcb->Vcb, &tp, rollback);
-
- b = find_next_item(fcb->Vcb, &tp, &next_tp, FALSE, Irp);
-
- if (b) {
- tp = next_tp;
-
- if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
- break;
- }
- } while (b);
-
- if (!fcb->deleted) {
- // add new EXTENT_DATAs
-
- last_end = 0;
-
- le = fcb->extents.Flink;
- while (le != &fcb->extents) {
- extent* ext = CONTAINING_RECORD(le, extent, list_entry);
- EXTENT_DATA* ed;
-
- if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && ext->offset > last_end) {
- Status = insert_sparse_extent(fcb, last_end, ext->offset - last_end, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("insert_sparse_extent returned %08x\n", Status);
- goto end;
- }
- }
-
- ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
- if (!ed) {
- ERR("out of memory\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto end;
- }
-
- RtlCopyMemory(ed, ext->data, ext->datalen);
-
- if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, ext->offset, ed, ext->datalen, NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- goto end;
- }
-
- if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_PREALLOC)
- prealloc = TRUE;
-
- if (ext->datalen >= sizeof(EXTENT_DATA) && ed->type == EXTENT_TYPE_INLINE)
- extents_inline = TRUE;
-
- if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES)) {
- if (ed->type == EXTENT_TYPE_INLINE)
- last_end = ext->offset + ed->decoded_size;
- else {
- EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
-
- last_end = ext->offset + ed2->num_bytes;
- }
- }
-
- le = le->Flink;
- }
-
- if (!(fcb->Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_NO_HOLES) && !extents_inline &&
- sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) > last_end) {
- Status = insert_sparse_extent(fcb, last_end, sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size) - last_end, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("insert_sparse_extent returned %08x\n", Status);
- goto end;
- }
- }
-
- // update prealloc flag in INODE_ITEM
-
- if (!prealloc)
- fcb->inode_item.flags &= ~BTRFS_INODE_PREALLOC;
- else
- fcb->inode_item.flags |= BTRFS_INODE_PREALLOC;
- }
-
- fcb->extents_changed = FALSE;
+ stripenum = (stripenum + 1) % (c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
}
+
+ ExFreePool(stripeoff);
- if (!fcb->created || cache) {
- searchkey.obj_id = fcb->inode;
- searchkey.obj_type = TYPE_INODE_ITEM;
- searchkey.offset = 0xffffffffffffffff;
-
- Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- goto end;
- }
-
- if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
- if (cache) {
- ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
- if (!ii) {
- ERR("out of memory\n");
- goto end;
- }
-
- RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
-
- if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- goto end;
- }
-
- ii_offset = 0;
- } else {
- ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id);
- goto end;
- }
- } else {
-#ifdef DEBUG_PARANOID
- INODE_ITEM* ii2 = (INODE_ITEM*)tp.item->data;
-
- old_size = ii2->st_size;
-#endif
-
- ii_offset = tp.item->key.offset;
- }
-
- if (!cache)
- delete_tree_item(fcb->Vcb, &tp, rollback);
- else {
- searchkey.obj_id = fcb->inode;
- searchkey.obj_type = TYPE_INODE_ITEM;
- searchkey.offset = ii_offset;
-
- Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- goto end;
- }
-
- if (keycmp(&tp.item->key, &searchkey)) {
- ERR("could not find INODE_ITEM for inode %llx in subvol %llx\n", fcb->inode, fcb->subvol->id);
- goto end;
- } else
- RtlCopyMemory(tp.item->data, &fcb->inode_item, min(tp.item->size, sizeof(INODE_ITEM)));
- }
- } else
- ii_offset = 0;
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS STDCALL read_stripe_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID ptr) {
+ read_stripe* stripe = ptr;
+ read_stripe_master* master = stripe->master;
+ ULONG stripes_left = InterlockedDecrement(&master->stripes_left);
-#ifdef DEBUG_PARANOID
- if (!extents_changed && fcb->type != BTRFS_TYPE_DIRECTORY && old_size != fcb->inode_item.st_size) {
- ERR("error - size has changed but extents not marked as changed\n");
- int3;
- }
-#endif
+ stripe->iosb = Irp->IoStatus;
- fcb->created = FALSE;
-
- if (fcb->deleted) {
- traverse_ptr tp2;
-
- // delete XATTR_ITEMs
-
- searchkey.obj_id = fcb->inode;
- searchkey.obj_type = TYPE_XATTR_ITEM;
- searchkey.offset = 0;
-
- Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- goto end;
- }
+ if (stripes_left == 0)
+ KeSetEvent(&master->event, 0, FALSE);
- while (find_next_item(fcb->Vcb, &tp, &tp2, FALSE, Irp)) {
- tp = tp2;
-
- if (tp.item->key.obj_id == fcb->inode) {
- // FIXME - do metadata thing here too?
- if (tp.item->key.obj_type == TYPE_XATTR_ITEM) {
- delete_tree_item(fcb->Vcb, &tp, rollback);
- TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
- }
- } else
- break;
- }
-
- goto end;
- }
+ return STATUS_MORE_PROCESSING_REQUIRED;
+}
+
+static NTSTATUS make_read_irp(PIRP old_irp, read_stripe* stripe, UINT64 offset, void* data, UINT32 length) {
+ PIO_STACK_LOCATION IrpSp;
+ PIRP Irp;
- if (!cache) {
- ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
- if (!ii) {
- ERR("out of memory\n");
- goto end;
- }
-
- RtlCopyMemory(ii, &fcb->inode_item, sizeof(INODE_ITEM));
+ if (!old_irp) {
+ Irp = IoAllocateIrp(stripe->devobj->StackSize, FALSE);
- if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, ii_offset, ii, sizeof(INODE_ITEM), NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- goto end;
- }
- }
-
- if (fcb->sd_dirty) {
- Status = set_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_NTACL, EA_NTACL_HASH, (UINT8*)fcb->sd, RtlLengthSecurityDescriptor(fcb->sd), Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("set_xattr returned %08x\n", Status);
+ if (!Irp) {
+ ERR("IoAllocateIrp failed\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
}
+ } else {
+ Irp = IoMakeAssociatedIrp(old_irp, stripe->devobj->StackSize);
- fcb->sd_dirty = FALSE;
+ if (!Irp) {
+ ERR("IoMakeAssociatedIrp failed\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
}
- if (fcb->atts_changed) {
- if (!fcb->atts_deleted) {
- char val[64];
-
- TRACE("inserting new DOSATTRIB xattr\n");
- sprintf(val, "0x%lx", fcb->atts);
-
- Status = set_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, (UINT8*)val, strlen(val), Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("set_xattr returned %08x\n", Status);
- goto end;
- }
- } else
- delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_DOSATTRIB, EA_DOSATTRIB_HASH, Irp, rollback);
-
- fcb->atts_changed = FALSE;
- fcb->atts_deleted = FALSE;
- }
+ IrpSp = IoGetNextIrpStackLocation(Irp);
+ IrpSp->MajorFunction = IRP_MJ_READ;
- if (fcb->reparse_xattr_changed) {
- if (fcb->reparse_xattr.Buffer && fcb->reparse_xattr.Length > 0) {
- Status = set_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_REPARSE, EA_REPARSE_HASH, (UINT8*)fcb->reparse_xattr.Buffer, fcb->reparse_xattr.Length, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("set_xattr returned %08x\n", Status);
- goto end;
- }
- } else
- delete_xattr(fcb->Vcb, fcb->subvol, fcb->inode, EA_REPARSE, EA_REPARSE_HASH, Irp, rollback);
+ if (stripe->devobj->Flags & DO_BUFFERED_IO) {
+ FIXME("FIXME - buffered IO\n");
+ IoFreeIrp(Irp);
+ return STATUS_INTERNAL_ERROR;
+ } else if (stripe->devobj->Flags & DO_DIRECT_IO) {
+ Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
+ if (!Irp->MdlAddress) {
+ ERR("IoAllocateMdl failed\n");
+ IoFreeIrp(Irp);
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
- fcb->reparse_xattr_changed = FALSE;
+ MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
+ } else {
+ Irp->UserBuffer = data;
}
-
-end:
- fcb->dirty = FALSE;
-
-// ExReleaseResourceLite(fcb->Header.Resource);
- return;
-}
-static NTSTATUS delete_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, UINT64 parinode, PANSI_STRING utf8, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp;
- NTSTATUS Status;
+ IrpSp->Parameters.Read.Length = length;
+ IrpSp->Parameters.Read.ByteOffset.QuadPart = offset;
- searchkey.obj_id = parsubvolid;
- searchkey.obj_type = TYPE_ROOT_REF;
- searchkey.offset = subvolid;
+ Irp->UserIosb = &stripe->iosb;
- Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
+ IoSetCompletionRoutine(Irp, read_stripe_completion, stripe, TRUE, TRUE, TRUE);
- if (!keycmp(&searchkey, &tp.item->key)) {
- if (tp.item->size < sizeof(ROOT_REF)) {
- ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
- return STATUS_INTERNAL_ERROR;
- } else {
- ROOT_REF* rr;
- ULONG len;
-
- rr = (ROOT_REF*)tp.item->data;
- len = tp.item->size;
-
- do {
- ULONG itemlen;
-
- if (len < sizeof(ROOT_REF) || len < sizeof(ROOT_REF) - 1 + rr->n) {
- ERR("(%llx,%x,%llx) was truncated\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
- break;
- }
-
- itemlen = sizeof(ROOT_REF) - sizeof(char) + rr->n;
-
- if (rr->dir == parinode && rr->n == utf8->Length && RtlCompareMemory(rr->name, utf8->Buffer, rr->n) == rr->n) {
- ULONG newlen = tp.item->size - itemlen;
-
- delete_tree_item(Vcb, &tp, rollback);
-
- if (newlen == 0) {
- TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
- } else {
- UINT8 *newrr = ExAllocatePoolWithTag(PagedPool, newlen, ALLOC_TAG), *rroff;
-
- if (!newrr) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
-
- TRACE("modifying (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
-
- if ((UINT8*)rr > tp.item->data) {
- RtlCopyMemory(newrr, tp.item->data, (UINT8*)rr - tp.item->data);
- rroff = newrr + ((UINT8*)rr - tp.item->data);
- } else {
- rroff = newrr;
- }
-
- if ((UINT8*)&rr->name[rr->n] - tp.item->data < tp.item->size)
- RtlCopyMemory(rroff, &rr->name[rr->n], tp.item->size - ((UINT8*)&rr->name[rr->n] - tp.item->data));
-
- insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, newrr, newlen, NULL, Irp, rollback);
- }
-
- break;
- }
-
- if (len > itemlen) {
- len -= itemlen;
- rr = (ROOT_REF*)&rr->name[rr->n];
- } else
- break;
- } while (len > 0);
- }
- } else {
- WARN("could not find ROOT_REF entry for subvol %llx in %llx\n", searchkey.offset, searchkey.obj_id);
- return STATUS_NOT_FOUND;
- }
+ stripe->Irp = Irp;
return STATUS_SUCCESS;
}
-static NTSTATUS add_root_ref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, ROOT_REF* rr, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp;
- NTSTATUS Status;
-
- searchkey.obj_id = parsubvolid;
- searchkey.obj_type = TYPE_ROOT_REF;
- searchkey.offset = subvolid;
+static NTSTATUS prepare_raid5_write(PIRP Irp, chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) {
+ UINT64 startoff, endoff;
+ UINT16 startoffstripe, endoffstripe, stripenum, parity, logstripe;
+ UINT64 start = 0xffffffffffffffff, end = 0;
+ UINT64 pos, stripepos;
+ UINT32 firststripesize, laststripesize;
+ UINT32 i;
+ UINT8* data2 = (UINT8*)data;
+ UINT32 num_reads;
+ BOOL same_stripe = FALSE, multiple_stripes;
- Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
+ get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 1, &startoff, &startoffstripe);
+ get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 1, &endoff, &endoffstripe);
- if (!keycmp(&searchkey, &tp.item->key)) {
- ULONG rrsize = tp.item->size + sizeof(ROOT_REF) - 1 + rr->n;
- UINT8* rr2;
+ for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
+ UINT64 ststart, stend;
- rr2 = ExAllocatePoolWithTag(PagedPool, rrsize, ALLOC_TAG);
- if (!rr2) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
+ if (startoffstripe > i) {
+ ststart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+ } else if (startoffstripe == i) {
+ ststart = startoff;
+ } else {
+ ststart = startoff - (startoff % c->chunk_item->stripe_length);
}
-
- if (tp.item->size > 0)
- RtlCopyMemory(rr2, tp.item->data, tp.item->size);
-
- RtlCopyMemory(rr2 + tp.item->size, rr, sizeof(ROOT_REF) - 1 + rr->n);
- ExFreePool(rr);
-
- delete_tree_item(Vcb, &tp, rollback);
-
- if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr2, rrsize, NULL, Irp, rollback)) {
- ERR("error - failed to insert item\n");
- ExFreePool(rr2);
- return STATUS_INTERNAL_ERROR;
+
+ if (endoffstripe > i) {
+ stend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+ } else if (endoffstripe == i) {
+ stend = endoff + 1;
+ } else {
+ stend = endoff - (endoff % c->chunk_item->stripe_length);
}
- } else {
- if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, rr, sizeof(ROOT_REF) - 1 + rr->n, NULL, Irp, rollback)) {
- ERR("error - failed to insert item\n");
- ExFreePool(rr);
- return STATUS_INTERNAL_ERROR;
+
+ if (ststart != stend) {
+ stripes[i].start = ststart;
+ stripes[i].end = stend;
+
+ if (ststart < start) {
+ start = ststart;
+ firststripesize = c->chunk_item->stripe_length - (ststart % c->chunk_item->stripe_length);
+ }
+
+ if (stend > end) {
+ end = stend;
+ laststripesize = stend % c->chunk_item->stripe_length;
+ if (laststripesize == 0)
+ laststripesize = c->chunk_item->stripe_length;
+ }
}
}
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS STDCALL update_root_backref(device_extension* Vcb, UINT64 subvolid, UINT64 parsubvolid, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp;
- UINT8* data;
- ULONG datalen;
- NTSTATUS Status;
-
- searchkey.obj_id = parsubvolid;
- searchkey.obj_type = TYPE_ROOT_REF;
- searchkey.offset = subvolid;
-
- Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
+ if (start == end) {
+ ERR("error: start == end (%llx)\n", start);
+ return STATUS_INTERNAL_ERROR;
}
- if (!keycmp(&tp.item->key, &searchkey) && tp.item->size > 0) {
- datalen = tp.item->size;
-
- data = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
- if (!data) {
+ if (startoffstripe == endoffstripe && start / c->chunk_item->stripe_length == end / c->chunk_item->stripe_length) {
+ firststripesize = end - start;
+ laststripesize = firststripesize;
+ }
+
+ for (i = 0; i < c->chunk_item->num_stripes; i++) {
+ stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, end - start, ALLOC_TAG);
+ if (!stripes[i].data) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
- RtlCopyMemory(data, tp.item->data, datalen);
- } else {
- datalen = 0;
- }
-
- searchkey.obj_id = subvolid;
- searchkey.obj_type = TYPE_ROOT_BACKREF;
- searchkey.offset = parsubvolid;
-
- Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
-
- if (datalen > 0)
- ExFreePool(data);
-
- return Status;
- }
-
- if (!keycmp(&tp.item->key, &searchkey))
- delete_tree_item(Vcb, &tp, rollback);
-
- if (datalen > 0) {
- if (!insert_tree_item(Vcb, Vcb->root_root, subvolid, TYPE_ROOT_BACKREF, parsubvolid, data, datalen, NULL, Irp, rollback)) {
- ERR("error - failed to insert item\n");
- ExFreePool(data);
- return STATUS_INTERNAL_ERROR;
+ if (i < c->chunk_item->num_stripes - 1) {
+ if (stripes[i].start == 0 && stripes[i].end == 0)
+ stripes[i].start = stripes[i].end = start;
}
}
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS flush_fileref(file_ref* fileref, PIRP Irp, LIST_ENTRY* rollback) {
- NTSTATUS Status;
-
- // if fileref created and then immediately deleted, do nothing
- if (fileref->created && fileref->deleted) {
- fileref->dirty = FALSE;
- return STATUS_SUCCESS;
- }
+ num_reads = 0;
+ multiple_stripes = (end - 1) / c->chunk_item->stripe_length != start / c->chunk_item->stripe_length;
- if (fileref->fcb->ads) {
- fileref->dirty = FALSE;
- return STATUS_SUCCESS;
+ for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
+ if (stripes[i].start == stripes[i].end) {
+ num_reads++;
+
+ if (multiple_stripes)
+ num_reads++;
+ } else {
+ if (stripes[i].start > start)
+ num_reads++;
+
+ if (stripes[i].end < end)
+ num_reads++;
+ }
}
- if (fileref->created) {
- ULONG disize;
- DIR_ITEM *di, *di2;
- UINT32 crc32;
-
- crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->utf8.Buffer, fileref->utf8.Length);
+ if (num_reads > 0) {
+ UINT32 j;
+ read_stripe_master* master;
+ read_stripe* read_stripes;
+ CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
+ NTSTATUS Status;
- disize = sizeof(DIR_ITEM) - 1 + fileref->utf8.Length;
- di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
- if (!di) {
+ master = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe_master), ALLOC_TAG);
+ if (!master) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
- if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
- di->key.obj_id = fileref->fcb->inode;
- di->key.obj_type = TYPE_INODE_ITEM;
- di->key.offset = 0;
- } else { // subvolume
- di->key.obj_id = fileref->fcb->subvol->id;
- di->key.obj_type = TYPE_ROOT_ITEM;
- di->key.offset = 0xffffffffffffffff;
- }
-
- di->transid = fileref->fcb->Vcb->superblock.generation;
- di->m = 0;
- di->n = (UINT16)fileref->utf8.Length;
- di->type = fileref->fcb->type;
- RtlCopyMemory(di->name, fileref->utf8.Buffer, fileref->utf8.Length);
-
- di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
- if (!di2) {
+ read_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe) * num_reads, ALLOC_TAG);
+ if (!read_stripes) {
ERR("out of memory\n");
+ ExFreePool(master);
return STATUS_INSUFFICIENT_RESOURCES;
}
- RtlCopyMemory(di2, di, disize);
-
- if (!insert_tree_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, fileref->index, di, disize, NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- Status = STATUS_INTERNAL_ERROR;
- return Status;
- }
-
- Status = add_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, crc32, di2, disize, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("add_dir_item returned %08x\n", Status);
- return Status;
- }
+ parity = (((address - c->offset) / ((c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
+ stripenum = (parity + 1) % c->chunk_item->num_stripes;
- if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
- Status = add_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, fileref->index, &fileref->utf8, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("add_inode_ref returned %08x\n", Status);
- return Status;
- }
- } else {
- ULONG rrlen;
- ROOT_REF* rr;
-
- rrlen = sizeof(ROOT_REF) - 1 + fileref->utf8.Length;
+ j = 0;
+ for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
+ if (stripes[i].start > start || stripes[i].start == stripes[i].end) {
+ ULONG readlen;
- rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
- if (!rr) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
+ read_stripes[j].Irp = NULL;
+ read_stripes[j].devobj = c->devices[stripenum]->devobj;
+ read_stripes[j].master = master;
+
+ if (stripes[i].start != stripes[i].end)
+ readlen = stripes[i].start - start;
+ else
+ readlen = firststripesize;
+
+ Status = make_read_irp(Irp, &read_stripes[j], start + cis[stripenum].offset, stripes[stripenum].data, readlen);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("make_read_irp returned %08x\n", Status);
+ j++;
+ goto readend;
+ }
+
+ stripes[stripenum].skip_start = readlen;
+
+ j++;
+ if (j == num_reads) break;
}
- rr->dir = fileref->parent->fcb->inode;
- rr->index = fileref->index;
- rr->n = fileref->utf8.Length;
- RtlCopyMemory(rr->name, fileref->utf8.Buffer, fileref->utf8.Length);
+ stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
+ }
+
+ if (j < num_reads) {
+ parity = (((address + length - 1 - c->offset) / ((c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
+ stripenum = (parity + 1) % c->chunk_item->num_stripes;
- Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("add_root_ref returned %08x\n", Status);
- return Status;
+ for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
+ if ((stripes[i].start != stripes[i].end && stripes[i].end < end) || (stripes[i].start == stripes[i].end && multiple_stripes)) {
+ read_stripes[j].Irp = NULL;
+ read_stripes[j].devobj = c->devices[stripenum]->devobj;
+ read_stripes[j].master = master;
+
+ if (stripes[i].start == stripes[i].end) {
+ Status = make_read_irp(Irp, &read_stripes[j], start + firststripesize + cis[stripenum].offset, &stripes[stripenum].data[firststripesize], laststripesize);
+ stripes[stripenum].skip_end = laststripesize;
+ } else {
+ Status = make_read_irp(Irp, &read_stripes[j], stripes[i].end + cis[stripenum].offset, &stripes[stripenum].data[stripes[i].end - start], end - stripes[i].end);
+ stripes[stripenum].skip_end = end - stripes[i].end;
+ }
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("make_read_irp returned %08x\n", Status);
+ j++;
+ goto readend;
+ }
+
+ j++;
+ if (j == num_reads) break;
+ }
+
+ stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
}
-
- Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback);
+ }
+
+ master->stripes_left = j;
+ KeInitializeEvent(&master->event, NotificationEvent, FALSE);
+
+ for (i = 0; i < j; i++) {
+ Status = IoCallDriver(read_stripes[i].devobj, read_stripes[i].Irp);
if (!NT_SUCCESS(Status)) {
- ERR("update_root_backref returned %08x\n", Status);
- return Status;
+ ERR("IoCallDriver returned %08x\n", Status);
+ goto readend;
}
}
- fileref->created = FALSE;
- } else if (fileref->deleted) {
- UINT32 crc32;
- KEY searchkey;
- traverse_ptr tp;
- ANSI_STRING* name;
+ KeWaitForSingleObject(&master->event, Executive, KernelMode, FALSE, NULL);
- if (fileref->oldutf8.Buffer)
- name = &fileref->oldutf8;
- else
- name = &fileref->utf8;
-
- crc32 = calc_crc32c(0xfffffffe, (UINT8*)name->Buffer, name->Length);
+ for (i = 0; i < j; i++) {
+ if (!NT_SUCCESS(read_stripes[i].iosb.Status)) {
+ Status = read_stripes[i].iosb.Status;
+ goto readend;
+ }
+ }
+
+ Status = STATUS_SUCCESS;
- TRACE("deleting %.*S\n", file_desc_fileref(fileref));
+readend:
+ for (i = 0; i < j; i++) {
+ if (read_stripes[i].Irp) {
+ if (read_stripes[i].devobj->Flags & DO_DIRECT_IO) {
+ MmUnlockPages(read_stripes[i].Irp->MdlAddress);
+ IoFreeMdl(read_stripes[i].Irp->MdlAddress);
+ }
+
+ IoFreeIrp(read_stripes[i].Irp); // FIXME - what if IoCallDriver fails and other Irps are still running?
+ }
+ }
- // delete DIR_ITEM (0x54)
+ ExFreePool(read_stripes);
+ ExFreePool(master);
- Status = delete_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, crc32, name, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("delete_dir_item returned %08x\n", Status);
+ if (!NT_SUCCESS(Status))
return Status;
- }
+ }
+
+ pos = 0;
+
+ parity = (((address - c->offset) / ((c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
+ stripepos = 0;
+
+ if ((address - c->offset) % (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)) > 0) {
+ UINT16 firstdata;
+ BOOL first = TRUE;
- if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
- // delete INODE_REF (0xc)
+ stripenum = (parity + 1) % c->chunk_item->num_stripes;
+
+ for (logstripe = 0; logstripe < c->chunk_item->num_stripes - 1; logstripe++) {
+ ULONG copylen;
- Status = delete_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, name, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("delete_inode_ref returned %08x\n", Status);
- return Status;
- }
- } else { // subvolume
- Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, name, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("delete_root_ref returned %08x\n", Status);
- }
+ if (pos >= length)
+ break;
- Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("update_root_backref returned %08x\n", Status);
- return Status;
+ if (stripes[logstripe].start < start + firststripesize && stripes[logstripe].start != stripes[logstripe].end) {
+ copylen = min(start + firststripesize - stripes[logstripe].start, length - pos);
+
+ if (!first && copylen < c->chunk_item->stripe_length) {
+ same_stripe = TRUE;
+ break;
+ }
+
+ RtlCopyMemory(&stripes[stripenum].data[firststripesize - copylen], &data2[pos], copylen);
+
+ pos += copylen;
+ first = FALSE;
}
+
+ stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
}
- // delete DIR_INDEX (0x60)
+ firstdata = parity == 0 ? 1 : 0;
- searchkey.obj_id = fileref->parent->fcb->inode;
- searchkey.obj_type = TYPE_DIR_INDEX;
- searchkey.offset = fileref->index;
-
- Status = find_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- Status = STATUS_INTERNAL_ERROR;
- return Status;
- }
+ RtlCopyMemory(stripes[parity].data, stripes[firstdata].data, firststripesize);
- if (!keycmp(&searchkey, &tp.item->key)) {
- delete_tree_item(fileref->fcb->Vcb, &tp, rollback);
- TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
+ for (i = firstdata + 1; i < c->chunk_item->num_stripes; i++) {
+ if (i != parity)
+ do_xor(&stripes[parity].data[0], &stripes[i].data[0], firststripesize);
}
- if (fileref->oldutf8.Buffer) {
- ExFreePool(fileref->oldutf8.Buffer);
- fileref->oldutf8.Buffer = NULL;
+ if (!same_stripe) {
+ stripepos = firststripesize;
+ parity = (parity + 1) % c->chunk_item->num_stripes;
}
- } else { // rename or change type
- PANSI_STRING oldutf8 = fileref->oldutf8.Buffer ? &fileref->oldutf8 : &fileref->utf8;
- UINT32 crc32, oldcrc32;
- ULONG disize;
- DIR_ITEM *di, *di2;
- KEY searchkey;
- traverse_ptr tp;
-
- crc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->utf8.Buffer, fileref->utf8.Length);
-
- if (!fileref->oldutf8.Buffer)
- oldcrc32 = crc32;
- else
- oldcrc32 = calc_crc32c(0xfffffffe, (UINT8*)fileref->oldutf8.Buffer, fileref->oldutf8.Length);
-
- // delete DIR_ITEM (0x54)
+ }
+
+ while (length >= pos + c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)) {
+ UINT16 firstdata;
- Status = delete_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, oldcrc32, oldutf8, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("delete_dir_item returned %08x\n", Status);
- return Status;
+ stripenum = (parity + 1) % c->chunk_item->num_stripes;
+
+ for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
+ RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], c->chunk_item->stripe_length);
+
+ pos += c->chunk_item->stripe_length;
+ stripenum = (stripenum +1) % c->chunk_item->num_stripes;
}
- // add DIR_ITEM (0x54)
+ firstdata = parity == 0 ? 1 : 0;
- disize = sizeof(DIR_ITEM) - 1 + fileref->utf8.Length;
- di = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
- if (!di) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
+ RtlCopyMemory(&stripes[parity].data[stripepos], &stripes[firstdata].data[stripepos], c->chunk_item->stripe_length);
- di2 = ExAllocatePoolWithTag(PagedPool, disize, ALLOC_TAG);
- if (!di2) {
- ERR("out of memory\n");
- ExFreePool(di);
- return STATUS_INSUFFICIENT_RESOURCES;
+ for (i = firstdata + 1; i < c->chunk_item->num_stripes; i++) {
+ if (i != parity)
+ do_xor(&stripes[parity].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
}
- if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
- di->key.obj_id = fileref->fcb->inode;
- di->key.obj_type = TYPE_INODE_ITEM;
- di->key.offset = 0;
- } else { // subvolume
- di->key.obj_id = fileref->fcb->subvol->id;
- di->key.obj_type = TYPE_ROOT_ITEM;
- di->key.offset = 0xffffffffffffffff;
+ parity = (parity + 1) % c->chunk_item->num_stripes;
+ stripepos += c->chunk_item->stripe_length;
+ }
+
+ if (pos < length) {
+ UINT16 firstdata;
+
+ if (!same_stripe) {
+ stripenum = (parity + 1) % c->chunk_item->num_stripes;
+ i = 0;
+ } else
+ i = logstripe;
+
+ while (pos < length) {
+ ULONG copylen;
+
+ copylen = min(stripes[i].end - start - stripepos, length - pos);
+
+ RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], copylen);
+
+ pos += copylen;
+ stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
+ i++;
}
- di->transid = fileref->fcb->Vcb->superblock.generation;
- di->m = 0;
- di->n = (UINT16)fileref->utf8.Length;
- di->type = fileref->fcb->type;
- RtlCopyMemory(di->name, fileref->utf8.Buffer, fileref->utf8.Length);
+ firstdata = parity == 0 ? 1 : 0;
- RtlCopyMemory(di2, di, disize);
+ RtlCopyMemory(&stripes[parity].data[stripepos], &stripes[firstdata].data[stripepos], laststripesize);
- Status = add_dir_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, crc32, di, disize, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("add_dir_item returned %08x\n", Status);
- return Status;
+ for (i = firstdata + 1; i < c->chunk_item->num_stripes; i++) {
+ if (i != parity)
+ do_xor(&stripes[parity].data[stripepos], &stripes[i].data[stripepos], laststripesize);
}
+ }
+
+ for (i = 0; i < c->chunk_item->num_stripes; i++) {
+ stripes[i].start = start;
+ stripes[i].end = end;
+ }
+
+ return STATUS_SUCCESS;
+}
+
+static NTSTATUS prepare_raid6_write(PIRP Irp, chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) {
+ UINT64 startoff, endoff;
+ UINT16 startoffstripe, endoffstripe, stripenum, parity1, parity2, logstripe;
+ UINT64 start = 0xffffffffffffffff, end = 0;
+ UINT64 pos, stripepos;
+ UINT32 firststripesize, laststripesize;
+ UINT32 i;
+ UINT8* data2 = (UINT8*)data;
+ UINT32 num_reads;
+ BOOL same_stripe = FALSE, multiple_stripes;
+
+ get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 2, &startoff, &startoffstripe);
+ get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 2, &endoff, &endoffstripe);
+
+ for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
+ UINT64 ststart, stend;
- if (fileref->parent->fcb->subvol == fileref->fcb->subvol) {
- // delete INODE_REF (0xc)
-
- Status = delete_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, oldutf8, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("delete_inode_ref returned %08x\n", Status);
- return Status;
- }
-
- // add INODE_REF (0xc)
+ if (startoffstripe > i) {
+ ststart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+ } else if (startoffstripe == i) {
+ ststart = startoff;
+ } else {
+ ststart = startoff - (startoff % c->chunk_item->stripe_length);
+ }
+
+ if (endoffstripe > i) {
+ stend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+ } else if (endoffstripe == i) {
+ stend = endoff + 1;
+ } else {
+ stend = endoff - (endoff % c->chunk_item->stripe_length);
+ }
+
+ if (ststart != stend) {
+ stripes[i].start = ststart;
+ stripes[i].end = stend;
- Status = add_inode_ref(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->fcb->inode, fileref->parent->fcb->inode, fileref->index, &fileref->utf8, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("add_inode_ref returned %08x\n", Status);
- return Status;
+ if (ststart < start) {
+ start = ststart;
+ firststripesize = c->chunk_item->stripe_length - (ststart % c->chunk_item->stripe_length);
}
- } else { // subvolume
- ULONG rrlen;
- ROOT_REF* rr;
-
- // FIXME - make sure this works with duff subvols within snapshots
-
- Status = delete_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, fileref->parent->fcb->inode, oldutf8, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("delete_root_ref returned %08x\n", Status);
+
+ if (stend > end) {
+ end = stend;
+ laststripesize = stend % c->chunk_item->stripe_length;
+ if (laststripesize == 0)
+ laststripesize = c->chunk_item->stripe_length;
}
+ }
+ }
+
+ if (start == end) {
+ ERR("error: start == end (%llx)\n", start);
+ return STATUS_INTERNAL_ERROR;
+ }
+
+ if (startoffstripe == endoffstripe && start / c->chunk_item->stripe_length == end / c->chunk_item->stripe_length) {
+ firststripesize = end - start;
+ laststripesize = firststripesize;
+ }
+
+ for (i = 0; i < c->chunk_item->num_stripes; i++) {
+ stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, end - start, ALLOC_TAG);
+ if (!stripes[i].data) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ if (i < c->chunk_item->num_stripes - 2) {
+ if (stripes[i].start == 0 && stripes[i].end == 0)
+ stripes[i].start = stripes[i].end = start;
+ }
+ }
+
+ num_reads = 0;
+ multiple_stripes = (end - 1) / c->chunk_item->stripe_length != start / c->chunk_item->stripe_length;
+
+ for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
+ if (stripes[i].start == stripes[i].end) {
+ num_reads++;
- rrlen = sizeof(ROOT_REF) - 1 + fileref->utf8.Length;
+ if (multiple_stripes)
+ num_reads++;
+ } else {
+ if (stripes[i].start > start)
+ num_reads++;
- rr = ExAllocatePoolWithTag(PagedPool, rrlen, ALLOC_TAG);
- if (!rr) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
+ if (stripes[i].end < end)
+ num_reads++;
+ }
+ }
+
+ if (num_reads > 0) {
+ UINT32 j;
+ read_stripe_master* master;
+ read_stripe* read_stripes;
+ CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
+ NTSTATUS Status;
+
+ master = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe_master), ALLOC_TAG);
+ if (!master) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ read_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe) * num_reads, ALLOC_TAG);
+ if (!read_stripes) {
+ ERR("out of memory\n");
+ ExFreePool(master);
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ parity1 = (((address - c->offset) / ((c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
+ stripenum = (parity1 + 2) % c->chunk_item->num_stripes;
+
+ j = 0;
+ for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
+ if (stripes[i].start > start || stripes[i].start == stripes[i].end) {
+ ULONG readlen;
+
+ read_stripes[j].Irp = NULL;
+ read_stripes[j].devobj = c->devices[stripenum]->devobj;
+ read_stripes[j].master = master;
+
+ if (stripes[i].start != stripes[i].end)
+ readlen = stripes[i].start - start;
+ else
+ readlen = firststripesize;
+
+ Status = make_read_irp(Irp, &read_stripes[j], start + cis[stripenum].offset, stripes[stripenum].data, readlen);
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("make_read_irp returned %08x\n", Status);
+ j++;
+ goto readend;
+ }
+
+ stripes[stripenum].skip_start = readlen;
+
+ j++;
+ if (j == num_reads) break;
}
- rr->dir = fileref->parent->fcb->inode;
- rr->index = fileref->index;
- rr->n = fileref->utf8.Length;
- RtlCopyMemory(rr->name, fileref->utf8.Buffer, fileref->utf8.Length);
+ stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
+ }
+
+ if (j < num_reads) {
+ parity1 = (((address + length - 1 - c->offset) / ((c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
+ stripenum = (parity1 + 2) % c->chunk_item->num_stripes;
- Status = add_root_ref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, rr, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("add_root_ref returned %08x\n", Status);
- return Status;
+ for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
+ if ((stripes[i].start != stripes[i].end && stripes[i].end < end) || (stripes[i].start == stripes[i].end && multiple_stripes)) {
+ read_stripes[j].Irp = NULL;
+ read_stripes[j].devobj = c->devices[stripenum]->devobj;
+ read_stripes[j].master = master;
+
+ if (stripes[i].start == stripes[i].end) {
+ Status = make_read_irp(Irp, &read_stripes[j], start + firststripesize + cis[stripenum].offset, &stripes[stripenum].data[firststripesize], laststripesize);
+ stripes[stripenum].skip_end = laststripesize;
+ } else {
+ Status = make_read_irp(Irp, &read_stripes[j], stripes[i].end + cis[stripenum].offset, &stripes[stripenum].data[stripes[i].end - start], end - stripes[i].end);
+ stripes[stripenum].skip_end = end - stripes[i].end;
+ }
+
+ if (!NT_SUCCESS(Status)) {
+ ERR("make_read_irp returned %08x\n", Status);
+ j++;
+ goto readend;
+ }
+
+ j++;
+ if (j == num_reads) break;
+ }
+
+ stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
}
-
- Status = update_root_backref(fileref->fcb->Vcb, fileref->fcb->subvol->id, fileref->parent->fcb->subvol->id, Irp, rollback);
+ }
+
+ master->stripes_left = j;
+ KeInitializeEvent(&master->event, NotificationEvent, FALSE);
+
+ for (i = 0; i < j; i++) {
+ Status = IoCallDriver(read_stripes[i].devobj, read_stripes[i].Irp);
if (!NT_SUCCESS(Status)) {
- ERR("update_root_backref returned %08x\n", Status);
- return Status;
+ ERR("IoCallDriver returned %08x\n", Status);
+ goto readend;
}
}
- // delete DIR_INDEX (0x60)
+ KeWaitForSingleObject(&master->event, Executive, KernelMode, FALSE, NULL);
- searchkey.obj_id = fileref->parent->fcb->inode;
- searchkey.obj_type = TYPE_DIR_INDEX;
- searchkey.offset = fileref->index;
-
- Status = find_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- Status = STATUS_INTERNAL_ERROR;
- return Status;
+ for (i = 0; i < j; i++) {
+ if (!NT_SUCCESS(read_stripes[i].iosb.Status)) {
+ Status = read_stripes[i].iosb.Status;
+ goto readend;
+ }
}
- if (!keycmp(&searchkey, &tp.item->key)) {
- delete_tree_item(fileref->fcb->Vcb, &tp, rollback);
- TRACE("deleting (%llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
- } else
- WARN("could not find (%llx,%x,%llx) in subvol %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, fileref->fcb->subvol->id);
+ Status = STATUS_SUCCESS;
+
+readend:
+ for (i = 0; i < j; i++) {
+ if (read_stripes[i].Irp) {
+ if (read_stripes[i].devobj->Flags & DO_DIRECT_IO) {
+ MmUnlockPages(read_stripes[i].Irp->MdlAddress);
+ IoFreeMdl(read_stripes[i].Irp->MdlAddress);
+ }
+
+ IoFreeIrp(read_stripes[i].Irp); // FIXME - what if IoCallDriver fails and other Irps are still running?
+ }
+ }
- // add DIR_INDEX (0x60)
+ ExFreePool(read_stripes);
+ ExFreePool(master);
- if (!insert_tree_item(fileref->fcb->Vcb, fileref->parent->fcb->subvol, fileref->parent->fcb->inode, TYPE_DIR_INDEX, fileref->index, di2, disize, NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- Status = STATUS_INTERNAL_ERROR;
+ if (!NT_SUCCESS(Status))
return Status;
- }
-
- if (fileref->oldutf8.Buffer) {
- ExFreePool(fileref->oldutf8.Buffer);
- fileref->oldutf8.Buffer = NULL;
- }
}
-
- fileref->dirty = FALSE;
- return STATUS_SUCCESS;
-}
-
-static void convert_shared_data_refs(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
- LIST_ENTRY* le;
- NTSTATUS Status;
+ pos = 0;
+
+ parity1 = (((address - c->offset) / ((c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
+ parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
+ stripepos = 0;
- le = Vcb->trees.Flink;
- while (le != &Vcb->trees) {
- tree* t = CONTAINING_RECORD(le, tree, list_entry);
+ if ((address - c->offset) % (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)) > 0) {
+ BOOL first = TRUE;
- if (t->write && t->header.level == 0 &&
- (t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF))) {
- LIST_ENTRY* le2;
- BOOL old = !(t->header.flags & HEADER_FLAG_MIXED_BACKREF);
+ stripenum = (parity2 + 1) % c->chunk_item->num_stripes;
+
+ for (logstripe = 0; logstripe < c->chunk_item->num_stripes - 2; logstripe++) {
+ ULONG copylen;
+
+ if (pos >= length)
+ break;
- le2 = Vcb->shared_extents.Flink;
- while (le2 != &Vcb->shared_extents) {
- shared_data* sd = CONTAINING_RECORD(le2, shared_data, list_entry);
+ if (stripes[logstripe].start < start + firststripesize && stripes[logstripe].start != stripes[logstripe].end) {
+ copylen = min(start + firststripesize - stripes[logstripe].start, length - pos);
- if (sd->address == t->header.address) {
- LIST_ENTRY* le3 = sd->entries.Flink;
- while (le3 != &sd->entries) {
- shared_data_entry* sde = CONTAINING_RECORD(le3, shared_data_entry, list_entry);
-
- TRACE("tree %llx; root %llx, objid %llx, offset %llx, count %x\n",
- t->header.address, sde->edr.root, sde->edr.objid, sde->edr.offset, sde->edr.count);
-
- Status = increase_extent_refcount_data(Vcb, sde->address, sde->size, sde->edr.root, sde->edr.objid, sde->edr.offset, sde->edr.count, Irp, rollback);
-
- if (!NT_SUCCESS(Status))
- WARN("increase_extent_refcount_data returned %08x\n", Status);
-
- if (old) {
- Status = decrease_extent_refcount_old(Vcb, sde->address, sde->size, sd->address, Irp, rollback);
-
- if (!NT_SUCCESS(Status))
- WARN("decrease_extent_refcount_old returned %08x\n", Status);
- } else {
- Status = decrease_extent_refcount_shared_data(Vcb, sde->address, sde->size, sd->address, sd->parent, Irp, rollback);
-
- if (!NT_SUCCESS(Status))
- WARN("decrease_extent_refcount_shared_data returned %08x\n", Status);
- }
-
- le3 = le3->Flink;
- }
+ if (!first && copylen < c->chunk_item->stripe_length) {
+ same_stripe = TRUE;
break;
}
+
+ RtlCopyMemory(&stripes[stripenum].data[firststripesize - copylen], &data2[pos], copylen);
- le2 = le2->Flink;
+ pos += copylen;
+ first = FALSE;
}
- t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
- t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
+ stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
}
- le = le->Flink;
- }
-}
-
-static NTSTATUS add_root_item_to_cache(device_extension* Vcb, UINT64 root, PIRP Irp, LIST_ENTRY* rollback) {
- KEY searchkey;
- traverse_ptr tp;
- NTSTATUS Status;
-
- searchkey.obj_id = root;
- searchkey.obj_type = TYPE_ROOT_ITEM;
- searchkey.offset = 0xffffffffffffffff;
-
- Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- return Status;
- }
-
- if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
- ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
- int3;
- return STATUS_INTERNAL_ERROR;
+ i = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
+ RtlCopyMemory(stripes[parity1].data, stripes[i].data, firststripesize);
+ RtlCopyMemory(stripes[parity2].data, stripes[i].data, firststripesize);
+ i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
+
+ do {
+ do_xor(stripes[parity1].data, stripes[i].data, firststripesize);
+
+ galois_double(stripes[parity2].data, firststripesize);
+ do_xor(stripes[parity2].data, stripes[i].data, firststripesize);
+
+ i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
+ } while (i != parity2);
+
+ if (!same_stripe) {
+ stripepos = firststripesize;
+ parity1 = parity2;
+ parity2 = (parity2 + 1) % c->chunk_item->num_stripes;
+ }
}
- if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
- ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
- if (!ri) {
- ERR("out of memory\n");
- return STATUS_INSUFFICIENT_RESOURCES;
- }
+ while (length >= pos + c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)) {
+ stripenum = (parity2 + 1) % c->chunk_item->num_stripes;
- if (tp.item->size > 0)
- RtlCopyMemory(ri, tp.item->data, tp.item->size);
-
- RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
+ for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
+ RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], c->chunk_item->stripe_length);
+
+ pos += c->chunk_item->stripe_length;
+ stripenum = (stripenum +1) % c->chunk_item->num_stripes;
+ }
- delete_tree_item(Vcb, &tp, rollback);
+ i = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
+ RtlCopyMemory(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
+ RtlCopyMemory(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
+ i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
+
+ do {
+ do_xor(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
+
+ galois_double(&stripes[parity2].data[stripepos], c->chunk_item->stripe_length);
+ do_xor(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
+
+ i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
+ } while (i != parity2);
- if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- return STATUS_INTERNAL_ERROR;
- }
- } else {
- tp.tree->write = TRUE;
+ parity1 = parity2;
+ parity2 = (parity2 + 1) % c->chunk_item->num_stripes;
+ stripepos += c->chunk_item->stripe_length;
}
- return STATUS_SUCCESS;
-}
-
-static NTSTATUS add_root_items_to_cache(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
- LIST_ENTRY* le;
- NTSTATUS Status;
-
- le = Vcb->trees.Flink;
- while (le != &Vcb->trees) {
- tree* t = CONTAINING_RECORD(le, tree, list_entry);
+ if (pos < length) {
+ if (!same_stripe) {
+ stripenum = (parity2 + 1) % c->chunk_item->num_stripes;
+ i = 0;
+ } else
+ i = logstripe;
- if (t->write && t->root != Vcb->chunk_root && t->root != Vcb->root_root) {
- Status = add_root_item_to_cache(Vcb, t->root->id, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("add_root_item_to_cache returned %08x\n", Status);
- return Status;
- }
+ while (pos < length) {
+ ULONG copylen;
+
+ copylen = min(stripes[i].end - start - stripepos, length - pos);
+
+ RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], copylen);
+
+ pos += copylen;
+ stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
+ i++;
}
- le = le->Flink;
+ i = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
+ RtlCopyMemory(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], laststripesize);
+ RtlCopyMemory(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], laststripesize);
+ i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
+
+ do {
+ do_xor(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], laststripesize);
+
+ galois_double(&stripes[parity2].data[stripepos], laststripesize);
+ do_xor(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], laststripesize);
+
+ i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
+ } while (i != parity2);
}
- // make sure we always update the extent tree
- Status = add_root_item_to_cache(Vcb, BTRFS_ROOT_EXTENT, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("add_root_item_to_cache returned %08x\n", Status);
- return Status;
+ for (i = 0; i < c->chunk_item->num_stripes; i++) {
+ stripes[i].start = start;
+ stripes[i].end = end;
}
return STATUS_SUCCESS;
}
-NTSTATUS STDCALL do_write(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
+NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, BOOL need_free, UINT32 length, write_data_context* wtc, PIRP Irp, chunk* c) {
NTSTATUS Status;
- LIST_ENTRY* le;
- BOOL cache_changed = FALSE;
-
-#ifdef DEBUG_WRITE_LOOPS
- UINT loops = 0;
-#endif
-
- TRACE("(%p)\n", Vcb);
-
- while (!IsListEmpty(&Vcb->dirty_filerefs)) {
- dirty_fileref* dirt;
-
- le = RemoveHeadList(&Vcb->dirty_filerefs);
-
- dirt = CONTAINING_RECORD(le, dirty_fileref, list_entry);
-
- flush_fileref(dirt->fileref, Irp, rollback);
- free_fileref(dirt->fileref);
- ExFreePool(dirt);
- }
+ UINT32 i;
+ CHUNK_ITEM_STRIPE* cis;
+ write_data_stripe* stripe;
+ write_stripe* stripes = NULL;
+ BOOL need_free2;
- // We process deleted streams first, so we don't run over our xattr
- // limit unless we absolutely have to.
+ TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
- le = Vcb->dirty_fcbs.Flink;
- while (le != &Vcb->dirty_fcbs) {
- dirty_fcb* dirt;
- LIST_ENTRY* le2 = le->Flink;
-
- dirt = CONTAINING_RECORD(le, dirty_fcb, list_entry);
-
- if (dirt->fcb->deleted && dirt->fcb->ads) {
- RemoveEntryList(le);
-
- flush_fcb(dirt->fcb, FALSE, Irp, rollback);
- free_fcb(dirt->fcb);
- ExFreePool(dirt);
+ if (!c) {
+ c = get_chunk_from_address(Vcb, address);
+ if (!c) {
+ ERR("could not get chunk for address %llx\n", address);
+ return STATUS_INTERNAL_ERROR;
}
-
- le = le2;
}
- le = Vcb->dirty_fcbs.Flink;
- while (le != &Vcb->dirty_fcbs) {
- dirty_fcb* dirt;
- LIST_ENTRY* le2 = le->Flink;
-
- dirt = CONTAINING_RECORD(le, dirty_fcb, list_entry);
-
- if (dirt->fcb->subvol != Vcb->root_root || dirt->fcb->deleted) {
- RemoveEntryList(le);
-
- flush_fcb(dirt->fcb, FALSE, Irp, rollback);
- free_fcb(dirt->fcb);
- ExFreePool(dirt);
- }
-
- le = le2;
+ stripes = ExAllocatePoolWithTag(PagedPool, sizeof(write_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
+ if (!stripes) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
}
- convert_shared_data_refs(Vcb, Irp, rollback);
+ RtlZeroMemory(stripes, sizeof(write_stripe) * c->chunk_item->num_stripes);
- ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE);
- if (!IsListEmpty(&Vcb->sector_checksums)) {
- update_checksum_tree(Vcb, Irp, rollback);
- }
- ExReleaseResourceLite(&Vcb->checksum_lock);
+ cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
- if (!IsListEmpty(&Vcb->drop_roots)) {
- Status = drop_roots(Vcb, Irp, rollback);
-
+ if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
+ Status = prepare_raid0_write(c, address, data, length, stripes);
if (!NT_SUCCESS(Status)) {
- ERR("drop_roots returned %08x\n", Status);
+ ERR("prepare_raid0_write returned %08x\n", Status);
+ ExFreePool(stripes);
return Status;
}
- }
-
- if (!IsListEmpty(&Vcb->chunks_changed)) {
- Status = update_chunks(Vcb, Irp, rollback);
+ if (need_free)
+ ExFreePool(data);
+
+ need_free2 = TRUE;
+ } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
+ Status = prepare_raid10_write(c, address, data, length, stripes);
if (!NT_SUCCESS(Status)) {
- ERR("update_chunks returned %08x\n", Status);
+ ERR("prepare_raid10_write returned %08x\n", Status);
+ ExFreePool(stripes);
return Status;
}
- }
-
- // If only changing superblock, e.g. changing label, we still need to rewrite
- // the root tree so the generations match, otherwise you won't be able to mount on Linux.
- if (!Vcb->root_root->treeholder.tree || !Vcb->root_root->treeholder.tree->write) {
- KEY searchkey;
-
- traverse_ptr tp;
-
- searchkey.obj_id = 0;
- searchkey.obj_type = 0;
- searchkey.offset = 0;
- Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, Irp);
+ if (need_free)
+ ExFreePool(data);
+
+ need_free2 = TRUE;
+ } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
+ Status = prepare_raid5_write(Irp, c, address, data, length, stripes);
if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
+ ERR("prepare_raid5_write returned %08x\n", Status);
+ ExFreePool(stripes);
return Status;
}
- Vcb->root_root->treeholder.tree->write = TRUE;
- }
-
- Status = add_root_items_to_cache(Vcb, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("add_root_items_to_cache returned %08x\n", Status);
- return Status;
- }
-
- do {
- Status = add_parents(Vcb, rollback);
+ if (need_free)
+ ExFreePool(data);
+
+ need_free2 = TRUE;
+ } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
+ Status = prepare_raid6_write(Irp, c, address, data, length, stripes);
if (!NT_SUCCESS(Status)) {
- ERR("add_parents returned %08x\n", Status);
- goto end;
+ ERR("prepare_raid6_write returned %08x\n", Status);
+ ExFreePool(stripes);
+ return Status;
}
- Status = do_splits(Vcb, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("do_splits returned %08x\n", Status);
- goto end;
+ if (need_free)
+ ExFreePool(data);
+
+ need_free2 = TRUE;
+ } else { // write same data to every location - SINGLE, DUP, RAID1
+ for (i = 0; i < c->chunk_item->num_stripes; i++) {
+ stripes[i].start = address - c->offset;
+ stripes[i].end = stripes[i].start + length;
+ stripes[i].data = data;
}
+ need_free2 = need_free;
+ }
+
+ for (i = 0; i < c->chunk_item->num_stripes; i++) {
+ PIO_STACK_LOCATION IrpSp;
- Status = allocate_tree_extents(Vcb, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("add_parents returned %08x\n", Status);
- goto end;
- }
+ // FIXME - handle missing devices
- Status = update_chunk_usage(Vcb, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("update_chunk_usage returned %08x\n", Status);
+ stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_stripe), ALLOC_TAG);
+ if (!stripe) {
+ ERR("out of memory\n");
+ Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
- Status = allocate_cache(Vcb, &cache_changed, Irp, rollback);
- if (!NT_SUCCESS(Status)) {
- ERR("allocate_cache returned %08x\n", Status);
- goto end;
+ if (stripes[i].start + stripes[i].skip_start == stripes[i].end - stripes[i].skip_end || stripes[i].start == stripes[i].end) {
+ stripe->status = WriteDataStatus_Ignore;
+ stripe->Irp = NULL;
+ stripe->buf = stripes[i].data;
+ stripe->need_free = need_free2;
+ } else {
+ stripe->context = (struct _write_data_context*)wtc;
+ stripe->buf = stripes[i].data;
+ stripe->need_free = need_free2;
+ stripe->device = c->devices[i];
+ RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
+ stripe->status = WriteDataStatus_Pending;
+
+ if (!Irp) {
+ stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
+
+ if (!stripe->Irp) {
+ ERR("IoAllocateIrp failed\n");
+ Status = STATUS_INTERNAL_ERROR;
+ goto end;
+ }
+ } else {
+ stripe->Irp = IoMakeAssociatedIrp(Irp, stripe->device->devobj->StackSize);
+
+ if (!stripe->Irp) {
+ ERR("IoMakeAssociatedIrp failed\n");
+ Status = STATUS_INTERNAL_ERROR;
+ goto end;
+ }
+ }
+
+ IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
+ IrpSp->MajorFunction = IRP_MJ_WRITE;
+
+ if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
+ stripe->Irp->AssociatedIrp.SystemBuffer = stripes[i].data + stripes[i].skip_start;
+
+ stripe->Irp->Flags = IRP_BUFFERED_IO;
+ } else if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
+ stripe->Irp->MdlAddress = IoAllocateMdl(stripes[i].data + stripes[i].skip_start,
+ stripes[i].end - stripes[i].start - stripes[i].skip_start - stripes[i].skip_end, FALSE, FALSE, NULL);
+ if (!stripe->Irp->MdlAddress) {
+ ERR("IoAllocateMdl failed\n");
+ Status = STATUS_INTERNAL_ERROR;
+ goto end;
+ }
+
+ MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess);
+ } else {
+ stripe->Irp->UserBuffer = stripes[i].data + stripes[i].skip_start;
+ }
+
+#ifdef DEBUG_PARANOID
+ if (stripes[i].end < stripes[i].start + stripes[i].skip_start + stripes[i].skip_end) {
+ ERR("trying to write stripe with negative length (%llx < %llx + %x + %x)\n",
+ stripes[i].end, stripes[i].start, stripes[i].skip_start, stripes[i].skip_end);
+ int3;
+ }
+#endif
+
+ IrpSp->Parameters.Write.Length = stripes[i].end - stripes[i].start - stripes[i].skip_start - stripes[i].skip_end;
+ IrpSp->Parameters.Write.ByteOffset.QuadPart = stripes[i].start + cis[i].offset + stripes[i].skip_start;
+
+ stripe->Irp->UserIosb = &stripe->iosb;
+ wtc->stripes_left++;
+
+ IoSetCompletionRoutine(stripe->Irp, write_data_completion, stripe, TRUE, TRUE, TRUE);
}
-#ifdef DEBUG_WRITE_LOOPS
- loops++;
-
- if (cache_changed)
- ERR("cache has changed, looping again\n");
-#endif
- } while (cache_changed || !trees_consistent(Vcb, rollback));
+ InsertTailList(&wtc->stripes, &stripe->list_entry);
+ }
-#ifdef DEBUG_WRITE_LOOPS
- ERR("%u loops\n", loops);
-#endif
+ Status = STATUS_SUCCESS;
- TRACE("trees consistent\n");
+end:
+
+ if (stripes) ExFreePool(stripes);
- Status = update_root_root(Vcb, Irp, rollback);
if (!NT_SUCCESS(Status)) {
- ERR("update_root_root returned %08x\n", Status);
- goto end;
+ free_write_data_stripes(wtc);
+ ExFreePool(wtc);
}
- Status = write_trees(Vcb, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("write_trees returned %08x\n", Status);
- goto end;
- }
+ return Status;
+}
+
+void get_raid56_lock_range(chunk* c, UINT64 address, UINT64 length, UINT64* lockaddr, UINT64* locklen) {
+ UINT64 startoff, endoff;
+ UINT16 startoffstripe, endoffstripe, datastripes;
+ UINT64 start = 0xffffffffffffffff, end = 0, logend;
+ UINT16 i;
- Vcb->superblock.cache_generation = Vcb->superblock.generation;
+ datastripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
- Status = write_superblocks(Vcb, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("write_superblocks returned %08x\n", Status);
- goto end;
+ get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, datastripes, &startoff, &startoffstripe);
+ get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, datastripes, &endoff, &endoffstripe);
+
+ for (i = 0; i < datastripes; i++) {
+ UINT64 ststart, stend;
+
+ if (startoffstripe > i) {
+ ststart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+ } else if (startoffstripe == i) {
+ ststart = startoff;
+ } else {
+ ststart = startoff - (startoff % c->chunk_item->stripe_length);
+ }
+
+ if (endoffstripe > i) {
+ stend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
+ } else if (endoffstripe == i) {
+ stend = endoff + 1;
+ } else {
+ stend = endoff - (endoff % c->chunk_item->stripe_length);
+ }
+
+ if (ststart != stend) {
+ if (ststart < start)
+ start = ststart;
+
+ if (stend > end)
+ end = stend;
+ }
}
- clean_space_cache(Vcb);
+ *lockaddr = c->offset + ((start / c->chunk_item->stripe_length) * c->chunk_item->stripe_length * datastripes) +
+ start % c->chunk_item->stripe_length;
+
+ logend = c->offset + ((end / c->chunk_item->stripe_length) * c->chunk_item->stripe_length * datastripes);
+ logend += c->chunk_item->stripe_length * (datastripes - 1);
+ logend += end % c->chunk_item->stripe_length == 0 ? c->chunk_item->stripe_length : (end % c->chunk_item->stripe_length);
+ *locklen = logend - *lockaddr;
+}
+
+NTSTATUS STDCALL write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c) {
+ write_data_context* wtc;
+ NTSTATUS Status;
+ UINT64 lockaddr, locklen;
+// #ifdef DEBUG_PARANOID
+// UINT8* buf2;
+// #endif
- Vcb->superblock.generation++;
+ wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG);
+ if (!wtc) {
+ ERR("out of memory\n");
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
+ InitializeListHead(&wtc->stripes);
+ wtc->tree = FALSE;
+ wtc->stripes_left = 0;
- Status = STATUS_SUCCESS;
+ if (!c) {
+ c = get_chunk_from_address(Vcb, address);
+ if (!c) {
+ ERR("could not get chunk for address %llx\n", address);
+ return STATUS_INTERNAL_ERROR;
+ }
+ }
- le = Vcb->trees.Flink;
- while (le != &Vcb->trees) {
- tree* t = CONTAINING_RECORD(le, tree, list_entry);
-
-#ifdef DEBUG_PARANOID
- KEY searchkey;
- traverse_ptr tp;
+ if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
+ get_raid56_lock_range(c, address, length, &lockaddr, &locklen);
+ chunk_lock_range(Vcb, c, lockaddr, locklen);
+ }
+
+ Status = write_data(Vcb, address, data, FALSE, length, wtc, Irp, c);
+ if (!NT_SUCCESS(Status)) {
+ ERR("write_data returned %08x\n", Status);
- searchkey.obj_id = t->header.address;
- searchkey.obj_type = TYPE_METADATA_ITEM;
- searchkey.offset = 0xffffffffffffffff;
+ if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
+ chunk_unlock_range(Vcb, c, lockaddr, locklen);
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- int3;
+ free_write_data_stripes(wtc);
+ ExFreePool(wtc);
+ return Status;
+ }
+
+ if (wtc->stripes.Flink != &wtc->stripes) {
+ // launch writes and wait
+ LIST_ENTRY* le = wtc->stripes.Flink;
+ while (le != &wtc->stripes) {
+ write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
+
+ if (stripe->status != WriteDataStatus_Ignore)
+ IoCallDriver(stripe->device->devobj, stripe->Irp);
+
+ le = le->Flink;
}
- if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
- searchkey.obj_id = t->header.address;
- searchkey.obj_type = TYPE_EXTENT_ITEM;
- searchkey.offset = 0xffffffffffffffff;
+ KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
+
+ le = wtc->stripes.Flink;
+ while (le != &wtc->stripes) {
+ write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- int3;
+ if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
+ Status = stripe->iosb.Status;
+ break;
}
- if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
- ERR("error - could not find entry in extent tree for tree at %llx\n", t->header.address);
- int3;
- }
+ le = le->Flink;
}
-#endif
- t->write = FALSE;
-
- le = le->Flink;
+ free_write_data_stripes(wtc);
}
- Vcb->need_write = FALSE;
-
- while (!IsListEmpty(&Vcb->drop_roots)) {
- LIST_ENTRY* le = RemoveHeadList(&Vcb->drop_roots);
- root* r = CONTAINING_RECORD(le, root, list_entry);
+ if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
+ chunk_unlock_range(Vcb, c, lockaddr, locklen);
- ExDeleteResourceLite(&r->nonpaged->load_tree_lock);
- ExFreePool(r->nonpaged);
- ExFreePool(r);
- }
-
-end:
- TRACE("do_write returning %08x\n", Status);
-
- return Status;
-}
+ ExFreePool(wtc);
-static __inline BOOL entry_in_ordered_list(LIST_ENTRY* list, UINT64 value) {
- LIST_ENTRY* le = list->Flink;
- ordered_list* ol;
-
- while (le != list) {
- ol = (ordered_list*)le;
-
- if (ol->key > value)
- return FALSE;
- else if (ol->key == value)
- return TRUE;
-
- le = le->Flink;
- }
-
- return FALSE;
-}
+// #ifdef DEBUG_PARANOID
+// buf2 = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
+// Status = read_data(Vcb, address, length, NULL, FALSE, buf2, NULL, Irp);
+//
+// if (!NT_SUCCESS(Status) || RtlCompareMemory(buf2, data, length) != length)
+// int3;
+//
+// ExFreePool(buf2);
+// #endif
-static changed_extent* get_changed_extent_item(chunk* c, UINT64 address, UINT64 size, BOOL no_csum) {
- LIST_ENTRY* le;
- changed_extent* ce;
-
- le = c->changed_extents.Flink;
- while (le != &c->changed_extents) {
- ce = CONTAINING_RECORD(le, changed_extent, list_entry);
-
- if (ce->address == address && ce->size == size)
- return ce;
-
- le = le->Flink;
- }
-
- ce = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent), ALLOC_TAG);
- if (!ce) {
- ERR("out of memory\n");
- return NULL;
- }
-
- ce->address = address;
- ce->size = size;
- ce->old_size = size;
- ce->count = 0;
- ce->old_count = 0;
- ce->no_csum = no_csum;
- InitializeListHead(&ce->refs);
- InitializeListHead(&ce->old_refs);
-
- InsertTailList(&c->changed_extents, &ce->list_entry);
-
- return ce;
+ return STATUS_SUCCESS;
}
-NTSTATUS update_changed_extent_ref(device_extension* Vcb, chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, signed long long count,
- BOOL no_csum, UINT64 new_size, PIRP Irp) {
+static NTSTATUS STDCALL write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
+ write_data_stripe* stripe = conptr;
+ write_data_context* context = (write_data_context*)stripe->context;
LIST_ENTRY* le;
- changed_extent* ce;
- changed_extent_ref* cer;
- NTSTATUS Status;
- KEY searchkey;
- traverse_ptr tp;
- UINT64 old_count;
- ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
-
- ce = get_changed_extent_item(c, address, size, no_csum);
+ // FIXME - we need a lock here
- if (!ce) {
- ERR("get_changed_extent_item failed\n");
- Status = STATUS_INTERNAL_ERROR;
+ if (stripe->status == WriteDataStatus_Cancelling) {
+ stripe->status = WriteDataStatus_Cancelled;
goto end;
}
- if (IsListEmpty(&ce->refs) && IsListEmpty(&ce->old_refs)) { // new entry
- searchkey.obj_id = address;
- searchkey.obj_type = TYPE_EXTENT_ITEM;
- searchkey.offset = 0xffffffffffffffff;
-
- Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- goto end;
- }
-
- if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
- ERR("could not find address %llx in extent tree\n", address);
- Status = STATUS_INTERNAL_ERROR;
- goto end;
- }
+ stripe->iosb = Irp->IoStatus;
+
+ if (NT_SUCCESS(Irp->IoStatus.Status)) {
+ stripe->status = WriteDataStatus_Success;
+ } else {
+ le = context->stripes.Flink;
- if (tp.item->key.offset != size) {
- ERR("extent %llx had size %llx, not %llx as expected\n", address, tp.item->key.offset, size);
- Status = STATUS_INTERNAL_ERROR;
- goto end;
- }
+ stripe->status = WriteDataStatus_Error;
- if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
- EXTENT_ITEM_V0* eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
+ while (le != &context->stripes) {
+ write_data_stripe* s2 = CONTAINING_RECORD(le, write_data_stripe, list_entry);
- ce->count = ce->old_count = eiv0->refcount;
- } else if (tp.item->size >= sizeof(EXTENT_ITEM)) {
- EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
+ if (s2->status == WriteDataStatus_Pending) {
+ s2->status = WriteDataStatus_Cancelling;
+ IoCancelIrp(s2->Irp);
+ }
- ce->count = ce->old_count = ei->refcount;
- } else {
- ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
- Status = STATUS_INTERNAL_ERROR;
- goto end;
+ le = le->Flink;
}
}
- ce->size = new_size;
+end:
+ if (InterlockedDecrement(&context->stripes_left) == 0)
+ KeSetEvent(&context->Event, 0, FALSE);
+
+ return STATUS_MORE_PROCESSING_REQUIRED;
+}
+
+void free_write_data_stripes(write_data_context* wtc) {
+ LIST_ENTRY *le, *le2, *nextle;
- le = ce->refs.Flink;
- while (le != &ce->refs) {
- cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
+ le = wtc->stripes.Flink;
+ while (le != &wtc->stripes) {
+ write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
- if (cer->edr.root == root && cer->edr.objid == objid && cer->edr.offset == offset) {
- ce->count += count;
- cer->edr.count += count;
- Status = STATUS_SUCCESS;
- goto end;
+ if (stripe->Irp) {
+ if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
+ MmUnlockPages(stripe->Irp->MdlAddress);
+ IoFreeMdl(stripe->Irp->MdlAddress);
+ }
}
le = le->Flink;
}
- old_count = find_extent_data_refcount(Vcb, address, size, root, objid, offset, Irp);
-
- if (old_count > 0) {
- cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
-
- if (!cer) {
- ERR("out of memory\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto end;
+ le = wtc->stripes.Flink;
+ while (le != &wtc->stripes) {
+ write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
+
+ nextle = le->Flink;
+
+ if (stripe->buf && stripe->need_free) {
+ ExFreePool(stripe->buf);
+
+ le2 = le->Flink;
+ while (le2 != &wtc->stripes) {
+ write_data_stripe* s2 = CONTAINING_RECORD(le2, write_data_stripe, list_entry);
+
+ if (s2->buf == stripe->buf)
+ s2->buf = NULL;
+
+ le2 = le2->Flink;
+ }
+
}
- cer->edr.root = root;
- cer->edr.objid = objid;
- cer->edr.offset = offset;
- cer->edr.count = old_count;
+ ExFreePool(stripe);
- InsertTailList(&ce->old_refs, &cer->list_entry);
- }
-
- cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
-
- if (!cer) {
- ERR("out of memory\n");
- Status = STATUS_INSUFFICIENT_RESOURCES;
- goto end;
+ le = nextle;
}
-
- cer->edr.root = root;
- cer->edr.objid = objid;
- cer->edr.offset = offset;
- cer->edr.count = old_count + count;
-
- InsertTailList(&ce->refs, &cer->list_entry);
-
- ce->count += count;
-
- Status = STATUS_SUCCESS;
-
-end:
- ExReleaseResourceLite(&c->changed_extents_lock);
-
- return Status;
}
NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, PIRP Irp, LIST_ENTRY* rollback) {
remove_fcb_extent(fcb, ext, rollback);
fcb->inode_item.st_blocks -= len;
+ fcb->inode_item_changed = TRUE;
} else if (start_data <= ext->offset && end_data < ext->offset + len) { // remove beginning
EXTENT_DATA* ned;
UINT64 size;
remove_fcb_extent(fcb, ext, rollback);
fcb->inode_item.st_blocks -= end_data - ext->offset;
+ fcb->inode_item_changed = TRUE;
} else if (start_data > ext->offset && end_data >= ext->offset + len) { // remove end
EXTENT_DATA* ned;
UINT64 size;
remove_fcb_extent(fcb, ext, rollback);
fcb->inode_item.st_blocks -= ext->offset + len - start_data;
+ fcb->inode_item_changed = TRUE;
} else if (start_data > ext->offset && end_data < ext->offset + len) { // remove middle
EXTENT_DATA *ned1, *ned2;
UINT64 size;
newext1->offset = ext->offset;
newext1->data = ned1;
newext1->datalen = sizeof(EXTENT_DATA) - 1 + size;
- newext1->unique = FALSE;
+ newext1->unique = ext->unique;
newext1->ignore = FALSE;
size = ext->offset + len - end_data;
newext2->offset = end_data;
newext2->data = ned2;
newext2->datalen = sizeof(EXTENT_DATA) - 1 + size;
- newext2->unique = FALSE;
+ newext2->unique = ext->unique;
newext2->ignore = FALSE;
InsertHeadList(&ext->list_entry, &newext1->list_entry);
remove_fcb_extent(fcb, ext, rollback);
fcb->inode_item.st_blocks -= end_data - start_data;
+ fcb->inode_item_changed = TRUE;
}
} else if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
- if (ed2->address != 0) {
+ if (ed2->size != 0) {
chunk* c;
fcb->inode_item.st_blocks -= len;
+ fcb->inode_item_changed = TRUE;
c = get_chunk_from_address(Vcb, ed2->address);
ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
} else {
Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
- fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
+ fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("update_changed_extent_ref returned %08x\n", Status);
goto end;
EXTENT_DATA2* ned2;
extent* newext;
- if (ed2->address != 0)
+ if (ed2->size != 0) {
fcb->inode_item.st_blocks -= end_data - ext->offset;
+ fcb->inode_item_changed = TRUE;
+ }
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
if (!ned) {
ned->type = ed->type;
ned2->address = ed2->address;
ned2->size = ed2->size;
- ned2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - ext->offset));
+ ned2->offset = ed2->offset + (end_data - ext->offset);
ned2->num_bytes = ed2->num_bytes - (end_data - ext->offset);
newext->offset = end_data;
EXTENT_DATA2* ned2;
extent* newext;
- if (ed2->address != 0)
+ if (ed2->size != 0) {
fcb->inode_item.st_blocks -= ext->offset + len - start_data;
+ fcb->inode_item_changed = TRUE;
+ }
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
if (!ned) {
ned->type = ed->type;
ned2->address = ed2->address;
ned2->size = ed2->size;
- ned2->offset = ed2->address == 0 ? 0 : ed2->offset;
+ ned2->offset = ed2->offset;
ned2->num_bytes = start_data - ext->offset;
newext->offset = ext->offset;
EXTENT_DATA2 *neda2, *nedb2;
extent *newext1, *newext2;
- if (ed2->address != 0) {
+ if (ed2->size != 0) {
chunk* c;
fcb->inode_item.st_blocks -= end_data - start_data;
+ fcb->inode_item_changed = TRUE;
c = get_chunk_from_address(Vcb, ed2->address);
ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
} else {
Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
- fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
+ fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("update_changed_extent_ref returned %08x\n", Status);
goto end;
neda->type = ed->type;
neda2->address = ed2->address;
neda2->size = ed2->size;
- neda2->offset = ed2->address == 0 ? 0 : ed2->offset;
+ neda2->offset = ed2->offset;
neda2->num_bytes = start_data - ext->offset;
nedb2 = (EXTENT_DATA2*)&nedb->data[0];
nedb->type = ed->type;
nedb2->address = ed2->address;
nedb2->size = ed2->size;
- nedb2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - ext->offset));
+ nedb2->offset = ed2->offset + (end_data - ext->offset);
nedb2->num_bytes = ext->offset + len - end_data;
newext1->offset = ext->offset;
newext1->data = neda;
newext1->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
- newext1->unique = FALSE;
+ newext1->unique = ext->unique;
newext1->ignore = FALSE;
newext2->offset = end_data;
newext2->data = nedb;
newext2->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
- newext2->unique = FALSE;
+ newext2->unique = ext->unique;
newext2->ignore = FALSE;
InsertHeadList(&ext->list_entry, &newext1->list_entry);
le = le2;
}
- // FIXME - do bitmap analysis of changed extents, and free what we can
-
Status = STATUS_SUCCESS;
end:
re->fcb = fcb;
re->ext = ext;
- add_rollback(rollback, ROLLBACK_INSERT_EXTENT, re);
+ add_rollback(fcb->Vcb, rollback, ROLLBACK_INSERT_EXTENT, re);
}
static BOOL add_extent_to_fcb(fcb* fcb, UINT64 offset, EXTENT_DATA* ed, ULONG edsize, BOOL unique, LIST_ENTRY* rollback) {
re->fcb = fcb;
re->ext = ext;
- add_rollback(rollback, ROLLBACK_DELETE_EXTENT, re);
- }
-}
-
-static void add_changed_extent_ref(chunk* c, UINT64 address, UINT64 size, UINT64 root, UINT64 objid, UINT64 offset, UINT32 count, BOOL no_csum) {
- changed_extent* ce;
- changed_extent_ref* cer;
- LIST_ENTRY* le;
-
- ce = get_changed_extent_item(c, address, size, no_csum);
-
- if (!ce) {
- ERR("get_changed_extent_item failed\n");
- return;
- }
-
- le = ce->refs.Flink;
- while (le != &ce->refs) {
- cer = CONTAINING_RECORD(le, changed_extent_ref, list_entry);
-
- if (cer->edr.root == root && cer->edr.objid == objid && cer->edr.offset == offset) {
- ce->count += count;
- cer->edr.count += count;
- return;
- }
-
- le = le->Flink;
- }
-
- cer = ExAllocatePoolWithTag(PagedPool, sizeof(changed_extent_ref), ALLOC_TAG);
-
- if (!cer) {
- ERR("out of memory\n");
- return;
+ add_rollback(fcb->Vcb, rollback, ROLLBACK_DELETE_EXTENT, re);
}
-
- cer->edr.root = root;
- cer->edr.objid = objid;
- cer->edr.offset = offset;
- cer->edr.count = count;
-
- InsertTailList(&ce->refs, &cer->list_entry);
-
- ce->count += count;
}
BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, BOOL prealloc, void* data,
// }
// #endif
- if (data) {
- Status = do_write_data(Vcb, address, data, length, changed_sector_list, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("do_write_data returned %08x\n", Status);
- return FALSE;
- }
- }
-
// add extent data to inode
ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
if (!ed) {
if (!add_extent_to_fcb(fcb, start_data, ed, edsize, TRUE, rollback)) {
ERR("add_extent_to_fcb failed\n");
- ExFreePool(ed);
- return FALSE;
- }
-
- increase_chunk_usage(c, length);
- space_list_subtract(Vcb, c, FALSE, address, length, rollback);
-
- fcb->inode_item.st_blocks += decoded_size;
-
- fcb->extents_changed = TRUE;
- mark_fcb_dirty(fcb);
-
- ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
-
- add_changed_extent_ref(c, address, length, fcb->subvol->id, fcb->inode, start_data, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
-
- ExReleaseResourceLite(&c->changed_extents_lock);
-
- return TRUE;
-}
-
-static BOOL extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
- LIST_ENTRY* changed_sector_list, extent* ext, chunk* c, PIRP Irp, LIST_ENTRY* rollback) {
- EXTENT_DATA* ed;
- EXTENT_DATA2 *ed2, *ed2orig;
- extent* newext;
- UINT64 addr, origsize;
- NTSTATUS Status;
- LIST_ENTRY* le;
-
- TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data,
- length, data, changed_sector_list, ext, c, rollback);
-
- ed2orig = (EXTENT_DATA2*)ext->data->data;
-
- origsize = ed2orig->size;
- addr = ed2orig->address + ed2orig->size;
-
- Status = write_data_complete(Vcb, addr, data, length, Irp, c);
- if (!NT_SUCCESS(Status)) {
- ERR("write_data returned %08x\n", Status);
- return FALSE;
- }
-
- le = fcb->extents.Flink;
- while (le != &fcb->extents) {
- extent* ext2 = CONTAINING_RECORD(le, extent, list_entry);
-
- if (!ext2->ignore && (ext2->data->type == EXTENT_TYPE_REGULAR || ext2->data->type == EXTENT_TYPE_PREALLOC)) {
- EXTENT_DATA2* ed2b = (EXTENT_DATA2*)ext2->data->data;
-
- if (ed2b->address == ed2orig->address) {
- ed2b->size = origsize + length;
- ext2->data->decoded_size = origsize + length;
- }
- }
-
- le = le->Flink;
- }
-
- ed = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
- if (!ed) {
- ERR("out of memory\n");
- return FALSE;
- }
-
- newext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
- if (!newext) {
- ERR("out of memory\n");
- ExFreePool(ed);
- return FALSE;
- }
-
- RtlCopyMemory(ed, ext->data, ext->datalen);
-
- ed2 = (EXTENT_DATA2*)ed->data;
- ed2->offset = ed2orig->offset + ed2orig->num_bytes;
- ed2->num_bytes = length;
-
- RtlCopyMemory(newext, ext, sizeof(extent));
- newext->offset = ext->offset + ed2orig->num_bytes;
- newext->data = ed;
-
- InsertHeadList(&ext->list_entry, &newext->list_entry);
-
- add_insert_extent_rollback(rollback, fcb, newext);
-
- Status = update_changed_extent_ref(Vcb, c, ed2orig->address, origsize, fcb->subvol->id, fcb->inode, newext->offset - ed2->offset,
- 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
-
- if (!NT_SUCCESS(Status)) {
- ERR("update_changed_extent_ref returned %08x\n", Status);
- return FALSE;
- }
-
- if (changed_sector_list) {
- int i;
- changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
- if (!sc) {
- ERR("out of memory\n");
- return FALSE;
- }
-
- sc->ol.key = addr;
- sc->length = length / Vcb->superblock.sector_size;
- sc->deleted = FALSE;
-
- sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
- if (!sc->checksums) {
- ERR("out of memory\n");
- ExFreePool(sc);
- return FALSE;
- }
-
- for (i = 0; i < sc->length; i++) {
- sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
- }
- insert_into_ordered_list(changed_sector_list, &sc->ol);
+ ExFreePool(ed);
+ return FALSE;
}
increase_chunk_usage(c, length);
-
- space_list_subtract(Vcb, c, FALSE, addr, length, NULL); // no rollback as we don't reverse extending the extent
-
- fcb->inode_item.st_blocks += length;
+ space_list_subtract(Vcb, c, FALSE, address, length, rollback);
+
+ fcb->inode_item.st_blocks += decoded_size;
+
+ fcb->extents_changed = TRUE;
+ fcb->inode_item_changed = TRUE;
+ mark_fcb_dirty(fcb);
+
+ ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
+
+ add_changed_extent_ref(c, address, length, fcb->subvol->id, fcb->inode, start_data, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
+
+ ExReleaseResourceLite(&c->changed_extents_lock);
+ ExReleaseResourceLite(&c->lock);
+
+ if (data) {
+ Status = do_write_data(Vcb, address, data, length, changed_sector_list, Irp);
+ if (!NT_SUCCESS(Status))
+ ERR("do_write_data returned %08x\n", Status);
+ }
+
return TRUE;
}
if (!ext)
return FALSE;
- if (!ext->unique) {
- TRACE("extent was not unique\n");
- return FALSE;
- }
-
ed = ext->data;
if (ext->datalen < sizeof(EXTENT_DATA)) {
return FALSE;
}
- if (ed->type != EXTENT_TYPE_REGULAR) {
- TRACE("not extending extent which is not EXTENT_TYPE_REGULAR\n");
+ if (ed->type != EXTENT_TYPE_REGULAR && ed->type != EXTENT_TYPE_PREALLOC) {
+ TRACE("not extending extent which is not regular or prealloc\n");
return FALSE;
}
return FALSE;
}
- if (ed->compression != BTRFS_COMPRESSION_NONE) {
- TRACE("not extending a compressed extent\n");
- return FALSE;
- }
-
- if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
- WARN("encryption not supported\n");
- return FALSE;
- }
-
- if (ed->encoding != BTRFS_ENCODING_NONE) {
- WARN("other encodings not supported\n");
- return FALSE;
- }
-
- if (ed2->size - ed2->offset != ed2->num_bytes) {
- TRACE("last EXTENT_DATA does not run all the way to the end of the extent\n");
- return FALSE;
- }
-
- if (ed2->size >= MAX_EXTENT_SIZE) {
- TRACE("extent size was too large to extend (%llx >= %llx)\n", ed2->size, (UINT64)MAX_EXTENT_SIZE);
- return FALSE;
- }
-
c = get_chunk_from_address(Vcb, ed2->address);
ExAcquireResourceExclusiveLite(&c->lock, TRUE);
s = CONTAINING_RECORD(le, space, list_entry);
if (s->address == ed2->address + ed2->size) {
- UINT64 newlen = min(min(s->size, length), MAX_EXTENT_SIZE - ed2->size);
+ UINT64 newlen = min(min(s->size, length), MAX_EXTENT_SIZE);
- success = extend_data(Vcb, fcb, start_data, newlen, data, changed_sector_list, ext, c, Irp, rollback);
+ success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen);
if (success)
*written += newlen;
- break;
+ return success;
} else if (s->address > ed2->address + ed2->size)
break;
ExReleaseResourceLite(&c->lock);
- return success;
+ return FALSE;
}
static NTSTATUS insert_prealloc_extent(fcb* fcb, UINT64 start, UINT64 length, LIST_ENTRY* rollback) {
while (le != &fcb->Vcb->chunks) {
c = CONTAINING_RECORD(le, chunk, list_entry);
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
- if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen)) {
- ExReleaseResourceLite(&c->lock);
- ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
- goto cont;
+ if (!c->readonly) {
+ ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+ if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
+ if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen)) {
+ ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
+ goto cont;
+ }
}
+
+ ExReleaseResourceLite(&c->lock);
}
-
- ExReleaseResourceLite(&c->lock);
le = le->Flink;
}
ExAcquireResourceExclusiveLite(&c->lock, TRUE);
if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
- if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen)) {
- ExReleaseResourceLite(&c->lock);
+ if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen))
goto cont;
- }
}
ExReleaseResourceLite(&c->lock);
TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list);
- // FIXME - split data up if not enough space for just one extent
-
if (start_data > 0) {
try_extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, Irp, &written, rollback);
while (le != &Vcb->chunks) {
c = CONTAINING_RECORD(le, chunk, list_entry);
- ExAcquireResourceExclusiveLite(&c->lock, TRUE);
-
- if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen) {
- if (insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) {
+ if (!c->readonly) {
+ ExAcquireResourceExclusiveLite(&c->lock, TRUE);
+
+ if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
+ insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) {
written += newlen;
if (written == orig_length) {
- ExReleaseResourceLite(&c->lock);
ExReleaseResourceLite(&Vcb->chunk_lock);
return STATUS_SUCCESS;
} else {
data = &((UINT8*)data)[newlen];
break;
}
- }
+ } else
+ ExReleaseResourceLite(&c->lock);
}
-
- ExReleaseResourceLite(&c->lock);
le = le->Flink;
}
ExAcquireResourceExclusiveLite(&c->lock, TRUE);
- if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen) {
- if (insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) {
- written += newlen;
-
- if (written == orig_length) {
- ExReleaseResourceLite(&c->lock);
- return STATUS_SUCCESS;
- } else {
- done = TRUE;
- start_data += newlen;
- length -= newlen;
- data = &((UINT8*)data)[newlen];
- }
+ if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
+ insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) {
+ written += newlen;
+
+ if (written == orig_length)
+ return STATUS_SUCCESS;
+ else {
+ done = TRUE;
+ start_data += newlen;
+ length -= newlen;
+ data = &((UINT8*)data)[newlen];
}
- }
-
- ExReleaseResourceLite(&c->lock);
+ } else
+ ExReleaseResourceLite(&c->lock);
} else
ExReleaseResourceLite(&Vcb->chunk_lock);
return STATUS_DISK_FULL;
}
-static void update_checksum_tree(device_extension* Vcb, PIRP Irp, LIST_ENTRY* rollback) {
- LIST_ENTRY* le = Vcb->sector_checksums.Flink;
- changed_sector* cs;
- traverse_ptr tp, next_tp;
- KEY searchkey;
- UINT32* data;
- NTSTATUS Status;
-
- if (!Vcb->checksum_root) {
- ERR("no checksum root\n");
- goto exit;
- }
-
- while (le != &Vcb->sector_checksums) {
- UINT64 startaddr, endaddr;
- ULONG len;
- UINT32* checksums;
- RTL_BITMAP bmp;
- ULONG* bmparr;
- ULONG runlength, index;
-
- cs = (changed_sector*)le;
-
- searchkey.obj_id = EXTENT_CSUM_ID;
- searchkey.obj_type = TYPE_EXTENT_CSUM;
- searchkey.offset = cs->ol.key;
-
- // FIXME - create checksum_root if it doesn't exist at all
-
- Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) { // tree is completely empty
- // FIXME - do proper check here that tree is empty
- if (!cs->deleted) {
- checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * cs->length, ALLOC_TAG);
- if (!checksums) {
- ERR("out of memory\n");
- goto exit;
- }
-
- RtlCopyMemory(checksums, cs->checksums, sizeof(UINT32) * cs->length);
-
- if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, cs->ol.key, checksums, sizeof(UINT32) * cs->length, NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- ExFreePool(checksums);
- goto exit;
- }
- }
- } else {
- UINT32 tplen;
-
- // FIXME - check entry is TYPE_EXTENT_CSUM?
-
- if (tp.item->key.offset < cs->ol.key && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= cs->ol.key)
- startaddr = tp.item->key.offset;
- else
- startaddr = cs->ol.key;
-
- searchkey.obj_id = EXTENT_CSUM_ID;
- searchkey.obj_type = TYPE_EXTENT_CSUM;
- searchkey.offset = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
-
- Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- goto exit;
- }
-
- tplen = tp.item->size / sizeof(UINT32);
-
- if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= cs->ol.key + (cs->length * Vcb->superblock.sector_size))
- endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
- else
- endaddr = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
-
- TRACE("cs starts at %llx (%x sectors)\n", cs->ol.key, cs->length);
- TRACE("startaddr = %llx\n", startaddr);
- TRACE("endaddr = %llx\n", endaddr);
-
- len = (endaddr - startaddr) / Vcb->superblock.sector_size;
-
- checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG);
- if (!checksums) {
- ERR("out of memory\n");
- goto exit;
- }
-
- bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
- if (!bmparr) {
- ERR("out of memory\n");
- ExFreePool(checksums);
- goto exit;
- }
-
- RtlInitializeBitMap(&bmp, bmparr, len);
- RtlSetAllBits(&bmp);
-
- searchkey.obj_id = EXTENT_CSUM_ID;
- searchkey.obj_type = TYPE_EXTENT_CSUM;
- searchkey.offset = cs->ol.key;
-
- Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, Irp);
- if (!NT_SUCCESS(Status)) {
- ERR("error - find_item returned %08x\n", Status);
- goto exit;
- }
-
- // set bit = free space, cleared bit = allocated sector
-
- // ERR("start loop\n");
- while (tp.item->key.offset < endaddr) {
- // ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
- if (tp.item->key.offset >= startaddr) {
- if (tp.item->size > 0) {
- RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, tp.item->size);
- RtlClearBits(&bmp, (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size, tp.item->size / sizeof(UINT32));
- }
-
- delete_tree_item(Vcb, &tp, rollback);
- }
-
- if (find_next_item(Vcb, &tp, &next_tp, FALSE, Irp)) {
- tp = next_tp;
- } else
- break;
- }
- // ERR("end loop\n");
-
- if (cs->deleted) {
- RtlSetBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
- } else {
- RtlCopyMemory(&checksums[(cs->ol.key - startaddr) / Vcb->superblock.sector_size], cs->checksums, cs->length * sizeof(UINT32));
- RtlClearBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
- }
-
- runlength = RtlFindFirstRunClear(&bmp, &index);
-
- while (runlength != 0) {
- do {
- ULONG rl;
-
- if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE)
- rl = MAX_CSUM_SIZE / sizeof(UINT32);
- else
- rl = runlength;
-
- data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG);
- if (!data) {
- ERR("out of memory\n");
- ExFreePool(bmparr);
- ExFreePool(checksums);
- goto exit;
- }
-
- RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl);
-
- if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, startaddr + (index * Vcb->superblock.sector_size), data, sizeof(UINT32) * rl, NULL, Irp, rollback)) {
- ERR("insert_tree_item failed\n");
- ExFreePool(data);
- ExFreePool(bmparr);
- ExFreePool(checksums);
- goto exit;
- }
-
- runlength -= rl;
- index += rl;
- } while (runlength > 0);
-
- runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
- }
-
- ExFreePool(bmparr);
- ExFreePool(checksums);
- }
-
- le = le->Flink;
- }
-
-exit:
- while (!IsListEmpty(&Vcb->sector_checksums)) {
- le = RemoveHeadList(&Vcb->sector_checksums);
- cs = (changed_sector*)le;
-
- if (cs->checksums)
- ExFreePool(cs->checksums);
-
- ExFreePool(cs);
- }
-}
-
void commit_checksum_changes(device_extension* Vcb, LIST_ENTRY* changed_sector_list) {
while (!IsListEmpty(changed_sector_list)) {
LIST_ENTRY* le = RemoveHeadList(changed_sector_list);
}
fcb->inode_item.st_size = end;
+ fcb->inode_item_changed = TRUE;
TRACE("setting st_size to %llx\n", end);
fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
RtlCopyMemory(data, ed->data, origlength);
fcb->inode_item.st_blocks -= origlength;
+ fcb->inode_item_changed = TRUE;
+ mark_fcb_dirty(fcb);
remove_fcb_extent(fcb, ext, rollback);
}
fcb->extents_changed = TRUE;
- mark_fcb_dirty(fcb);
}
fcb->inode_item.st_size = end;
+ fcb->inode_item_changed = TRUE;
+ mark_fcb_dirty(fcb);
+
TRACE("setting st_size to %llx\n", end);
TRACE("newalloc = %llx\n", newalloc);
}
fcb->extents_changed = TRUE;
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
fcb->inode_item.st_size = end;
}
fcb->extents_changed = TRUE;
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
fcb->inode_item.st_size = end;
return STATUS_SUCCESS;
}
-// #ifdef DEBUG_PARANOID
-// static void print_loaded_trees(tree* t, int spaces) {
-// char pref[10];
-// int i;
-// LIST_ENTRY* le;
-//
-// for (i = 0; i < spaces; i++) {
-// pref[i] = ' ';
-// }
-// pref[spaces] = 0;
-//
-// if (!t) {
-// ERR("%s(not loaded)\n", pref);
-// return;
-// }
-//
-// le = t->itemlist.Flink;
-// while (le != &t->itemlist) {
-// tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
-//
-// ERR("%s%llx,%x,%llx ignore=%s\n", pref, td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE");
-//
-// if (t->header.level > 0) {
-// print_loaded_trees(td->treeholder.tree, spaces+1);
-// }
-//
-// le = le->Flink;
-// }
-// }
-
-// static void check_extents_consistent(device_extension* Vcb, fcb* fcb) {
-// KEY searchkey;
-// traverse_ptr tp, next_tp;
-// UINT64 length, oldlength, lastoff, alloc;
-// NTSTATUS Status;
-// EXTENT_DATA* ed;
-// EXTENT_DATA2* ed2;
-//
-// if (fcb->ads || fcb->inode_item.st_size == 0 || fcb->deleted)
-// return;
-//
-// TRACE("inode = %llx, subvol = %llx\n", fcb->inode, fcb->subvol->id);
-//
-// searchkey.obj_id = fcb->inode;
-// searchkey.obj_type = TYPE_EXTENT_DATA;
-// searchkey.offset = 0;
-//
-// Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
-// if (!NT_SUCCESS(Status)) {
-// ERR("error - find_item returned %08x\n", Status);
-// goto failure;
-// }
-//
-// if (keycmp(&searchkey, &tp.item->key)) {
-// ERR("could not find EXTENT_DATA at offset 0\n");
-// goto failure;
-// }
-//
-// if (tp.item->size < sizeof(EXTENT_DATA)) {
-// ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
-// goto failure;
-// }
-//
-// ed = (EXTENT_DATA*)tp.item->data;
-// ed2 = (EXTENT_DATA2*)&ed->data[0];
-//
-// length = oldlength = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
-// lastoff = tp.item->key.offset;
-//
-// TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
-//
-// alloc = 0;
-// if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
-// alloc += length;
-// }
-//
-// while (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
-// if (next_tp.item->key.obj_id != searchkey.obj_id || next_tp.item->key.obj_type != searchkey.obj_type)
-// break;
-//
-// tp = next_tp;
-//
-// if (tp.item->size < sizeof(EXTENT_DATA)) {
-// ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
-// goto failure;
-// }
-//
-// ed = (EXTENT_DATA*)tp.item->data;
-// ed2 = (EXTENT_DATA2*)&ed->data[0];
-//
-// length = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
-//
-// TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
-//
-// if (tp.item->key.offset != lastoff + oldlength) {
-// ERR("EXTENT_DATA in %llx,%llx was at %llx, expected %llx\n", fcb->subvol->id, fcb->inode, tp.item->key.offset, lastoff + oldlength);
-// goto failure;
-// }
-//
-// if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
-// alloc += length;
-// }
-//
-// oldlength = length;
-// lastoff = tp.item->key.offset;
-// }
-//
-// if (alloc != fcb->inode_item.st_blocks) {
-// ERR("allocation size was %llx, expected %llx\n", alloc, fcb->inode_item.st_blocks);
-// goto failure;
-// }
-//
-// // if (fcb->inode_item.st_blocks != lastoff + oldlength) {
-// // ERR("extents finished at %x, expected %x\n", (UINT32)(lastoff + oldlength), (UINT32)fcb->inode_item.st_blocks);
-// // goto failure;
-// // }
-//
-// return;
-//
-// failure:
-// if (fcb->subvol->treeholder.tree)
-// print_loaded_trees(fcb->subvol->treeholder.tree, 0);
-//
-// int3;
-// }
-
-// static void check_extent_tree_consistent(device_extension* Vcb) {
-// KEY searchkey;
-// traverse_ptr tp, next_tp;
-// UINT64 lastaddr;
-// BOOL b, inconsistency;
-//
-// searchkey.obj_id = 0;
-// searchkey.obj_type = 0;
-// searchkey.offset = 0;
-//
-// if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
-// ERR("error - could not find any entries in extent_root\n");
-// int3;
-// }
-//
-// lastaddr = 0;
-// inconsistency = FALSE;
-//
-// do {
-// if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
-// // ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
-//
-// if (tp.item->key.obj_id < lastaddr) {
-// // ERR("inconsistency!\n");
-// // int3;
-// inconsistency = TRUE;
-// }
-//
-// lastaddr = tp.item->key.obj_id + tp.item->key.offset;
-// }
-//
-// b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
-// if (b) {
-// free_traverse_ptr(&tp);
-// tp = next_tp;
-// }
-// } while (b);
-//
-// free_traverse_ptr(&tp);
-//
-// if (!inconsistency)
-// return;
-//
-// ERR("Inconsistency detected:\n");
-//
-// if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
-// ERR("error - could not find any entries in extent_root\n");
-// int3;
-// }
-//
-// do {
-// if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
-// ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
-//
-// if (tp.item->key.obj_id < lastaddr) {
-// ERR("inconsistency!\n");
-// }
-//
-// lastaddr = tp.item->key.obj_id + tp.item->key.offset;
-// }
-//
-// b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
-// if (b) {
-// free_traverse_ptr(&tp);
-// tp = next_tp;
-// }
-// } while (b);
-//
-// free_traverse_ptr(&tp);
-//
-// int3;
-// }
-// #endif
-
static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, UINT64 end_data, void* data, UINT64* written,
LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) {
EXTENT_DATA* ed = ext->data;
newext1->offset = ext->offset;
newext1->data = ned;
newext1->datalen = ext->datalen;
- newext1->unique = FALSE;
+ newext1->unique = ext->unique;
newext1->ignore = FALSE;
InsertHeadList(&ext->list_entry, &newext1->list_entry);
newext2->offset = end_data;
newext2->data = nedb;
newext2->datalen = ext->datalen;
- newext2->unique = FALSE;
+ newext2->unique = ext->unique;
newext2->ignore = FALSE;
InsertHeadList(&newext1->list_entry, &newext2->list_entry);
ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
else {
Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
- fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
+ fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("update_changed_extent_ref returned %08x\n", Status);
newext1->offset = ext->offset;
newext1->data = ned;
newext1->datalen = ext->datalen;
- newext1->unique = FALSE;
+ newext1->unique = ext->unique;
newext1->ignore = FALSE;
InsertHeadList(&ext->list_entry, &newext1->list_entry);
newext2->offset = start_data;
newext2->data = nedb;
newext2->datalen = ext->datalen;
- newext2->unique = FALSE;
+ newext2->unique = ext->unique;
newext2->ignore = FALSE;
InsertHeadList(&newext1->list_entry, &newext2->list_entry);
ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
else {
Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
- fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
+ fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("update_changed_extent_ref returned %08x\n", Status);
newext1->offset = ext->offset;
newext1->data = ned;
newext1->datalen = ext->datalen;
- newext1->unique = FALSE;
+ newext1->unique = ext->unique;
newext1->ignore = FALSE;
InsertHeadList(&ext->list_entry, &newext1->list_entry);
newext2->offset = start_data;
newext2->data = nedb;
newext2->datalen = ext->datalen;
- newext2->unique = FALSE;
+ newext2->unique = ext->unique;
newext2->ignore = FALSE;
InsertHeadList(&newext1->list_entry, &newext2->list_entry);
newext3->offset = end_data;
newext3->data = nedc;
newext3->datalen = ext->datalen;
- newext3->unique = FALSE;
+ newext3->unique = ext->unique;
newext3->ignore = FALSE;
InsertHeadList(&newext2->list_entry, &newext3->list_entry);
ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
else {
Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 2,
- fcb->inode_item.flags & BTRFS_INODE_NODATASUM, ed2->size, Irp);
+ fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("update_changed_extent_ref returned %08x\n", Status);
EXTENT_DATA* ed = ext->data;
EXTENT_DATA2* ed2 = ed->type == EXTENT_TYPE_INLINE ? NULL : (EXTENT_DATA2*)ed->data;
UINT64 len;
- BOOL nocow;
len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
if (ext->offset > start + written + length)
break;
- nocow = (ext->unique && fcb->inode_item.flags & BTRFS_INODE_NODATACOW) || ed->type == EXTENT_TYPE_PREALLOC;
-
- if (nocow) {
+ if ((fcb->inode_item.flags & BTRFS_INODE_NODATACOW || ed->type == EXTENT_TYPE_PREALLOC) && ext->unique) {
if (max(last_cow_start, start + written) < ext->offset) {
UINT64 start_write = max(last_cow_start, start + written);
}
}
- // FIXME - make extending work again (here?)
- // FIXME - make maximum extent size 128 MB again (here?)
-
#ifdef DEBUG_PARANOID
last_off = 0xffffffffffffffff;
// bother with the rest of it.
if (s2 == 0 && e2 == COMPRESSED_EXTENT_SIZE && !compressed && !fcb->Vcb->options.compress_force) {
fcb->inode_item.flags |= BTRFS_INODE_NOCOMPRESS;
+ fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
// write subsequent data non-compressed
ccb = FileObject->FsContext2;
fileref = ccb ? ccb->fileref : NULL;
- if (fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
+ if (!fcb->ads && fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
WARN("tried to write to something other than a file or symlink (inode %llx, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
return STATUS_INVALID_DEVICE_REQUEST;
}
tree_lock = TRUE;
}
- if (no_cache && !ExIsResourceAcquiredExclusiveLite(fcb->Header.Resource)) {
- if (!ExAcquireResourceExclusiveLite(fcb->Header.Resource, wait)) {
- Status = STATUS_PENDING;
- goto end;
- } else
- fcb_lock = TRUE;
+ if (no_cache) {
+ if (pagefile) {
+ if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
+ Status = STATUS_PENDING;
+ goto end;
+ } else
+ fcb_lock = TRUE;
+ } else if (!ExIsResourceAcquiredExclusiveLite(fcb->Header.Resource)) {
+ if (!ExAcquireResourceExclusiveLite(fcb->Header.Resource, wait)) {
+ Status = STATUS_PENDING;
+ goto end;
+ } else
+ fcb_lock = TRUE;
+ }
}
nocsum = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
origii->transid = Vcb->superblock.generation;
origii->sequence++;
- origii->st_ctime = now;
+
+ if (!ccb->user_set_change_time)
+ origii->st_ctime = now;
if (!fcb->ads) {
if (changed_length) {
filter |= FILE_NOTIFY_CHANGE_SIZE;
}
- origii->st_mtime = now;
- filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
- }
+ if (!ccb->user_set_write_time) {
+ origii->st_mtime = now;
+ filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
+ }
+
+ fcb->inode_item_changed = TRUE;
+ } else
+ fileref->parent->fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb->ads ? fileref->parent->fcb : fcb);
}
exit:
// if (locked) {
if (NT_SUCCESS(Status))
- clear_rollback(&rollback);
+ clear_rollback(Vcb, &rollback);
else
do_rollback(Vcb, &rollback);
//