1 /* Copyright (c) Mark Harmstone 2016-17
3 * This file is part of WinBtrfs.
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
18 #include "btrfs_drv.h"
28 _Function_class_(IO_COMPLETION_ROUTINE
)
29 static NTSTATUS __stdcall
write_data_completion(PDEVICE_OBJECT DeviceObject
, PIRP Irp
, PVOID conptr
);
31 static void remove_fcb_extent(fcb
* fcb
, extent
* ext
, LIST_ENTRY
* rollback
);
33 extern tPsUpdateDiskCounters fPsUpdateDiskCounters
;
34 extern tCcCopyWriteEx fCcCopyWriteEx
;
35 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters
;
38 bool find_data_address_in_chunk(device_extension
* Vcb
, chunk
* c
, uint64_t length
, uint64_t* address
) {
42 TRACE("(%p, %I64x, %I64x, %p)\n", Vcb
, c
->offset
, length
, address
);
44 if (length
> c
->chunk_item
->size
- c
->used
)
47 if (!c
->cache_loaded
) {
48 NTSTATUS Status
= load_cache_chunk(Vcb
, c
, NULL
);
50 if (!NT_SUCCESS(Status
)) {
51 ERR("load_cache_chunk returned %08x\n", Status
);
56 if (IsListEmpty(&c
->space_size
))
59 le
= c
->space_size
.Flink
;
60 while (le
!= &c
->space_size
) {
61 s
= CONTAINING_RECORD(le
, space
, list_entry_size
);
63 if (s
->size
== length
) {
64 *address
= s
->address
;
66 } else if (s
->size
< length
) {
67 if (le
== c
->space_size
.Flink
)
70 s
= CONTAINING_RECORD(le
->Blink
, space
, list_entry_size
);
72 *address
= s
->address
;
79 s
= CONTAINING_RECORD(c
->space_size
.Blink
, space
, list_entry_size
);
81 if (s
->size
> length
) {
82 *address
= s
->address
;
89 chunk
* get_chunk_from_address(device_extension
* Vcb
, uint64_t address
) {
92 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
94 le2
= Vcb
->chunks
.Flink
;
95 while (le2
!= &Vcb
->chunks
) {
96 chunk
* c
= CONTAINING_RECORD(le2
, chunk
, list_entry
);
98 if (address
>= c
->offset
&& address
< c
->offset
+ c
->chunk_item
->size
) {
99 ExReleaseResourceLite(&Vcb
->chunk_lock
);
106 ExReleaseResourceLite(&Vcb
->chunk_lock
);
116 static uint64_t find_new_chunk_address(device_extension
* Vcb
, uint64_t size
) {
122 le
= Vcb
->chunks
.Flink
;
123 while (le
!= &Vcb
->chunks
) {
124 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
126 if (c
->offset
>= lastaddr
+ size
)
129 lastaddr
= c
->offset
+ c
->chunk_item
->size
;
137 static bool find_new_dup_stripes(device_extension
* Vcb
, stripe
* stripes
, uint64_t max_stripe_size
, bool full_size
) {
138 uint64_t devusage
= 0xffffffffffffffff;
139 space
*devdh1
= NULL
, *devdh2
= NULL
;
143 le
= Vcb
->devices
.Flink
;
145 while (le
!= &Vcb
->devices
) {
146 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
148 if (!dev
->readonly
&& !dev
->reloc
&& dev
->devobj
) {
149 uint64_t usage
= (dev
->devitem
.bytes_used
* 4096) / dev
->devitem
.num_bytes
;
151 // favour devices which have been used the least
152 if (usage
< devusage
) {
153 if (!IsListEmpty(&dev
->space
)) {
155 space
*dh1
= NULL
, *dh2
= NULL
;
157 le2
= dev
->space
.Flink
;
158 while (le2
!= &dev
->space
) {
159 space
* dh
= CONTAINING_RECORD(le2
, space
, list_entry
);
161 if (dh
->size
>= max_stripe_size
&& (!dh1
|| !dh2
|| dh
->size
< dh1
->size
)) {
169 if (dh1
&& (dh2
|| dh1
->size
>= 2 * max_stripe_size
)) {
173 devdh2
= dh2
? dh2
: dh1
;
185 // Can't find hole of at least max_stripe_size; look for the largest one we can find
190 le
= Vcb
->devices
.Flink
;
191 while (le
!= &Vcb
->devices
) {
192 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
194 if (!dev
->readonly
&& !dev
->reloc
) {
195 if (!IsListEmpty(&dev
->space
)) {
197 space
*dh1
= NULL
, *dh2
= NULL
;
199 le2
= dev
->space
.Flink
;
200 while (le2
!= &dev
->space
) {
201 space
* dh
= CONTAINING_RECORD(le2
, space
, list_entry
);
203 if (!dh1
|| !dh2
|| dh
->size
< dh1
->size
) {
215 devsize
= max(dh1
->size
/ 2, min(dh1
->size
, dh2
->size
));
217 devsize
= dh1
->size
/ 2;
219 if (devsize
> size
) {
223 if (dh2
&& min(dh1
->size
, dh2
->size
) > dh1
->size
/ 2)
241 stripes
[0].device
= stripes
[1].device
= dev2
;
242 stripes
[0].dh
= devdh1
;
243 stripes
[1].dh
= devdh2
;
248 static bool find_new_stripe(device_extension
* Vcb
, stripe
* stripes
, uint16_t i
, uint64_t max_stripe_size
, bool allow_missing
, bool full_size
) {
249 uint64_t k
, devusage
= 0xffffffffffffffff;
254 le
= Vcb
->devices
.Flink
;
255 while (le
!= &Vcb
->devices
) {
256 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
260 if (dev
->readonly
|| dev
->reloc
|| (!dev
->devobj
&& !allow_missing
)) {
265 // skip this device if it already has a stripe
267 for (k
= 0; k
< i
; k
++) {
268 if (stripes
[k
].device
== dev
) {
276 usage
= (dev
->devitem
.bytes_used
* 4096) / dev
->devitem
.num_bytes
;
278 // favour devices which have been used the least
279 if (usage
< devusage
) {
280 if (!IsListEmpty(&dev
->space
)) {
283 le2
= dev
->space
.Flink
;
284 while (le2
!= &dev
->space
) {
285 space
* dh
= CONTAINING_RECORD(le2
, space
, list_entry
);
287 if ((dev2
!= dev
&& dh
->size
>= max_stripe_size
) ||
288 (dev2
== dev
&& dh
->size
>= max_stripe_size
&& dh
->size
< devdh
->size
)
305 // Can't find hole of at least max_stripe_size; look for the largest one we can find
310 le
= Vcb
->devices
.Flink
;
311 while (le
!= &Vcb
->devices
) {
312 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
315 if (dev
->readonly
|| dev
->reloc
|| (!dev
->devobj
&& !allow_missing
)) {
320 // skip this device if it already has a stripe
322 for (k
= 0; k
< i
; k
++) {
323 if (stripes
[k
].device
== dev
) {
331 if (!IsListEmpty(&dev
->space
)) {
334 le2
= dev
->space
.Flink
;
335 while (le2
!= &dev
->space
) {
336 space
* dh
= CONTAINING_RECORD(le2
, space
, list_entry
);
338 if (!devdh
|| devdh
->size
< dh
->size
) {
355 stripes
[i
].dh
= devdh
;
356 stripes
[i
].device
= dev2
;
361 NTSTATUS
alloc_chunk(device_extension
* Vcb
, uint64_t flags
, chunk
** pc
, bool full_size
) {
363 uint64_t max_stripe_size
, max_chunk_size
, stripe_size
, stripe_length
, factor
;
364 uint64_t total_size
= 0, logaddr
;
365 uint16_t i
, type
, num_stripes
, sub_stripes
, max_stripes
, min_stripes
, allowed_missing
;
366 stripe
* stripes
= NULL
;
368 CHUNK_ITEM_STRIPE
* cis
;
373 le
= Vcb
->devices
.Flink
;
374 while (le
!= &Vcb
->devices
) {
375 device
* dev
= CONTAINING_RECORD(le
, device
, list_entry
);
376 total_size
+= dev
->devitem
.num_bytes
;
381 TRACE("total_size = %I64x\n", total_size
);
383 // We purposely check for DATA first - mixed blocks have the same size
385 if (flags
& BLOCK_FLAG_DATA
) {
386 max_stripe_size
= 0x40000000; // 1 GB
387 max_chunk_size
= 10 * max_stripe_size
;
388 } else if (flags
& BLOCK_FLAG_METADATA
) {
389 if (total_size
> 0xC80000000) // 50 GB
390 max_stripe_size
= 0x40000000; // 1 GB
392 max_stripe_size
= 0x10000000; // 256 MB
394 max_chunk_size
= max_stripe_size
;
395 } else if (flags
& BLOCK_FLAG_SYSTEM
) {
396 max_stripe_size
= 0x2000000; // 32 MB
397 max_chunk_size
= 2 * max_stripe_size
;
399 ERR("unknown chunk type\n");
400 return STATUS_INTERNAL_ERROR
;
403 if (flags
& BLOCK_FLAG_DUPLICATE
) {
407 type
= BLOCK_FLAG_DUPLICATE
;
409 } else if (flags
& BLOCK_FLAG_RAID0
) {
411 max_stripes
= (uint16_t)min(0xffff, Vcb
->superblock
.num_devices
);
413 type
= BLOCK_FLAG_RAID0
;
415 } else if (flags
& BLOCK_FLAG_RAID1
) {
419 type
= BLOCK_FLAG_RAID1
;
421 } else if (flags
& BLOCK_FLAG_RAID10
) {
423 max_stripes
= (uint16_t)min(0xffff, Vcb
->superblock
.num_devices
);
425 type
= BLOCK_FLAG_RAID10
;
427 } else if (flags
& BLOCK_FLAG_RAID5
) {
429 max_stripes
= (uint16_t)min(0xffff, Vcb
->superblock
.num_devices
);
431 type
= BLOCK_FLAG_RAID5
;
433 } else if (flags
& BLOCK_FLAG_RAID6
) {
437 type
= BLOCK_FLAG_RAID6
;
447 if (max_chunk_size
> total_size
/ 10) { // cap at 10%
448 max_chunk_size
= total_size
/ 10;
449 max_stripe_size
= max_chunk_size
/ min_stripes
;
452 TRACE("would allocate a new chunk of %I64x bytes and stripe %I64x\n", max_chunk_size
, max_stripe_size
);
454 stripes
= ExAllocatePoolWithTag(PagedPool
, sizeof(stripe
) * max_stripes
, ALLOC_TAG
);
456 ERR("out of memory\n");
457 Status
= STATUS_INSUFFICIENT_RESOURCES
;
463 if (type
== BLOCK_FLAG_DUPLICATE
) {
464 if (!find_new_dup_stripes(Vcb
, stripes
, max_stripe_size
, full_size
)) {
465 Status
= STATUS_DISK_FULL
;
469 num_stripes
= max_stripes
;
471 for (i
= 0; i
< max_stripes
; i
++) {
472 if (!find_new_stripe(Vcb
, stripes
, i
, max_stripe_size
, false, full_size
))
479 if (num_stripes
< min_stripes
&& Vcb
->options
.allow_degraded
&& allowed_missing
> 0) {
480 uint16_t added_missing
= 0;
482 for (i
= num_stripes
; i
< max_stripes
; i
++) {
483 if (!find_new_stripe(Vcb
, stripes
, i
, max_stripe_size
, true, full_size
))
487 if (added_missing
>= allowed_missing
)
492 num_stripes
+= added_missing
;
495 // for RAID10, round down to an even number of stripes
496 if (type
== BLOCK_FLAG_RAID10
&& (num_stripes
% sub_stripes
) != 0) {
497 num_stripes
-= num_stripes
% sub_stripes
;
500 if (num_stripes
< min_stripes
) {
501 WARN("found %u stripes, needed at least %u\n", num_stripes
, min_stripes
);
502 Status
= STATUS_DISK_FULL
;
506 c
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(chunk
), ALLOC_TAG
);
508 ERR("out of memory\n");
509 Status
= STATUS_INSUFFICIENT_RESOURCES
;
515 cisize
= sizeof(CHUNK_ITEM
) + (num_stripes
* sizeof(CHUNK_ITEM_STRIPE
));
516 c
->chunk_item
= ExAllocatePoolWithTag(NonPagedPool
, cisize
, ALLOC_TAG
);
517 if (!c
->chunk_item
) {
518 ERR("out of memory\n");
519 Status
= STATUS_INSUFFICIENT_RESOURCES
;
523 stripe_length
= 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
525 if (type
== BLOCK_FLAG_DUPLICATE
&& stripes
[1].dh
== stripes
[0].dh
)
526 stripe_size
= min(stripes
[0].dh
->size
/ 2, max_stripe_size
);
528 stripe_size
= max_stripe_size
;
529 for (i
= 0; i
< num_stripes
; i
++) {
530 if (stripes
[i
].dh
->size
< stripe_size
)
531 stripe_size
= stripes
[i
].dh
->size
;
535 if (type
== 0 || type
== BLOCK_FLAG_DUPLICATE
|| type
== BLOCK_FLAG_RAID1
)
537 else if (type
== BLOCK_FLAG_RAID0
)
538 factor
= num_stripes
;
539 else if (type
== BLOCK_FLAG_RAID10
)
540 factor
= num_stripes
/ sub_stripes
;
541 else if (type
== BLOCK_FLAG_RAID5
)
542 factor
= num_stripes
- 1;
543 else if (type
== BLOCK_FLAG_RAID6
)
544 factor
= num_stripes
- 2;
546 if (stripe_size
* factor
> max_chunk_size
)
547 stripe_size
= max_chunk_size
/ factor
;
549 if (stripe_size
% stripe_length
> 0)
550 stripe_size
-= stripe_size
% stripe_length
;
552 if (stripe_size
== 0) {
553 ERR("not enough free space found (stripe_size == 0)\n");
554 Status
= STATUS_DISK_FULL
;
558 c
->chunk_item
->size
= stripe_size
* factor
;
559 c
->chunk_item
->root_id
= Vcb
->extent_root
->id
;
560 c
->chunk_item
->stripe_length
= stripe_length
;
561 c
->chunk_item
->type
= flags
;
562 c
->chunk_item
->opt_io_alignment
= (uint32_t)c
->chunk_item
->stripe_length
;
563 c
->chunk_item
->opt_io_width
= (uint32_t)c
->chunk_item
->stripe_length
;
564 c
->chunk_item
->sector_size
= stripes
[0].device
->devitem
.minimal_io_size
;
565 c
->chunk_item
->num_stripes
= num_stripes
;
566 c
->chunk_item
->sub_stripes
= sub_stripes
;
568 c
->devices
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(device
*) * num_stripes
, ALLOC_TAG
);
570 ERR("out of memory\n");
571 Status
= STATUS_INSUFFICIENT_RESOURCES
;
575 cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
576 for (i
= 0; i
< num_stripes
; i
++) {
577 cis
[i
].dev_id
= stripes
[i
].device
->devitem
.dev_id
;
579 if (type
== BLOCK_FLAG_DUPLICATE
&& i
== 1 && stripes
[i
].dh
== stripes
[0].dh
)
580 cis
[i
].offset
= stripes
[0].dh
->address
+ stripe_size
;
582 cis
[i
].offset
= stripes
[i
].dh
->address
;
584 cis
[i
].dev_uuid
= stripes
[i
].device
->devitem
.device_uuid
;
586 c
->devices
[i
] = stripes
[i
].device
;
589 logaddr
= find_new_chunk_address(Vcb
, c
->chunk_item
->size
);
591 Vcb
->superblock
.chunk_root_generation
= Vcb
->superblock
.generation
;
595 c
->used
= c
->oldused
= 0;
596 c
->cache
= c
->old_cache
= NULL
;
599 c
->last_alloc_set
= false;
601 c
->cache_loaded
= true;
603 c
->space_changed
= false;
606 InitializeListHead(&c
->space
);
607 InitializeListHead(&c
->space_size
);
608 InitializeListHead(&c
->deleting
);
609 InitializeListHead(&c
->changed_extents
);
611 InitializeListHead(&c
->range_locks
);
612 ExInitializeResourceLite(&c
->range_locks_lock
);
613 KeInitializeEvent(&c
->range_locks_event
, NotificationEvent
, false);
615 InitializeListHead(&c
->partial_stripes
);
616 ExInitializeResourceLite(&c
->partial_stripes_lock
);
618 ExInitializeResourceLite(&c
->lock
);
619 ExInitializeResourceLite(&c
->changed_extents_lock
);
621 s
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(space
), ALLOC_TAG
);
623 ERR("out of memory\n");
624 Status
= STATUS_INSUFFICIENT_RESOURCES
;
628 s
->address
= c
->offset
;
629 s
->size
= c
->chunk_item
->size
;
630 InsertTailList(&c
->space
, &s
->list_entry
);
631 InsertTailList(&c
->space_size
, &s
->list_entry_size
);
633 protect_superblocks(c
);
635 for (i
= 0; i
< num_stripes
; i
++) {
636 stripes
[i
].device
->devitem
.bytes_used
+= stripe_size
;
638 space_list_subtract2(&stripes
[i
].device
->space
, NULL
, cis
[i
].offset
, stripe_size
, NULL
, NULL
);
641 Status
= STATUS_SUCCESS
;
643 if (flags
& BLOCK_FLAG_RAID5
|| flags
& BLOCK_FLAG_RAID6
)
644 Vcb
->superblock
.incompat_flags
|= BTRFS_INCOMPAT_FLAGS_RAID56
;
650 if (!NT_SUCCESS(Status
)) {
653 ExFreePool(c
->devices
);
656 ExFreePool(c
->chunk_item
);
661 if (s
) ExFreePool(s
);
665 le
= Vcb
->chunks
.Flink
;
666 while (le
!= &Vcb
->chunks
) {
667 chunk
* c2
= CONTAINING_RECORD(le
, chunk
, list_entry
);
669 if (c2
->offset
> c
->offset
) {
670 InsertHeadList(le
->Blink
, &c
->list_entry
);
679 InsertTailList(&Vcb
->chunks
, &c
->list_entry
);
683 c
->space_changed
= true;
684 c
->list_entry_balance
.Flink
= NULL
;
692 static NTSTATUS
prepare_raid0_write(_Pre_satisfies_(_Curr_
->chunk_item
->num_stripes
>0) _In_ chunk
* c
, _In_
uint64_t address
, _In_reads_bytes_(length
) void* data
,
693 _In_
uint32_t length
, _In_ write_stripe
* stripes
, _In_ PIRP Irp
, _In_
uint64_t irp_offset
, _In_ write_data_context
* wtc
) {
694 uint64_t startoff
, endoff
;
695 uint16_t startoffstripe
, endoffstripe
, stripenum
;
696 uint64_t pos
, *stripeoff
;
698 bool file_write
= Irp
&& Irp
->MdlAddress
&& (Irp
->MdlAddress
->ByteOffset
== 0);
702 stripeoff
= ExAllocatePoolWithTag(PagedPool
, sizeof(uint64_t) * c
->chunk_item
->num_stripes
, ALLOC_TAG
);
704 ERR("out of memory\n");
705 return STATUS_INSUFFICIENT_RESOURCES
;
708 get_raid0_offset(address
- c
->offset
, c
->chunk_item
->stripe_length
, c
->chunk_item
->num_stripes
, &startoff
, &startoffstripe
);
709 get_raid0_offset(address
+ length
- c
->offset
- 1, c
->chunk_item
->stripe_length
, c
->chunk_item
->num_stripes
, &endoff
, &endoffstripe
);
712 master_mdl
= Irp
->MdlAddress
;
714 pfns
= (PFN_NUMBER
*)(Irp
->MdlAddress
+ 1);
715 pfns
= &pfns
[irp_offset
>> PAGE_SHIFT
];
716 } else if (((ULONG_PTR
)data
% PAGE_SIZE
) != 0) {
717 wtc
->scratch
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
719 ERR("out of memory\n");
720 return STATUS_INSUFFICIENT_RESOURCES
;
723 RtlCopyMemory(wtc
->scratch
, data
, length
);
725 master_mdl
= IoAllocateMdl(wtc
->scratch
, length
, false, false, NULL
);
727 ERR("out of memory\n");
728 return STATUS_INSUFFICIENT_RESOURCES
;
731 MmBuildMdlForNonPagedPool(master_mdl
);
733 wtc
->mdl
= master_mdl
;
735 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
737 NTSTATUS Status
= STATUS_SUCCESS
;
739 master_mdl
= IoAllocateMdl(data
, length
, false, false, NULL
);
741 ERR("out of memory\n");
742 return STATUS_INSUFFICIENT_RESOURCES
;
746 MmProbeAndLockPages(master_mdl
, KernelMode
, IoReadAccess
);
747 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
748 Status
= _SEH2_GetExceptionCode();
751 if (!NT_SUCCESS(Status
)) {
752 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
753 IoFreeMdl(master_mdl
);
757 wtc
->mdl
= master_mdl
;
759 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
762 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
763 if (startoffstripe
> i
)
764 stripes
[i
].start
= startoff
- (startoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
765 else if (startoffstripe
== i
)
766 stripes
[i
].start
= startoff
;
768 stripes
[i
].start
= startoff
- (startoff
% c
->chunk_item
->stripe_length
);
770 if (endoffstripe
> i
)
771 stripes
[i
].end
= endoff
- (endoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
772 else if (endoffstripe
== i
)
773 stripes
[i
].end
= endoff
+ 1;
775 stripes
[i
].end
= endoff
- (endoff
% c
->chunk_item
->stripe_length
);
777 if (stripes
[i
].start
!= stripes
[i
].end
) {
778 stripes
[i
].mdl
= IoAllocateMdl(NULL
, (ULONG
)(stripes
[i
].end
- stripes
[i
].start
), false, false, NULL
);
779 if (!stripes
[i
].mdl
) {
780 ERR("IoAllocateMdl failed\n");
781 ExFreePool(stripeoff
);
782 return STATUS_INSUFFICIENT_RESOURCES
;
788 RtlZeroMemory(stripeoff
, sizeof(uint64_t) * c
->chunk_item
->num_stripes
);
790 stripenum
= startoffstripe
;
792 while (pos
< length
) {
793 PFN_NUMBER
* stripe_pfns
= (PFN_NUMBER
*)(stripes
[stripenum
].mdl
+ 1);
796 uint32_t writelen
= (uint32_t)min(stripes
[stripenum
].end
- stripes
[stripenum
].start
,
797 c
->chunk_item
->stripe_length
- (stripes
[stripenum
].start
% c
->chunk_item
->stripe_length
));
799 RtlCopyMemory(stripe_pfns
, pfns
, writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
801 stripeoff
[stripenum
] += writelen
;
803 } else if (length
- pos
< c
->chunk_item
->stripe_length
) {
804 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripenum
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)((length
- pos
) * sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
807 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripenum
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(c
->chunk_item
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
809 stripeoff
[stripenum
] += c
->chunk_item
->stripe_length
;
810 pos
+= c
->chunk_item
->stripe_length
;
813 stripenum
= (stripenum
+ 1) % c
->chunk_item
->num_stripes
;
816 ExFreePool(stripeoff
);
818 return STATUS_SUCCESS
;
821 static NTSTATUS
prepare_raid10_write(_Pre_satisfies_(_Curr_
->chunk_item
->sub_stripes
>0&&_Curr_
->chunk_item
->num_stripes
>=_Curr_
->chunk_item
->sub_stripes
) _In_ chunk
* c
,
822 _In_
uint64_t address
, _In_reads_bytes_(length
) void* data
, _In_
uint32_t length
, _In_ write_stripe
* stripes
,
823 _In_ PIRP Irp
, _In_
uint64_t irp_offset
, _In_ write_data_context
* wtc
) {
824 uint64_t startoff
, endoff
;
825 uint16_t startoffstripe
, endoffstripe
, stripenum
;
826 uint64_t pos
, *stripeoff
;
828 bool file_write
= Irp
&& Irp
->MdlAddress
&& (Irp
->MdlAddress
->ByteOffset
== 0);
832 get_raid0_offset(address
- c
->offset
, c
->chunk_item
->stripe_length
, c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
, &startoff
, &startoffstripe
);
833 get_raid0_offset(address
+ length
- c
->offset
- 1, c
->chunk_item
->stripe_length
, c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
, &endoff
, &endoffstripe
);
835 stripenum
= startoffstripe
;
836 startoffstripe
*= c
->chunk_item
->sub_stripes
;
837 endoffstripe
*= c
->chunk_item
->sub_stripes
;
840 master_mdl
= Irp
->MdlAddress
;
842 pfns
= (PFN_NUMBER
*)(Irp
->MdlAddress
+ 1);
843 pfns
= &pfns
[irp_offset
>> PAGE_SHIFT
];
844 } else if (((ULONG_PTR
)data
% PAGE_SIZE
) != 0) {
845 wtc
->scratch
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
847 ERR("out of memory\n");
848 return STATUS_INSUFFICIENT_RESOURCES
;
851 RtlCopyMemory(wtc
->scratch
, data
, length
);
853 master_mdl
= IoAllocateMdl(wtc
->scratch
, length
, false, false, NULL
);
855 ERR("out of memory\n");
856 return STATUS_INSUFFICIENT_RESOURCES
;
859 MmBuildMdlForNonPagedPool(master_mdl
);
861 wtc
->mdl
= master_mdl
;
863 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
865 NTSTATUS Status
= STATUS_SUCCESS
;
867 master_mdl
= IoAllocateMdl(data
, length
, false, false, NULL
);
869 ERR("out of memory\n");
870 return STATUS_INSUFFICIENT_RESOURCES
;
874 MmProbeAndLockPages(master_mdl
, KernelMode
, IoReadAccess
);
875 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
876 Status
= _SEH2_GetExceptionCode();
879 if (!NT_SUCCESS(Status
)) {
880 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
881 IoFreeMdl(master_mdl
);
885 wtc
->mdl
= master_mdl
;
887 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
890 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
+= c
->chunk_item
->sub_stripes
) {
893 if (startoffstripe
> i
)
894 stripes
[i
].start
= startoff
- (startoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
895 else if (startoffstripe
== i
)
896 stripes
[i
].start
= startoff
;
898 stripes
[i
].start
= startoff
- (startoff
% c
->chunk_item
->stripe_length
);
900 if (endoffstripe
> i
)
901 stripes
[i
].end
= endoff
- (endoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
902 else if (endoffstripe
== i
)
903 stripes
[i
].end
= endoff
+ 1;
905 stripes
[i
].end
= endoff
- (endoff
% c
->chunk_item
->stripe_length
);
907 stripes
[i
].mdl
= IoAllocateMdl(NULL
, (ULONG
)(stripes
[i
].end
- stripes
[i
].start
), false, false, NULL
);
908 if (!stripes
[i
].mdl
) {
909 ERR("IoAllocateMdl failed\n");
910 return STATUS_INSUFFICIENT_RESOURCES
;
913 for (j
= 1; j
< c
->chunk_item
->sub_stripes
; j
++) {
914 stripes
[i
+j
].start
= stripes
[i
].start
;
915 stripes
[i
+j
].end
= stripes
[i
].end
;
916 stripes
[i
+j
].data
= stripes
[i
].data
;
917 stripes
[i
+j
].mdl
= stripes
[i
].mdl
;
923 stripeoff
= ExAllocatePoolWithTag(PagedPool
, sizeof(uint64_t) * c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
, ALLOC_TAG
);
925 ERR("out of memory\n");
926 return STATUS_INSUFFICIENT_RESOURCES
;
929 RtlZeroMemory(stripeoff
, sizeof(uint64_t) * c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
);
931 while (pos
< length
) {
932 PFN_NUMBER
* stripe_pfns
= (PFN_NUMBER
*)(stripes
[stripenum
* c
->chunk_item
->sub_stripes
].mdl
+ 1);
935 uint32_t writelen
= (uint32_t)min(stripes
[stripenum
* c
->chunk_item
->sub_stripes
].end
- stripes
[stripenum
* c
->chunk_item
->sub_stripes
].start
,
936 c
->chunk_item
->stripe_length
- (stripes
[stripenum
* c
->chunk_item
->sub_stripes
].start
% c
->chunk_item
->stripe_length
));
938 RtlCopyMemory(stripe_pfns
, pfns
, writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
940 stripeoff
[stripenum
] += writelen
;
942 } else if (length
- pos
< c
->chunk_item
->stripe_length
) {
943 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripenum
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)((length
- pos
) * sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
946 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripenum
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(c
->chunk_item
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
948 stripeoff
[stripenum
] += c
->chunk_item
->stripe_length
;
949 pos
+= c
->chunk_item
->stripe_length
;
952 stripenum
= (stripenum
+ 1) % (c
->chunk_item
->num_stripes
/ c
->chunk_item
->sub_stripes
);
955 ExFreePool(stripeoff
);
957 return STATUS_SUCCESS
;
960 static NTSTATUS
add_partial_stripe(device_extension
* Vcb
, chunk
*c
, uint64_t address
, uint32_t length
, void* data
) {
964 uint64_t stripe_addr
;
965 uint16_t num_data_stripes
;
967 num_data_stripes
= c
->chunk_item
->num_stripes
- (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
? 1 : 2);
968 stripe_addr
= address
- ((address
- c
->offset
) % (num_data_stripes
* c
->chunk_item
->stripe_length
));
970 ExAcquireResourceExclusiveLite(&c
->partial_stripes_lock
, true);
972 le
= c
->partial_stripes
.Flink
;
973 while (le
!= &c
->partial_stripes
) {
974 ps
= CONTAINING_RECORD(le
, partial_stripe
, list_entry
);
976 if (ps
->address
== stripe_addr
) {
977 // update existing entry
979 RtlCopyMemory(ps
->data
+ address
- stripe_addr
, data
, length
);
980 RtlClearBits(&ps
->bmp
, (ULONG
)((address
- stripe_addr
) / Vcb
->superblock
.sector_size
), length
/ Vcb
->superblock
.sector_size
);
982 // if now filled, flush
983 if (RtlAreBitsClear(&ps
->bmp
, 0, (ULONG
)((num_data_stripes
* c
->chunk_item
->stripe_length
) / Vcb
->superblock
.sector_size
))) {
984 Status
= flush_partial_stripe(Vcb
, c
, ps
);
985 if (!NT_SUCCESS(Status
)) {
986 ERR("flush_partial_stripe returned %08x\n", Status
);
990 RemoveEntryList(&ps
->list_entry
);
993 ExFreePool(ps
->bmparr
);
998 Status
= STATUS_SUCCESS
;
1000 } else if (ps
->address
> stripe_addr
)
1008 ps
= ExAllocatePoolWithTag(NonPagedPool
, offsetof(partial_stripe
, data
[0]) + (ULONG
)(num_data_stripes
* c
->chunk_item
->stripe_length
), ALLOC_TAG
);
1010 ERR("out of memory\n");
1011 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1015 ps
->bmplen
= (ULONG
)(num_data_stripes
* c
->chunk_item
->stripe_length
) / Vcb
->superblock
.sector_size
;
1017 ps
->address
= stripe_addr
;
1018 ps
->bmparr
= ExAllocatePoolWithTag(NonPagedPool
, (size_t)sector_align(((ps
->bmplen
/ 8) + 1), sizeof(ULONG
)), ALLOC_TAG
);
1020 ERR("out of memory\n");
1022 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1026 RtlInitializeBitMap(&ps
->bmp
, ps
->bmparr
, ps
->bmplen
);
1027 RtlSetAllBits(&ps
->bmp
);
1029 RtlCopyMemory(ps
->data
+ address
- stripe_addr
, data
, length
);
1030 RtlClearBits(&ps
->bmp
, (ULONG
)((address
- stripe_addr
) / Vcb
->superblock
.sector_size
), length
/ Vcb
->superblock
.sector_size
);
1032 InsertHeadList(le
->Blink
, &ps
->list_entry
);
1034 Status
= STATUS_SUCCESS
;
1037 ExReleaseResourceLite(&c
->partial_stripes_lock
);
1047 static NTSTATUS
prepare_raid5_write(device_extension
* Vcb
, chunk
* c
, uint64_t address
, void* data
, uint32_t length
, write_stripe
* stripes
, PIRP Irp
,
1048 uint64_t irp_offset
, ULONG priority
, write_data_context
* wtc
) {
1049 uint64_t startoff
, endoff
, parity_start
, parity_end
;
1050 uint16_t startoffstripe
, endoffstripe
, parity
, num_data_stripes
= c
->chunk_item
->num_stripes
- 1;
1051 uint64_t pos
, parity_pos
, *stripeoff
= NULL
;
1053 bool file_write
= Irp
&& Irp
->MdlAddress
&& (Irp
->MdlAddress
->ByteOffset
== 0);
1056 PFN_NUMBER
*pfns
, *parity_pfns
;
1057 log_stripe
* log_stripes
= NULL
;
1059 if ((address
+ length
- c
->offset
) % (num_data_stripes
* c
->chunk_item
->stripe_length
) > 0) {
1060 uint64_t delta
= (address
+ length
- c
->offset
) % (num_data_stripes
* c
->chunk_item
->stripe_length
);
1062 delta
= min(irp_offset
+ length
, delta
);
1063 Status
= add_partial_stripe(Vcb
, c
, address
+ length
- delta
, (uint32_t)delta
, (uint8_t*)data
+ irp_offset
+ length
- delta
);
1064 if (!NT_SUCCESS(Status
)) {
1065 ERR("add_partial_stripe returned %08x\n", Status
);
1069 length
-= (uint32_t)delta
;
1072 if (length
> 0 && (address
- c
->offset
) % (num_data_stripes
* c
->chunk_item
->stripe_length
) > 0) {
1073 uint64_t delta
= (num_data_stripes
* c
->chunk_item
->stripe_length
) - ((address
- c
->offset
) % (num_data_stripes
* c
->chunk_item
->stripe_length
));
1075 Status
= add_partial_stripe(Vcb
, c
, address
, (uint32_t)delta
, (uint8_t*)data
+ irp_offset
);
1076 if (!NT_SUCCESS(Status
)) {
1077 ERR("add_partial_stripe returned %08x\n", Status
);
1082 length
-= (uint32_t)delta
;
1083 irp_offset
+= delta
;
1087 Status
= STATUS_SUCCESS
;
1091 get_raid0_offset(address
- c
->offset
, c
->chunk_item
->stripe_length
, num_data_stripes
, &startoff
, &startoffstripe
);
1092 get_raid0_offset(address
+ length
- c
->offset
- 1, c
->chunk_item
->stripe_length
, num_data_stripes
, &endoff
, &endoffstripe
);
1095 while (pos
< length
) {
1096 parity
= (((address
- c
->offset
+ pos
) / (num_data_stripes
* c
->chunk_item
->stripe_length
)) + num_data_stripes
) % c
->chunk_item
->num_stripes
;
1099 uint16_t stripe
= (parity
+ startoffstripe
+ 1) % c
->chunk_item
->num_stripes
;
1100 ULONG skip
, writelen
;
1103 while (stripe
!= parity
) {
1104 if (i
== startoffstripe
) {
1105 writelen
= (ULONG
)min(length
, c
->chunk_item
->stripe_length
- (startoff
% c
->chunk_item
->stripe_length
));
1107 stripes
[stripe
].start
= startoff
;
1108 stripes
[stripe
].end
= startoff
+ writelen
;
1115 writelen
= (ULONG
)min(length
- pos
, c
->chunk_item
->stripe_length
);
1117 stripes
[stripe
].start
= startoff
- (startoff
% c
->chunk_item
->stripe_length
);
1118 stripes
[stripe
].end
= stripes
[stripe
].start
+ writelen
;
1127 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1133 for (i
= 0; i
< startoffstripe
; i
++) {
1134 stripe
= (parity
+ i
+ 1) % c
->chunk_item
->num_stripes
;
1136 stripes
[stripe
].start
= stripes
[stripe
].end
= startoff
- (startoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
1139 stripes
[parity
].start
= stripes
[parity
].end
= startoff
- (startoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
1141 if (length
- pos
> c
->chunk_item
->num_stripes
* num_data_stripes
* c
->chunk_item
->stripe_length
) {
1142 skip
= (ULONG
)(((length
- pos
) / (c
->chunk_item
->num_stripes
* num_data_stripes
* c
->chunk_item
->stripe_length
)) - 1);
1144 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1145 stripes
[i
].end
+= skip
* c
->chunk_item
->num_stripes
* c
->chunk_item
->stripe_length
;
1148 pos
+= skip
* num_data_stripes
* c
->chunk_item
->num_stripes
* c
->chunk_item
->stripe_length
;
1150 } else if (length
- pos
>= c
->chunk_item
->stripe_length
* num_data_stripes
) {
1151 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1152 stripes
[i
].end
+= c
->chunk_item
->stripe_length
;
1155 pos
+= c
->chunk_item
->stripe_length
* num_data_stripes
;
1157 uint16_t stripe
= (parity
+ 1) % c
->chunk_item
->num_stripes
;
1160 while (stripe
!= parity
) {
1161 if (endoffstripe
== i
) {
1162 stripes
[stripe
].end
= endoff
+ 1;
1164 } else if (endoffstripe
> i
)
1165 stripes
[stripe
].end
= endoff
- (endoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
1168 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1175 parity_start
= 0xffffffffffffffff;
1178 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1179 if (stripes
[i
].start
!= 0 || stripes
[i
].end
!= 0) {
1180 parity_start
= min(stripes
[i
].start
, parity_start
);
1181 parity_end
= max(stripes
[i
].end
, parity_end
);
1185 if (parity_end
== parity_start
) {
1186 Status
= STATUS_SUCCESS
;
1190 parity
= (((address
- c
->offset
) / (num_data_stripes
* c
->chunk_item
->stripe_length
)) + num_data_stripes
) % c
->chunk_item
->num_stripes
;
1191 stripes
[parity
].start
= parity_start
;
1193 parity
= (((address
- c
->offset
+ length
- 1) / (num_data_stripes
* c
->chunk_item
->stripe_length
)) + num_data_stripes
) % c
->chunk_item
->num_stripes
;
1194 stripes
[parity
].end
= parity_end
;
1196 log_stripes
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(log_stripe
) * num_data_stripes
, ALLOC_TAG
);
1198 ERR("out of memory\n");
1199 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1203 RtlZeroMemory(log_stripes
, sizeof(log_stripe
) * num_data_stripes
);
1205 for (i
= 0; i
< num_data_stripes
; i
++) {
1206 log_stripes
[i
].mdl
= IoAllocateMdl(NULL
, (ULONG
)(parity_end
- parity_start
), false, false, NULL
);
1207 if (!log_stripes
[i
].mdl
) {
1208 ERR("out of memory\n");
1209 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1213 log_stripes
[i
].mdl
->MdlFlags
|= MDL_PARTIAL
;
1214 log_stripes
[i
].pfns
= (PFN_NUMBER
*)(log_stripes
[i
].mdl
+ 1);
1217 wtc
->parity1
= ExAllocatePoolWithTag(NonPagedPool
, (ULONG
)(parity_end
- parity_start
), ALLOC_TAG
);
1218 if (!wtc
->parity1
) {
1219 ERR("out of memory\n");
1220 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1224 wtc
->parity1_mdl
= IoAllocateMdl(wtc
->parity1
, (ULONG
)(parity_end
- parity_start
), false, false, NULL
);
1225 if (!wtc
->parity1_mdl
) {
1226 ERR("out of memory\n");
1227 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1231 MmBuildMdlForNonPagedPool(wtc
->parity1_mdl
);
1234 master_mdl
= Irp
->MdlAddress
;
1235 else if (((ULONG_PTR
)data
% PAGE_SIZE
) != 0) {
1236 wtc
->scratch
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
1237 if (!wtc
->scratch
) {
1238 ERR("out of memory\n");
1239 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1243 RtlCopyMemory(wtc
->scratch
, (uint8_t*)data
+ irp_offset
, length
);
1245 master_mdl
= IoAllocateMdl(wtc
->scratch
, length
, false, false, NULL
);
1247 ERR("out of memory\n");
1248 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1252 MmBuildMdlForNonPagedPool(master_mdl
);
1254 wtc
->mdl
= master_mdl
;
1256 master_mdl
= IoAllocateMdl((uint8_t*)data
+ irp_offset
, length
, false, false, NULL
);
1258 ERR("out of memory\n");
1259 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1263 Status
= STATUS_SUCCESS
;
1266 MmProbeAndLockPages(master_mdl
, KernelMode
, IoReadAccess
);
1267 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1268 Status
= _SEH2_GetExceptionCode();
1271 if (!NT_SUCCESS(Status
)) {
1272 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
1273 IoFreeMdl(master_mdl
);
1277 wtc
->mdl
= master_mdl
;
1280 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
1281 parity_pfns
= (PFN_NUMBER
*)(wtc
->parity1_mdl
+ 1);
1284 pfns
= &pfns
[irp_offset
>> PAGE_SHIFT
];
1286 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1287 if (stripes
[i
].start
!= stripes
[i
].end
) {
1288 stripes
[i
].mdl
= IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl
) + irp_offset
, (ULONG
)(stripes
[i
].end
- stripes
[i
].start
), false, false, NULL
);
1289 if (!stripes
[i
].mdl
) {
1290 ERR("IoAllocateMdl failed\n");
1291 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1297 stripeoff
= ExAllocatePoolWithTag(PagedPool
, sizeof(uint64_t) * c
->chunk_item
->num_stripes
, ALLOC_TAG
);
1299 ERR("out of memory\n");
1300 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1304 RtlZeroMemory(stripeoff
, sizeof(uint64_t) * c
->chunk_item
->num_stripes
);
1309 while (pos
< length
) {
1310 PFN_NUMBER
* stripe_pfns
;
1312 parity
= (((address
- c
->offset
+ pos
) / (num_data_stripes
* c
->chunk_item
->stripe_length
)) + num_data_stripes
) % c
->chunk_item
->num_stripes
;
1315 uint16_t stripe
= (parity
+ startoffstripe
+ 1) % c
->chunk_item
->num_stripes
;
1316 uint32_t writelen
= (uint32_t)min(length
- pos
, min(stripes
[stripe
].end
- stripes
[stripe
].start
,
1317 c
->chunk_item
->stripe_length
- (stripes
[stripe
].start
% c
->chunk_item
->stripe_length
)));
1318 uint32_t maxwritelen
= writelen
;
1320 stripe_pfns
= (PFN_NUMBER
*)(stripes
[stripe
].mdl
+ 1);
1322 RtlCopyMemory(stripe_pfns
, pfns
, writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1324 RtlCopyMemory(log_stripes
[startoffstripe
].pfns
, pfns
, writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1325 log_stripes
[startoffstripe
].pfns
+= writelen
>> PAGE_SHIFT
;
1327 stripeoff
[stripe
] = writelen
;
1330 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1331 i
= startoffstripe
+ 1;
1333 while (stripe
!= parity
) {
1334 stripe_pfns
= (PFN_NUMBER
*)(stripes
[stripe
].mdl
+ 1);
1335 writelen
= (uint32_t)min(length
- pos
, min(stripes
[stripe
].end
- stripes
[stripe
].start
, c
->chunk_item
->stripe_length
));
1340 if (writelen
> maxwritelen
)
1341 maxwritelen
= writelen
;
1343 RtlCopyMemory(stripe_pfns
, &pfns
[pos
>> PAGE_SHIFT
], writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1345 RtlCopyMemory(log_stripes
[i
].pfns
, &pfns
[pos
>> PAGE_SHIFT
], writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1346 log_stripes
[i
].pfns
+= writelen
>> PAGE_SHIFT
;
1348 stripeoff
[stripe
] = writelen
;
1351 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1355 stripe_pfns
= (PFN_NUMBER
*)(stripes
[parity
].mdl
+ 1);
1357 RtlCopyMemory(stripe_pfns
, parity_pfns
, maxwritelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1358 stripeoff
[parity
] = maxwritelen
;
1359 parity_pos
= maxwritelen
;
1360 } else if (length
- pos
>= c
->chunk_item
->stripe_length
* num_data_stripes
) {
1361 uint16_t stripe
= (parity
+ 1) % c
->chunk_item
->num_stripes
;
1364 while (stripe
!= parity
) {
1365 stripe_pfns
= (PFN_NUMBER
*)(stripes
[stripe
].mdl
+ 1);
1367 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(c
->chunk_item
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
1369 RtlCopyMemory(log_stripes
[i
].pfns
, &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(c
->chunk_item
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
1370 log_stripes
[i
].pfns
+= c
->chunk_item
->stripe_length
>> PAGE_SHIFT
;
1372 stripeoff
[stripe
] += c
->chunk_item
->stripe_length
;
1373 pos
+= c
->chunk_item
->stripe_length
;
1375 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1379 stripe_pfns
= (PFN_NUMBER
*)(stripes
[parity
].mdl
+ 1);
1381 RtlCopyMemory(&stripe_pfns
[stripeoff
[parity
] >> PAGE_SHIFT
], &parity_pfns
[parity_pos
>> PAGE_SHIFT
], (ULONG
)(c
->chunk_item
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
1382 stripeoff
[parity
] += c
->chunk_item
->stripe_length
;
1383 parity_pos
+= c
->chunk_item
->stripe_length
;
1385 uint16_t stripe
= (parity
+ 1) % c
->chunk_item
->num_stripes
;
1386 uint32_t writelen
, maxwritelen
= 0;
1389 while (pos
< length
) {
1390 stripe_pfns
= (PFN_NUMBER
*)(stripes
[stripe
].mdl
+ 1);
1391 writelen
= (uint32_t)min(length
- pos
, min(stripes
[stripe
].end
- stripes
[stripe
].start
, c
->chunk_item
->stripe_length
));
1396 if (writelen
> maxwritelen
)
1397 maxwritelen
= writelen
;
1399 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1401 RtlCopyMemory(log_stripes
[i
].pfns
, &pfns
[pos
>> PAGE_SHIFT
], writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1402 log_stripes
[i
].pfns
+= writelen
>> PAGE_SHIFT
;
1404 stripeoff
[stripe
] += writelen
;
1407 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1411 stripe_pfns
= (PFN_NUMBER
*)(stripes
[parity
].mdl
+ 1);
1413 RtlCopyMemory(&stripe_pfns
[stripeoff
[parity
] >> PAGE_SHIFT
], &parity_pfns
[parity_pos
>> PAGE_SHIFT
], maxwritelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1417 for (i
= 0; i
< num_data_stripes
; i
++) {
1418 uint8_t* ss
= MmGetSystemAddressForMdlSafe(log_stripes
[i
].mdl
, priority
);
1421 RtlCopyMemory(wtc
->parity1
, ss
, (uint32_t)(parity_end
- parity_start
));
1423 do_xor(wtc
->parity1
, ss
, (uint32_t)(parity_end
- parity_start
));
1426 Status
= STATUS_SUCCESS
;
1430 for (i
= 0; i
< num_data_stripes
; i
++) {
1431 if (log_stripes
[i
].mdl
)
1432 IoFreeMdl(log_stripes
[i
].mdl
);
1435 ExFreePool(log_stripes
);
1439 ExFreePool(stripeoff
);
1444 static NTSTATUS
prepare_raid6_write(device_extension
* Vcb
, chunk
* c
, uint64_t address
, void* data
, uint32_t length
, write_stripe
* stripes
, PIRP Irp
,
1445 uint64_t irp_offset
, ULONG priority
, write_data_context
* wtc
) {
1446 uint64_t startoff
, endoff
, parity_start
, parity_end
;
1447 uint16_t startoffstripe
, endoffstripe
, parity1
, num_data_stripes
= c
->chunk_item
->num_stripes
- 2;
1448 uint64_t pos
, parity_pos
, *stripeoff
= NULL
;
1450 bool file_write
= Irp
&& Irp
->MdlAddress
&& (Irp
->MdlAddress
->ByteOffset
== 0);
1453 PFN_NUMBER
*pfns
, *parity1_pfns
, *parity2_pfns
;
1454 log_stripe
* log_stripes
= NULL
;
1456 if ((address
+ length
- c
->offset
) % (num_data_stripes
* c
->chunk_item
->stripe_length
) > 0) {
1457 uint64_t delta
= (address
+ length
- c
->offset
) % (num_data_stripes
* c
->chunk_item
->stripe_length
);
1459 delta
= min(irp_offset
+ length
, delta
);
1460 Status
= add_partial_stripe(Vcb
, c
, address
+ length
- delta
, (uint32_t)delta
, (uint8_t*)data
+ irp_offset
+ length
- delta
);
1461 if (!NT_SUCCESS(Status
)) {
1462 ERR("add_partial_stripe returned %08x\n", Status
);
1466 length
-= (uint32_t)delta
;
1469 if (length
> 0 && (address
- c
->offset
) % (num_data_stripes
* c
->chunk_item
->stripe_length
) > 0) {
1470 uint64_t delta
= (num_data_stripes
* c
->chunk_item
->stripe_length
) - ((address
- c
->offset
) % (num_data_stripes
* c
->chunk_item
->stripe_length
));
1472 Status
= add_partial_stripe(Vcb
, c
, address
, (uint32_t)delta
, (uint8_t*)data
+ irp_offset
);
1473 if (!NT_SUCCESS(Status
)) {
1474 ERR("add_partial_stripe returned %08x\n", Status
);
1479 length
-= (uint32_t)delta
;
1480 irp_offset
+= delta
;
1484 Status
= STATUS_SUCCESS
;
1488 get_raid0_offset(address
- c
->offset
, c
->chunk_item
->stripe_length
, num_data_stripes
, &startoff
, &startoffstripe
);
1489 get_raid0_offset(address
+ length
- c
->offset
- 1, c
->chunk_item
->stripe_length
, num_data_stripes
, &endoff
, &endoffstripe
);
1492 while (pos
< length
) {
1493 parity1
= (((address
- c
->offset
+ pos
) / (num_data_stripes
* c
->chunk_item
->stripe_length
)) + num_data_stripes
) % c
->chunk_item
->num_stripes
;
1496 uint16_t stripe
= (parity1
+ startoffstripe
+ 2) % c
->chunk_item
->num_stripes
;
1497 uint16_t parity2
= (parity1
+ 1) % c
->chunk_item
->num_stripes
;
1498 ULONG skip
, writelen
;
1501 while (stripe
!= parity1
) {
1502 if (i
== startoffstripe
) {
1503 writelen
= (ULONG
)min(length
, c
->chunk_item
->stripe_length
- (startoff
% c
->chunk_item
->stripe_length
));
1505 stripes
[stripe
].start
= startoff
;
1506 stripes
[stripe
].end
= startoff
+ writelen
;
1513 writelen
= (ULONG
)min(length
- pos
, c
->chunk_item
->stripe_length
);
1515 stripes
[stripe
].start
= startoff
- (startoff
% c
->chunk_item
->stripe_length
);
1516 stripes
[stripe
].end
= stripes
[stripe
].start
+ writelen
;
1525 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1531 for (i
= 0; i
< startoffstripe
; i
++) {
1532 stripe
= (parity1
+ i
+ 2) % c
->chunk_item
->num_stripes
;
1534 stripes
[stripe
].start
= stripes
[stripe
].end
= startoff
- (startoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
1537 stripes
[parity1
].start
= stripes
[parity1
].end
= stripes
[parity2
].start
= stripes
[parity2
].end
=
1538 startoff
- (startoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
1540 if (length
- pos
> c
->chunk_item
->num_stripes
* num_data_stripes
* c
->chunk_item
->stripe_length
) {
1541 skip
= (ULONG
)(((length
- pos
) / (c
->chunk_item
->num_stripes
* num_data_stripes
* c
->chunk_item
->stripe_length
)) - 1);
1543 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1544 stripes
[i
].end
+= skip
* c
->chunk_item
->num_stripes
* c
->chunk_item
->stripe_length
;
1547 pos
+= skip
* num_data_stripes
* c
->chunk_item
->num_stripes
* c
->chunk_item
->stripe_length
;
1549 } else if (length
- pos
>= c
->chunk_item
->stripe_length
* num_data_stripes
) {
1550 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1551 stripes
[i
].end
+= c
->chunk_item
->stripe_length
;
1554 pos
+= c
->chunk_item
->stripe_length
* num_data_stripes
;
1556 uint16_t stripe
= (parity1
+ 2) % c
->chunk_item
->num_stripes
;
1559 while (stripe
!= parity1
) {
1560 if (endoffstripe
== i
) {
1561 stripes
[stripe
].end
= endoff
+ 1;
1563 } else if (endoffstripe
> i
)
1564 stripes
[stripe
].end
= endoff
- (endoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
1567 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1574 parity_start
= 0xffffffffffffffff;
1577 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1578 if (stripes
[i
].start
!= 0 || stripes
[i
].end
!= 0) {
1579 parity_start
= min(stripes
[i
].start
, parity_start
);
1580 parity_end
= max(stripes
[i
].end
, parity_end
);
1584 if (parity_end
== parity_start
) {
1585 Status
= STATUS_SUCCESS
;
1589 parity1
= (((address
- c
->offset
) / (num_data_stripes
* c
->chunk_item
->stripe_length
)) + num_data_stripes
) % c
->chunk_item
->num_stripes
;
1590 stripes
[parity1
].start
= stripes
[(parity1
+ 1) % c
->chunk_item
->num_stripes
].start
= parity_start
;
1592 parity1
= (((address
- c
->offset
+ length
- 1) / (num_data_stripes
* c
->chunk_item
->stripe_length
)) + num_data_stripes
) % c
->chunk_item
->num_stripes
;
1593 stripes
[parity1
].end
= stripes
[(parity1
+ 1) % c
->chunk_item
->num_stripes
].end
= parity_end
;
1595 log_stripes
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(log_stripe
) * num_data_stripes
, ALLOC_TAG
);
1597 ERR("out of memory\n");
1598 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1602 RtlZeroMemory(log_stripes
, sizeof(log_stripe
) * num_data_stripes
);
1604 for (i
= 0; i
< num_data_stripes
; i
++) {
1605 log_stripes
[i
].mdl
= IoAllocateMdl(NULL
, (ULONG
)(parity_end
- parity_start
), false, false, NULL
);
1606 if (!log_stripes
[i
].mdl
) {
1607 ERR("out of memory\n");
1608 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1612 log_stripes
[i
].mdl
->MdlFlags
|= MDL_PARTIAL
;
1613 log_stripes
[i
].pfns
= (PFN_NUMBER
*)(log_stripes
[i
].mdl
+ 1);
1616 wtc
->parity1
= ExAllocatePoolWithTag(NonPagedPool
, (ULONG
)(parity_end
- parity_start
), ALLOC_TAG
);
1617 if (!wtc
->parity1
) {
1618 ERR("out of memory\n");
1619 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1623 wtc
->parity2
= ExAllocatePoolWithTag(NonPagedPool
, (ULONG
)(parity_end
- parity_start
), ALLOC_TAG
);
1624 if (!wtc
->parity2
) {
1625 ERR("out of memory\n");
1626 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1630 wtc
->parity1_mdl
= IoAllocateMdl(wtc
->parity1
, (ULONG
)(parity_end
- parity_start
), false, false, NULL
);
1631 if (!wtc
->parity1_mdl
) {
1632 ERR("out of memory\n");
1633 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1637 MmBuildMdlForNonPagedPool(wtc
->parity1_mdl
);
1639 wtc
->parity2_mdl
= IoAllocateMdl(wtc
->parity2
, (ULONG
)(parity_end
- parity_start
), false, false, NULL
);
1640 if (!wtc
->parity2_mdl
) {
1641 ERR("out of memory\n");
1642 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1646 MmBuildMdlForNonPagedPool(wtc
->parity2_mdl
);
1649 master_mdl
= Irp
->MdlAddress
;
1650 else if (((ULONG_PTR
)data
% PAGE_SIZE
) != 0) {
1651 wtc
->scratch
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
1652 if (!wtc
->scratch
) {
1653 ERR("out of memory\n");
1654 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1658 RtlCopyMemory(wtc
->scratch
, (uint8_t*)data
+ irp_offset
, length
);
1660 master_mdl
= IoAllocateMdl(wtc
->scratch
, length
, false, false, NULL
);
1662 ERR("out of memory\n");
1663 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1667 MmBuildMdlForNonPagedPool(master_mdl
);
1669 wtc
->mdl
= master_mdl
;
1671 master_mdl
= IoAllocateMdl((uint8_t*)data
+ irp_offset
, length
, false, false, NULL
);
1673 ERR("out of memory\n");
1674 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1678 Status
= STATUS_SUCCESS
;
1681 MmProbeAndLockPages(master_mdl
, KernelMode
, IoReadAccess
);
1682 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1683 Status
= _SEH2_GetExceptionCode();
1686 if (!NT_SUCCESS(Status
)) {
1687 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
1688 IoFreeMdl(master_mdl
);
1692 wtc
->mdl
= master_mdl
;
1695 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
1696 parity1_pfns
= (PFN_NUMBER
*)(wtc
->parity1_mdl
+ 1);
1697 parity2_pfns
= (PFN_NUMBER
*)(wtc
->parity2_mdl
+ 1);
1700 pfns
= &pfns
[irp_offset
>> PAGE_SHIFT
];
1702 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1703 if (stripes
[i
].start
!= stripes
[i
].end
) {
1704 stripes
[i
].mdl
= IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl
) + irp_offset
, (ULONG
)(stripes
[i
].end
- stripes
[i
].start
), false, false, NULL
);
1705 if (!stripes
[i
].mdl
) {
1706 ERR("IoAllocateMdl failed\n");
1707 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1713 stripeoff
= ExAllocatePoolWithTag(PagedPool
, sizeof(uint64_t) * c
->chunk_item
->num_stripes
, ALLOC_TAG
);
1715 ERR("out of memory\n");
1716 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1720 RtlZeroMemory(stripeoff
, sizeof(uint64_t) * c
->chunk_item
->num_stripes
);
1725 while (pos
< length
) {
1726 PFN_NUMBER
* stripe_pfns
;
1728 parity1
= (((address
- c
->offset
+ pos
) / (num_data_stripes
* c
->chunk_item
->stripe_length
)) + num_data_stripes
) % c
->chunk_item
->num_stripes
;
1731 uint16_t stripe
= (parity1
+ startoffstripe
+ 2) % c
->chunk_item
->num_stripes
, parity2
;
1732 uint32_t writelen
= (uint32_t)min(length
- pos
, min(stripes
[stripe
].end
- stripes
[stripe
].start
,
1733 c
->chunk_item
->stripe_length
- (stripes
[stripe
].start
% c
->chunk_item
->stripe_length
)));
1734 uint32_t maxwritelen
= writelen
;
1736 stripe_pfns
= (PFN_NUMBER
*)(stripes
[stripe
].mdl
+ 1);
1738 RtlCopyMemory(stripe_pfns
, pfns
, writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1740 RtlCopyMemory(log_stripes
[startoffstripe
].pfns
, pfns
, writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1741 log_stripes
[startoffstripe
].pfns
+= writelen
>> PAGE_SHIFT
;
1743 stripeoff
[stripe
] = writelen
;
1746 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1747 i
= startoffstripe
+ 1;
1749 while (stripe
!= parity1
) {
1750 stripe_pfns
= (PFN_NUMBER
*)(stripes
[stripe
].mdl
+ 1);
1751 writelen
= (uint32_t)min(length
- pos
, min(stripes
[stripe
].end
- stripes
[stripe
].start
, c
->chunk_item
->stripe_length
));
1756 if (writelen
> maxwritelen
)
1757 maxwritelen
= writelen
;
1759 RtlCopyMemory(stripe_pfns
, &pfns
[pos
>> PAGE_SHIFT
], writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1761 RtlCopyMemory(log_stripes
[i
].pfns
, &pfns
[pos
>> PAGE_SHIFT
], writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1762 log_stripes
[i
].pfns
+= writelen
>> PAGE_SHIFT
;
1764 stripeoff
[stripe
] = writelen
;
1767 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1771 stripe_pfns
= (PFN_NUMBER
*)(stripes
[parity1
].mdl
+ 1);
1772 RtlCopyMemory(stripe_pfns
, parity1_pfns
, maxwritelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1773 stripeoff
[parity1
] = maxwritelen
;
1775 parity2
= (parity1
+ 1) % c
->chunk_item
->num_stripes
;
1777 stripe_pfns
= (PFN_NUMBER
*)(stripes
[parity2
].mdl
+ 1);
1778 RtlCopyMemory(stripe_pfns
, parity2_pfns
, maxwritelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1779 stripeoff
[parity2
] = maxwritelen
;
1781 parity_pos
= maxwritelen
;
1782 } else if (length
- pos
>= c
->chunk_item
->stripe_length
* num_data_stripes
) {
1783 uint16_t stripe
= (parity1
+ 2) % c
->chunk_item
->num_stripes
, parity2
;
1786 while (stripe
!= parity1
) {
1787 stripe_pfns
= (PFN_NUMBER
*)(stripes
[stripe
].mdl
+ 1);
1789 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(c
->chunk_item
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
1791 RtlCopyMemory(log_stripes
[i
].pfns
, &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(c
->chunk_item
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
1792 log_stripes
[i
].pfns
+= c
->chunk_item
->stripe_length
>> PAGE_SHIFT
;
1794 stripeoff
[stripe
] += c
->chunk_item
->stripe_length
;
1795 pos
+= c
->chunk_item
->stripe_length
;
1797 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1801 stripe_pfns
= (PFN_NUMBER
*)(stripes
[parity1
].mdl
+ 1);
1802 RtlCopyMemory(&stripe_pfns
[stripeoff
[parity1
] >> PAGE_SHIFT
], &parity1_pfns
[parity_pos
>> PAGE_SHIFT
], (ULONG
)(c
->chunk_item
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
1803 stripeoff
[parity1
] += c
->chunk_item
->stripe_length
;
1805 parity2
= (parity1
+ 1) % c
->chunk_item
->num_stripes
;
1807 stripe_pfns
= (PFN_NUMBER
*)(stripes
[parity2
].mdl
+ 1);
1808 RtlCopyMemory(&stripe_pfns
[stripeoff
[parity2
] >> PAGE_SHIFT
], &parity2_pfns
[parity_pos
>> PAGE_SHIFT
], (ULONG
)(c
->chunk_item
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
1809 stripeoff
[parity2
] += c
->chunk_item
->stripe_length
;
1811 parity_pos
+= c
->chunk_item
->stripe_length
;
1813 uint16_t stripe
= (parity1
+ 2) % c
->chunk_item
->num_stripes
, parity2
;
1814 uint32_t writelen
, maxwritelen
= 0;
1817 while (pos
< length
) {
1818 stripe_pfns
= (PFN_NUMBER
*)(stripes
[stripe
].mdl
+ 1);
1819 writelen
= (uint32_t)min(length
- pos
, min(stripes
[stripe
].end
- stripes
[stripe
].start
, c
->chunk_item
->stripe_length
));
1824 if (writelen
> maxwritelen
)
1825 maxwritelen
= writelen
;
1827 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1829 RtlCopyMemory(log_stripes
[i
].pfns
, &pfns
[pos
>> PAGE_SHIFT
], writelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1830 log_stripes
[i
].pfns
+= writelen
>> PAGE_SHIFT
;
1832 stripeoff
[stripe
] += writelen
;
1835 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1839 stripe_pfns
= (PFN_NUMBER
*)(stripes
[parity1
].mdl
+ 1);
1840 RtlCopyMemory(&stripe_pfns
[stripeoff
[parity1
] >> PAGE_SHIFT
], &parity1_pfns
[parity_pos
>> PAGE_SHIFT
], maxwritelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1842 parity2
= (parity1
+ 1) % c
->chunk_item
->num_stripes
;
1844 stripe_pfns
= (PFN_NUMBER
*)(stripes
[parity2
].mdl
+ 1);
1845 RtlCopyMemory(&stripe_pfns
[stripeoff
[parity2
] >> PAGE_SHIFT
], &parity2_pfns
[parity_pos
>> PAGE_SHIFT
], maxwritelen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1849 for (i
= 0; i
< num_data_stripes
; i
++) {
1850 uint8_t* ss
= MmGetSystemAddressForMdlSafe(log_stripes
[c
->chunk_item
->num_stripes
- 3 - i
].mdl
, priority
);
1853 RtlCopyMemory(wtc
->parity1
, ss
, (ULONG
)(parity_end
- parity_start
));
1854 RtlCopyMemory(wtc
->parity2
, ss
, (ULONG
)(parity_end
- parity_start
));
1856 do_xor(wtc
->parity1
, ss
, (uint32_t)(parity_end
- parity_start
));
1858 galois_double(wtc
->parity2
, (uint32_t)(parity_end
- parity_start
));
1859 do_xor(wtc
->parity2
, ss
, (uint32_t)(parity_end
- parity_start
));
1863 Status
= STATUS_SUCCESS
;
1867 for (i
= 0; i
< num_data_stripes
; i
++) {
1868 if (log_stripes
[i
].mdl
)
1869 IoFreeMdl(log_stripes
[i
].mdl
);
1872 ExFreePool(log_stripes
);
1876 ExFreePool(stripeoff
);
1881 NTSTATUS
write_data(_In_ device_extension
* Vcb
, _In_
uint64_t address
, _In_reads_bytes_(length
) void* data
, _In_
uint32_t length
, _In_ write_data_context
* wtc
,
1882 _In_opt_ PIRP Irp
, _In_opt_ chunk
* c
, _In_
bool file_write
, _In_
uint64_t irp_offset
, _In_ ULONG priority
) {
1885 CHUNK_ITEM_STRIPE
* cis
;
1886 write_stripe
* stripes
= NULL
;
1887 uint64_t total_writing
= 0;
1888 ULONG allowed_missing
, missing
;
1890 TRACE("(%p, %I64x, %p, %x)\n", Vcb
, address
, data
, length
);
1893 c
= get_chunk_from_address(Vcb
, address
);
1895 ERR("could not get chunk for address %I64x\n", address
);
1896 return STATUS_INTERNAL_ERROR
;
1900 stripes
= ExAllocatePoolWithTag(PagedPool
, sizeof(write_stripe
) * c
->chunk_item
->num_stripes
, ALLOC_TAG
);
1902 ERR("out of memory\n");
1903 return STATUS_INSUFFICIENT_RESOURCES
;
1906 RtlZeroMemory(stripes
, sizeof(write_stripe
) * c
->chunk_item
->num_stripes
);
1908 cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
1910 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
) {
1911 Status
= prepare_raid0_write(c
, address
, data
, length
, stripes
, file_write
? Irp
: NULL
, irp_offset
, wtc
);
1912 if (!NT_SUCCESS(Status
)) {
1913 ERR("prepare_raid0_write returned %08x\n", Status
);
1914 goto prepare_failed
;
1917 allowed_missing
= 0;
1918 } else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
) {
1919 Status
= prepare_raid10_write(c
, address
, data
, length
, stripes
, file_write
? Irp
: NULL
, irp_offset
, wtc
);
1920 if (!NT_SUCCESS(Status
)) {
1921 ERR("prepare_raid10_write returned %08x\n", Status
);
1922 goto prepare_failed
;
1925 allowed_missing
= 1;
1926 } else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
) {
1927 Status
= prepare_raid5_write(Vcb
, c
, address
, data
, length
, stripes
, file_write
? Irp
: NULL
, irp_offset
, priority
, wtc
);
1928 if (!NT_SUCCESS(Status
)) {
1929 ERR("prepare_raid5_write returned %08x\n", Status
);
1930 goto prepare_failed
;
1933 allowed_missing
= 1;
1934 } else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
) {
1935 Status
= prepare_raid6_write(Vcb
, c
, address
, data
, length
, stripes
, file_write
? Irp
: NULL
, irp_offset
, priority
, wtc
);
1936 if (!NT_SUCCESS(Status
)) {
1937 ERR("prepare_raid6_write returned %08x\n", Status
);
1938 goto prepare_failed
;
1941 allowed_missing
= 2;
1942 } else { // write same data to every location - SINGLE, DUP, RAID1
1943 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1944 stripes
[i
].start
= address
- c
->offset
;
1945 stripes
[i
].end
= stripes
[i
].start
+ length
;
1946 stripes
[i
].data
= data
;
1947 stripes
[i
].irp_offset
= irp_offset
;
1949 if (c
->devices
[i
]->devobj
) {
1952 ULONG writelen
= (ULONG
)(stripes
[i
].end
- stripes
[i
].start
);
1954 va
= (uint8_t*)MmGetMdlVirtualAddress(Irp
->MdlAddress
) + stripes
[i
].irp_offset
;
1956 stripes
[i
].mdl
= IoAllocateMdl(va
, writelen
, false, false, NULL
);
1957 if (!stripes
[i
].mdl
) {
1958 ERR("IoAllocateMdl failed\n");
1959 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1960 goto prepare_failed
;
1963 IoBuildPartialMdl(Irp
->MdlAddress
, stripes
[i
].mdl
, va
, writelen
);
1965 stripes
[i
].mdl
= IoAllocateMdl(stripes
[i
].data
, (ULONG
)(stripes
[i
].end
- stripes
[i
].start
), false, false, NULL
);
1966 if (!stripes
[i
].mdl
) {
1967 ERR("IoAllocateMdl failed\n");
1968 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1969 goto prepare_failed
;
1972 Status
= STATUS_SUCCESS
;
1975 MmProbeAndLockPages(stripes
[i
].mdl
, KernelMode
, IoReadAccess
);
1976 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1977 Status
= _SEH2_GetExceptionCode();
1980 if (!NT_SUCCESS(Status
)) {
1981 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
1982 IoFreeMdl(stripes
[i
].mdl
);
1983 stripes
[i
].mdl
= NULL
;
1984 goto prepare_failed
;
1990 allowed_missing
= c
->chunk_item
->num_stripes
- 1;
1994 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1995 if (!c
->devices
[i
]->devobj
)
1999 if (missing
> allowed_missing
) {
2000 ERR("cannot write as %u missing devices (maximum %u)\n", missing
, allowed_missing
);
2001 Status
= STATUS_DEVICE_NOT_READY
;
2002 goto prepare_failed
;
2005 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2006 write_data_stripe
* stripe
;
2007 PIO_STACK_LOCATION IrpSp
;
2009 stripe
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(write_data_stripe
), ALLOC_TAG
);
2011 ERR("out of memory\n");
2012 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2016 if (stripes
[i
].start
== stripes
[i
].end
|| !c
->devices
[i
]->devobj
) {
2017 stripe
->status
= WriteDataStatus_Ignore
;
2019 stripe
->buf
= stripes
[i
].data
;
2022 stripe
->context
= (struct _write_data_context
*)wtc
;
2023 stripe
->buf
= stripes
[i
].data
;
2024 stripe
->device
= c
->devices
[i
];
2025 RtlZeroMemory(&stripe
->iosb
, sizeof(IO_STATUS_BLOCK
));
2026 stripe
->status
= WriteDataStatus_Pending
;
2027 stripe
->mdl
= stripes
[i
].mdl
;
2030 stripe
->Irp
= IoAllocateIrp(stripe
->device
->devobj
->StackSize
, false);
2033 ERR("IoAllocateIrp failed\n");
2035 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2039 stripe
->Irp
= IoMakeAssociatedIrp(Irp
, stripe
->device
->devobj
->StackSize
);
2042 ERR("IoMakeAssociatedIrp failed\n");
2044 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2049 IrpSp
= IoGetNextIrpStackLocation(stripe
->Irp
);
2050 IrpSp
->MajorFunction
= IRP_MJ_WRITE
;
2051 IrpSp
->FileObject
= stripe
->device
->fileobj
;
2053 if (stripe
->device
->devobj
->Flags
& DO_BUFFERED_IO
) {
2054 stripe
->Irp
->AssociatedIrp
.SystemBuffer
= MmGetSystemAddressForMdlSafe(stripes
[i
].mdl
, priority
);
2056 stripe
->Irp
->Flags
= IRP_BUFFERED_IO
;
2057 } else if (stripe
->device
->devobj
->Flags
& DO_DIRECT_IO
)
2058 stripe
->Irp
->MdlAddress
= stripe
->mdl
;
2060 stripe
->Irp
->UserBuffer
= MmGetSystemAddressForMdlSafe(stripes
[i
].mdl
, priority
);
2062 #ifdef DEBUG_PARANOID
2063 if (stripes
[i
].end
< stripes
[i
].start
) {
2064 ERR("trying to write stripe with negative length (%I64x < %I64x)\n", stripes
[i
].end
, stripes
[i
].start
);
2069 IrpSp
->Parameters
.Write
.Length
= (ULONG
)(stripes
[i
].end
- stripes
[i
].start
);
2070 IrpSp
->Parameters
.Write
.ByteOffset
.QuadPart
= stripes
[i
].start
+ cis
[i
].offset
;
2072 total_writing
+= IrpSp
->Parameters
.Write
.Length
;
2074 stripe
->Irp
->UserIosb
= &stripe
->iosb
;
2075 wtc
->stripes_left
++;
2077 IoSetCompletionRoutine(stripe
->Irp
, write_data_completion
, stripe
, true, true, true);
2080 InsertTailList(&wtc
->stripes
, &stripe
->list_entry
);
2084 fFsRtlUpdateDiskCounters(0, total_writing
);
2086 Status
= STATUS_SUCCESS
;
2090 if (stripes
) ExFreePool(stripes
);
2092 if (!NT_SUCCESS(Status
))
2093 free_write_data_stripes(wtc
);
2098 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2099 if (stripes
[i
].mdl
&& (i
== 0 || stripes
[i
].mdl
!= stripes
[i
-1].mdl
)) {
2100 if (stripes
[i
].mdl
->MdlFlags
& MDL_PAGES_LOCKED
)
2101 MmUnlockPages(stripes
[i
].mdl
);
2103 IoFreeMdl(stripes
[i
].mdl
);
2107 if (wtc
->parity1_mdl
) {
2108 if (wtc
->parity1_mdl
->MdlFlags
& MDL_PAGES_LOCKED
)
2109 MmUnlockPages(wtc
->parity1_mdl
);
2111 IoFreeMdl(wtc
->parity1_mdl
);
2112 wtc
->parity1_mdl
= NULL
;
2115 if (wtc
->parity2_mdl
) {
2116 if (wtc
->parity2_mdl
->MdlFlags
& MDL_PAGES_LOCKED
)
2117 MmUnlockPages(wtc
->parity2_mdl
);
2119 IoFreeMdl(wtc
->parity2_mdl
);
2120 wtc
->parity2_mdl
= NULL
;
2124 if (wtc
->mdl
->MdlFlags
& MDL_PAGES_LOCKED
)
2125 MmUnlockPages(wtc
->mdl
);
2127 IoFreeMdl(wtc
->mdl
);
2132 ExFreePool(wtc
->parity1
);
2133 wtc
->parity1
= NULL
;
2137 ExFreePool(wtc
->parity2
);
2138 wtc
->parity2
= NULL
;
2142 ExFreePool(wtc
->scratch
);
2143 wtc
->scratch
= NULL
;
2146 ExFreePool(stripes
);
2150 void get_raid56_lock_range(chunk
* c
, uint64_t address
, uint64_t length
, uint64_t* lockaddr
, uint64_t* locklen
) {
2151 uint64_t startoff
, endoff
;
2152 uint16_t startoffstripe
, endoffstripe
, datastripes
;
2154 datastripes
= c
->chunk_item
->num_stripes
- (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
? 1 : 2);
2156 get_raid0_offset(address
- c
->offset
, c
->chunk_item
->stripe_length
, datastripes
, &startoff
, &startoffstripe
);
2157 get_raid0_offset(address
+ length
- c
->offset
- 1, c
->chunk_item
->stripe_length
, datastripes
, &endoff
, &endoffstripe
);
2159 startoff
-= startoff
% c
->chunk_item
->stripe_length
;
2160 endoff
= sector_align(endoff
, c
->chunk_item
->stripe_length
);
2162 *lockaddr
= c
->offset
+ (startoff
* datastripes
);
2163 *locklen
= (endoff
- startoff
) * datastripes
;
2166 NTSTATUS
write_data_complete(device_extension
* Vcb
, uint64_t address
, void* data
, uint32_t length
, PIRP Irp
, chunk
* c
, bool file_write
, uint64_t irp_offset
, ULONG priority
) {
2167 write_data_context wtc
;
2169 uint64_t lockaddr
, locklen
;
2171 KeInitializeEvent(&wtc
.Event
, NotificationEvent
, false);
2172 InitializeListHead(&wtc
.stripes
);
2173 wtc
.stripes_left
= 0;
2174 wtc
.parity1
= wtc
.parity2
= wtc
.scratch
= NULL
;
2175 wtc
.mdl
= wtc
.parity1_mdl
= wtc
.parity2_mdl
= NULL
;
2178 c
= get_chunk_from_address(Vcb
, address
);
2180 ERR("could not get chunk for address %I64x\n", address
);
2181 return STATUS_INTERNAL_ERROR
;
2185 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
|| c
->chunk_item
->type
& BLOCK_FLAG_RAID6
) {
2186 get_raid56_lock_range(c
, address
, length
, &lockaddr
, &locklen
);
2187 chunk_lock_range(Vcb
, c
, lockaddr
, locklen
);
2191 Status
= write_data(Vcb
, address
, data
, length
, &wtc
, Irp
, c
, file_write
, irp_offset
, priority
);
2192 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
2193 Status
= _SEH2_GetExceptionCode();
2196 if (!NT_SUCCESS(Status
)) {
2197 ERR("write_data returned %08x\n", Status
);
2199 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
|| c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
2200 chunk_unlock_range(Vcb
, c
, lockaddr
, locklen
);
2202 free_write_data_stripes(&wtc
);
2206 if (wtc
.stripes
.Flink
!= &wtc
.stripes
) {
2207 // launch writes and wait
2208 LIST_ENTRY
* le
= wtc
.stripes
.Flink
;
2209 bool no_wait
= true;
2211 while (le
!= &wtc
.stripes
) {
2212 write_data_stripe
* stripe
= CONTAINING_RECORD(le
, write_data_stripe
, list_entry
);
2214 if (stripe
->status
!= WriteDataStatus_Ignore
) {
2215 IoCallDriver(stripe
->device
->devobj
, stripe
->Irp
);
2223 KeWaitForSingleObject(&wtc
.Event
, Executive
, KernelMode
, false, NULL
);
2225 le
= wtc
.stripes
.Flink
;
2226 while (le
!= &wtc
.stripes
) {
2227 write_data_stripe
* stripe
= CONTAINING_RECORD(le
, write_data_stripe
, list_entry
);
2229 if (stripe
->status
!= WriteDataStatus_Ignore
&& !NT_SUCCESS(stripe
->iosb
.Status
)) {
2230 Status
= stripe
->iosb
.Status
;
2232 log_device_error(Vcb
, stripe
->device
, BTRFS_DEV_STAT_WRITE_ERRORS
);
2239 free_write_data_stripes(&wtc
);
2242 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
|| c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
2243 chunk_unlock_range(Vcb
, c
, lockaddr
, locklen
);
2248 _Function_class_(IO_COMPLETION_ROUTINE
)
2249 static NTSTATUS __stdcall
write_data_completion(PDEVICE_OBJECT DeviceObject
, PIRP Irp
, PVOID conptr
) {
2250 write_data_stripe
* stripe
= conptr
;
2251 write_data_context
* context
= (write_data_context
*)stripe
->context
;
2254 UNUSED(DeviceObject
);
2256 // FIXME - we need a lock here
2258 if (stripe
->status
== WriteDataStatus_Cancelling
) {
2259 stripe
->status
= WriteDataStatus_Cancelled
;
2263 stripe
->iosb
= Irp
->IoStatus
;
2265 if (NT_SUCCESS(Irp
->IoStatus
.Status
)) {
2266 stripe
->status
= WriteDataStatus_Success
;
2268 le
= context
->stripes
.Flink
;
2270 stripe
->status
= WriteDataStatus_Error
;
2272 while (le
!= &context
->stripes
) {
2273 write_data_stripe
* s2
= CONTAINING_RECORD(le
, write_data_stripe
, list_entry
);
2275 if (s2
->status
== WriteDataStatus_Pending
) {
2276 s2
->status
= WriteDataStatus_Cancelling
;
2277 IoCancelIrp(s2
->Irp
);
2285 if (InterlockedDecrement(&context
->stripes_left
) == 0)
2286 KeSetEvent(&context
->Event
, 0, false);
2288 return STATUS_MORE_PROCESSING_REQUIRED
;
2291 void free_write_data_stripes(write_data_context
* wtc
) {
2293 PMDL last_mdl
= NULL
;
2295 if (wtc
->parity1_mdl
) {
2296 if (wtc
->parity1_mdl
->MdlFlags
& MDL_PAGES_LOCKED
)
2297 MmUnlockPages(wtc
->parity1_mdl
);
2299 IoFreeMdl(wtc
->parity1_mdl
);
2302 if (wtc
->parity2_mdl
) {
2303 if (wtc
->parity2_mdl
->MdlFlags
& MDL_PAGES_LOCKED
)
2304 MmUnlockPages(wtc
->parity2_mdl
);
2306 IoFreeMdl(wtc
->parity2_mdl
);
2310 if (wtc
->mdl
->MdlFlags
& MDL_PAGES_LOCKED
)
2311 MmUnlockPages(wtc
->mdl
);
2313 IoFreeMdl(wtc
->mdl
);
2317 ExFreePool(wtc
->parity1
);
2320 ExFreePool(wtc
->parity2
);
2323 ExFreePool(wtc
->scratch
);
2325 le
= wtc
->stripes
.Flink
;
2326 while (le
!= &wtc
->stripes
) {
2327 write_data_stripe
* stripe
= CONTAINING_RECORD(le
, write_data_stripe
, list_entry
);
2329 if (stripe
->mdl
&& stripe
->mdl
!= last_mdl
) {
2330 if (stripe
->mdl
->MdlFlags
& MDL_PAGES_LOCKED
)
2331 MmUnlockPages(stripe
->mdl
);
2333 IoFreeMdl(stripe
->mdl
);
2336 last_mdl
= stripe
->mdl
;
2339 IoFreeIrp(stripe
->Irp
);
2344 while (!IsListEmpty(&wtc
->stripes
)) {
2345 write_data_stripe
* stripe
= CONTAINING_RECORD(RemoveHeadList(&wtc
->stripes
), write_data_stripe
, list_entry
);
2351 void add_extent(_In_ fcb
* fcb
, _In_ LIST_ENTRY
* prevextle
, _In_ __drv_aliasesMem extent
* newext
) {
2352 LIST_ENTRY
* le
= prevextle
->Flink
;
2354 while (le
!= &fcb
->extents
) {
2355 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
2357 if (ext
->offset
>= newext
->offset
) {
2358 InsertHeadList(ext
->list_entry
.Blink
, &newext
->list_entry
);
2365 InsertTailList(&fcb
->extents
, &newext
->list_entry
);
2368 NTSTATUS
excise_extents(device_extension
* Vcb
, fcb
* fcb
, uint64_t start_data
, uint64_t end_data
, PIRP Irp
, LIST_ENTRY
* rollback
) {
2372 le
= fcb
->extents
.Flink
;
2374 while (le
!= &fcb
->extents
) {
2375 LIST_ENTRY
* le2
= le
->Flink
;
2376 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
2377 EXTENT_DATA
* ed
= &ext
->extent_data
;
2378 EXTENT_DATA2
* ed2
= NULL
;
2382 if (ed
->type
!= EXTENT_TYPE_INLINE
)
2383 ed2
= (EXTENT_DATA2
*)ed
->data
;
2385 len
= ed
->type
== EXTENT_TYPE_INLINE
? ed
->decoded_size
: ed2
->num_bytes
;
2387 if (ext
->offset
< end_data
&& ext
->offset
+ len
> start_data
) {
2388 if (ed
->type
== EXTENT_TYPE_INLINE
) {
2389 if (start_data
<= ext
->offset
&& end_data
>= ext
->offset
+ len
) { // remove all
2390 remove_fcb_extent(fcb
, ext
, rollback
);
2392 fcb
->inode_item
.st_blocks
-= len
;
2393 fcb
->inode_item_changed
= true;
2395 ERR("trying to split inline extent\n");
2396 #ifdef DEBUG_PARANOID
2399 return STATUS_INTERNAL_ERROR
;
2401 } else if (ed
->type
!= EXTENT_TYPE_INLINE
) {
2402 if (start_data
<= ext
->offset
&& end_data
>= ext
->offset
+ len
) { // remove all
2403 if (ed2
->size
!= 0) {
2406 fcb
->inode_item
.st_blocks
-= len
;
2407 fcb
->inode_item_changed
= true;
2409 c
= get_chunk_from_address(Vcb
, ed2
->address
);
2412 ERR("get_chunk_from_address(%I64x) failed\n", ed2
->address
);
2414 Status
= update_changed_extent_ref(Vcb
, c
, ed2
->address
, ed2
->size
, fcb
->subvol
->id
, fcb
->inode
, ext
->offset
- ed2
->offset
, -1,
2415 fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
, false, Irp
);
2416 if (!NT_SUCCESS(Status
)) {
2417 ERR("update_changed_extent_ref returned %08x\n", Status
);
2423 remove_fcb_extent(fcb
, ext
, rollback
);
2424 } else if (start_data
<= ext
->offset
&& end_data
< ext
->offset
+ len
) { // remove beginning
2428 if (ed2
->size
!= 0) {
2429 fcb
->inode_item
.st_blocks
-= end_data
- ext
->offset
;
2430 fcb
->inode_item_changed
= true;
2433 newext
= ExAllocatePoolWithTag(PagedPool
, offsetof(extent
, extent_data
) + sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
), ALLOC_TAG
);
2435 ERR("out of memory\n");
2436 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2440 ned2
= (EXTENT_DATA2
*)newext
->extent_data
.data
;
2442 newext
->extent_data
.generation
= Vcb
->superblock
.generation
;
2443 newext
->extent_data
.decoded_size
= ed
->decoded_size
;
2444 newext
->extent_data
.compression
= ed
->compression
;
2445 newext
->extent_data
.encryption
= ed
->encryption
;
2446 newext
->extent_data
.encoding
= ed
->encoding
;
2447 newext
->extent_data
.type
= ed
->type
;
2448 ned2
->address
= ed2
->address
;
2449 ned2
->size
= ed2
->size
;
2450 ned2
->offset
= ed2
->offset
+ (end_data
- ext
->offset
);
2451 ned2
->num_bytes
= ed2
->num_bytes
- (end_data
- ext
->offset
);
2453 newext
->offset
= end_data
;
2454 newext
->datalen
= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
);
2455 newext
->unique
= ext
->unique
;
2456 newext
->ignore
= false;
2457 newext
->inserted
= true;
2460 if (ed
->compression
== BTRFS_COMPRESSION_NONE
) {
2461 newext
->csum
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(ned2
->num_bytes
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
), ALLOC_TAG
);
2462 if (!newext
->csum
) {
2463 ERR("out of memory\n");
2464 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2469 RtlCopyMemory(newext
->csum
, &ext
->csum
[(end_data
- ext
->offset
) / Vcb
->superblock
.sector_size
],
2470 (ULONG
)(ned2
->num_bytes
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
));
2472 newext
->csum
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(ed2
->size
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
), ALLOC_TAG
);
2473 if (!newext
->csum
) {
2474 ERR("out of memory\n");
2475 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2480 RtlCopyMemory(newext
->csum
, ext
->csum
, (ULONG
)(ed2
->size
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
));
2483 newext
->csum
= NULL
;
2485 add_extent(fcb
, &ext
->list_entry
, newext
);
2487 remove_fcb_extent(fcb
, ext
, rollback
);
2488 } else if (start_data
> ext
->offset
&& end_data
>= ext
->offset
+ len
) { // remove end
2492 if (ed2
->size
!= 0) {
2493 fcb
->inode_item
.st_blocks
-= ext
->offset
+ len
- start_data
;
2494 fcb
->inode_item_changed
= true;
2497 newext
= ExAllocatePoolWithTag(PagedPool
, offsetof(extent
, extent_data
) + sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
), ALLOC_TAG
);
2499 ERR("out of memory\n");
2500 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2504 ned2
= (EXTENT_DATA2
*)newext
->extent_data
.data
;
2506 newext
->extent_data
.generation
= Vcb
->superblock
.generation
;
2507 newext
->extent_data
.decoded_size
= ed
->decoded_size
;
2508 newext
->extent_data
.compression
= ed
->compression
;
2509 newext
->extent_data
.encryption
= ed
->encryption
;
2510 newext
->extent_data
.encoding
= ed
->encoding
;
2511 newext
->extent_data
.type
= ed
->type
;
2512 ned2
->address
= ed2
->address
;
2513 ned2
->size
= ed2
->size
;
2514 ned2
->offset
= ed2
->offset
;
2515 ned2
->num_bytes
= start_data
- ext
->offset
;
2517 newext
->offset
= ext
->offset
;
2518 newext
->datalen
= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
);
2519 newext
->unique
= ext
->unique
;
2520 newext
->ignore
= false;
2521 newext
->inserted
= true;
2524 if (ed
->compression
== BTRFS_COMPRESSION_NONE
) {
2525 newext
->csum
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(ned2
->num_bytes
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
), ALLOC_TAG
);
2526 if (!newext
->csum
) {
2527 ERR("out of memory\n");
2528 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2533 RtlCopyMemory(newext
->csum
, ext
->csum
, (ULONG
)(ned2
->num_bytes
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
));
2535 newext
->csum
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(ed2
->size
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
), ALLOC_TAG
);
2536 if (!newext
->csum
) {
2537 ERR("out of memory\n");
2538 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2543 RtlCopyMemory(newext
->csum
, ext
->csum
, (ULONG
)(ed2
->size
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
));
2546 newext
->csum
= NULL
;
2548 InsertHeadList(&ext
->list_entry
, &newext
->list_entry
);
2550 remove_fcb_extent(fcb
, ext
, rollback
);
2551 } else if (start_data
> ext
->offset
&& end_data
< ext
->offset
+ len
) { // remove middle
2552 EXTENT_DATA2
*neda2
, *nedb2
;
2553 extent
*newext1
, *newext2
;
2555 if (ed2
->size
!= 0) {
2558 fcb
->inode_item
.st_blocks
-= end_data
- start_data
;
2559 fcb
->inode_item_changed
= true;
2561 c
= get_chunk_from_address(Vcb
, ed2
->address
);
2564 ERR("get_chunk_from_address(%I64x) failed\n", ed2
->address
);
2566 Status
= update_changed_extent_ref(Vcb
, c
, ed2
->address
, ed2
->size
, fcb
->subvol
->id
, fcb
->inode
, ext
->offset
- ed2
->offset
, 1,
2567 fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
, false, Irp
);
2568 if (!NT_SUCCESS(Status
)) {
2569 ERR("update_changed_extent_ref returned %08x\n", Status
);
2575 newext1
= ExAllocatePoolWithTag(PagedPool
, offsetof(extent
, extent_data
) + sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
), ALLOC_TAG
);
2577 ERR("out of memory\n");
2578 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2582 newext2
= ExAllocatePoolWithTag(PagedPool
, offsetof(extent
, extent_data
) + sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
), ALLOC_TAG
);
2584 ERR("out of memory\n");
2585 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2586 ExFreePool(newext1
);
2590 neda2
= (EXTENT_DATA2
*)newext1
->extent_data
.data
;
2592 newext1
->extent_data
.generation
= Vcb
->superblock
.generation
;
2593 newext1
->extent_data
.decoded_size
= ed
->decoded_size
;
2594 newext1
->extent_data
.compression
= ed
->compression
;
2595 newext1
->extent_data
.encryption
= ed
->encryption
;
2596 newext1
->extent_data
.encoding
= ed
->encoding
;
2597 newext1
->extent_data
.type
= ed
->type
;
2598 neda2
->address
= ed2
->address
;
2599 neda2
->size
= ed2
->size
;
2600 neda2
->offset
= ed2
->offset
;
2601 neda2
->num_bytes
= start_data
- ext
->offset
;
2603 nedb2
= (EXTENT_DATA2
*)newext2
->extent_data
.data
;
2605 newext2
->extent_data
.generation
= Vcb
->superblock
.generation
;
2606 newext2
->extent_data
.decoded_size
= ed
->decoded_size
;
2607 newext2
->extent_data
.compression
= ed
->compression
;
2608 newext2
->extent_data
.encryption
= ed
->encryption
;
2609 newext2
->extent_data
.encoding
= ed
->encoding
;
2610 newext2
->extent_data
.type
= ed
->type
;
2611 nedb2
->address
= ed2
->address
;
2612 nedb2
->size
= ed2
->size
;
2613 nedb2
->offset
= ed2
->offset
+ (end_data
- ext
->offset
);
2614 nedb2
->num_bytes
= ext
->offset
+ len
- end_data
;
2616 newext1
->offset
= ext
->offset
;
2617 newext1
->datalen
= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
);
2618 newext1
->unique
= ext
->unique
;
2619 newext1
->ignore
= false;
2620 newext1
->inserted
= true;
2622 newext2
->offset
= end_data
;
2623 newext2
->datalen
= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
);
2624 newext2
->unique
= ext
->unique
;
2625 newext2
->ignore
= false;
2626 newext2
->inserted
= true;
2629 if (ed
->compression
== BTRFS_COMPRESSION_NONE
) {
2630 newext1
->csum
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(neda2
->num_bytes
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
), ALLOC_TAG
);
2631 if (!newext1
->csum
) {
2632 ERR("out of memory\n");
2633 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2634 ExFreePool(newext1
);
2635 ExFreePool(newext2
);
2639 newext2
->csum
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(nedb2
->num_bytes
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
), ALLOC_TAG
);
2640 if (!newext2
->csum
) {
2641 ERR("out of memory\n");
2642 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2643 ExFreePool(newext1
->csum
);
2644 ExFreePool(newext1
);
2645 ExFreePool(newext2
);
2649 RtlCopyMemory(newext1
->csum
, ext
->csum
, (ULONG
)(neda2
->num_bytes
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
));
2650 RtlCopyMemory(newext2
->csum
, &ext
->csum
[(end_data
- ext
->offset
) / Vcb
->superblock
.sector_size
],
2651 (ULONG
)(nedb2
->num_bytes
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
));
2653 newext1
->csum
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(ed2
->size
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
), ALLOC_TAG
);
2654 if (!newext1
->csum
) {
2655 ERR("out of memory\n");
2656 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2657 ExFreePool(newext1
);
2658 ExFreePool(newext2
);
2662 newext2
->csum
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(ed2
->size
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
), ALLOC_TAG
);
2663 if (!newext2
->csum
) {
2664 ERR("out of memory\n");
2665 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2666 ExFreePool(newext1
->csum
);
2667 ExFreePool(newext1
);
2668 ExFreePool(newext2
);
2672 RtlCopyMemory(newext1
->csum
, ext
->csum
, (ULONG
)(ed2
->size
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
));
2673 RtlCopyMemory(newext2
->csum
, ext
->csum
, (ULONG
)(ed2
->size
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
));
2676 newext1
->csum
= NULL
;
2677 newext2
->csum
= NULL
;
2680 InsertHeadList(&ext
->list_entry
, &newext1
->list_entry
);
2681 add_extent(fcb
, &newext1
->list_entry
, newext2
);
2683 remove_fcb_extent(fcb
, ext
, rollback
);
2692 Status
= STATUS_SUCCESS
;
2695 fcb
->extents_changed
= true;
2696 mark_fcb_dirty(fcb
);
2701 void add_insert_extent_rollback(LIST_ENTRY
* rollback
, fcb
* fcb
, extent
* ext
) {
2702 rollback_extent
* re
;
2704 re
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(rollback_extent
), ALLOC_TAG
);
2706 ERR("out of memory\n");
2713 add_rollback(rollback
, ROLLBACK_INSERT_EXTENT
, re
);
2717 #pragma warning(push)
2718 #pragma warning(suppress: 28194)
2720 NTSTATUS
add_extent_to_fcb(_In_ fcb
* fcb
, _In_
uint64_t offset
, _In_reads_bytes_(edsize
) EXTENT_DATA
* ed
, _In_
uint16_t edsize
,
2721 _In_
bool unique
, _In_opt_
_When_(return >= 0, __drv_aliasesMem
) uint32_t* csum
, _In_ LIST_ENTRY
* rollback
) {
2725 ext
= ExAllocatePoolWithTag(PagedPool
, offsetof(extent
, extent_data
) + edsize
, ALLOC_TAG
);
2727 ERR("out of memory\n");
2728 return STATUS_INSUFFICIENT_RESOURCES
;
2731 ext
->offset
= offset
;
2732 ext
->datalen
= edsize
;
2733 ext
->unique
= unique
;
2734 ext
->ignore
= false;
2735 ext
->inserted
= true;
2738 RtlCopyMemory(&ext
->extent_data
, ed
, edsize
);
2740 le
= fcb
->extents
.Flink
;
2741 while (le
!= &fcb
->extents
) {
2742 extent
* oldext
= CONTAINING_RECORD(le
, extent
, list_entry
);
2744 if (oldext
->offset
>= offset
) {
2745 InsertHeadList(le
->Blink
, &ext
->list_entry
);
2752 InsertTailList(&fcb
->extents
, &ext
->list_entry
);
2755 add_insert_extent_rollback(rollback
, fcb
, ext
);
2757 return STATUS_SUCCESS
;
2760 #pragma warning(pop)
2763 static void remove_fcb_extent(fcb
* fcb
, extent
* ext
, LIST_ENTRY
* rollback
) {
2765 rollback_extent
* re
;
2769 re
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(rollback_extent
), ALLOC_TAG
);
2771 ERR("out of memory\n");
2778 add_rollback(rollback
, ROLLBACK_DELETE_EXTENT
, re
);
2782 NTSTATUS
calc_csum(_In_ device_extension
* Vcb
, _In_reads_bytes_(sectors
*Vcb
->superblock
.sector_size
) uint8_t* data
,
2783 _In_
uint32_t sectors
, _Out_writes_bytes_(sectors
*sizeof(uint32_t)) uint32_t* csum
) {
2787 // From experimenting, it seems that 40 sectors is roughly the crossover
2788 // point where offloading the crc32 calculation becomes worth it.
2790 if (sectors
< 40 || get_num_of_processors() < 2) {
2793 for (j
= 0; j
< sectors
; j
++) {
2794 csum
[j
] = ~calc_crc32c(0xffffffff, data
+ (j
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
2797 return STATUS_SUCCESS
;
2800 Status
= add_calc_job(Vcb
, data
, sectors
, csum
, &cj
);
2801 if (!NT_SUCCESS(Status
)) {
2802 ERR("add_calc_job returned %08x\n", Status
);
2806 KeWaitForSingleObject(&cj
->event
, Executive
, KernelMode
, false, NULL
);
2809 return STATUS_SUCCESS
;
2812 _Requires_lock_held_(c
->lock
)
2813 _When_(return != 0, _Releases_lock_(c
->lock
))
2814 bool insert_extent_chunk(_In_ device_extension
* Vcb
, _In_ fcb
* fcb
, _In_ chunk
* c
, _In_
uint64_t start_data
, _In_
uint64_t length
, _In_
bool prealloc
, _In_opt_
void* data
,
2815 _In_opt_ PIRP Irp
, _In_ LIST_ENTRY
* rollback
, _In_
uint8_t compression
, _In_
uint64_t decoded_size
, _In_
bool file_write
, _In_
uint64_t irp_offset
) {
2820 uint16_t edsize
= (uint16_t)(offsetof(EXTENT_DATA
, data
[0]) + sizeof(EXTENT_DATA2
));
2821 uint32_t* csum
= NULL
;
2823 TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %I64x, %u, %p, %p)\n", Vcb
, fcb
->subvol
->id
, fcb
->inode
, c
->offset
, start_data
, length
, prealloc
, data
, rollback
);
2825 if (!find_data_address_in_chunk(Vcb
, c
, length
, &address
))
2828 // add extent data to inode
2829 ed
= ExAllocatePoolWithTag(PagedPool
, edsize
, ALLOC_TAG
);
2831 ERR("out of memory\n");
2835 ed
->generation
= Vcb
->superblock
.generation
;
2836 ed
->decoded_size
= decoded_size
;
2837 ed
->compression
= compression
;
2838 ed
->encryption
= BTRFS_ENCRYPTION_NONE
;
2839 ed
->encoding
= BTRFS_ENCODING_NONE
;
2840 ed
->type
= prealloc
? EXTENT_TYPE_PREALLOC
: EXTENT_TYPE_REGULAR
;
2842 ed2
= (EXTENT_DATA2
*)ed
->data
;
2843 ed2
->address
= address
;
2846 ed2
->num_bytes
= decoded_size
;
2848 if (!prealloc
&& data
&& !(fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
)) {
2849 ULONG sl
= (ULONG
)(length
/ Vcb
->superblock
.sector_size
);
2851 csum
= ExAllocatePoolWithTag(PagedPool
, sl
* sizeof(uint32_t), ALLOC_TAG
);
2853 ERR("out of memory\n");
2858 Status
= calc_csum(Vcb
, data
, sl
, csum
);
2859 if (!NT_SUCCESS(Status
)) {
2860 ERR("calc_csum returned %08x\n", Status
);
2867 Status
= add_extent_to_fcb(fcb
, start_data
, ed
, edsize
, true, csum
, rollback
);
2868 if (!NT_SUCCESS(Status
)) {
2869 ERR("add_extent_to_fcb returned %08x\n", Status
);
2870 if (csum
) ExFreePool(csum
);
2878 space_list_subtract(c
, false, address
, length
, rollback
);
2880 fcb
->inode_item
.st_blocks
+= decoded_size
;
2882 fcb
->extents_changed
= true;
2883 fcb
->inode_item_changed
= true;
2884 mark_fcb_dirty(fcb
);
2886 ExAcquireResourceExclusiveLite(&c
->changed_extents_lock
, true);
2888 add_changed_extent_ref(c
, address
, length
, fcb
->subvol
->id
, fcb
->inode
, start_data
, 1, fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
);
2890 ExReleaseResourceLite(&c
->changed_extents_lock
);
2892 release_chunk_lock(c
, Vcb
);
2895 Status
= write_data_complete(Vcb
, address
, data
, (uint32_t)length
, Irp
, NULL
, file_write
, irp_offset
,
2896 fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
? HighPagePriority
: NormalPagePriority
);
2897 if (!NT_SUCCESS(Status
))
2898 ERR("write_data_complete returned %08x\n", Status
);
2904 static bool try_extend_data(device_extension
* Vcb
, fcb
* fcb
, uint64_t start_data
, uint64_t length
, void* data
,
2905 PIRP Irp
, uint64_t* written
, bool file_write
, uint64_t irp_offset
, LIST_ENTRY
* rollback
) {
2906 bool success
= false;
2913 le
= fcb
->extents
.Flink
;
2915 while (le
!= &fcb
->extents
) {
2916 extent
* nextext
= CONTAINING_RECORD(le
, extent
, list_entry
);
2918 if (!nextext
->ignore
) {
2919 if (nextext
->offset
== start_data
) {
2922 } else if (nextext
->offset
> start_data
)
2934 ed
= &ext
->extent_data
;
2936 if (ed
->type
!= EXTENT_TYPE_REGULAR
&& ed
->type
!= EXTENT_TYPE_PREALLOC
) {
2937 TRACE("not extending extent which is not regular or prealloc\n");
2941 ed2
= (EXTENT_DATA2
*)ed
->data
;
2943 if (ext
->offset
+ ed2
->num_bytes
!= start_data
) {
2944 TRACE("last EXTENT_DATA does not run up to start_data (%I64x + %I64x != %I64x)\n", ext
->offset
, ed2
->num_bytes
, start_data
);
2948 c
= get_chunk_from_address(Vcb
, ed2
->address
);
2950 if (c
->reloc
|| c
->readonly
|| c
->chunk_item
->type
!= Vcb
->data_flags
)
2953 acquire_chunk_lock(c
, Vcb
);
2955 if (length
> c
->chunk_item
->size
- c
->used
) {
2956 release_chunk_lock(c
, Vcb
);
2960 if (!c
->cache_loaded
) {
2961 NTSTATUS Status
= load_cache_chunk(Vcb
, c
, NULL
);
2963 if (!NT_SUCCESS(Status
)) {
2964 ERR("load_cache_chunk returned %08x\n", Status
);
2965 release_chunk_lock(c
, Vcb
);
2970 le
= c
->space
.Flink
;
2971 while (le
!= &c
->space
) {
2972 space
* s
= CONTAINING_RECORD(le
, space
, list_entry
);
2974 if (s
->address
== ed2
->address
+ ed2
->size
) {
2975 uint64_t newlen
= min(min(s
->size
, length
), MAX_EXTENT_SIZE
);
2977 success
= insert_extent_chunk(Vcb
, fcb
, c
, start_data
, newlen
, false, data
, Irp
, rollback
, BTRFS_COMPRESSION_NONE
, newlen
, file_write
, irp_offset
);
2982 release_chunk_lock(c
, Vcb
);
2985 } else if (s
->address
> ed2
->address
+ ed2
->size
)
2991 release_chunk_lock(c
, Vcb
);
2996 static NTSTATUS
insert_chunk_fragmented(fcb
* fcb
, uint64_t start
, uint64_t length
, uint8_t* data
, bool prealloc
, LIST_ENTRY
* rollback
) {
2998 uint64_t flags
= fcb
->Vcb
->data_flags
;
2999 bool page_file
= fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
;
3003 ExAcquireResourceSharedLite(&fcb
->Vcb
->chunk_lock
, true);
3005 // first create as many chunks as we can
3007 Status
= alloc_chunk(fcb
->Vcb
, flags
, &c
, false);
3008 } while (NT_SUCCESS(Status
));
3010 if (Status
!= STATUS_DISK_FULL
) {
3011 ERR("alloc_chunk returned %08x\n", Status
);
3012 ExReleaseResourceLite(&fcb
->Vcb
->chunk_lock
);
3016 le
= fcb
->Vcb
->chunks
.Flink
;
3017 while (le
!= &fcb
->Vcb
->chunks
) {
3018 c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
3020 if (!c
->readonly
&& !c
->reloc
) {
3021 acquire_chunk_lock(c
, fcb
->Vcb
);
3023 if (c
->chunk_item
->type
== flags
) {
3024 while (!IsListEmpty(&c
->space_size
) && length
> 0) {
3025 space
* s
= CONTAINING_RECORD(c
->space_size
.Flink
, space
, list_entry_size
);
3026 uint64_t extlen
= min(length
, s
->size
);
3028 if (insert_extent_chunk(fcb
->Vcb
, fcb
, c
, start
, extlen
, prealloc
&& !page_file
, data
, NULL
, rollback
, BTRFS_COMPRESSION_NONE
, extlen
, false, 0)) {
3031 if (data
) data
+= extlen
;
3033 acquire_chunk_lock(c
, fcb
->Vcb
);
3038 release_chunk_lock(c
, fcb
->Vcb
);
3047 ExReleaseResourceLite(&fcb
->Vcb
->chunk_lock
);
3049 return length
== 0 ? STATUS_SUCCESS
: STATUS_DISK_FULL
;
3052 static NTSTATUS
insert_prealloc_extent(fcb
* fcb
, uint64_t start
, uint64_t length
, LIST_ENTRY
* rollback
) {
3057 bool page_file
= fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
;
3059 flags
= fcb
->Vcb
->data_flags
;
3062 uint64_t extlen
= min(MAX_EXTENT_SIZE
, length
);
3064 ExAcquireResourceSharedLite(&fcb
->Vcb
->chunk_lock
, true);
3066 le
= fcb
->Vcb
->chunks
.Flink
;
3067 while (le
!= &fcb
->Vcb
->chunks
) {
3068 c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
3070 if (!c
->readonly
&& !c
->reloc
) {
3071 acquire_chunk_lock(c
, fcb
->Vcb
);
3073 if (c
->chunk_item
->type
== flags
&& (c
->chunk_item
->size
- c
->used
) >= extlen
) {
3074 if (insert_extent_chunk(fcb
->Vcb
, fcb
, c
, start
, extlen
, !page_file
, NULL
, NULL
, rollback
, BTRFS_COMPRESSION_NONE
, extlen
, false, 0)) {
3075 ExReleaseResourceLite(&fcb
->Vcb
->chunk_lock
);
3080 release_chunk_lock(c
, fcb
->Vcb
);
3086 ExReleaseResourceLite(&fcb
->Vcb
->chunk_lock
);
3088 ExAcquireResourceExclusiveLite(&fcb
->Vcb
->chunk_lock
, true);
3090 Status
= alloc_chunk(fcb
->Vcb
, flags
, &c
, false);
3092 ExReleaseResourceLite(&fcb
->Vcb
->chunk_lock
);
3094 if (!NT_SUCCESS(Status
)) {
3095 ERR("alloc_chunk returned %08x\n", Status
);
3099 acquire_chunk_lock(c
, fcb
->Vcb
);
3101 if (c
->chunk_item
->type
== flags
&& (c
->chunk_item
->size
- c
->used
) >= extlen
) {
3102 if (insert_extent_chunk(fcb
->Vcb
, fcb
, c
, start
, extlen
, !page_file
, NULL
, NULL
, rollback
, BTRFS_COMPRESSION_NONE
, extlen
, false, 0))
3106 release_chunk_lock(c
, fcb
->Vcb
);
3108 Status
= insert_chunk_fragmented(fcb
, start
, length
, NULL
, true, rollback
);
3109 if (!NT_SUCCESS(Status
))
3110 ERR("insert_chunk_fragmented returned %08x\n", Status
);
3117 } while (length
> 0);
3119 Status
= STATUS_SUCCESS
;
3125 static NTSTATUS
insert_extent(device_extension
* Vcb
, fcb
* fcb
, uint64_t start_data
, uint64_t length
, void* data
,
3126 PIRP Irp
, bool file_write
, uint64_t irp_offset
, LIST_ENTRY
* rollback
) {
3130 uint64_t flags
, orig_length
= length
, written
= 0;
3132 TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %p)\n", Vcb
, fcb
->subvol
->id
, fcb
->inode
, start_data
, length
, data
);
3134 if (start_data
> 0) {
3135 try_extend_data(Vcb
, fcb
, start_data
, length
, data
, Irp
, &written
, file_write
, irp_offset
, rollback
);
3137 if (written
== length
)
3138 return STATUS_SUCCESS
;
3139 else if (written
> 0) {
3140 start_data
+= written
;
3141 irp_offset
+= written
;
3143 data
= &((uint8_t*)data
)[written
];
3147 flags
= Vcb
->data_flags
;
3149 while (written
< orig_length
) {
3150 uint64_t newlen
= min(length
, MAX_EXTENT_SIZE
);
3153 // Rather than necessarily writing the whole extent at once, we deal with it in blocks of 128 MB.
3154 // First, see if we can write the extent part to an existing chunk.
3156 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
3158 le
= Vcb
->chunks
.Flink
;
3159 while (le
!= &Vcb
->chunks
) {
3160 c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
3162 if (!c
->readonly
&& !c
->reloc
) {
3163 acquire_chunk_lock(c
, Vcb
);
3165 if (c
->chunk_item
->type
== flags
&& (c
->chunk_item
->size
- c
->used
) >= newlen
&&
3166 insert_extent_chunk(Vcb
, fcb
, c
, start_data
, newlen
, false, data
, Irp
, rollback
, BTRFS_COMPRESSION_NONE
, newlen
, file_write
, irp_offset
)) {
3169 if (written
== orig_length
) {
3170 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3171 return STATUS_SUCCESS
;
3174 start_data
+= newlen
;
3175 irp_offset
+= newlen
;
3177 data
= &((uint8_t*)data
)[newlen
];
3181 release_chunk_lock(c
, Vcb
);
3187 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3191 // Otherwise, see if we can put it in a new chunk.
3193 ExAcquireResourceExclusiveLite(&Vcb
->chunk_lock
, true);
3195 Status
= alloc_chunk(Vcb
, flags
, &c
, false);
3197 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3199 if (!NT_SUCCESS(Status
)) {
3200 ERR("alloc_chunk returned %08x\n", Status
);
3205 acquire_chunk_lock(c
, Vcb
);
3207 if (c
->chunk_item
->type
== flags
&& (c
->chunk_item
->size
- c
->used
) >= newlen
&&
3208 insert_extent_chunk(Vcb
, fcb
, c
, start_data
, newlen
, false, data
, Irp
, rollback
, BTRFS_COMPRESSION_NONE
, newlen
, file_write
, irp_offset
)) {
3211 if (written
== orig_length
)
3212 return STATUS_SUCCESS
;
3215 start_data
+= newlen
;
3216 irp_offset
+= newlen
;
3218 data
= &((uint8_t*)data
)[newlen
];
3221 release_chunk_lock(c
, Vcb
);
3225 Status
= insert_chunk_fragmented(fcb
, start_data
, length
, data
, false, rollback
);
3226 if (!NT_SUCCESS(Status
))
3227 ERR("insert_chunk_fragmented returned %08x\n", Status
);
3233 return STATUS_DISK_FULL
;
3236 NTSTATUS
truncate_file(fcb
* fcb
, uint64_t end
, PIRP Irp
, LIST_ENTRY
* rollback
) {
3239 // FIXME - convert into inline extent if short enough
3241 if (end
> 0 && fcb_is_inline(fcb
)) {
3243 bool make_inline
= end
<= fcb
->Vcb
->options
.max_inline
;
3245 buf
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(make_inline
? (offsetof(EXTENT_DATA
, data
[0]) + end
) : sector_align(end
, fcb
->Vcb
->superblock
.sector_size
)), ALLOC_TAG
);
3247 ERR("out of memory\n");
3248 return STATUS_INSUFFICIENT_RESOURCES
;
3251 Status
= read_file(fcb
, make_inline
? (buf
+ offsetof(EXTENT_DATA
, data
[0])) : buf
, 0, end
, NULL
, Irp
);
3252 if (!NT_SUCCESS(Status
)) {
3253 ERR("read_file returned %08x\n", Status
);
3258 Status
= excise_extents(fcb
->Vcb
, fcb
, 0, fcb
->inode_item
.st_size
, Irp
, rollback
);
3259 if (!NT_SUCCESS(Status
)) {
3260 ERR("excise_extents returned %08x\n", Status
);
3266 RtlZeroMemory(buf
+ end
, (ULONG
)(sector_align(end
, fcb
->Vcb
->superblock
.sector_size
) - end
));
3268 Status
= do_write_file(fcb
, 0, sector_align(end
, fcb
->Vcb
->superblock
.sector_size
), buf
, Irp
, false, 0, rollback
);
3269 if (!NT_SUCCESS(Status
)) {
3270 ERR("do_write_file returned %08x\n", Status
);
3275 EXTENT_DATA
* ed
= (EXTENT_DATA
*)buf
;
3277 ed
->generation
= fcb
->Vcb
->superblock
.generation
;
3278 ed
->decoded_size
= end
;
3279 ed
->compression
= BTRFS_COMPRESSION_NONE
;
3280 ed
->encryption
= BTRFS_ENCRYPTION_NONE
;
3281 ed
->encoding
= BTRFS_ENCODING_NONE
;
3282 ed
->type
= EXTENT_TYPE_INLINE
;
3284 Status
= add_extent_to_fcb(fcb
, 0, ed
, (uint16_t)(offsetof(EXTENT_DATA
, data
[0]) + end
), false, NULL
, rollback
);
3285 if (!NT_SUCCESS(Status
)) {
3286 ERR("add_extent_to_fcb returned %08x\n", Status
);
3291 fcb
->inode_item
.st_blocks
+= end
;
3295 return STATUS_SUCCESS
;
3298 Status
= excise_extents(fcb
->Vcb
, fcb
, sector_align(end
, fcb
->Vcb
->superblock
.sector_size
),
3299 sector_align(fcb
->inode_item
.st_size
, fcb
->Vcb
->superblock
.sector_size
), Irp
, rollback
);
3300 if (!NT_SUCCESS(Status
)) {
3301 ERR("excise_extents returned %08x\n", Status
);
3305 fcb
->inode_item
.st_size
= end
;
3306 fcb
->inode_item_changed
= true;
3307 TRACE("setting st_size to %I64x\n", end
);
3309 fcb
->Header
.AllocationSize
.QuadPart
= sector_align(fcb
->inode_item
.st_size
, fcb
->Vcb
->superblock
.sector_size
);
3310 fcb
->Header
.FileSize
.QuadPart
= fcb
->inode_item
.st_size
;
3311 fcb
->Header
.ValidDataLength
.QuadPart
= fcb
->inode_item
.st_size
;
3312 // FIXME - inform cache manager of this
3314 TRACE("fcb %p FileSize = %I64x\n", fcb
, fcb
->Header
.FileSize
.QuadPart
);
3316 return STATUS_SUCCESS
;
3319 NTSTATUS
extend_file(fcb
* fcb
, file_ref
* fileref
, uint64_t end
, bool prealloc
, PIRP Irp
, LIST_ENTRY
* rollback
) {
3320 uint64_t oldalloc
, newalloc
;
3324 TRACE("(%p, %p, %x, %u)\n", fcb
, fileref
, end
, prealloc
);
3328 return STATUS_DISK_FULL
;
3330 return stream_set_end_of_file_information(fcb
->Vcb
, (uint16_t)end
, fcb
, fileref
, false);
3335 le
= fcb
->extents
.Blink
;
3336 while (le
!= &fcb
->extents
) {
3337 extent
* ext2
= CONTAINING_RECORD(le
, extent
, list_entry
);
3339 if (!ext2
->ignore
) {
3349 EXTENT_DATA
* ed
= &ext
->extent_data
;
3350 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
3352 oldalloc
= ext
->offset
+ (ed
->type
== EXTENT_TYPE_INLINE
? ed
->decoded_size
: ed2
->num_bytes
);
3353 cur_inline
= ed
->type
== EXTENT_TYPE_INLINE
;
3355 if (cur_inline
&& end
> fcb
->Vcb
->options
.max_inline
) {
3356 uint64_t origlength
, length
;
3359 TRACE("giving inline file proper extents\n");
3361 origlength
= ed
->decoded_size
;
3365 length
= sector_align(origlength
, fcb
->Vcb
->superblock
.sector_size
);
3367 data
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)length
, ALLOC_TAG
);
3369 ERR("could not allocate %I64x bytes for data\n", length
);
3370 return STATUS_INSUFFICIENT_RESOURCES
;
3373 Status
= read_file(fcb
, data
, 0, origlength
, NULL
, Irp
);
3374 if (!NT_SUCCESS(Status
)) {
3375 ERR("read_file returned %08x\n", Status
);
3380 RtlZeroMemory(data
+ origlength
, (ULONG
)(length
- origlength
));
3382 Status
= excise_extents(fcb
->Vcb
, fcb
, 0, fcb
->inode_item
.st_size
, Irp
, rollback
);
3383 if (!NT_SUCCESS(Status
)) {
3384 ERR("excise_extents returned %08x\n", Status
);
3389 Status
= do_write_file(fcb
, 0, length
, data
, Irp
, false, 0, rollback
);
3390 if (!NT_SUCCESS(Status
)) {
3391 ERR("do_write_file returned %08x\n", Status
);
3396 oldalloc
= ext
->offset
+ length
;
3404 if (end
> oldalloc
) {
3405 edsize
= (uint16_t)(offsetof(EXTENT_DATA
, data
[0]) + end
- ext
->offset
);
3406 ed
= ExAllocatePoolWithTag(PagedPool
, edsize
, ALLOC_TAG
);
3409 ERR("out of memory\n");
3410 return STATUS_INSUFFICIENT_RESOURCES
;
3413 ed
->generation
= fcb
->Vcb
->superblock
.generation
;
3414 ed
->decoded_size
= end
- ext
->offset
;
3415 ed
->compression
= BTRFS_COMPRESSION_NONE
;
3416 ed
->encryption
= BTRFS_ENCRYPTION_NONE
;
3417 ed
->encoding
= BTRFS_ENCODING_NONE
;
3418 ed
->type
= EXTENT_TYPE_INLINE
;
3420 Status
= read_file(fcb
, ed
->data
, ext
->offset
, oldalloc
, NULL
, Irp
);
3421 if (!NT_SUCCESS(Status
)) {
3422 ERR("read_file returned %08x\n", Status
);
3427 RtlZeroMemory(ed
->data
+ oldalloc
- ext
->offset
, (ULONG
)(end
- oldalloc
));
3429 remove_fcb_extent(fcb
, ext
, rollback
);
3431 Status
= add_extent_to_fcb(fcb
, ext
->offset
, ed
, edsize
, ext
->unique
, NULL
, rollback
);
3432 if (!NT_SUCCESS(Status
)) {
3433 ERR("add_extent_to_fcb returned %08x\n", Status
);
3440 fcb
->extents_changed
= true;
3441 mark_fcb_dirty(fcb
);
3444 TRACE("extending inline file (oldalloc = %I64x, end = %I64x)\n", oldalloc
, end
);
3446 fcb
->inode_item
.st_size
= end
;
3447 TRACE("setting st_size to %I64x\n", end
);
3449 fcb
->inode_item
.st_blocks
= end
;
3451 fcb
->Header
.AllocationSize
.QuadPart
= fcb
->Header
.FileSize
.QuadPart
= fcb
->Header
.ValidDataLength
.QuadPart
= end
;
3453 newalloc
= sector_align(end
, fcb
->Vcb
->superblock
.sector_size
);
3455 if (newalloc
> oldalloc
) {
3457 // FIXME - try and extend previous extent first
3459 Status
= insert_prealloc_extent(fcb
, oldalloc
, newalloc
- oldalloc
, rollback
);
3461 if (!NT_SUCCESS(Status
)) {
3462 ERR("insert_prealloc_extent returned %08x\n", Status
);
3467 fcb
->extents_changed
= true;
3470 fcb
->inode_item
.st_size
= end
;
3471 fcb
->inode_item_changed
= true;
3472 mark_fcb_dirty(fcb
);
3474 TRACE("setting st_size to %I64x\n", end
);
3476 TRACE("newalloc = %I64x\n", newalloc
);
3478 fcb
->Header
.AllocationSize
.QuadPart
= newalloc
;
3479 fcb
->Header
.FileSize
.QuadPart
= fcb
->Header
.ValidDataLength
.QuadPart
= end
;
3482 if (end
> fcb
->Vcb
->options
.max_inline
) {
3483 newalloc
= sector_align(end
, fcb
->Vcb
->superblock
.sector_size
);
3486 Status
= insert_prealloc_extent(fcb
, 0, newalloc
, rollback
);
3488 if (!NT_SUCCESS(Status
)) {
3489 ERR("insert_prealloc_extent returned %08x\n", Status
);
3494 fcb
->extents_changed
= true;
3495 fcb
->inode_item_changed
= true;
3496 mark_fcb_dirty(fcb
);
3498 fcb
->inode_item
.st_size
= end
;
3499 TRACE("setting st_size to %I64x\n", end
);
3501 TRACE("newalloc = %I64x\n", newalloc
);
3503 fcb
->Header
.AllocationSize
.QuadPart
= newalloc
;
3504 fcb
->Header
.FileSize
.QuadPart
= fcb
->Header
.ValidDataLength
.QuadPart
= end
;
3509 edsize
= (uint16_t)(offsetof(EXTENT_DATA
, data
[0]) + end
);
3510 ed
= ExAllocatePoolWithTag(PagedPool
, edsize
, ALLOC_TAG
);
3513 ERR("out of memory\n");
3514 return STATUS_INSUFFICIENT_RESOURCES
;
3517 ed
->generation
= fcb
->Vcb
->superblock
.generation
;
3518 ed
->decoded_size
= end
;
3519 ed
->compression
= BTRFS_COMPRESSION_NONE
;
3520 ed
->encryption
= BTRFS_ENCRYPTION_NONE
;
3521 ed
->encoding
= BTRFS_ENCODING_NONE
;
3522 ed
->type
= EXTENT_TYPE_INLINE
;
3524 RtlZeroMemory(ed
->data
, (ULONG
)end
);
3526 Status
= add_extent_to_fcb(fcb
, 0, ed
, edsize
, false, NULL
, rollback
);
3527 if (!NT_SUCCESS(Status
)) {
3528 ERR("add_extent_to_fcb returned %08x\n", Status
);
3535 fcb
->extents_changed
= true;
3536 fcb
->inode_item_changed
= true;
3537 mark_fcb_dirty(fcb
);
3539 fcb
->inode_item
.st_size
= end
;
3540 TRACE("setting st_size to %I64x\n", end
);
3542 fcb
->inode_item
.st_blocks
= end
;
3544 fcb
->Header
.AllocationSize
.QuadPart
= fcb
->Header
.FileSize
.QuadPart
= fcb
->Header
.ValidDataLength
.QuadPart
= end
;
3549 return STATUS_SUCCESS
;
3552 static NTSTATUS
do_write_file_prealloc(fcb
* fcb
, extent
* ext
, uint64_t start_data
, uint64_t end_data
, void* data
, uint64_t* written
,
3553 PIRP Irp
, bool file_write
, uint64_t irp_offset
, ULONG priority
, LIST_ENTRY
* rollback
) {
3554 EXTENT_DATA
* ed
= &ext
->extent_data
;
3555 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
3559 if (start_data
<= ext
->offset
&& end_data
>= ext
->offset
+ ed2
->num_bytes
) { // replace all
3562 newext
= ExAllocatePoolWithTag(PagedPool
, offsetof(extent
, extent_data
) + ext
->datalen
, ALLOC_TAG
);
3564 ERR("out of memory\n");
3565 return STATUS_INSUFFICIENT_RESOURCES
;
3568 RtlCopyMemory(&newext
->extent_data
, &ext
->extent_data
, ext
->datalen
);
3570 newext
->extent_data
.type
= EXTENT_TYPE_REGULAR
;
3572 Status
= write_data_complete(fcb
->Vcb
, ed2
->address
+ ed2
->offset
, (uint8_t*)data
+ ext
->offset
- start_data
, (uint32_t)ed2
->num_bytes
, Irp
,
3573 NULL
, file_write
, irp_offset
+ ext
->offset
- start_data
, priority
);
3574 if (!NT_SUCCESS(Status
)) {
3575 ERR("write_data_complete returned %08x\n", Status
);
3579 if (!(fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
)) {
3580 ULONG sl
= (ULONG
)(ed2
->num_bytes
/ fcb
->Vcb
->superblock
.sector_size
);
3581 uint32_t* csum
= ExAllocatePoolWithTag(PagedPool
, sl
* sizeof(uint32_t), ALLOC_TAG
);
3584 ERR("out of memory\n");
3586 return STATUS_INSUFFICIENT_RESOURCES
;
3589 Status
= calc_csum(fcb
->Vcb
, (uint8_t*)data
+ ext
->offset
- start_data
, sl
, csum
);
3590 if (!NT_SUCCESS(Status
)) {
3591 ERR("calc_csum returned %08x\n", Status
);
3597 newext
->csum
= csum
;
3599 newext
->csum
= NULL
;
3601 *written
= ed2
->num_bytes
;
3603 newext
->offset
= ext
->offset
;
3604 newext
->datalen
= ext
->datalen
;
3605 newext
->unique
= ext
->unique
;
3606 newext
->ignore
= false;
3607 newext
->inserted
= true;
3608 InsertHeadList(&ext
->list_entry
, &newext
->list_entry
);
3610 add_insert_extent_rollback(rollback
, fcb
, newext
);
3612 remove_fcb_extent(fcb
, ext
, rollback
);
3614 c
= get_chunk_from_address(fcb
->Vcb
, ed2
->address
);
3615 } else if (start_data
<= ext
->offset
&& end_data
< ext
->offset
+ ed2
->num_bytes
) { // replace beginning
3617 extent
*newext1
, *newext2
;
3619 newext1
= ExAllocatePoolWithTag(PagedPool
, offsetof(extent
, extent_data
) + ext
->datalen
, ALLOC_TAG
);
3621 ERR("out of memory\n");
3622 return STATUS_INSUFFICIENT_RESOURCES
;
3625 newext2
= ExAllocatePoolWithTag(PagedPool
, offsetof(extent
, extent_data
) + ext
->datalen
, ALLOC_TAG
);
3627 ERR("out of memory\n");
3628 ExFreePool(newext1
);
3629 return STATUS_INSUFFICIENT_RESOURCES
;
3632 RtlCopyMemory(&newext1
->extent_data
, &ext
->extent_data
, ext
->datalen
);
3633 newext1
->extent_data
.type
= EXTENT_TYPE_REGULAR
;
3634 ned2
= (EXTENT_DATA2
*)newext1
->extent_data
.data
;
3635 ned2
->num_bytes
= end_data
- ext
->offset
;
3637 RtlCopyMemory(&newext2
->extent_data
, &ext
->extent_data
, ext
->datalen
);
3638 ned2
= (EXTENT_DATA2
*)newext2
->extent_data
.data
;
3639 ned2
->offset
+= end_data
- ext
->offset
;
3640 ned2
->num_bytes
-= end_data
- ext
->offset
;
3642 Status
= write_data_complete(fcb
->Vcb
, ed2
->address
+ ed2
->offset
, (uint8_t*)data
+ ext
->offset
- start_data
, (uint32_t)(end_data
- ext
->offset
),
3643 Irp
, NULL
, file_write
, irp_offset
+ ext
->offset
- start_data
, priority
);
3644 if (!NT_SUCCESS(Status
)) {
3645 ERR("write_data_complete returned %08x\n", Status
);
3646 ExFreePool(newext1
);
3647 ExFreePool(newext2
);
3651 if (!(fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
)) {
3652 ULONG sl
= (ULONG
)((end_data
- ext
->offset
) / fcb
->Vcb
->superblock
.sector_size
);
3653 uint32_t* csum
= ExAllocatePoolWithTag(PagedPool
, sl
* sizeof(uint32_t), ALLOC_TAG
);
3656 ERR("out of memory\n");
3657 ExFreePool(newext1
);
3658 ExFreePool(newext2
);
3659 return STATUS_INSUFFICIENT_RESOURCES
;
3662 Status
= calc_csum(fcb
->Vcb
, (uint8_t*)data
+ ext
->offset
- start_data
, sl
, csum
);
3663 if (!NT_SUCCESS(Status
)) {
3664 ERR("calc_csum returned %08x\n", Status
);
3665 ExFreePool(newext1
);
3666 ExFreePool(newext2
);
3671 newext1
->csum
= csum
;
3673 newext1
->csum
= NULL
;
3675 *written
= end_data
- ext
->offset
;
3677 newext1
->offset
= ext
->offset
;
3678 newext1
->datalen
= ext
->datalen
;
3679 newext1
->unique
= ext
->unique
;
3680 newext1
->ignore
= false;
3681 newext1
->inserted
= true;
3682 InsertHeadList(&ext
->list_entry
, &newext1
->list_entry
);
3684 add_insert_extent_rollback(rollback
, fcb
, newext1
);
3686 newext2
->offset
= end_data
;
3687 newext2
->datalen
= ext
->datalen
;
3688 newext2
->unique
= ext
->unique
;
3689 newext2
->ignore
= false;
3690 newext2
->inserted
= true;
3691 newext2
->csum
= NULL
;
3692 add_extent(fcb
, &newext1
->list_entry
, newext2
);
3694 add_insert_extent_rollback(rollback
, fcb
, newext2
);
3696 c
= get_chunk_from_address(fcb
->Vcb
, ed2
->address
);
3699 ERR("get_chunk_from_address(%I64x) failed\n", ed2
->address
);
3701 Status
= update_changed_extent_ref(fcb
->Vcb
, c
, ed2
->address
, ed2
->size
, fcb
->subvol
->id
, fcb
->inode
, ext
->offset
- ed2
->offset
, 1,
3702 fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
, false, Irp
);
3704 if (!NT_SUCCESS(Status
)) {
3705 ERR("update_changed_extent_ref returned %08x\n", Status
);
3710 remove_fcb_extent(fcb
, ext
, rollback
);
3711 } else if (start_data
> ext
->offset
&& end_data
>= ext
->offset
+ ed2
->num_bytes
) { // replace end
3713 extent
*newext1
, *newext2
;
3715 newext1
= ExAllocatePoolWithTag(PagedPool
, offsetof(extent
, extent_data
) + ext
->datalen
, ALLOC_TAG
);
3717 ERR("out of memory\n");
3718 return STATUS_INSUFFICIENT_RESOURCES
;
3721 newext2
= ExAllocatePoolWithTag(PagedPool
, offsetof(extent
, extent_data
) + ext
->datalen
, ALLOC_TAG
);
3723 ERR("out of memory\n");
3724 ExFreePool(newext1
);
3725 return STATUS_INSUFFICIENT_RESOURCES
;
3728 RtlCopyMemory(&newext1
->extent_data
, &ext
->extent_data
, ext
->datalen
);
3730 ned2
= (EXTENT_DATA2
*)newext1
->extent_data
.data
;
3731 ned2
->num_bytes
= start_data
- ext
->offset
;
3733 RtlCopyMemory(&newext2
->extent_data
, &ext
->extent_data
, ext
->datalen
);
3735 newext2
->extent_data
.type
= EXTENT_TYPE_REGULAR
;
3736 ned2
= (EXTENT_DATA2
*)newext2
->extent_data
.data
;
3737 ned2
->offset
+= start_data
- ext
->offset
;
3738 ned2
->num_bytes
= ext
->offset
+ ed2
->num_bytes
- start_data
;
3740 Status
= write_data_complete(fcb
->Vcb
, ed2
->address
+ ned2
->offset
, data
, (uint32_t)ned2
->num_bytes
, Irp
, NULL
, file_write
, irp_offset
, priority
);
3741 if (!NT_SUCCESS(Status
)) {
3742 ERR("write_data_complete returned %08x\n", Status
);
3743 ExFreePool(newext1
);
3744 ExFreePool(newext2
);
3748 if (!(fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
)) {
3749 ULONG sl
= (ULONG
)(ned2
->num_bytes
/ fcb
->Vcb
->superblock
.sector_size
);
3750 uint32_t* csum
= ExAllocatePoolWithTag(PagedPool
, sl
* sizeof(uint32_t), ALLOC_TAG
);
3753 ERR("out of memory\n");
3754 ExFreePool(newext1
);
3755 ExFreePool(newext2
);
3756 return STATUS_INSUFFICIENT_RESOURCES
;
3759 Status
= calc_csum(fcb
->Vcb
, data
, sl
, csum
);
3760 if (!NT_SUCCESS(Status
)) {
3761 ERR("calc_csum returned %08x\n", Status
);
3762 ExFreePool(newext1
);
3763 ExFreePool(newext2
);
3768 newext2
->csum
= csum
;
3770 newext2
->csum
= NULL
;
3772 *written
= ned2
->num_bytes
;
3774 newext1
->offset
= ext
->offset
;
3775 newext1
->datalen
= ext
->datalen
;
3776 newext1
->unique
= ext
->unique
;
3777 newext1
->ignore
= false;
3778 newext1
->inserted
= true;
3779 newext1
->csum
= NULL
;
3780 InsertHeadList(&ext
->list_entry
, &newext1
->list_entry
);
3782 add_insert_extent_rollback(rollback
, fcb
, newext1
);
3784 newext2
->offset
= start_data
;
3785 newext2
->datalen
= ext
->datalen
;
3786 newext2
->unique
= ext
->unique
;
3787 newext2
->ignore
= false;
3788 newext2
->inserted
= true;
3789 add_extent(fcb
, &newext1
->list_entry
, newext2
);
3791 add_insert_extent_rollback(rollback
, fcb
, newext2
);
3793 c
= get_chunk_from_address(fcb
->Vcb
, ed2
->address
);
3796 ERR("get_chunk_from_address(%I64x) failed\n", ed2
->address
);
3798 Status
= update_changed_extent_ref(fcb
->Vcb
, c
, ed2
->address
, ed2
->size
, fcb
->subvol
->id
, fcb
->inode
, ext
->offset
- ed2
->offset
, 1,
3799 fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
, false, Irp
);
3801 if (!NT_SUCCESS(Status
)) {
3802 ERR("update_changed_extent_ref returned %08x\n", Status
);
3807 remove_fcb_extent(fcb
, ext
, rollback
);
3808 } else if (start_data
> ext
->offset
&& end_data
< ext
->offset
+ ed2
->num_bytes
) { // replace middle
3810 extent
*newext1
, *newext2
, *newext3
;
3812 newext1
= ExAllocatePoolWithTag(PagedPool
, offsetof(extent
, extent_data
) + ext
->datalen
, ALLOC_TAG
);
3814 ERR("out of memory\n");
3815 return STATUS_INSUFFICIENT_RESOURCES
;
3818 newext2
= ExAllocatePoolWithTag(PagedPool
, offsetof(extent
, extent_data
) + ext
->datalen
, ALLOC_TAG
);
3820 ERR("out of memory\n");
3821 ExFreePool(newext1
);
3822 return STATUS_INSUFFICIENT_RESOURCES
;
3825 newext3
= ExAllocatePoolWithTag(PagedPool
, offsetof(extent
, extent_data
) + ext
->datalen
, ALLOC_TAG
);
3827 ERR("out of memory\n");
3828 ExFreePool(newext1
);
3829 ExFreePool(newext2
);
3830 return STATUS_INSUFFICIENT_RESOURCES
;
3833 RtlCopyMemory(&newext1
->extent_data
, &ext
->extent_data
, ext
->datalen
);
3834 RtlCopyMemory(&newext2
->extent_data
, &ext
->extent_data
, ext
->datalen
);
3835 RtlCopyMemory(&newext3
->extent_data
, &ext
->extent_data
, ext
->datalen
);
3837 ned2
= (EXTENT_DATA2
*)newext1
->extent_data
.data
;
3838 ned2
->num_bytes
= start_data
- ext
->offset
;
3840 newext2
->extent_data
.type
= EXTENT_TYPE_REGULAR
;
3841 ned2
= (EXTENT_DATA2
*)newext2
->extent_data
.data
;
3842 ned2
->offset
+= start_data
- ext
->offset
;
3843 ned2
->num_bytes
= end_data
- start_data
;
3845 ned2
= (EXTENT_DATA2
*)newext3
->extent_data
.data
;
3846 ned2
->offset
+= end_data
- ext
->offset
;
3847 ned2
->num_bytes
-= end_data
- ext
->offset
;
3849 ned2
= (EXTENT_DATA2
*)newext2
->extent_data
.data
;
3850 Status
= write_data_complete(fcb
->Vcb
, ed2
->address
+ ned2
->offset
, data
, (uint32_t)(end_data
- start_data
), Irp
, NULL
, file_write
, irp_offset
, priority
);
3851 if (!NT_SUCCESS(Status
)) {
3852 ERR("write_data_complete returned %08x\n", Status
);
3853 ExFreePool(newext1
);
3854 ExFreePool(newext2
);
3855 ExFreePool(newext3
);
3859 if (!(fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
)) {
3860 ULONG sl
= (ULONG
)((end_data
- start_data
) / fcb
->Vcb
->superblock
.sector_size
);
3861 uint32_t* csum
= ExAllocatePoolWithTag(PagedPool
, sl
* sizeof(uint32_t), ALLOC_TAG
);
3864 ERR("out of memory\n");
3865 ExFreePool(newext1
);
3866 ExFreePool(newext2
);
3867 ExFreePool(newext3
);
3868 return STATUS_INSUFFICIENT_RESOURCES
;
3871 Status
= calc_csum(fcb
->Vcb
, data
, sl
, csum
);
3872 if (!NT_SUCCESS(Status
)) {
3873 ERR("calc_csum returned %08x\n", Status
);
3874 ExFreePool(newext1
);
3875 ExFreePool(newext2
);
3876 ExFreePool(newext3
);
3881 newext2
->csum
= csum
;
3883 newext2
->csum
= NULL
;
3885 *written
= end_data
- start_data
;
3887 newext1
->offset
= ext
->offset
;
3888 newext1
->datalen
= ext
->datalen
;
3889 newext1
->unique
= ext
->unique
;
3890 newext1
->ignore
= false;
3891 newext1
->inserted
= true;
3892 newext1
->csum
= NULL
;
3893 InsertHeadList(&ext
->list_entry
, &newext1
->list_entry
);
3895 add_insert_extent_rollback(rollback
, fcb
, newext1
);
3897 newext2
->offset
= start_data
;
3898 newext2
->datalen
= ext
->datalen
;
3899 newext2
->unique
= ext
->unique
;
3900 newext2
->ignore
= false;
3901 newext2
->inserted
= true;
3902 add_extent(fcb
, &newext1
->list_entry
, newext2
);
3904 add_insert_extent_rollback(rollback
, fcb
, newext2
);
3906 newext3
->offset
= end_data
;
3907 newext3
->datalen
= ext
->datalen
;
3908 newext3
->unique
= ext
->unique
;
3909 newext3
->ignore
= false;
3910 newext3
->inserted
= true;
3911 newext3
->csum
= NULL
;
3912 add_extent(fcb
, &newext2
->list_entry
, newext3
);
3914 add_insert_extent_rollback(rollback
, fcb
, newext3
);
3916 c
= get_chunk_from_address(fcb
->Vcb
, ed2
->address
);
3919 ERR("get_chunk_from_address(%I64x) failed\n", ed2
->address
);
3921 Status
= update_changed_extent_ref(fcb
->Vcb
, c
, ed2
->address
, ed2
->size
, fcb
->subvol
->id
, fcb
->inode
, ext
->offset
- ed2
->offset
, 2,
3922 fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
, false, Irp
);
3924 if (!NT_SUCCESS(Status
)) {
3925 ERR("update_changed_extent_ref returned %08x\n", Status
);
3930 remove_fcb_extent(fcb
, ext
, rollback
);
3936 return STATUS_SUCCESS
;
3939 NTSTATUS
do_write_file(fcb
* fcb
, uint64_t start
, uint64_t end_data
, void* data
, PIRP Irp
, bool file_write
, uint32_t irp_offset
, LIST_ENTRY
* rollback
) {
3941 LIST_ENTRY
*le
, *le2
;
3942 uint64_t written
= 0, length
= end_data
- start
;
3943 uint64_t last_cow_start
;
3944 ULONG priority
= fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
? HighPagePriority
: NormalPagePriority
;
3945 #ifdef DEBUG_PARANOID
3948 bool extents_changed
= false;
3952 le
= fcb
->extents
.Flink
;
3953 while (le
!= &fcb
->extents
) {
3954 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
3959 EXTENT_DATA
* ed
= &ext
->extent_data
;
3960 EXTENT_DATA2
* ed2
= ed
->type
== EXTENT_TYPE_INLINE
? NULL
: (EXTENT_DATA2
*)ed
->data
;
3963 len
= ed
->type
== EXTENT_TYPE_INLINE
? ed
->decoded_size
: ed2
->num_bytes
;
3965 if (ext
->offset
+ len
<= start
)
3968 if (ext
->offset
> start
+ written
+ length
)
3971 if ((fcb
->inode_item
.flags
& BTRFS_INODE_NODATACOW
|| ed
->type
== EXTENT_TYPE_PREALLOC
) && ext
->unique
&& ed
->compression
== BTRFS_COMPRESSION_NONE
) {
3972 if (max(last_cow_start
, start
+ written
) < ext
->offset
) {
3973 uint64_t start_write
= max(last_cow_start
, start
+ written
);
3975 extents_changed
= true;
3977 Status
= excise_extents(fcb
->Vcb
, fcb
, start_write
, ext
->offset
, Irp
, rollback
);
3978 if (!NT_SUCCESS(Status
)) {
3979 ERR("excise_extents returned %08x\n", Status
);
3983 Status
= insert_extent(fcb
->Vcb
, fcb
, start_write
, ext
->offset
- start_write
, (uint8_t*)data
+ written
, Irp
, file_write
, irp_offset
+ written
, rollback
);
3984 if (!NT_SUCCESS(Status
)) {
3985 ERR("insert_extent returned %08x\n", Status
);
3989 written
+= ext
->offset
- start_write
;
3990 length
-= ext
->offset
- start_write
;
3996 if (ed
->type
== EXTENT_TYPE_REGULAR
) {
3997 uint64_t writeaddr
= ed2
->address
+ ed2
->offset
+ start
+ written
- ext
->offset
;
3998 uint64_t write_len
= min(len
, length
);
4001 TRACE("doing non-COW write to %I64x\n", writeaddr
);
4003 Status
= write_data_complete(fcb
->Vcb
, writeaddr
, (uint8_t*)data
+ written
, (uint32_t)write_len
, Irp
, NULL
, file_write
, irp_offset
+ written
, priority
);
4004 if (!NT_SUCCESS(Status
)) {
4005 ERR("write_data_complete returned %08x\n", Status
);
4009 c
= get_chunk_from_address(fcb
->Vcb
, writeaddr
);
4013 // This shouldn't ever get called - nocow files should always also be nosum.
4014 if (!(fcb
->inode_item
.flags
& BTRFS_INODE_NODATASUM
)) {
4015 calc_csum(fcb
->Vcb
, (uint8_t*)data
+ written
, (uint32_t)(write_len
/ fcb
->Vcb
->superblock
.sector_size
),
4016 &ext
->csum
[(start
+ written
- ext
->offset
) / fcb
->Vcb
->superblock
.sector_size
]);
4018 ext
->inserted
= true;
4019 extents_changed
= true;
4022 written
+= write_len
;
4023 length
-= write_len
;
4027 } else if (ed
->type
== EXTENT_TYPE_PREALLOC
) {
4030 Status
= do_write_file_prealloc(fcb
, ext
, start
+ written
, end_data
, (uint8_t*)data
+ written
, &write_len
,
4031 Irp
, file_write
, irp_offset
+ written
, priority
, rollback
);
4032 if (!NT_SUCCESS(Status
)) {
4033 ERR("do_write_file_prealloc returned %08x\n", Status
);
4037 extents_changed
= true;
4039 written
+= write_len
;
4040 length
-= write_len
;
4046 last_cow_start
= ext
->offset
+ len
;
4055 uint64_t start_write
= max(last_cow_start
, start
+ written
);
4057 extents_changed
= true;
4059 Status
= excise_extents(fcb
->Vcb
, fcb
, start_write
, end_data
, Irp
, rollback
);
4060 if (!NT_SUCCESS(Status
)) {
4061 ERR("excise_extents returned %08x\n", Status
);
4065 Status
= insert_extent(fcb
->Vcb
, fcb
, start_write
, end_data
- start_write
, (uint8_t*)data
+ written
, Irp
, file_write
, irp_offset
+ written
, rollback
);
4066 if (!NT_SUCCESS(Status
)) {
4067 ERR("insert_extent returned %08x\n", Status
);
4072 #ifdef DEBUG_PARANOID
4073 last_off
= 0xffffffffffffffff;
4075 le
= fcb
->extents
.Flink
;
4076 while (le
!= &fcb
->extents
) {
4077 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
4080 if (ext
->offset
== last_off
) {
4081 ERR("offset %I64x duplicated\n", ext
->offset
);
4083 } else if (ext
->offset
< last_off
&& last_off
!= 0xffffffffffffffff) {
4084 ERR("offsets out of order\n");
4088 last_off
= ext
->offset
;
4095 if (extents_changed
) {
4096 fcb
->extents_changed
= true;
4097 mark_fcb_dirty(fcb
);
4100 return STATUS_SUCCESS
;
4103 NTSTATUS
write_compressed(fcb
* fcb
, uint64_t start_data
, uint64_t end_data
, void* data
, PIRP Irp
, LIST_ENTRY
* rollback
) {
4107 for (i
= 0; i
< sector_align(end_data
- start_data
, COMPRESSED_EXTENT_SIZE
) / COMPRESSED_EXTENT_SIZE
; i
++) {
4111 s2
= start_data
+ (i
* COMPRESSED_EXTENT_SIZE
);
4112 e2
= min(s2
+ COMPRESSED_EXTENT_SIZE
, end_data
);
4114 Status
= write_compressed_bit(fcb
, s2
, e2
, (uint8_t*)data
+ (i
* COMPRESSED_EXTENT_SIZE
), &compressed
, Irp
, rollback
);
4116 if (!NT_SUCCESS(Status
)) {
4117 ERR("write_compressed_bit returned %08x\n", Status
);
4121 // If the first 128 KB of a file is incompressible, we set the nocompress flag so we don't
4122 // bother with the rest of it.
4123 if (s2
== 0 && e2
== COMPRESSED_EXTENT_SIZE
&& !compressed
&& !fcb
->Vcb
->options
.compress_force
) {
4124 fcb
->inode_item
.flags
|= BTRFS_INODE_NOCOMPRESS
;
4125 fcb
->inode_item_changed
= true;
4126 mark_fcb_dirty(fcb
);
4128 // write subsequent data non-compressed
4129 if (e2
< end_data
) {
4130 Status
= do_write_file(fcb
, e2
, end_data
, (uint8_t*)data
+ e2
, Irp
, false, 0, rollback
);
4132 if (!NT_SUCCESS(Status
)) {
4133 ERR("do_write_file returned %08x\n", Status
);
4138 return STATUS_SUCCESS
;
4142 return STATUS_SUCCESS
;
4145 NTSTATUS
write_file2(device_extension
* Vcb
, PIRP Irp
, LARGE_INTEGER offset
, void* buf
, ULONG
* length
, bool paging_io
, bool no_cache
,
4146 bool wait
, bool deferred_write
, bool write_irp
, LIST_ENTRY
* rollback
) {
4147 PIO_STACK_LOCATION IrpSp
= IoGetCurrentIrpStackLocation(Irp
);
4148 PFILE_OBJECT FileObject
= IrpSp
->FileObject
;
4150 uint64_t off64
, newlength
, start_data
, end_data
;
4154 bool changed_length
= false;
4161 bool paging_lock
= false, acquired_fcb_lock
= false, acquired_tree_lock
= false, pagefile
;
4164 TRACE("(%p, %p, %I64x, %p, %x, %u, %u)\n", Vcb
, FileObject
, offset
.QuadPart
, buf
, *length
, paging_io
, no_cache
);
4167 TRACE("returning success for zero-length write\n");
4168 return STATUS_SUCCESS
;
4172 ERR("error - FileObject was NULL\n");
4173 return STATUS_ACCESS_DENIED
;
4176 fcb
= FileObject
->FsContext
;
4177 ccb
= FileObject
->FsContext2
;
4178 fileref
= ccb
? ccb
->fileref
: NULL
;
4180 if (!fcb
->ads
&& fcb
->type
!= BTRFS_TYPE_FILE
&& fcb
->type
!= BTRFS_TYPE_SYMLINK
) {
4181 WARN("tried to write to something other than a file or symlink (inode %I64x, type %u, %p, %p)\n", fcb
->inode
, fcb
->type
, &fcb
->type
, fcb
);
4182 return STATUS_INVALID_DEVICE_REQUEST
;
4185 if (offset
.LowPart
== FILE_WRITE_TO_END_OF_FILE
&& offset
.HighPart
== -1)
4186 offset
= fcb
->Header
.FileSize
;
4188 off64
= offset
.QuadPart
;
4190 TRACE("fcb->Header.Flags = %x\n", fcb
->Header
.Flags
);
4192 if (!no_cache
&& !CcCanIWrite(FileObject
, *length
, wait
, deferred_write
))
4193 return STATUS_PENDING
;
4195 if (!wait
&& no_cache
)
4196 return STATUS_PENDING
;
4198 if (no_cache
&& !paging_io
&& FileObject
->SectionObjectPointer
->DataSectionObject
) {
4199 IO_STATUS_BLOCK iosb
;
4201 ExAcquireResourceExclusiveLite(fcb
->Header
.PagingIoResource
, true);
4203 CcFlushCache(FileObject
->SectionObjectPointer
, &offset
, *length
, &iosb
);
4205 if (!NT_SUCCESS(iosb
.Status
)) {
4206 ExReleaseResourceLite(fcb
->Header
.PagingIoResource
);
4207 ERR("CcFlushCache returned %08x\n", iosb
.Status
);
4213 CcPurgeCacheSection(FileObject
->SectionObjectPointer
, &offset
, *length
, false);
4217 if (!ExAcquireResourceSharedLite(fcb
->Header
.PagingIoResource
, wait
)) {
4218 Status
= STATUS_PENDING
;
4224 pagefile
= fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
&& paging_io
;
4226 if (!pagefile
&& !ExIsResourceAcquiredExclusiveLite(&Vcb
->tree_lock
)) {
4227 if (!ExAcquireResourceSharedLite(&Vcb
->tree_lock
, wait
)) {
4228 Status
= STATUS_PENDING
;
4231 acquired_tree_lock
= true;
4235 if (!ExAcquireResourceSharedLite(fcb
->Header
.Resource
, wait
)) {
4236 Status
= STATUS_PENDING
;
4239 acquired_fcb_lock
= true;
4240 } else if (!ExIsResourceAcquiredExclusiveLite(fcb
->Header
.Resource
)) {
4241 if (!ExAcquireResourceExclusiveLite(fcb
->Header
.Resource
, wait
)) {
4242 Status
= STATUS_PENDING
;
4245 acquired_fcb_lock
= true;
4248 newlength
= fcb
->ads
? fcb
->adsdata
.Length
: fcb
->inode_item
.st_size
;
4253 TRACE("newlength = %I64x\n", newlength
);
4255 if (off64
+ *length
> newlength
) {
4257 if (off64
>= newlength
) {
4258 TRACE("paging IO tried to write beyond end of file (file size = %I64x, offset = %I64x, length = %x)\n", newlength
, off64
, *length
);
4259 TRACE("FileObject: AllocationSize = %I64x, FileSize = %I64x, ValidDataLength = %I64x\n",
4260 fcb
->Header
.AllocationSize
.QuadPart
, fcb
->Header
.FileSize
.QuadPart
, fcb
->Header
.ValidDataLength
.QuadPart
);
4261 Status
= STATUS_SUCCESS
;
4265 *length
= (ULONG
)(newlength
- off64
);
4267 newlength
= off64
+ *length
;
4268 changed_length
= true;
4270 TRACE("extending length to %I64x\n", newlength
);
4275 make_inline
= false;
4276 else if (fcb
->type
== BTRFS_TYPE_SYMLINK
)
4277 make_inline
= newlength
<= (Vcb
->superblock
.node_size
- sizeof(tree_header
) - sizeof(leaf_node
) - offsetof(EXTENT_DATA
, data
[0]));
4279 make_inline
= newlength
<= fcb
->Vcb
->options
.max_inline
;
4281 if (changed_length
) {
4282 if (newlength
> (uint64_t)fcb
->Header
.AllocationSize
.QuadPart
) {
4283 if (!acquired_tree_lock
) {
4284 // We need to acquire the tree lock if we don't have it already -
4285 // we can't give an inline file proper extents at the same time as we're
4287 if (!ExAcquireResourceSharedLite(&Vcb
->tree_lock
, wait
)) {
4288 Status
= STATUS_PENDING
;
4291 acquired_tree_lock
= true;
4294 Status
= extend_file(fcb
, fileref
, newlength
, false, Irp
, rollback
);
4295 if (!NT_SUCCESS(Status
)) {
4296 ERR("extend_file returned %08x\n", Status
);
4299 } else if (!fcb
->ads
)
4300 fcb
->inode_item
.st_size
= newlength
;
4302 fcb
->Header
.FileSize
.QuadPart
= newlength
;
4303 fcb
->Header
.ValidDataLength
.QuadPart
= newlength
;
4305 TRACE("AllocationSize = %I64x\n", fcb
->Header
.AllocationSize
.QuadPart
);
4306 TRACE("FileSize = %I64x\n", fcb
->Header
.FileSize
.QuadPart
);
4307 TRACE("ValidDataLength = %I64x\n", fcb
->Header
.ValidDataLength
.QuadPart
);
4311 Status
= STATUS_SUCCESS
;
4314 if (!FileObject
->PrivateCacheMap
|| changed_length
) {
4317 ccfs
.AllocationSize
= fcb
->Header
.AllocationSize
;
4318 ccfs
.FileSize
= fcb
->Header
.FileSize
;
4319 ccfs
.ValidDataLength
= fcb
->Header
.ValidDataLength
;
4321 if (!FileObject
->PrivateCacheMap
)
4322 init_file_cache(FileObject
, &ccfs
);
4324 CcSetFileSizes(FileObject
, &ccfs
);
4327 if (IrpSp
->MinorFunction
& IRP_MN_MDL
) {
4328 CcPrepareMdlWrite(FileObject
, &offset
, *length
, &Irp
->MdlAddress
, &Irp
->IoStatus
);
4330 Status
= Irp
->IoStatus
.Status
;
4333 if (fCcCopyWriteEx
) {
4334 TRACE("CcCopyWriteEx(%p, %I64x, %x, %u, %p, %p)\n", FileObject
, off64
, *length
, wait
, buf
, Irp
->Tail
.Overlay
.Thread
);
4335 if (!fCcCopyWriteEx(FileObject
, &offset
, *length
, wait
, buf
, Irp
->Tail
.Overlay
.Thread
)) {
4336 Status
= STATUS_PENDING
;
4339 TRACE("CcCopyWriteEx finished\n");
4341 TRACE("CcCopyWrite(%p, %I64x, %x, %u, %p)\n", FileObject
, off64
, *length
, wait
, buf
);
4342 if (!CcCopyWrite(FileObject
, &offset
, *length
, wait
, buf
)) {
4343 Status
= STATUS_PENDING
;
4346 TRACE("CcCopyWrite finished\n");
4349 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
4350 Status
= _SEH2_GetExceptionCode();
4353 if (changed_length
) {
4354 queue_notification_fcb(fcb
->ads
? fileref
->parent
: fileref
, fcb
->ads
? FILE_NOTIFY_CHANGE_STREAM_SIZE
: FILE_NOTIFY_CHANGE_SIZE
,
4355 fcb
->ads
? FILE_ACTION_MODIFIED_STREAM
: FILE_ACTION_MODIFIED
, fcb
->ads
&& fileref
->dc
? &fileref
->dc
->name
: NULL
);
4362 if (changed_length
) {
4365 if (newlength
> fcb
->adsmaxlen
) {
4366 ERR("error - xattr too long (%I64u > %u)\n", newlength
, fcb
->adsmaxlen
);
4367 Status
= STATUS_DISK_FULL
;
4371 data2
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)newlength
, ALLOC_TAG
);
4373 ERR("out of memory\n");
4374 Status
= STATUS_INSUFFICIENT_RESOURCES
;
4378 if (fcb
->adsdata
.Buffer
) {
4379 RtlCopyMemory(data2
, fcb
->adsdata
.Buffer
, fcb
->adsdata
.Length
);
4380 ExFreePool(fcb
->adsdata
.Buffer
);
4383 if (newlength
> fcb
->adsdata
.Length
)
4384 RtlZeroMemory(&data2
[fcb
->adsdata
.Length
], (ULONG
)(newlength
- fcb
->adsdata
.Length
));
4387 fcb
->adsdata
.Buffer
= data2
;
4388 fcb
->adsdata
.Length
= fcb
->adsdata
.MaximumLength
= (USHORT
)newlength
;
4390 fcb
->Header
.AllocationSize
.QuadPart
= newlength
;
4391 fcb
->Header
.FileSize
.QuadPart
= newlength
;
4392 fcb
->Header
.ValidDataLength
.QuadPart
= newlength
;
4396 RtlCopyMemory(&fcb
->adsdata
.Buffer
[off64
], buf
, *length
);
4398 fcb
->Header
.ValidDataLength
.QuadPart
= newlength
;
4400 mark_fcb_dirty(fcb
);
4403 mark_fileref_dirty(fileref
);
4405 bool compress
= write_fcb_compressed(fcb
), no_buf
= false;
4410 end_data
= sector_align(newlength
, fcb
->Vcb
->superblock
.sector_size
);
4411 bufhead
= sizeof(EXTENT_DATA
) - 1;
4412 } else if (compress
) {
4413 start_data
= off64
& ~(uint64_t)(COMPRESSED_EXTENT_SIZE
- 1);
4414 end_data
= min(sector_align(off64
+ *length
, COMPRESSED_EXTENT_SIZE
),
4415 sector_align(newlength
, fcb
->Vcb
->superblock
.sector_size
));
4418 start_data
= off64
& ~(uint64_t)(fcb
->Vcb
->superblock
.sector_size
- 1);
4419 end_data
= sector_align(off64
+ *length
, fcb
->Vcb
->superblock
.sector_size
);
4423 if (fcb_is_inline(fcb
))
4424 end_data
= max(end_data
, sector_align(fcb
->inode_item
.st_size
, Vcb
->superblock
.sector_size
));
4426 fcb
->Header
.ValidDataLength
.QuadPart
= newlength
;
4427 TRACE("fcb %p FileSize = %I64x\n", fcb
, fcb
->Header
.FileSize
.QuadPart
);
4429 if (!make_inline
&& !compress
&& off64
== start_data
&& off64
+ *length
== end_data
) {
4433 data
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(end_data
- start_data
+ bufhead
), ALLOC_TAG
);
4435 ERR("out of memory\n");
4436 Status
= STATUS_INSUFFICIENT_RESOURCES
;
4440 RtlZeroMemory(data
+ bufhead
, (ULONG
)(end_data
- start_data
));
4442 TRACE("start_data = %I64x\n", start_data
);
4443 TRACE("end_data = %I64x\n", end_data
);
4445 if (off64
> start_data
|| off64
+ *length
< end_data
) {
4446 if (changed_length
) {
4447 if (fcb
->inode_item
.st_size
> start_data
)
4448 Status
= read_file(fcb
, data
+ bufhead
, start_data
, fcb
->inode_item
.st_size
- start_data
, NULL
, Irp
);
4450 Status
= STATUS_SUCCESS
;
4452 Status
= read_file(fcb
, data
+ bufhead
, start_data
, end_data
- start_data
, NULL
, Irp
);
4454 if (!NT_SUCCESS(Status
)) {
4455 ERR("read_file returned %08x\n", Status
);
4461 RtlCopyMemory(data
+ bufhead
+ off64
- start_data
, buf
, *length
);
4465 Status
= excise_extents(fcb
->Vcb
, fcb
, start_data
, end_data
, Irp
, rollback
);
4466 if (!NT_SUCCESS(Status
)) {
4467 ERR("error - excise_extents returned %08x\n", Status
);
4472 ed2
= (EXTENT_DATA
*)data
;
4473 ed2
->generation
= fcb
->Vcb
->superblock
.generation
;
4474 ed2
->decoded_size
= newlength
;
4475 ed2
->compression
= BTRFS_COMPRESSION_NONE
;
4476 ed2
->encryption
= BTRFS_ENCRYPTION_NONE
;
4477 ed2
->encoding
= BTRFS_ENCODING_NONE
;
4478 ed2
->type
= EXTENT_TYPE_INLINE
;
4480 Status
= add_extent_to_fcb(fcb
, 0, ed2
, (uint16_t)(offsetof(EXTENT_DATA
, data
[0]) + newlength
), false, NULL
, rollback
);
4481 if (!NT_SUCCESS(Status
)) {
4482 ERR("add_extent_to_fcb returned %08x\n", Status
);
4487 fcb
->inode_item
.st_blocks
+= newlength
;
4488 } else if (compress
) {
4489 Status
= write_compressed(fcb
, start_data
, end_data
, data
, Irp
, rollback
);
4491 if (!NT_SUCCESS(Status
)) {
4492 ERR("write_compressed returned %08x\n", Status
);
4497 if (write_irp
&& Irp
->MdlAddress
&& no_buf
) {
4498 bool locked
= Irp
->MdlAddress
->MdlFlags
& (MDL_PAGES_LOCKED
| MDL_PARTIAL
);
4501 Status
= STATUS_SUCCESS
;
4504 MmProbeAndLockPages(Irp
->MdlAddress
, KernelMode
, IoReadAccess
);
4505 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
4506 Status
= _SEH2_GetExceptionCode();
4509 if (!NT_SUCCESS(Status
)) {
4510 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
4516 Status
= do_write_file(fcb
, start_data
, end_data
, data
, Irp
, true, 0, rollback
);
4517 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
4518 Status
= _SEH2_GetExceptionCode();
4522 MmUnlockPages(Irp
->MdlAddress
);
4525 Status
= do_write_file(fcb
, start_data
, end_data
, data
, Irp
, false, 0, rollback
);
4526 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
4527 Status
= _SEH2_GetExceptionCode();
4531 if (!NT_SUCCESS(Status
)) {
4532 ERR("do_write_file returned %08x\n", Status
);
4533 if (!no_buf
) ExFreePool(data
);
4542 KeQuerySystemTime(&time
);
4543 win_time_to_unix(time
, &now
);
4547 if (fileref
&& fileref
->parent
)
4548 origii
= &fileref
->parent
->fcb
->inode_item
;
4550 ERR("no parent fcb found for stream\n");
4551 Status
= STATUS_INTERNAL_ERROR
;
4555 origii
= &fcb
->inode_item
;
4557 origii
->transid
= Vcb
->superblock
.generation
;
4560 if (!ccb
->user_set_change_time
)
4561 origii
->st_ctime
= now
;
4564 if (changed_length
) {
4565 TRACE("setting st_size to %I64x\n", newlength
);
4566 origii
->st_size
= newlength
;
4567 filter
|= FILE_NOTIFY_CHANGE_SIZE
;
4570 fcb
->inode_item_changed
= true;
4572 fileref
->parent
->fcb
->inode_item_changed
= true;
4575 filter
|= FILE_NOTIFY_CHANGE_STREAM_SIZE
;
4577 filter
|= FILE_NOTIFY_CHANGE_STREAM_WRITE
;
4580 if (!ccb
->user_set_write_time
) {
4581 origii
->st_mtime
= now
;
4582 filter
|= FILE_NOTIFY_CHANGE_LAST_WRITE
;
4585 mark_fcb_dirty(fcb
->ads
? fileref
->parent
->fcb
: fcb
);
4588 if (changed_length
) {
4591 ccfs
.AllocationSize
= fcb
->Header
.AllocationSize
;
4592 ccfs
.FileSize
= fcb
->Header
.FileSize
;
4593 ccfs
.ValidDataLength
= fcb
->Header
.ValidDataLength
;
4596 CcSetFileSizes(FileObject
, &ccfs
);
4597 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
4598 Status
= _SEH2_GetExceptionCode();
4603 fcb
->subvol
->root_item
.ctransid
= Vcb
->superblock
.generation
;
4604 fcb
->subvol
->root_item
.ctime
= now
;
4606 Status
= STATUS_SUCCESS
;
4609 queue_notification_fcb(fcb
->ads
? fileref
->parent
: fileref
, filter
, fcb
->ads
? FILE_ACTION_MODIFIED_STREAM
: FILE_ACTION_MODIFIED
,
4610 fcb
->ads
&& fileref
->dc
? &fileref
->dc
->name
: NULL
);
4613 if (NT_SUCCESS(Status
) && FileObject
->Flags
& FO_SYNCHRONOUS_IO
&& !paging_io
) {
4614 TRACE("CurrentByteOffset was: %I64x\n", FileObject
->CurrentByteOffset
.QuadPart
);
4615 FileObject
->CurrentByteOffset
.QuadPart
= offset
.QuadPart
+ (NT_SUCCESS(Status
) ? *length
: 0);
4616 TRACE("CurrentByteOffset now: %I64x\n", FileObject
->CurrentByteOffset
.QuadPart
);
4619 if (acquired_fcb_lock
)
4620 ExReleaseResourceLite(fcb
->Header
.Resource
);
4622 if (acquired_tree_lock
)
4623 ExReleaseResourceLite(&Vcb
->tree_lock
);
4626 ExReleaseResourceLite(fcb
->Header
.PagingIoResource
);
4631 NTSTATUS
write_file(device_extension
* Vcb
, PIRP Irp
, bool wait
, bool deferred_write
) {
4632 PIO_STACK_LOCATION IrpSp
= IoGetCurrentIrpStackLocation(Irp
);
4635 LARGE_INTEGER offset
= IrpSp
->Parameters
.Write
.ByteOffset
;
4636 PFILE_OBJECT FileObject
= IrpSp
->FileObject
;
4637 fcb
* fcb
= FileObject
? FileObject
->FsContext
: NULL
;
4638 LIST_ENTRY rollback
;
4640 InitializeListHead(&rollback
);
4644 Irp
->IoStatus
.Information
= 0;
4646 TRACE("offset = %I64x\n", offset
.QuadPart
);
4647 TRACE("length = %x\n", IrpSp
->Parameters
.Write
.Length
);
4649 if (!Irp
->AssociatedIrp
.SystemBuffer
) {
4650 buf
= map_user_buffer(Irp
, fcb
&& fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
? HighPagePriority
: NormalPagePriority
);
4652 if (Irp
->MdlAddress
&& !buf
) {
4653 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
4654 Status
= STATUS_INSUFFICIENT_RESOURCES
;
4658 buf
= Irp
->AssociatedIrp
.SystemBuffer
;
4660 TRACE("buf = %p\n", buf
);
4662 if (fcb
&& !(Irp
->Flags
& IRP_PAGING_IO
) && !FsRtlCheckLockForWriteAccess(&fcb
->lock
, Irp
)) {
4663 WARN("tried to write to locked region\n");
4664 Status
= STATUS_FILE_LOCK_CONFLICT
;
4668 Status
= write_file2(Vcb
, Irp
, offset
, buf
, &IrpSp
->Parameters
.Write
.Length
, Irp
->Flags
& IRP_PAGING_IO
, Irp
->Flags
& IRP_NOCACHE
,
4669 wait
, deferred_write
, true, &rollback
);
4671 if (Status
== STATUS_PENDING
)
4673 else if (!NT_SUCCESS(Status
)) {
4674 ERR("write_file2 returned %08x\n", Status
);
4678 if (NT_SUCCESS(Status
)) {
4679 Irp
->IoStatus
.Information
= IrpSp
->Parameters
.Write
.Length
;
4681 if (diskacc
&& Status
!= STATUS_PENDING
&& Irp
->Flags
& IRP_NOCACHE
) {
4682 PETHREAD thread
= NULL
;
4684 if (Irp
->Tail
.Overlay
.Thread
&& !IoIsSystemThread(Irp
->Tail
.Overlay
.Thread
))
4685 thread
= Irp
->Tail
.Overlay
.Thread
;
4686 else if (!IoIsSystemThread(PsGetCurrentThread()))
4687 thread
= PsGetCurrentThread();
4688 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp
)
4689 thread
= PsGetCurrentThread();
4692 fPsUpdateDiskCounters(PsGetThreadProcess(thread
), 0, IrpSp
->Parameters
.Write
.Length
, 0, 1, 0);
4697 if (NT_SUCCESS(Status
))
4698 clear_rollback(&rollback
);
4700 do_rollback(Vcb
, &rollback
);
4705 _Dispatch_type_(IRP_MJ_WRITE
)
4706 _Function_class_(DRIVER_DISPATCH
)
4707 NTSTATUS __stdcall
drv_write(IN PDEVICE_OBJECT DeviceObject
, IN PIRP Irp
) {
4710 PIO_STACK_LOCATION IrpSp
= IoGetCurrentIrpStackLocation(Irp
);
4711 device_extension
* Vcb
= DeviceObject
->DeviceExtension
;
4712 PFILE_OBJECT FileObject
= IrpSp
->FileObject
;
4713 fcb
* fcb
= FileObject
? FileObject
->FsContext
: NULL
;
4714 ccb
* ccb
= FileObject
? FileObject
->FsContext2
: NULL
;
4715 bool wait
= FileObject
? IoIsOperationSynchronous(Irp
) : true;
4717 FsRtlEnterFileSystem();
4719 top_level
= is_top_level(Irp
);
4721 if (Vcb
&& Vcb
->type
== VCB_TYPE_VOLUME
) {
4722 Status
= vol_write(DeviceObject
, Irp
);
4724 } else if (!Vcb
|| Vcb
->type
!= VCB_TYPE_FS
) {
4725 Status
= STATUS_INVALID_PARAMETER
;
4730 ERR("fcb was NULL\n");
4731 Status
= STATUS_INVALID_PARAMETER
;
4736 ERR("ccb was NULL\n");
4737 Status
= STATUS_INVALID_PARAMETER
;
4741 if (Irp
->RequestorMode
== UserMode
&& !(ccb
->access
& (FILE_WRITE_DATA
| FILE_APPEND_DATA
))) {
4742 WARN("insufficient permissions\n");
4743 Status
= STATUS_ACCESS_DENIED
;
4747 if (fcb
== Vcb
->volume_fcb
) {
4748 if (!Vcb
->locked
|| Vcb
->locked_fileobj
!= FileObject
) {
4749 ERR("trying to write to volume when not locked, or locked with another FileObject\n");
4750 Status
= STATUS_ACCESS_DENIED
;
4754 TRACE("writing directly to volume\n");
4756 IoSkipCurrentIrpStackLocation(Irp
);
4758 Status
= IoCallDriver(Vcb
->Vpb
->RealDevice
, Irp
);
4762 if (is_subvol_readonly(fcb
->subvol
, Irp
)) {
4763 Status
= STATUS_ACCESS_DENIED
;
4767 if (Vcb
->readonly
) {
4768 Status
= STATUS_MEDIA_WRITE_PROTECTED
;
4773 if (IrpSp
->MinorFunction
& IRP_MN_COMPLETE
) {
4774 CcMdlWriteComplete(IrpSp
->FileObject
, &IrpSp
->Parameters
.Write
.ByteOffset
, Irp
->MdlAddress
);
4776 Irp
->MdlAddress
= NULL
;
4777 Status
= STATUS_SUCCESS
;
4779 if (!(Irp
->Flags
& IRP_PAGING_IO
))
4780 FsRtlCheckOplock(fcb_oplock(fcb
), Irp
, NULL
, NULL
, NULL
);
4782 // Don't offload jobs when doing paging IO - otherwise this can lead to
4783 // deadlocks in CcCopyWrite.
4784 if (Irp
->Flags
& IRP_PAGING_IO
)
4787 Status
= write_file(Vcb
, Irp
, wait
, false);
4789 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
4790 Status
= _SEH2_GetExceptionCode();
4794 Irp
->IoStatus
.Status
= Status
;
4796 TRACE("wrote %u bytes\n", Irp
->IoStatus
.Information
);
4798 if (Status
!= STATUS_PENDING
)
4799 IoCompleteRequest(Irp
, IO_NO_INCREMENT
);
4801 IoMarkIrpPending(Irp
);
4803 if (!add_thread_job(Vcb
, Irp
))
4804 Status
= do_write_job(Vcb
, Irp
);
4809 IoSetTopLevelIrp(NULL
);
4811 TRACE("returning %08x\n", Status
);
4813 FsRtlExitFileSystem();