[BTRFS] Upgrade to 1.5
[reactos.git] / drivers / filesystems / btrfs / write.c
1 /* Copyright (c) Mark Harmstone 2016-17
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 typedef struct {
21 uint64_t start;
22 uint64_t end;
23 uint8_t* data;
24 PMDL mdl;
25 uint64_t irp_offset;
26 } write_stripe;
27
28 _Function_class_(IO_COMPLETION_ROUTINE)
29 static NTSTATUS __stdcall write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr);
30
31 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback);
32
33 extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
34 extern tCcCopyWriteEx fCcCopyWriteEx;
35 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
36 extern bool diskacc;
37
38 bool find_data_address_in_chunk(device_extension* Vcb, chunk* c, uint64_t length, uint64_t* address) {
39 LIST_ENTRY* le;
40 space* s;
41
42 TRACE("(%p, %I64x, %I64x, %p)\n", Vcb, c->offset, length, address);
43
44 if (length > c->chunk_item->size - c->used)
45 return false;
46
47 if (!c->cache_loaded) {
48 NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
49
50 if (!NT_SUCCESS(Status)) {
51 ERR("load_cache_chunk returned %08x\n", Status);
52 return false;
53 }
54 }
55
56 if (IsListEmpty(&c->space_size))
57 return false;
58
59 le = c->space_size.Flink;
60 while (le != &c->space_size) {
61 s = CONTAINING_RECORD(le, space, list_entry_size);
62
63 if (s->size == length) {
64 *address = s->address;
65 return true;
66 } else if (s->size < length) {
67 if (le == c->space_size.Flink)
68 return false;
69
70 s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
71
72 *address = s->address;
73 return true;
74 }
75
76 le = le->Flink;
77 }
78
79 s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
80
81 if (s->size > length) {
82 *address = s->address;
83 return true;
84 }
85
86 return false;
87 }
88
89 chunk* get_chunk_from_address(device_extension* Vcb, uint64_t address) {
90 LIST_ENTRY* le2;
91
92 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
93
94 le2 = Vcb->chunks.Flink;
95 while (le2 != &Vcb->chunks) {
96 chunk* c = CONTAINING_RECORD(le2, chunk, list_entry);
97
98 if (address >= c->offset && address < c->offset + c->chunk_item->size) {
99 ExReleaseResourceLite(&Vcb->chunk_lock);
100 return c;
101 }
102
103 le2 = le2->Flink;
104 }
105
106 ExReleaseResourceLite(&Vcb->chunk_lock);
107
108 return NULL;
109 }
110
111 typedef struct {
112 space* dh;
113 device* device;
114 } stripe;
115
116 static uint64_t find_new_chunk_address(device_extension* Vcb, uint64_t size) {
117 uint64_t lastaddr;
118 LIST_ENTRY* le;
119
120 lastaddr = 0xc00000;
121
122 le = Vcb->chunks.Flink;
123 while (le != &Vcb->chunks) {
124 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
125
126 if (c->offset >= lastaddr + size)
127 return lastaddr;
128
129 lastaddr = c->offset + c->chunk_item->size;
130
131 le = le->Flink;
132 }
133
134 return lastaddr;
135 }
136
137 static bool find_new_dup_stripes(device_extension* Vcb, stripe* stripes, uint64_t max_stripe_size, bool full_size) {
138 uint64_t devusage = 0xffffffffffffffff;
139 space *devdh1 = NULL, *devdh2 = NULL;
140 LIST_ENTRY* le;
141 device* dev2 = NULL;
142
143 le = Vcb->devices.Flink;
144
145 while (le != &Vcb->devices) {
146 device* dev = CONTAINING_RECORD(le, device, list_entry);
147
148 if (!dev->readonly && !dev->reloc && dev->devobj) {
149 uint64_t usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
150
151 // favour devices which have been used the least
152 if (usage < devusage) {
153 if (!IsListEmpty(&dev->space)) {
154 LIST_ENTRY* le2;
155 space *dh1 = NULL, *dh2 = NULL;
156
157 le2 = dev->space.Flink;
158 while (le2 != &dev->space) {
159 space* dh = CONTAINING_RECORD(le2, space, list_entry);
160
161 if (dh->size >= max_stripe_size && (!dh1 || !dh2 || dh->size < dh1->size)) {
162 dh2 = dh1;
163 dh1 = dh;
164 }
165
166 le2 = le2->Flink;
167 }
168
169 if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) {
170 dev2 = dev;
171 devusage = usage;
172 devdh1 = dh1;
173 devdh2 = dh2 ? dh2 : dh1;
174 }
175 }
176 }
177 }
178
179 le = le->Flink;
180 }
181
182 if (!devdh1) {
183 uint64_t size = 0;
184
185 // Can't find hole of at least max_stripe_size; look for the largest one we can find
186
187 if (full_size)
188 return false;
189
190 le = Vcb->devices.Flink;
191 while (le != &Vcb->devices) {
192 device* dev = CONTAINING_RECORD(le, device, list_entry);
193
194 if (!dev->readonly && !dev->reloc) {
195 if (!IsListEmpty(&dev->space)) {
196 LIST_ENTRY* le2;
197 space *dh1 = NULL, *dh2 = NULL;
198
199 le2 = dev->space.Flink;
200 while (le2 != &dev->space) {
201 space* dh = CONTAINING_RECORD(le2, space, list_entry);
202
203 if (!dh1 || !dh2 || dh->size < dh1->size) {
204 dh2 = dh1;
205 dh1 = dh;
206 }
207
208 le2 = le2->Flink;
209 }
210
211 if (dh1) {
212 uint64_t devsize;
213
214 if (dh2)
215 devsize = max(dh1->size / 2, min(dh1->size, dh2->size));
216 else
217 devsize = dh1->size / 2;
218
219 if (devsize > size) {
220 dev2 = dev;
221 devdh1 = dh1;
222
223 if (dh2 && min(dh1->size, dh2->size) > dh1->size / 2)
224 devdh2 = dh2;
225 else
226 devdh2 = dh1;
227
228 size = devsize;
229 }
230 }
231 }
232 }
233
234 le = le->Flink;
235 }
236
237 if (!devdh1)
238 return false;
239 }
240
241 stripes[0].device = stripes[1].device = dev2;
242 stripes[0].dh = devdh1;
243 stripes[1].dh = devdh2;
244
245 return true;
246 }
247
248 static bool find_new_stripe(device_extension* Vcb, stripe* stripes, uint16_t i, uint64_t max_stripe_size, bool allow_missing, bool full_size) {
249 uint64_t k, devusage = 0xffffffffffffffff;
250 space* devdh = NULL;
251 LIST_ENTRY* le;
252 device* dev2 = NULL;
253
254 le = Vcb->devices.Flink;
255 while (le != &Vcb->devices) {
256 device* dev = CONTAINING_RECORD(le, device, list_entry);
257 uint64_t usage;
258 bool skip = false;
259
260 if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
261 le = le->Flink;
262 continue;
263 }
264
265 // skip this device if it already has a stripe
266 if (i > 0) {
267 for (k = 0; k < i; k++) {
268 if (stripes[k].device == dev) {
269 skip = true;
270 break;
271 }
272 }
273 }
274
275 if (!skip) {
276 usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
277
278 // favour devices which have been used the least
279 if (usage < devusage) {
280 if (!IsListEmpty(&dev->space)) {
281 LIST_ENTRY* le2;
282
283 le2 = dev->space.Flink;
284 while (le2 != &dev->space) {
285 space* dh = CONTAINING_RECORD(le2, space, list_entry);
286
287 if ((dev2 != dev && dh->size >= max_stripe_size) ||
288 (dev2 == dev && dh->size >= max_stripe_size && dh->size < devdh->size)
289 ) {
290 devdh = dh;
291 dev2 = dev;
292 devusage = usage;
293 }
294
295 le2 = le2->Flink;
296 }
297 }
298 }
299 }
300
301 le = le->Flink;
302 }
303
304 if (!devdh) {
305 // Can't find hole of at least max_stripe_size; look for the largest one we can find
306
307 if (full_size)
308 return false;
309
310 le = Vcb->devices.Flink;
311 while (le != &Vcb->devices) {
312 device* dev = CONTAINING_RECORD(le, device, list_entry);
313 bool skip = false;
314
315 if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
316 le = le->Flink;
317 continue;
318 }
319
320 // skip this device if it already has a stripe
321 if (i > 0) {
322 for (k = 0; k < i; k++) {
323 if (stripes[k].device == dev) {
324 skip = true;
325 break;
326 }
327 }
328 }
329
330 if (!skip) {
331 if (!IsListEmpty(&dev->space)) {
332 LIST_ENTRY* le2;
333
334 le2 = dev->space.Flink;
335 while (le2 != &dev->space) {
336 space* dh = CONTAINING_RECORD(le2, space, list_entry);
337
338 if (!devdh || devdh->size < dh->size) {
339 devdh = dh;
340 dev2 = dev;
341 }
342
343 le2 = le2->Flink;
344 }
345 }
346 }
347
348 le = le->Flink;
349 }
350
351 if (!devdh)
352 return false;
353 }
354
355 stripes[i].dh = devdh;
356 stripes[i].device = dev2;
357
358 return true;
359 }
360
361 NTSTATUS alloc_chunk(device_extension* Vcb, uint64_t flags, chunk** pc, bool full_size) {
362 NTSTATUS Status;
363 uint64_t max_stripe_size, max_chunk_size, stripe_size, stripe_length, factor;
364 uint64_t total_size = 0, logaddr;
365 uint16_t i, type, num_stripes, sub_stripes, max_stripes, min_stripes, allowed_missing;
366 stripe* stripes = NULL;
367 uint16_t cisize;
368 CHUNK_ITEM_STRIPE* cis;
369 chunk* c = NULL;
370 space* s = NULL;
371 LIST_ENTRY* le;
372
373 le = Vcb->devices.Flink;
374 while (le != &Vcb->devices) {
375 device* dev = CONTAINING_RECORD(le, device, list_entry);
376 total_size += dev->devitem.num_bytes;
377
378 le = le->Flink;
379 }
380
381 TRACE("total_size = %I64x\n", total_size);
382
383 // We purposely check for DATA first - mixed blocks have the same size
384 // as DATA ones.
385 if (flags & BLOCK_FLAG_DATA) {
386 max_stripe_size = 0x40000000; // 1 GB
387 max_chunk_size = 10 * max_stripe_size;
388 } else if (flags & BLOCK_FLAG_METADATA) {
389 if (total_size > 0xC80000000) // 50 GB
390 max_stripe_size = 0x40000000; // 1 GB
391 else
392 max_stripe_size = 0x10000000; // 256 MB
393
394 max_chunk_size = max_stripe_size;
395 } else if (flags & BLOCK_FLAG_SYSTEM) {
396 max_stripe_size = 0x2000000; // 32 MB
397 max_chunk_size = 2 * max_stripe_size;
398 } else {
399 ERR("unknown chunk type\n");
400 return STATUS_INTERNAL_ERROR;
401 }
402
403 if (flags & BLOCK_FLAG_DUPLICATE) {
404 min_stripes = 2;
405 max_stripes = 2;
406 sub_stripes = 0;
407 type = BLOCK_FLAG_DUPLICATE;
408 allowed_missing = 0;
409 } else if (flags & BLOCK_FLAG_RAID0) {
410 min_stripes = 2;
411 max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
412 sub_stripes = 0;
413 type = BLOCK_FLAG_RAID0;
414 allowed_missing = 0;
415 } else if (flags & BLOCK_FLAG_RAID1) {
416 min_stripes = 2;
417 max_stripes = 2;
418 sub_stripes = 1;
419 type = BLOCK_FLAG_RAID1;
420 allowed_missing = 1;
421 } else if (flags & BLOCK_FLAG_RAID10) {
422 min_stripes = 4;
423 max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
424 sub_stripes = 2;
425 type = BLOCK_FLAG_RAID10;
426 allowed_missing = 1;
427 } else if (flags & BLOCK_FLAG_RAID5) {
428 min_stripes = 3;
429 max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
430 sub_stripes = 1;
431 type = BLOCK_FLAG_RAID5;
432 allowed_missing = 1;
433 } else if (flags & BLOCK_FLAG_RAID6) {
434 min_stripes = 4;
435 max_stripes = 257;
436 sub_stripes = 1;
437 type = BLOCK_FLAG_RAID6;
438 allowed_missing = 2;
439 } else { // SINGLE
440 min_stripes = 1;
441 max_stripes = 1;
442 sub_stripes = 1;
443 type = 0;
444 allowed_missing = 0;
445 }
446
447 if (max_chunk_size > total_size / 10) { // cap at 10%
448 max_chunk_size = total_size / 10;
449 max_stripe_size = max_chunk_size / min_stripes;
450 }
451
452 TRACE("would allocate a new chunk of %I64x bytes and stripe %I64x\n", max_chunk_size, max_stripe_size);
453
454 stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * max_stripes, ALLOC_TAG);
455 if (!stripes) {
456 ERR("out of memory\n");
457 Status = STATUS_INSUFFICIENT_RESOURCES;
458 goto end;
459 }
460
461 num_stripes = 0;
462
463 if (type == BLOCK_FLAG_DUPLICATE) {
464 if (!find_new_dup_stripes(Vcb, stripes, max_stripe_size, full_size)) {
465 Status = STATUS_DISK_FULL;
466 goto end;
467 }
468 else
469 num_stripes = max_stripes;
470 } else {
471 for (i = 0; i < max_stripes; i++) {
472 if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, false, full_size))
473 break;
474 else
475 num_stripes++;
476 }
477 }
478
479 if (num_stripes < min_stripes && Vcb->options.allow_degraded && allowed_missing > 0) {
480 uint16_t added_missing = 0;
481
482 for (i = num_stripes; i < max_stripes; i++) {
483 if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, true, full_size))
484 break;
485 else {
486 added_missing++;
487 if (added_missing >= allowed_missing)
488 break;
489 }
490 }
491
492 num_stripes += added_missing;
493 }
494
495 // for RAID10, round down to an even number of stripes
496 if (type == BLOCK_FLAG_RAID10 && (num_stripes % sub_stripes) != 0) {
497 num_stripes -= num_stripes % sub_stripes;
498 }
499
500 if (num_stripes < min_stripes) {
501 WARN("found %u stripes, needed at least %u\n", num_stripes, min_stripes);
502 Status = STATUS_DISK_FULL;
503 goto end;
504 }
505
506 c = ExAllocatePoolWithTag(NonPagedPool, sizeof(chunk), ALLOC_TAG);
507 if (!c) {
508 ERR("out of memory\n");
509 Status = STATUS_INSUFFICIENT_RESOURCES;
510 goto end;
511 }
512
513 c->devices = NULL;
514
515 cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
516 c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, cisize, ALLOC_TAG);
517 if (!c->chunk_item) {
518 ERR("out of memory\n");
519 Status = STATUS_INSUFFICIENT_RESOURCES;
520 goto end;
521 }
522
523 stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
524
525 if (type == BLOCK_FLAG_DUPLICATE && stripes[1].dh == stripes[0].dh)
526 stripe_size = min(stripes[0].dh->size / 2, max_stripe_size);
527 else {
528 stripe_size = max_stripe_size;
529 for (i = 0; i < num_stripes; i++) {
530 if (stripes[i].dh->size < stripe_size)
531 stripe_size = stripes[i].dh->size;
532 }
533 }
534
535 if (type == 0 || type == BLOCK_FLAG_DUPLICATE || type == BLOCK_FLAG_RAID1)
536 factor = 1;
537 else if (type == BLOCK_FLAG_RAID0)
538 factor = num_stripes;
539 else if (type == BLOCK_FLAG_RAID10)
540 factor = num_stripes / sub_stripes;
541 else if (type == BLOCK_FLAG_RAID5)
542 factor = num_stripes - 1;
543 else if (type == BLOCK_FLAG_RAID6)
544 factor = num_stripes - 2;
545
546 if (stripe_size * factor > max_chunk_size)
547 stripe_size = max_chunk_size / factor;
548
549 if (stripe_size % stripe_length > 0)
550 stripe_size -= stripe_size % stripe_length;
551
552 if (stripe_size == 0) {
553 ERR("not enough free space found (stripe_size == 0)\n");
554 Status = STATUS_DISK_FULL;
555 goto end;
556 }
557
558 c->chunk_item->size = stripe_size * factor;
559 c->chunk_item->root_id = Vcb->extent_root->id;
560 c->chunk_item->stripe_length = stripe_length;
561 c->chunk_item->type = flags;
562 c->chunk_item->opt_io_alignment = (uint32_t)c->chunk_item->stripe_length;
563 c->chunk_item->opt_io_width = (uint32_t)c->chunk_item->stripe_length;
564 c->chunk_item->sector_size = stripes[0].device->devitem.minimal_io_size;
565 c->chunk_item->num_stripes = num_stripes;
566 c->chunk_item->sub_stripes = sub_stripes;
567
568 c->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
569 if (!c->devices) {
570 ERR("out of memory\n");
571 Status = STATUS_INSUFFICIENT_RESOURCES;
572 goto end;
573 }
574
575 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
576 for (i = 0; i < num_stripes; i++) {
577 cis[i].dev_id = stripes[i].device->devitem.dev_id;
578
579 if (type == BLOCK_FLAG_DUPLICATE && i == 1 && stripes[i].dh == stripes[0].dh)
580 cis[i].offset = stripes[0].dh->address + stripe_size;
581 else
582 cis[i].offset = stripes[i].dh->address;
583
584 cis[i].dev_uuid = stripes[i].device->devitem.device_uuid;
585
586 c->devices[i] = stripes[i].device;
587 }
588
589 logaddr = find_new_chunk_address(Vcb, c->chunk_item->size);
590
591 Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
592
593 c->size = cisize;
594 c->offset = logaddr;
595 c->used = c->oldused = 0;
596 c->cache = c->old_cache = NULL;
597 c->readonly = false;
598 c->reloc = false;
599 c->last_alloc_set = false;
600 c->last_stripe = 0;
601 c->cache_loaded = true;
602 c->changed = false;
603 c->space_changed = false;
604 c->balance_num = 0;
605
606 InitializeListHead(&c->space);
607 InitializeListHead(&c->space_size);
608 InitializeListHead(&c->deleting);
609 InitializeListHead(&c->changed_extents);
610
611 InitializeListHead(&c->range_locks);
612 ExInitializeResourceLite(&c->range_locks_lock);
613 KeInitializeEvent(&c->range_locks_event, NotificationEvent, false);
614
615 InitializeListHead(&c->partial_stripes);
616 ExInitializeResourceLite(&c->partial_stripes_lock);
617
618 ExInitializeResourceLite(&c->lock);
619 ExInitializeResourceLite(&c->changed_extents_lock);
620
621 s = ExAllocatePoolWithTag(NonPagedPool, sizeof(space), ALLOC_TAG);
622 if (!s) {
623 ERR("out of memory\n");
624 Status = STATUS_INSUFFICIENT_RESOURCES;
625 goto end;
626 }
627
628 s->address = c->offset;
629 s->size = c->chunk_item->size;
630 InsertTailList(&c->space, &s->list_entry);
631 InsertTailList(&c->space_size, &s->list_entry_size);
632
633 protect_superblocks(c);
634
635 for (i = 0; i < num_stripes; i++) {
636 stripes[i].device->devitem.bytes_used += stripe_size;
637
638 space_list_subtract2(&stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL, NULL);
639 }
640
641 Status = STATUS_SUCCESS;
642
643 if (flags & BLOCK_FLAG_RAID5 || flags & BLOCK_FLAG_RAID6)
644 Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_RAID56;
645
646 end:
647 if (stripes)
648 ExFreePool(stripes);
649
650 if (!NT_SUCCESS(Status)) {
651 if (c) {
652 if (c->devices)
653 ExFreePool(c->devices);
654
655 if (c->chunk_item)
656 ExFreePool(c->chunk_item);
657
658 ExFreePool(c);
659 }
660
661 if (s) ExFreePool(s);
662 } else {
663 bool done = false;
664
665 le = Vcb->chunks.Flink;
666 while (le != &Vcb->chunks) {
667 chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
668
669 if (c2->offset > c->offset) {
670 InsertHeadList(le->Blink, &c->list_entry);
671 done = true;
672 break;
673 }
674
675 le = le->Flink;
676 }
677
678 if (!done)
679 InsertTailList(&Vcb->chunks, &c->list_entry);
680
681 c->created = true;
682 c->changed = true;
683 c->space_changed = true;
684 c->list_entry_balance.Flink = NULL;
685
686 *pc = c;
687 }
688
689 return Status;
690 }
691
692 static NTSTATUS prepare_raid0_write(_Pre_satisfies_(_Curr_->chunk_item->num_stripes>0) _In_ chunk* c, _In_ uint64_t address, _In_reads_bytes_(length) void* data,
693 _In_ uint32_t length, _In_ write_stripe* stripes, _In_ PIRP Irp, _In_ uint64_t irp_offset, _In_ write_data_context* wtc) {
694 uint64_t startoff, endoff;
695 uint16_t startoffstripe, endoffstripe, stripenum;
696 uint64_t pos, *stripeoff;
697 uint32_t i;
698 bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
699 PMDL master_mdl;
700 PFN_NUMBER* pfns;
701
702 stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
703 if (!stripeoff) {
704 ERR("out of memory\n");
705 return STATUS_INSUFFICIENT_RESOURCES;
706 }
707
708 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
709 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
710
711 if (file_write) {
712 master_mdl = Irp->MdlAddress;
713
714 pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
715 pfns = &pfns[irp_offset >> PAGE_SHIFT];
716 } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
717 wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
718 if (!wtc->scratch) {
719 ERR("out of memory\n");
720 return STATUS_INSUFFICIENT_RESOURCES;
721 }
722
723 RtlCopyMemory(wtc->scratch, data, length);
724
725 master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
726 if (!master_mdl) {
727 ERR("out of memory\n");
728 return STATUS_INSUFFICIENT_RESOURCES;
729 }
730
731 MmBuildMdlForNonPagedPool(master_mdl);
732
733 wtc->mdl = master_mdl;
734
735 pfns = (PFN_NUMBER*)(master_mdl + 1);
736 } else {
737 NTSTATUS Status = STATUS_SUCCESS;
738
739 master_mdl = IoAllocateMdl(data, length, false, false, NULL);
740 if (!master_mdl) {
741 ERR("out of memory\n");
742 return STATUS_INSUFFICIENT_RESOURCES;
743 }
744
745 _SEH2_TRY {
746 MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
747 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
748 Status = _SEH2_GetExceptionCode();
749 } _SEH2_END;
750
751 if (!NT_SUCCESS(Status)) {
752 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
753 IoFreeMdl(master_mdl);
754 return Status;
755 }
756
757 wtc->mdl = master_mdl;
758
759 pfns = (PFN_NUMBER*)(master_mdl + 1);
760 }
761
762 for (i = 0; i < c->chunk_item->num_stripes; i++) {
763 if (startoffstripe > i)
764 stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
765 else if (startoffstripe == i)
766 stripes[i].start = startoff;
767 else
768 stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
769
770 if (endoffstripe > i)
771 stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
772 else if (endoffstripe == i)
773 stripes[i].end = endoff + 1;
774 else
775 stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
776
777 if (stripes[i].start != stripes[i].end) {
778 stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
779 if (!stripes[i].mdl) {
780 ERR("IoAllocateMdl failed\n");
781 ExFreePool(stripeoff);
782 return STATUS_INSUFFICIENT_RESOURCES;
783 }
784 }
785 }
786
787 pos = 0;
788 RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
789
790 stripenum = startoffstripe;
791
792 while (pos < length) {
793 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum].mdl + 1);
794
795 if (pos == 0) {
796 uint32_t writelen = (uint32_t)min(stripes[stripenum].end - stripes[stripenum].start,
797 c->chunk_item->stripe_length - (stripes[stripenum].start % c->chunk_item->stripe_length));
798
799 RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
800
801 stripeoff[stripenum] += writelen;
802 pos += writelen;
803 } else if (length - pos < c->chunk_item->stripe_length) {
804 RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
805 break;
806 } else {
807 RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
808
809 stripeoff[stripenum] += c->chunk_item->stripe_length;
810 pos += c->chunk_item->stripe_length;
811 }
812
813 stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
814 }
815
816 ExFreePool(stripeoff);
817
818 return STATUS_SUCCESS;
819 }
820
821 static NTSTATUS prepare_raid10_write(_Pre_satisfies_(_Curr_->chunk_item->sub_stripes>0&&_Curr_->chunk_item->num_stripes>=_Curr_->chunk_item->sub_stripes) _In_ chunk* c,
822 _In_ uint64_t address, _In_reads_bytes_(length) void* data, _In_ uint32_t length, _In_ write_stripe* stripes,
823 _In_ PIRP Irp, _In_ uint64_t irp_offset, _In_ write_data_context* wtc) {
824 uint64_t startoff, endoff;
825 uint16_t startoffstripe, endoffstripe, stripenum;
826 uint64_t pos, *stripeoff;
827 uint32_t i;
828 bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
829 PMDL master_mdl;
830 PFN_NUMBER* pfns;
831
832 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &startoff, &startoffstripe);
833 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &endoff, &endoffstripe);
834
835 stripenum = startoffstripe;
836 startoffstripe *= c->chunk_item->sub_stripes;
837 endoffstripe *= c->chunk_item->sub_stripes;
838
839 if (file_write) {
840 master_mdl = Irp->MdlAddress;
841
842 pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
843 pfns = &pfns[irp_offset >> PAGE_SHIFT];
844 } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
845 wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
846 if (!wtc->scratch) {
847 ERR("out of memory\n");
848 return STATUS_INSUFFICIENT_RESOURCES;
849 }
850
851 RtlCopyMemory(wtc->scratch, data, length);
852
853 master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
854 if (!master_mdl) {
855 ERR("out of memory\n");
856 return STATUS_INSUFFICIENT_RESOURCES;
857 }
858
859 MmBuildMdlForNonPagedPool(master_mdl);
860
861 wtc->mdl = master_mdl;
862
863 pfns = (PFN_NUMBER*)(master_mdl + 1);
864 } else {
865 NTSTATUS Status = STATUS_SUCCESS;
866
867 master_mdl = IoAllocateMdl(data, length, false, false, NULL);
868 if (!master_mdl) {
869 ERR("out of memory\n");
870 return STATUS_INSUFFICIENT_RESOURCES;
871 }
872
873 _SEH2_TRY {
874 MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
875 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
876 Status = _SEH2_GetExceptionCode();
877 } _SEH2_END;
878
879 if (!NT_SUCCESS(Status)) {
880 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
881 IoFreeMdl(master_mdl);
882 return Status;
883 }
884
885 wtc->mdl = master_mdl;
886
887 pfns = (PFN_NUMBER*)(master_mdl + 1);
888 }
889
890 for (i = 0; i < c->chunk_item->num_stripes; i += c->chunk_item->sub_stripes) {
891 uint16_t j;
892
893 if (startoffstripe > i)
894 stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
895 else if (startoffstripe == i)
896 stripes[i].start = startoff;
897 else
898 stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
899
900 if (endoffstripe > i)
901 stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
902 else if (endoffstripe == i)
903 stripes[i].end = endoff + 1;
904 else
905 stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
906
907 stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
908 if (!stripes[i].mdl) {
909 ERR("IoAllocateMdl failed\n");
910 return STATUS_INSUFFICIENT_RESOURCES;
911 }
912
913 for (j = 1; j < c->chunk_item->sub_stripes; j++) {
914 stripes[i+j].start = stripes[i].start;
915 stripes[i+j].end = stripes[i].end;
916 stripes[i+j].data = stripes[i].data;
917 stripes[i+j].mdl = stripes[i].mdl;
918 }
919 }
920
921 pos = 0;
922
923 stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG);
924 if (!stripeoff) {
925 ERR("out of memory\n");
926 return STATUS_INSUFFICIENT_RESOURCES;
927 }
928
929 RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
930
931 while (pos < length) {
932 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum * c->chunk_item->sub_stripes].mdl + 1);
933
934 if (pos == 0) {
935 uint32_t writelen = (uint32_t)min(stripes[stripenum * c->chunk_item->sub_stripes].end - stripes[stripenum * c->chunk_item->sub_stripes].start,
936 c->chunk_item->stripe_length - (stripes[stripenum * c->chunk_item->sub_stripes].start % c->chunk_item->stripe_length));
937
938 RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
939
940 stripeoff[stripenum] += writelen;
941 pos += writelen;
942 } else if (length - pos < c->chunk_item->stripe_length) {
943 RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
944 break;
945 } else {
946 RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
947
948 stripeoff[stripenum] += c->chunk_item->stripe_length;
949 pos += c->chunk_item->stripe_length;
950 }
951
952 stripenum = (stripenum + 1) % (c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
953 }
954
955 ExFreePool(stripeoff);
956
957 return STATUS_SUCCESS;
958 }
959
960 static NTSTATUS add_partial_stripe(device_extension* Vcb, chunk *c, uint64_t address, uint32_t length, void* data) {
961 NTSTATUS Status;
962 LIST_ENTRY* le;
963 partial_stripe* ps;
964 uint64_t stripe_addr;
965 uint16_t num_data_stripes;
966
967 num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
968 stripe_addr = address - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
969
970 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
971
972 le = c->partial_stripes.Flink;
973 while (le != &c->partial_stripes) {
974 ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
975
976 if (ps->address == stripe_addr) {
977 // update existing entry
978
979 RtlCopyMemory(ps->data + address - stripe_addr, data, length);
980 RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) / Vcb->superblock.sector_size), length / Vcb->superblock.sector_size);
981
982 // if now filled, flush
983 if (RtlAreBitsClear(&ps->bmp, 0, (ULONG)((num_data_stripes * c->chunk_item->stripe_length) / Vcb->superblock.sector_size))) {
984 Status = flush_partial_stripe(Vcb, c, ps);
985 if (!NT_SUCCESS(Status)) {
986 ERR("flush_partial_stripe returned %08x\n", Status);
987 goto end;
988 }
989
990 RemoveEntryList(&ps->list_entry);
991
992 if (ps->bmparr)
993 ExFreePool(ps->bmparr);
994
995 ExFreePool(ps);
996 }
997
998 Status = STATUS_SUCCESS;
999 goto end;
1000 } else if (ps->address > stripe_addr)
1001 break;
1002
1003 le = le->Flink;
1004 }
1005
1006 // add new entry
1007
1008 ps = ExAllocatePoolWithTag(NonPagedPool, offsetof(partial_stripe, data[0]) + (ULONG)(num_data_stripes * c->chunk_item->stripe_length), ALLOC_TAG);
1009 if (!ps) {
1010 ERR("out of memory\n");
1011 Status = STATUS_INSUFFICIENT_RESOURCES;
1012 goto end;
1013 }
1014
1015 ps->bmplen = (ULONG)(num_data_stripes * c->chunk_item->stripe_length) / Vcb->superblock.sector_size;
1016
1017 ps->address = stripe_addr;
1018 ps->bmparr = ExAllocatePoolWithTag(NonPagedPool, (size_t)sector_align(((ps->bmplen / 8) + 1), sizeof(ULONG)), ALLOC_TAG);
1019 if (!ps->bmparr) {
1020 ERR("out of memory\n");
1021 ExFreePool(ps);
1022 Status = STATUS_INSUFFICIENT_RESOURCES;
1023 goto end;
1024 }
1025
1026 RtlInitializeBitMap(&ps->bmp, ps->bmparr, ps->bmplen);
1027 RtlSetAllBits(&ps->bmp);
1028
1029 RtlCopyMemory(ps->data + address - stripe_addr, data, length);
1030 RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) / Vcb->superblock.sector_size), length / Vcb->superblock.sector_size);
1031
1032 InsertHeadList(le->Blink, &ps->list_entry);
1033
1034 Status = STATUS_SUCCESS;
1035
1036 end:
1037 ExReleaseResourceLite(&c->partial_stripes_lock);
1038
1039 return Status;
1040 }
1041
1042 typedef struct {
1043 PMDL mdl;
1044 PFN_NUMBER* pfns;
1045 } log_stripe;
1046
1047 static NTSTATUS prepare_raid5_write(device_extension* Vcb, chunk* c, uint64_t address, void* data, uint32_t length, write_stripe* stripes, PIRP Irp,
1048 uint64_t irp_offset, ULONG priority, write_data_context* wtc) {
1049 uint64_t startoff, endoff, parity_start, parity_end;
1050 uint16_t startoffstripe, endoffstripe, parity, num_data_stripes = c->chunk_item->num_stripes - 1;
1051 uint64_t pos, parity_pos, *stripeoff = NULL;
1052 uint32_t i;
1053 bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
1054 PMDL master_mdl;
1055 NTSTATUS Status;
1056 PFN_NUMBER *pfns, *parity_pfns;
1057 log_stripe* log_stripes = NULL;
1058
1059 if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1060 uint64_t delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
1061
1062 delta = min(irp_offset + length, delta);
1063 Status = add_partial_stripe(Vcb, c, address + length - delta, (uint32_t)delta, (uint8_t*)data + irp_offset + length - delta);
1064 if (!NT_SUCCESS(Status)) {
1065 ERR("add_partial_stripe returned %08x\n", Status);
1066 goto exit;
1067 }
1068
1069 length -= (uint32_t)delta;
1070 }
1071
1072 if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1073 uint64_t delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
1074
1075 Status = add_partial_stripe(Vcb, c, address, (uint32_t)delta, (uint8_t*)data + irp_offset);
1076 if (!NT_SUCCESS(Status)) {
1077 ERR("add_partial_stripe returned %08x\n", Status);
1078 goto exit;
1079 }
1080
1081 address += delta;
1082 length -= (uint32_t)delta;
1083 irp_offset += delta;
1084 }
1085
1086 if (length == 0) {
1087 Status = STATUS_SUCCESS;
1088 goto exit;
1089 }
1090
1091 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
1092 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
1093
1094 pos = 0;
1095 while (pos < length) {
1096 parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1097
1098 if (pos == 0) {
1099 uint16_t stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
1100 ULONG skip, writelen;
1101
1102 i = startoffstripe;
1103 while (stripe != parity) {
1104 if (i == startoffstripe) {
1105 writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
1106
1107 stripes[stripe].start = startoff;
1108 stripes[stripe].end = startoff + writelen;
1109
1110 pos += writelen;
1111
1112 if (pos == length)
1113 break;
1114 } else {
1115 writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
1116
1117 stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
1118 stripes[stripe].end = stripes[stripe].start + writelen;
1119
1120 pos += writelen;
1121
1122 if (pos == length)
1123 break;
1124 }
1125
1126 i++;
1127 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1128 }
1129
1130 if (pos == length)
1131 break;
1132
1133 for (i = 0; i < startoffstripe; i++) {
1134 stripe = (parity + i + 1) % c->chunk_item->num_stripes;
1135
1136 stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1137 }
1138
1139 stripes[parity].start = stripes[parity].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1140
1141 if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
1142 skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
1143
1144 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1145 stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1146 }
1147
1148 pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1149 }
1150 } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1151 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1152 stripes[i].end += c->chunk_item->stripe_length;
1153 }
1154
1155 pos += c->chunk_item->stripe_length * num_data_stripes;
1156 } else {
1157 uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1158
1159 i = 0;
1160 while (stripe != parity) {
1161 if (endoffstripe == i) {
1162 stripes[stripe].end = endoff + 1;
1163 break;
1164 } else if (endoffstripe > i)
1165 stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1166
1167 i++;
1168 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1169 }
1170
1171 break;
1172 }
1173 }
1174
1175 parity_start = 0xffffffffffffffff;
1176 parity_end = 0;
1177
1178 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1179 if (stripes[i].start != 0 || stripes[i].end != 0) {
1180 parity_start = min(stripes[i].start, parity_start);
1181 parity_end = max(stripes[i].end, parity_end);
1182 }
1183 }
1184
1185 if (parity_end == parity_start) {
1186 Status = STATUS_SUCCESS;
1187 goto exit;
1188 }
1189
1190 parity = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1191 stripes[parity].start = parity_start;
1192
1193 parity = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1194 stripes[parity].end = parity_end;
1195
1196 log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
1197 if (!log_stripes) {
1198 ERR("out of memory\n");
1199 Status = STATUS_INSUFFICIENT_RESOURCES;
1200 goto exit;
1201 }
1202
1203 RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
1204
1205 for (i = 0; i < num_data_stripes; i++) {
1206 log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), false, false, NULL);
1207 if (!log_stripes[i].mdl) {
1208 ERR("out of memory\n");
1209 Status = STATUS_INSUFFICIENT_RESOURCES;
1210 goto exit;
1211 }
1212
1213 log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
1214 log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
1215 }
1216
1217 wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1218 if (!wtc->parity1) {
1219 ERR("out of memory\n");
1220 Status = STATUS_INSUFFICIENT_RESOURCES;
1221 goto exit;
1222 }
1223
1224 wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), false, false, NULL);
1225 if (!wtc->parity1_mdl) {
1226 ERR("out of memory\n");
1227 Status = STATUS_INSUFFICIENT_RESOURCES;
1228 goto exit;
1229 }
1230
1231 MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
1232
1233 if (file_write)
1234 master_mdl = Irp->MdlAddress;
1235 else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
1236 wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1237 if (!wtc->scratch) {
1238 ERR("out of memory\n");
1239 Status = STATUS_INSUFFICIENT_RESOURCES;
1240 goto exit;
1241 }
1242
1243 RtlCopyMemory(wtc->scratch, (uint8_t*)data + irp_offset, length);
1244
1245 master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
1246 if (!master_mdl) {
1247 ERR("out of memory\n");
1248 Status = STATUS_INSUFFICIENT_RESOURCES;
1249 goto exit;
1250 }
1251
1252 MmBuildMdlForNonPagedPool(master_mdl);
1253
1254 wtc->mdl = master_mdl;
1255 } else {
1256 master_mdl = IoAllocateMdl((uint8_t*)data + irp_offset, length, false, false, NULL);
1257 if (!master_mdl) {
1258 ERR("out of memory\n");
1259 Status = STATUS_INSUFFICIENT_RESOURCES;
1260 goto exit;
1261 }
1262
1263 Status = STATUS_SUCCESS;
1264
1265 _SEH2_TRY {
1266 MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
1267 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1268 Status = _SEH2_GetExceptionCode();
1269 } _SEH2_END;
1270
1271 if (!NT_SUCCESS(Status)) {
1272 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1273 IoFreeMdl(master_mdl);
1274 return Status;
1275 }
1276
1277 wtc->mdl = master_mdl;
1278 }
1279
1280 pfns = (PFN_NUMBER*)(master_mdl + 1);
1281 parity_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
1282
1283 if (file_write)
1284 pfns = &pfns[irp_offset >> PAGE_SHIFT];
1285
1286 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1287 if (stripes[i].start != stripes[i].end) {
1288 stripes[i].mdl = IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1289 if (!stripes[i].mdl) {
1290 ERR("IoAllocateMdl failed\n");
1291 Status = STATUS_INSUFFICIENT_RESOURCES;
1292 goto exit;
1293 }
1294 }
1295 }
1296
1297 stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
1298 if (!stripeoff) {
1299 ERR("out of memory\n");
1300 Status = STATUS_INSUFFICIENT_RESOURCES;
1301 goto exit;
1302 }
1303
1304 RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
1305
1306 pos = 0;
1307 parity_pos = 0;
1308
1309 while (pos < length) {
1310 PFN_NUMBER* stripe_pfns;
1311
1312 parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1313
1314 if (pos == 0) {
1315 uint16_t stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
1316 uint32_t writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
1317 c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
1318 uint32_t maxwritelen = writelen;
1319
1320 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1321
1322 RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1323
1324 RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1325 log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
1326
1327 stripeoff[stripe] = writelen;
1328 pos += writelen;
1329
1330 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1331 i = startoffstripe + 1;
1332
1333 while (stripe != parity) {
1334 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1335 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1336
1337 if (writelen == 0)
1338 break;
1339
1340 if (writelen > maxwritelen)
1341 maxwritelen = writelen;
1342
1343 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1344
1345 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1346 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1347
1348 stripeoff[stripe] = writelen;
1349 pos += writelen;
1350
1351 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1352 i++;
1353 }
1354
1355 stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1356
1357 RtlCopyMemory(stripe_pfns, parity_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1358 stripeoff[parity] = maxwritelen;
1359 parity_pos = maxwritelen;
1360 } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1361 uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1362
1363 i = 0;
1364 while (stripe != parity) {
1365 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1366
1367 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1368
1369 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1370 log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
1371
1372 stripeoff[stripe] += c->chunk_item->stripe_length;
1373 pos += c->chunk_item->stripe_length;
1374
1375 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1376 i++;
1377 }
1378
1379 stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1380
1381 RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1382 stripeoff[parity] += c->chunk_item->stripe_length;
1383 parity_pos += c->chunk_item->stripe_length;
1384 } else {
1385 uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1386 uint32_t writelen, maxwritelen = 0;
1387
1388 i = 0;
1389 while (pos < length) {
1390 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1391 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1392
1393 if (writelen == 0)
1394 break;
1395
1396 if (writelen > maxwritelen)
1397 maxwritelen = writelen;
1398
1399 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1400
1401 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1402 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1403
1404 stripeoff[stripe] += writelen;
1405 pos += writelen;
1406
1407 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1408 i++;
1409 }
1410
1411 stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1412
1413 RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1414 }
1415 }
1416
1417 for (i = 0; i < num_data_stripes; i++) {
1418 uint8_t* ss = MmGetSystemAddressForMdlSafe(log_stripes[i].mdl, priority);
1419
1420 if (i == 0)
1421 RtlCopyMemory(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1422 else
1423 do_xor(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1424 }
1425
1426 Status = STATUS_SUCCESS;
1427
1428 exit:
1429 if (log_stripes) {
1430 for (i = 0; i < num_data_stripes; i++) {
1431 if (log_stripes[i].mdl)
1432 IoFreeMdl(log_stripes[i].mdl);
1433 }
1434
1435 ExFreePool(log_stripes);
1436 }
1437
1438 if (stripeoff)
1439 ExFreePool(stripeoff);
1440
1441 return Status;
1442 }
1443
1444 static NTSTATUS prepare_raid6_write(device_extension* Vcb, chunk* c, uint64_t address, void* data, uint32_t length, write_stripe* stripes, PIRP Irp,
1445 uint64_t irp_offset, ULONG priority, write_data_context* wtc) {
1446 uint64_t startoff, endoff, parity_start, parity_end;
1447 uint16_t startoffstripe, endoffstripe, parity1, num_data_stripes = c->chunk_item->num_stripes - 2;
1448 uint64_t pos, parity_pos, *stripeoff = NULL;
1449 uint32_t i;
1450 bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
1451 PMDL master_mdl;
1452 NTSTATUS Status;
1453 PFN_NUMBER *pfns, *parity1_pfns, *parity2_pfns;
1454 log_stripe* log_stripes = NULL;
1455
1456 if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1457 uint64_t delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
1458
1459 delta = min(irp_offset + length, delta);
1460 Status = add_partial_stripe(Vcb, c, address + length - delta, (uint32_t)delta, (uint8_t*)data + irp_offset + length - delta);
1461 if (!NT_SUCCESS(Status)) {
1462 ERR("add_partial_stripe returned %08x\n", Status);
1463 goto exit;
1464 }
1465
1466 length -= (uint32_t)delta;
1467 }
1468
1469 if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1470 uint64_t delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
1471
1472 Status = add_partial_stripe(Vcb, c, address, (uint32_t)delta, (uint8_t*)data + irp_offset);
1473 if (!NT_SUCCESS(Status)) {
1474 ERR("add_partial_stripe returned %08x\n", Status);
1475 goto exit;
1476 }
1477
1478 address += delta;
1479 length -= (uint32_t)delta;
1480 irp_offset += delta;
1481 }
1482
1483 if (length == 0) {
1484 Status = STATUS_SUCCESS;
1485 goto exit;
1486 }
1487
1488 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
1489 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
1490
1491 pos = 0;
1492 while (pos < length) {
1493 parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1494
1495 if (pos == 0) {
1496 uint16_t stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes;
1497 uint16_t parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1498 ULONG skip, writelen;
1499
1500 i = startoffstripe;
1501 while (stripe != parity1) {
1502 if (i == startoffstripe) {
1503 writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
1504
1505 stripes[stripe].start = startoff;
1506 stripes[stripe].end = startoff + writelen;
1507
1508 pos += writelen;
1509
1510 if (pos == length)
1511 break;
1512 } else {
1513 writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
1514
1515 stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
1516 stripes[stripe].end = stripes[stripe].start + writelen;
1517
1518 pos += writelen;
1519
1520 if (pos == length)
1521 break;
1522 }
1523
1524 i++;
1525 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1526 }
1527
1528 if (pos == length)
1529 break;
1530
1531 for (i = 0; i < startoffstripe; i++) {
1532 stripe = (parity1 + i + 2) % c->chunk_item->num_stripes;
1533
1534 stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1535 }
1536
1537 stripes[parity1].start = stripes[parity1].end = stripes[parity2].start = stripes[parity2].end =
1538 startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1539
1540 if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
1541 skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
1542
1543 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1544 stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1545 }
1546
1547 pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1548 }
1549 } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1550 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1551 stripes[i].end += c->chunk_item->stripe_length;
1552 }
1553
1554 pos += c->chunk_item->stripe_length * num_data_stripes;
1555 } else {
1556 uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes;
1557
1558 i = 0;
1559 while (stripe != parity1) {
1560 if (endoffstripe == i) {
1561 stripes[stripe].end = endoff + 1;
1562 break;
1563 } else if (endoffstripe > i)
1564 stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1565
1566 i++;
1567 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1568 }
1569
1570 break;
1571 }
1572 }
1573
1574 parity_start = 0xffffffffffffffff;
1575 parity_end = 0;
1576
1577 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1578 if (stripes[i].start != 0 || stripes[i].end != 0) {
1579 parity_start = min(stripes[i].start, parity_start);
1580 parity_end = max(stripes[i].end, parity_end);
1581 }
1582 }
1583
1584 if (parity_end == parity_start) {
1585 Status = STATUS_SUCCESS;
1586 goto exit;
1587 }
1588
1589 parity1 = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1590 stripes[parity1].start = stripes[(parity1 + 1) % c->chunk_item->num_stripes].start = parity_start;
1591
1592 parity1 = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1593 stripes[parity1].end = stripes[(parity1 + 1) % c->chunk_item->num_stripes].end = parity_end;
1594
1595 log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
1596 if (!log_stripes) {
1597 ERR("out of memory\n");
1598 Status = STATUS_INSUFFICIENT_RESOURCES;
1599 goto exit;
1600 }
1601
1602 RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
1603
1604 for (i = 0; i < num_data_stripes; i++) {
1605 log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), false, false, NULL);
1606 if (!log_stripes[i].mdl) {
1607 ERR("out of memory\n");
1608 Status = STATUS_INSUFFICIENT_RESOURCES;
1609 goto exit;
1610 }
1611
1612 log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
1613 log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
1614 }
1615
1616 wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1617 if (!wtc->parity1) {
1618 ERR("out of memory\n");
1619 Status = STATUS_INSUFFICIENT_RESOURCES;
1620 goto exit;
1621 }
1622
1623 wtc->parity2 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1624 if (!wtc->parity2) {
1625 ERR("out of memory\n");
1626 Status = STATUS_INSUFFICIENT_RESOURCES;
1627 goto exit;
1628 }
1629
1630 wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), false, false, NULL);
1631 if (!wtc->parity1_mdl) {
1632 ERR("out of memory\n");
1633 Status = STATUS_INSUFFICIENT_RESOURCES;
1634 goto exit;
1635 }
1636
1637 MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
1638
1639 wtc->parity2_mdl = IoAllocateMdl(wtc->parity2, (ULONG)(parity_end - parity_start), false, false, NULL);
1640 if (!wtc->parity2_mdl) {
1641 ERR("out of memory\n");
1642 Status = STATUS_INSUFFICIENT_RESOURCES;
1643 goto exit;
1644 }
1645
1646 MmBuildMdlForNonPagedPool(wtc->parity2_mdl);
1647
1648 if (file_write)
1649 master_mdl = Irp->MdlAddress;
1650 else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
1651 wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1652 if (!wtc->scratch) {
1653 ERR("out of memory\n");
1654 Status = STATUS_INSUFFICIENT_RESOURCES;
1655 goto exit;
1656 }
1657
1658 RtlCopyMemory(wtc->scratch, (uint8_t*)data + irp_offset, length);
1659
1660 master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
1661 if (!master_mdl) {
1662 ERR("out of memory\n");
1663 Status = STATUS_INSUFFICIENT_RESOURCES;
1664 goto exit;
1665 }
1666
1667 MmBuildMdlForNonPagedPool(master_mdl);
1668
1669 wtc->mdl = master_mdl;
1670 } else {
1671 master_mdl = IoAllocateMdl((uint8_t*)data + irp_offset, length, false, false, NULL);
1672 if (!master_mdl) {
1673 ERR("out of memory\n");
1674 Status = STATUS_INSUFFICIENT_RESOURCES;
1675 goto exit;
1676 }
1677
1678 Status = STATUS_SUCCESS;
1679
1680 _SEH2_TRY {
1681 MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
1682 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1683 Status = _SEH2_GetExceptionCode();
1684 } _SEH2_END;
1685
1686 if (!NT_SUCCESS(Status)) {
1687 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1688 IoFreeMdl(master_mdl);
1689 goto exit;
1690 }
1691
1692 wtc->mdl = master_mdl;
1693 }
1694
1695 pfns = (PFN_NUMBER*)(master_mdl + 1);
1696 parity1_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
1697 parity2_pfns = (PFN_NUMBER*)(wtc->parity2_mdl + 1);
1698
1699 if (file_write)
1700 pfns = &pfns[irp_offset >> PAGE_SHIFT];
1701
1702 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1703 if (stripes[i].start != stripes[i].end) {
1704 stripes[i].mdl = IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1705 if (!stripes[i].mdl) {
1706 ERR("IoAllocateMdl failed\n");
1707 Status = STATUS_INSUFFICIENT_RESOURCES;
1708 goto exit;
1709 }
1710 }
1711 }
1712
1713 stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
1714 if (!stripeoff) {
1715 ERR("out of memory\n");
1716 Status = STATUS_INSUFFICIENT_RESOURCES;
1717 goto exit;
1718 }
1719
1720 RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
1721
1722 pos = 0;
1723 parity_pos = 0;
1724
1725 while (pos < length) {
1726 PFN_NUMBER* stripe_pfns;
1727
1728 parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1729
1730 if (pos == 0) {
1731 uint16_t stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes, parity2;
1732 uint32_t writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
1733 c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
1734 uint32_t maxwritelen = writelen;
1735
1736 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1737
1738 RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1739
1740 RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1741 log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
1742
1743 stripeoff[stripe] = writelen;
1744 pos += writelen;
1745
1746 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1747 i = startoffstripe + 1;
1748
1749 while (stripe != parity1) {
1750 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1751 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1752
1753 if (writelen == 0)
1754 break;
1755
1756 if (writelen > maxwritelen)
1757 maxwritelen = writelen;
1758
1759 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1760
1761 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1762 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1763
1764 stripeoff[stripe] = writelen;
1765 pos += writelen;
1766
1767 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1768 i++;
1769 }
1770
1771 stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1772 RtlCopyMemory(stripe_pfns, parity1_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1773 stripeoff[parity1] = maxwritelen;
1774
1775 parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1776
1777 stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1778 RtlCopyMemory(stripe_pfns, parity2_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1779 stripeoff[parity2] = maxwritelen;
1780
1781 parity_pos = maxwritelen;
1782 } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1783 uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
1784
1785 i = 0;
1786 while (stripe != parity1) {
1787 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1788
1789 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1790
1791 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1792 log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
1793
1794 stripeoff[stripe] += c->chunk_item->stripe_length;
1795 pos += c->chunk_item->stripe_length;
1796
1797 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1798 i++;
1799 }
1800
1801 stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1802 RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1803 stripeoff[parity1] += c->chunk_item->stripe_length;
1804
1805 parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1806
1807 stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1808 RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1809 stripeoff[parity2] += c->chunk_item->stripe_length;
1810
1811 parity_pos += c->chunk_item->stripe_length;
1812 } else {
1813 uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
1814 uint32_t writelen, maxwritelen = 0;
1815
1816 i = 0;
1817 while (pos < length) {
1818 stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1819 writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1820
1821 if (writelen == 0)
1822 break;
1823
1824 if (writelen > maxwritelen)
1825 maxwritelen = writelen;
1826
1827 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1828
1829 RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1830 log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1831
1832 stripeoff[stripe] += writelen;
1833 pos += writelen;
1834
1835 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1836 i++;
1837 }
1838
1839 stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1840 RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1841
1842 parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1843
1844 stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1845 RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1846 }
1847 }
1848
1849 for (i = 0; i < num_data_stripes; i++) {
1850 uint8_t* ss = MmGetSystemAddressForMdlSafe(log_stripes[c->chunk_item->num_stripes - 3 - i].mdl, priority);
1851
1852 if (i == 0) {
1853 RtlCopyMemory(wtc->parity1, ss, (ULONG)(parity_end - parity_start));
1854 RtlCopyMemory(wtc->parity2, ss, (ULONG)(parity_end - parity_start));
1855 } else {
1856 do_xor(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1857
1858 galois_double(wtc->parity2, (uint32_t)(parity_end - parity_start));
1859 do_xor(wtc->parity2, ss, (uint32_t)(parity_end - parity_start));
1860 }
1861 }
1862
1863 Status = STATUS_SUCCESS;
1864
1865 exit:
1866 if (log_stripes) {
1867 for (i = 0; i < num_data_stripes; i++) {
1868 if (log_stripes[i].mdl)
1869 IoFreeMdl(log_stripes[i].mdl);
1870 }
1871
1872 ExFreePool(log_stripes);
1873 }
1874
1875 if (stripeoff)
1876 ExFreePool(stripeoff);
1877
1878 return Status;
1879 }
1880
1881 NTSTATUS write_data(_In_ device_extension* Vcb, _In_ uint64_t address, _In_reads_bytes_(length) void* data, _In_ uint32_t length, _In_ write_data_context* wtc,
1882 _In_opt_ PIRP Irp, _In_opt_ chunk* c, _In_ bool file_write, _In_ uint64_t irp_offset, _In_ ULONG priority) {
1883 NTSTATUS Status;
1884 uint32_t i;
1885 CHUNK_ITEM_STRIPE* cis;
1886 write_stripe* stripes = NULL;
1887 uint64_t total_writing = 0;
1888 ULONG allowed_missing, missing;
1889
1890 TRACE("(%p, %I64x, %p, %x)\n", Vcb, address, data, length);
1891
1892 if (!c) {
1893 c = get_chunk_from_address(Vcb, address);
1894 if (!c) {
1895 ERR("could not get chunk for address %I64x\n", address);
1896 return STATUS_INTERNAL_ERROR;
1897 }
1898 }
1899
1900 stripes = ExAllocatePoolWithTag(PagedPool, sizeof(write_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
1901 if (!stripes) {
1902 ERR("out of memory\n");
1903 return STATUS_INSUFFICIENT_RESOURCES;
1904 }
1905
1906 RtlZeroMemory(stripes, sizeof(write_stripe) * c->chunk_item->num_stripes);
1907
1908 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1909
1910 if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
1911 Status = prepare_raid0_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
1912 if (!NT_SUCCESS(Status)) {
1913 ERR("prepare_raid0_write returned %08x\n", Status);
1914 goto prepare_failed;
1915 }
1916
1917 allowed_missing = 0;
1918 } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
1919 Status = prepare_raid10_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
1920 if (!NT_SUCCESS(Status)) {
1921 ERR("prepare_raid10_write returned %08x\n", Status);
1922 goto prepare_failed;
1923 }
1924
1925 allowed_missing = 1;
1926 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
1927 Status = prepare_raid5_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
1928 if (!NT_SUCCESS(Status)) {
1929 ERR("prepare_raid5_write returned %08x\n", Status);
1930 goto prepare_failed;
1931 }
1932
1933 allowed_missing = 1;
1934 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
1935 Status = prepare_raid6_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
1936 if (!NT_SUCCESS(Status)) {
1937 ERR("prepare_raid6_write returned %08x\n", Status);
1938 goto prepare_failed;
1939 }
1940
1941 allowed_missing = 2;
1942 } else { // write same data to every location - SINGLE, DUP, RAID1
1943 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1944 stripes[i].start = address - c->offset;
1945 stripes[i].end = stripes[i].start + length;
1946 stripes[i].data = data;
1947 stripes[i].irp_offset = irp_offset;
1948
1949 if (c->devices[i]->devobj) {
1950 if (file_write) {
1951 uint8_t* va;
1952 ULONG writelen = (ULONG)(stripes[i].end - stripes[i].start);
1953
1954 va = (uint8_t*)MmGetMdlVirtualAddress(Irp->MdlAddress) + stripes[i].irp_offset;
1955
1956 stripes[i].mdl = IoAllocateMdl(va, writelen, false, false, NULL);
1957 if (!stripes[i].mdl) {
1958 ERR("IoAllocateMdl failed\n");
1959 Status = STATUS_INSUFFICIENT_RESOURCES;
1960 goto prepare_failed;
1961 }
1962
1963 IoBuildPartialMdl(Irp->MdlAddress, stripes[i].mdl, va, writelen);
1964 } else {
1965 stripes[i].mdl = IoAllocateMdl(stripes[i].data, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1966 if (!stripes[i].mdl) {
1967 ERR("IoAllocateMdl failed\n");
1968 Status = STATUS_INSUFFICIENT_RESOURCES;
1969 goto prepare_failed;
1970 }
1971
1972 Status = STATUS_SUCCESS;
1973
1974 _SEH2_TRY {
1975 MmProbeAndLockPages(stripes[i].mdl, KernelMode, IoReadAccess);
1976 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1977 Status = _SEH2_GetExceptionCode();
1978 } _SEH2_END;
1979
1980 if (!NT_SUCCESS(Status)) {
1981 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1982 IoFreeMdl(stripes[i].mdl);
1983 stripes[i].mdl = NULL;
1984 goto prepare_failed;
1985 }
1986 }
1987 }
1988 }
1989
1990 allowed_missing = c->chunk_item->num_stripes - 1;
1991 }
1992
1993 missing = 0;
1994 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1995 if (!c->devices[i]->devobj)
1996 missing++;
1997 }
1998
1999 if (missing > allowed_missing) {
2000 ERR("cannot write as %u missing devices (maximum %u)\n", missing, allowed_missing);
2001 Status = STATUS_DEVICE_NOT_READY;
2002 goto prepare_failed;
2003 }
2004
2005 for (i = 0; i < c->chunk_item->num_stripes; i++) {
2006 write_data_stripe* stripe;
2007 PIO_STACK_LOCATION IrpSp;
2008
2009 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_stripe), ALLOC_TAG);
2010 if (!stripe) {
2011 ERR("out of memory\n");
2012 Status = STATUS_INSUFFICIENT_RESOURCES;
2013 goto end;
2014 }
2015
2016 if (stripes[i].start == stripes[i].end || !c->devices[i]->devobj) {
2017 stripe->status = WriteDataStatus_Ignore;
2018 stripe->Irp = NULL;
2019 stripe->buf = stripes[i].data;
2020 stripe->mdl = NULL;
2021 } else {
2022 stripe->context = (struct _write_data_context*)wtc;
2023 stripe->buf = stripes[i].data;
2024 stripe->device = c->devices[i];
2025 RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
2026 stripe->status = WriteDataStatus_Pending;
2027 stripe->mdl = stripes[i].mdl;
2028
2029 if (!Irp) {
2030 stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, false);
2031
2032 if (!stripe->Irp) {
2033 ERR("IoAllocateIrp failed\n");
2034 ExFreePool(stripe);
2035 Status = STATUS_INSUFFICIENT_RESOURCES;
2036 goto end;
2037 }
2038 } else {
2039 stripe->Irp = IoMakeAssociatedIrp(Irp, stripe->device->devobj->StackSize);
2040
2041 if (!stripe->Irp) {
2042 ERR("IoMakeAssociatedIrp failed\n");
2043 ExFreePool(stripe);
2044 Status = STATUS_INSUFFICIENT_RESOURCES;
2045 goto end;
2046 }
2047 }
2048
2049 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
2050 IrpSp->MajorFunction = IRP_MJ_WRITE;
2051 IrpSp->FileObject = stripe->device->fileobj;
2052
2053 if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
2054 stripe->Irp->AssociatedIrp.SystemBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
2055
2056 stripe->Irp->Flags = IRP_BUFFERED_IO;
2057 } else if (stripe->device->devobj->Flags & DO_DIRECT_IO)
2058 stripe->Irp->MdlAddress = stripe->mdl;
2059 else
2060 stripe->Irp->UserBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
2061
2062 #ifdef DEBUG_PARANOID
2063 if (stripes[i].end < stripes[i].start) {
2064 ERR("trying to write stripe with negative length (%I64x < %I64x)\n", stripes[i].end, stripes[i].start);
2065 int3;
2066 }
2067 #endif
2068
2069 IrpSp->Parameters.Write.Length = (ULONG)(stripes[i].end - stripes[i].start);
2070 IrpSp->Parameters.Write.ByteOffset.QuadPart = stripes[i].start + cis[i].offset;
2071
2072 total_writing += IrpSp->Parameters.Write.Length;
2073
2074 stripe->Irp->UserIosb = &stripe->iosb;
2075 wtc->stripes_left++;
2076
2077 IoSetCompletionRoutine(stripe->Irp, write_data_completion, stripe, true, true, true);
2078 }
2079
2080 InsertTailList(&wtc->stripes, &stripe->list_entry);
2081 }
2082
2083 if (diskacc)
2084 fFsRtlUpdateDiskCounters(0, total_writing);
2085
2086 Status = STATUS_SUCCESS;
2087
2088 end:
2089
2090 if (stripes) ExFreePool(stripes);
2091
2092 if (!NT_SUCCESS(Status))
2093 free_write_data_stripes(wtc);
2094
2095 return Status;
2096
2097 prepare_failed:
2098 for (i = 0; i < c->chunk_item->num_stripes; i++) {
2099 if (stripes[i].mdl && (i == 0 || stripes[i].mdl != stripes[i-1].mdl)) {
2100 if (stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2101 MmUnlockPages(stripes[i].mdl);
2102
2103 IoFreeMdl(stripes[i].mdl);
2104 }
2105 }
2106
2107 if (wtc->parity1_mdl) {
2108 if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
2109 MmUnlockPages(wtc->parity1_mdl);
2110
2111 IoFreeMdl(wtc->parity1_mdl);
2112 wtc->parity1_mdl = NULL;
2113 }
2114
2115 if (wtc->parity2_mdl) {
2116 if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
2117 MmUnlockPages(wtc->parity2_mdl);
2118
2119 IoFreeMdl(wtc->parity2_mdl);
2120 wtc->parity2_mdl = NULL;
2121 }
2122
2123 if (wtc->mdl) {
2124 if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
2125 MmUnlockPages(wtc->mdl);
2126
2127 IoFreeMdl(wtc->mdl);
2128 wtc->mdl = NULL;
2129 }
2130
2131 if (wtc->parity1) {
2132 ExFreePool(wtc->parity1);
2133 wtc->parity1 = NULL;
2134 }
2135
2136 if (wtc->parity2) {
2137 ExFreePool(wtc->parity2);
2138 wtc->parity2 = NULL;
2139 }
2140
2141 if (wtc->scratch) {
2142 ExFreePool(wtc->scratch);
2143 wtc->scratch = NULL;
2144 }
2145
2146 ExFreePool(stripes);
2147 return Status;
2148 }
2149
2150 void get_raid56_lock_range(chunk* c, uint64_t address, uint64_t length, uint64_t* lockaddr, uint64_t* locklen) {
2151 uint64_t startoff, endoff;
2152 uint16_t startoffstripe, endoffstripe, datastripes;
2153
2154 datastripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
2155
2156 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, datastripes, &startoff, &startoffstripe);
2157 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, datastripes, &endoff, &endoffstripe);
2158
2159 startoff -= startoff % c->chunk_item->stripe_length;
2160 endoff = sector_align(endoff, c->chunk_item->stripe_length);
2161
2162 *lockaddr = c->offset + (startoff * datastripes);
2163 *locklen = (endoff - startoff) * datastripes;
2164 }
2165
2166 NTSTATUS write_data_complete(device_extension* Vcb, uint64_t address, void* data, uint32_t length, PIRP Irp, chunk* c, bool file_write, uint64_t irp_offset, ULONG priority) {
2167 write_data_context wtc;
2168 NTSTATUS Status;
2169 uint64_t lockaddr, locklen;
2170
2171 KeInitializeEvent(&wtc.Event, NotificationEvent, false);
2172 InitializeListHead(&wtc.stripes);
2173 wtc.stripes_left = 0;
2174 wtc.parity1 = wtc.parity2 = wtc.scratch = NULL;
2175 wtc.mdl = wtc.parity1_mdl = wtc.parity2_mdl = NULL;
2176
2177 if (!c) {
2178 c = get_chunk_from_address(Vcb, address);
2179 if (!c) {
2180 ERR("could not get chunk for address %I64x\n", address);
2181 return STATUS_INTERNAL_ERROR;
2182 }
2183 }
2184
2185 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
2186 get_raid56_lock_range(c, address, length, &lockaddr, &locklen);
2187 chunk_lock_range(Vcb, c, lockaddr, locklen);
2188 }
2189
2190 _SEH2_TRY {
2191 Status = write_data(Vcb, address, data, length, &wtc, Irp, c, file_write, irp_offset, priority);
2192 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2193 Status = _SEH2_GetExceptionCode();
2194 } _SEH2_END;
2195
2196 if (!NT_SUCCESS(Status)) {
2197 ERR("write_data returned %08x\n", Status);
2198
2199 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
2200 chunk_unlock_range(Vcb, c, lockaddr, locklen);
2201
2202 free_write_data_stripes(&wtc);
2203 return Status;
2204 }
2205
2206 if (wtc.stripes.Flink != &wtc.stripes) {
2207 // launch writes and wait
2208 LIST_ENTRY* le = wtc.stripes.Flink;
2209 bool no_wait = true;
2210
2211 while (le != &wtc.stripes) {
2212 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2213
2214 if (stripe->status != WriteDataStatus_Ignore) {
2215 IoCallDriver(stripe->device->devobj, stripe->Irp);
2216 no_wait = false;
2217 }
2218
2219 le = le->Flink;
2220 }
2221
2222 if (!no_wait)
2223 KeWaitForSingleObject(&wtc.Event, Executive, KernelMode, false, NULL);
2224
2225 le = wtc.stripes.Flink;
2226 while (le != &wtc.stripes) {
2227 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2228
2229 if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
2230 Status = stripe->iosb.Status;
2231
2232 log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
2233 break;
2234 }
2235
2236 le = le->Flink;
2237 }
2238
2239 free_write_data_stripes(&wtc);
2240 }
2241
2242 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
2243 chunk_unlock_range(Vcb, c, lockaddr, locklen);
2244
2245 return Status;
2246 }
2247
2248 _Function_class_(IO_COMPLETION_ROUTINE)
2249 static NTSTATUS __stdcall write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2250 write_data_stripe* stripe = conptr;
2251 write_data_context* context = (write_data_context*)stripe->context;
2252 LIST_ENTRY* le;
2253
2254 UNUSED(DeviceObject);
2255
2256 // FIXME - we need a lock here
2257
2258 if (stripe->status == WriteDataStatus_Cancelling) {
2259 stripe->status = WriteDataStatus_Cancelled;
2260 goto end;
2261 }
2262
2263 stripe->iosb = Irp->IoStatus;
2264
2265 if (NT_SUCCESS(Irp->IoStatus.Status)) {
2266 stripe->status = WriteDataStatus_Success;
2267 } else {
2268 le = context->stripes.Flink;
2269
2270 stripe->status = WriteDataStatus_Error;
2271
2272 while (le != &context->stripes) {
2273 write_data_stripe* s2 = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2274
2275 if (s2->status == WriteDataStatus_Pending) {
2276 s2->status = WriteDataStatus_Cancelling;
2277 IoCancelIrp(s2->Irp);
2278 }
2279
2280 le = le->Flink;
2281 }
2282 }
2283
2284 end:
2285 if (InterlockedDecrement(&context->stripes_left) == 0)
2286 KeSetEvent(&context->Event, 0, false);
2287
2288 return STATUS_MORE_PROCESSING_REQUIRED;
2289 }
2290
2291 void free_write_data_stripes(write_data_context* wtc) {
2292 LIST_ENTRY* le;
2293 PMDL last_mdl = NULL;
2294
2295 if (wtc->parity1_mdl) {
2296 if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
2297 MmUnlockPages(wtc->parity1_mdl);
2298
2299 IoFreeMdl(wtc->parity1_mdl);
2300 }
2301
2302 if (wtc->parity2_mdl) {
2303 if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
2304 MmUnlockPages(wtc->parity2_mdl);
2305
2306 IoFreeMdl(wtc->parity2_mdl);
2307 }
2308
2309 if (wtc->mdl) {
2310 if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
2311 MmUnlockPages(wtc->mdl);
2312
2313 IoFreeMdl(wtc->mdl);
2314 }
2315
2316 if (wtc->parity1)
2317 ExFreePool(wtc->parity1);
2318
2319 if (wtc->parity2)
2320 ExFreePool(wtc->parity2);
2321
2322 if (wtc->scratch)
2323 ExFreePool(wtc->scratch);
2324
2325 le = wtc->stripes.Flink;
2326 while (le != &wtc->stripes) {
2327 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
2328
2329 if (stripe->mdl && stripe->mdl != last_mdl) {
2330 if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2331 MmUnlockPages(stripe->mdl);
2332
2333 IoFreeMdl(stripe->mdl);
2334 }
2335
2336 last_mdl = stripe->mdl;
2337
2338 if (stripe->Irp)
2339 IoFreeIrp(stripe->Irp);
2340
2341 le = le->Flink;
2342 }
2343
2344 while (!IsListEmpty(&wtc->stripes)) {
2345 write_data_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&wtc->stripes), write_data_stripe, list_entry);
2346
2347 ExFreePool(stripe);
2348 }
2349 }
2350
2351 void add_extent(_In_ fcb* fcb, _In_ LIST_ENTRY* prevextle, _In_ __drv_aliasesMem extent* newext) {
2352 LIST_ENTRY* le = prevextle->Flink;
2353
2354 while (le != &fcb->extents) {
2355 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2356
2357 if (ext->offset >= newext->offset) {
2358 InsertHeadList(ext->list_entry.Blink, &newext->list_entry);
2359 return;
2360 }
2361
2362 le = le->Flink;
2363 }
2364
2365 InsertTailList(&fcb->extents, &newext->list_entry);
2366 }
2367
2368 NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t end_data, PIRP Irp, LIST_ENTRY* rollback) {
2369 NTSTATUS Status;
2370 LIST_ENTRY* le;
2371
2372 le = fcb->extents.Flink;
2373
2374 while (le != &fcb->extents) {
2375 LIST_ENTRY* le2 = le->Flink;
2376 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2377 EXTENT_DATA* ed = &ext->extent_data;
2378 EXTENT_DATA2* ed2 = NULL;
2379 uint64_t len;
2380
2381 if (!ext->ignore) {
2382 if (ed->type != EXTENT_TYPE_INLINE)
2383 ed2 = (EXTENT_DATA2*)ed->data;
2384
2385 len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
2386
2387 if (ext->offset < end_data && ext->offset + len > start_data) {
2388 if (ed->type == EXTENT_TYPE_INLINE) {
2389 if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
2390 remove_fcb_extent(fcb, ext, rollback);
2391
2392 fcb->inode_item.st_blocks -= len;
2393 fcb->inode_item_changed = true;
2394 } else {
2395 ERR("trying to split inline extent\n");
2396 #ifdef DEBUG_PARANOID
2397 int3;
2398 #endif
2399 return STATUS_INTERNAL_ERROR;
2400 }
2401 } else if (ed->type != EXTENT_TYPE_INLINE) {
2402 if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
2403 if (ed2->size != 0) {
2404 chunk* c;
2405
2406 fcb->inode_item.st_blocks -= len;
2407 fcb->inode_item_changed = true;
2408
2409 c = get_chunk_from_address(Vcb, ed2->address);
2410
2411 if (!c) {
2412 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
2413 } else {
2414 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
2415 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
2416 if (!NT_SUCCESS(Status)) {
2417 ERR("update_changed_extent_ref returned %08x\n", Status);
2418 goto end;
2419 }
2420 }
2421 }
2422
2423 remove_fcb_extent(fcb, ext, rollback);
2424 } else if (start_data <= ext->offset && end_data < ext->offset + len) { // remove beginning
2425 EXTENT_DATA2* ned2;
2426 extent* newext;
2427
2428 if (ed2->size != 0) {
2429 fcb->inode_item.st_blocks -= end_data - ext->offset;
2430 fcb->inode_item_changed = true;
2431 }
2432
2433 newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2434 if (!newext) {
2435 ERR("out of memory\n");
2436 Status = STATUS_INSUFFICIENT_RESOURCES;
2437 goto end;
2438 }
2439
2440 ned2 = (EXTENT_DATA2*)newext->extent_data.data;
2441
2442 newext->extent_data.generation = Vcb->superblock.generation;
2443 newext->extent_data.decoded_size = ed->decoded_size;
2444 newext->extent_data.compression = ed->compression;
2445 newext->extent_data.encryption = ed->encryption;
2446 newext->extent_data.encoding = ed->encoding;
2447 newext->extent_data.type = ed->type;
2448 ned2->address = ed2->address;
2449 ned2->size = ed2->size;
2450 ned2->offset = ed2->offset + (end_data - ext->offset);
2451 ned2->num_bytes = ed2->num_bytes - (end_data - ext->offset);
2452
2453 newext->offset = end_data;
2454 newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2455 newext->unique = ext->unique;
2456 newext->ignore = false;
2457 newext->inserted = true;
2458
2459 if (ext->csum) {
2460 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2461 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ned2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2462 if (!newext->csum) {
2463 ERR("out of memory\n");
2464 Status = STATUS_INSUFFICIENT_RESOURCES;
2465 ExFreePool(newext);
2466 goto end;
2467 }
2468
2469 RtlCopyMemory(newext->csum, &ext->csum[(end_data - ext->offset) / Vcb->superblock.sector_size],
2470 (ULONG)(ned2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size));
2471 } else {
2472 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2473 if (!newext->csum) {
2474 ERR("out of memory\n");
2475 Status = STATUS_INSUFFICIENT_RESOURCES;
2476 ExFreePool(newext);
2477 goto end;
2478 }
2479
2480 RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size));
2481 }
2482 } else
2483 newext->csum = NULL;
2484
2485 add_extent(fcb, &ext->list_entry, newext);
2486
2487 remove_fcb_extent(fcb, ext, rollback);
2488 } else if (start_data > ext->offset && end_data >= ext->offset + len) { // remove end
2489 EXTENT_DATA2* ned2;
2490 extent* newext;
2491
2492 if (ed2->size != 0) {
2493 fcb->inode_item.st_blocks -= ext->offset + len - start_data;
2494 fcb->inode_item_changed = true;
2495 }
2496
2497 newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2498 if (!newext) {
2499 ERR("out of memory\n");
2500 Status = STATUS_INSUFFICIENT_RESOURCES;
2501 goto end;
2502 }
2503
2504 ned2 = (EXTENT_DATA2*)newext->extent_data.data;
2505
2506 newext->extent_data.generation = Vcb->superblock.generation;
2507 newext->extent_data.decoded_size = ed->decoded_size;
2508 newext->extent_data.compression = ed->compression;
2509 newext->extent_data.encryption = ed->encryption;
2510 newext->extent_data.encoding = ed->encoding;
2511 newext->extent_data.type = ed->type;
2512 ned2->address = ed2->address;
2513 ned2->size = ed2->size;
2514 ned2->offset = ed2->offset;
2515 ned2->num_bytes = start_data - ext->offset;
2516
2517 newext->offset = ext->offset;
2518 newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2519 newext->unique = ext->unique;
2520 newext->ignore = false;
2521 newext->inserted = true;
2522
2523 if (ext->csum) {
2524 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2525 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ned2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2526 if (!newext->csum) {
2527 ERR("out of memory\n");
2528 Status = STATUS_INSUFFICIENT_RESOURCES;
2529 ExFreePool(newext);
2530 goto end;
2531 }
2532
2533 RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ned2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size));
2534 } else {
2535 newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2536 if (!newext->csum) {
2537 ERR("out of memory\n");
2538 Status = STATUS_INSUFFICIENT_RESOURCES;
2539 ExFreePool(newext);
2540 goto end;
2541 }
2542
2543 RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size));
2544 }
2545 } else
2546 newext->csum = NULL;
2547
2548 InsertHeadList(&ext->list_entry, &newext->list_entry);
2549
2550 remove_fcb_extent(fcb, ext, rollback);
2551 } else if (start_data > ext->offset && end_data < ext->offset + len) { // remove middle
2552 EXTENT_DATA2 *neda2, *nedb2;
2553 extent *newext1, *newext2;
2554
2555 if (ed2->size != 0) {
2556 chunk* c;
2557
2558 fcb->inode_item.st_blocks -= end_data - start_data;
2559 fcb->inode_item_changed = true;
2560
2561 c = get_chunk_from_address(Vcb, ed2->address);
2562
2563 if (!c) {
2564 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
2565 } else {
2566 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
2567 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
2568 if (!NT_SUCCESS(Status)) {
2569 ERR("update_changed_extent_ref returned %08x\n", Status);
2570 goto end;
2571 }
2572 }
2573 }
2574
2575 newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2576 if (!newext1) {
2577 ERR("out of memory\n");
2578 Status = STATUS_INSUFFICIENT_RESOURCES;
2579 goto end;
2580 }
2581
2582 newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2583 if (!newext2) {
2584 ERR("out of memory\n");
2585 Status = STATUS_INSUFFICIENT_RESOURCES;
2586 ExFreePool(newext1);
2587 goto end;
2588 }
2589
2590 neda2 = (EXTENT_DATA2*)newext1->extent_data.data;
2591
2592 newext1->extent_data.generation = Vcb->superblock.generation;
2593 newext1->extent_data.decoded_size = ed->decoded_size;
2594 newext1->extent_data.compression = ed->compression;
2595 newext1->extent_data.encryption = ed->encryption;
2596 newext1->extent_data.encoding = ed->encoding;
2597 newext1->extent_data.type = ed->type;
2598 neda2->address = ed2->address;
2599 neda2->size = ed2->size;
2600 neda2->offset = ed2->offset;
2601 neda2->num_bytes = start_data - ext->offset;
2602
2603 nedb2 = (EXTENT_DATA2*)newext2->extent_data.data;
2604
2605 newext2->extent_data.generation = Vcb->superblock.generation;
2606 newext2->extent_data.decoded_size = ed->decoded_size;
2607 newext2->extent_data.compression = ed->compression;
2608 newext2->extent_data.encryption = ed->encryption;
2609 newext2->extent_data.encoding = ed->encoding;
2610 newext2->extent_data.type = ed->type;
2611 nedb2->address = ed2->address;
2612 nedb2->size = ed2->size;
2613 nedb2->offset = ed2->offset + (end_data - ext->offset);
2614 nedb2->num_bytes = ext->offset + len - end_data;
2615
2616 newext1->offset = ext->offset;
2617 newext1->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2618 newext1->unique = ext->unique;
2619 newext1->ignore = false;
2620 newext1->inserted = true;
2621
2622 newext2->offset = end_data;
2623 newext2->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2624 newext2->unique = ext->unique;
2625 newext2->ignore = false;
2626 newext2->inserted = true;
2627
2628 if (ext->csum) {
2629 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2630 newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(neda2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2631 if (!newext1->csum) {
2632 ERR("out of memory\n");
2633 Status = STATUS_INSUFFICIENT_RESOURCES;
2634 ExFreePool(newext1);
2635 ExFreePool(newext2);
2636 goto end;
2637 }
2638
2639 newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(nedb2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2640 if (!newext2->csum) {
2641 ERR("out of memory\n");
2642 Status = STATUS_INSUFFICIENT_RESOURCES;
2643 ExFreePool(newext1->csum);
2644 ExFreePool(newext1);
2645 ExFreePool(newext2);
2646 goto end;
2647 }
2648
2649 RtlCopyMemory(newext1->csum, ext->csum, (ULONG)(neda2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size));
2650 RtlCopyMemory(newext2->csum, &ext->csum[(end_data - ext->offset) / Vcb->superblock.sector_size],
2651 (ULONG)(nedb2->num_bytes * sizeof(uint32_t) / Vcb->superblock.sector_size));
2652 } else {
2653 newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2654 if (!newext1->csum) {
2655 ERR("out of memory\n");
2656 Status = STATUS_INSUFFICIENT_RESOURCES;
2657 ExFreePool(newext1);
2658 ExFreePool(newext2);
2659 goto end;
2660 }
2661
2662 newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size), ALLOC_TAG);
2663 if (!newext2->csum) {
2664 ERR("out of memory\n");
2665 Status = STATUS_INSUFFICIENT_RESOURCES;
2666 ExFreePool(newext1->csum);
2667 ExFreePool(newext1);
2668 ExFreePool(newext2);
2669 goto end;
2670 }
2671
2672 RtlCopyMemory(newext1->csum, ext->csum, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size));
2673 RtlCopyMemory(newext2->csum, ext->csum, (ULONG)(ed2->size * sizeof(uint32_t) / Vcb->superblock.sector_size));
2674 }
2675 } else {
2676 newext1->csum = NULL;
2677 newext2->csum = NULL;
2678 }
2679
2680 InsertHeadList(&ext->list_entry, &newext1->list_entry);
2681 add_extent(fcb, &newext1->list_entry, newext2);
2682
2683 remove_fcb_extent(fcb, ext, rollback);
2684 }
2685 }
2686 }
2687 }
2688
2689 le = le2;
2690 }
2691
2692 Status = STATUS_SUCCESS;
2693
2694 end:
2695 fcb->extents_changed = true;
2696 mark_fcb_dirty(fcb);
2697
2698 return Status;
2699 }
2700
2701 void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* ext) {
2702 rollback_extent* re;
2703
2704 re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
2705 if (!re) {
2706 ERR("out of memory\n");
2707 return;
2708 }
2709
2710 re->fcb = fcb;
2711 re->ext = ext;
2712
2713 add_rollback(rollback, ROLLBACK_INSERT_EXTENT, re);
2714 }
2715
2716 #ifdef _MSC_VER
2717 #pragma warning(push)
2718 #pragma warning(suppress: 28194)
2719 #endif
2720 NTSTATUS add_extent_to_fcb(_In_ fcb* fcb, _In_ uint64_t offset, _In_reads_bytes_(edsize) EXTENT_DATA* ed, _In_ uint16_t edsize,
2721 _In_ bool unique, _In_opt_ _When_(return >= 0, __drv_aliasesMem) uint32_t* csum, _In_ LIST_ENTRY* rollback) {
2722 extent* ext;
2723 LIST_ENTRY* le;
2724
2725 ext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + edsize, ALLOC_TAG);
2726 if (!ext) {
2727 ERR("out of memory\n");
2728 return STATUS_INSUFFICIENT_RESOURCES;
2729 }
2730
2731 ext->offset = offset;
2732 ext->datalen = edsize;
2733 ext->unique = unique;
2734 ext->ignore = false;
2735 ext->inserted = true;
2736 ext->csum = csum;
2737
2738 RtlCopyMemory(&ext->extent_data, ed, edsize);
2739
2740 le = fcb->extents.Flink;
2741 while (le != &fcb->extents) {
2742 extent* oldext = CONTAINING_RECORD(le, extent, list_entry);
2743
2744 if (oldext->offset >= offset) {
2745 InsertHeadList(le->Blink, &ext->list_entry);
2746 goto end;
2747 }
2748
2749 le = le->Flink;
2750 }
2751
2752 InsertTailList(&fcb->extents, &ext->list_entry);
2753
2754 end:
2755 add_insert_extent_rollback(rollback, fcb, ext);
2756
2757 return STATUS_SUCCESS;
2758 }
2759 #ifdef _MSC_VER
2760 #pragma warning(pop)
2761 #endif
2762
2763 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) {
2764 if (!ext->ignore) {
2765 rollback_extent* re;
2766
2767 ext->ignore = true;
2768
2769 re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
2770 if (!re) {
2771 ERR("out of memory\n");
2772 return;
2773 }
2774
2775 re->fcb = fcb;
2776 re->ext = ext;
2777
2778 add_rollback(rollback, ROLLBACK_DELETE_EXTENT, re);
2779 }
2780 }
2781
2782 NTSTATUS calc_csum(_In_ device_extension* Vcb, _In_reads_bytes_(sectors*Vcb->superblock.sector_size) uint8_t* data,
2783 _In_ uint32_t sectors, _Out_writes_bytes_(sectors*sizeof(uint32_t)) uint32_t* csum) {
2784 NTSTATUS Status;
2785 calc_job* cj;
2786
2787 // From experimenting, it seems that 40 sectors is roughly the crossover
2788 // point where offloading the crc32 calculation becomes worth it.
2789
2790 if (sectors < 40 || get_num_of_processors() < 2) {
2791 ULONG j;
2792
2793 for (j = 0; j < sectors; j++) {
2794 csum[j] = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
2795 }
2796
2797 return STATUS_SUCCESS;
2798 }
2799
2800 Status = add_calc_job(Vcb, data, sectors, csum, &cj);
2801 if (!NT_SUCCESS(Status)) {
2802 ERR("add_calc_job returned %08x\n", Status);
2803 return Status;
2804 }
2805
2806 KeWaitForSingleObject(&cj->event, Executive, KernelMode, false, NULL);
2807 free_calc_job(cj);
2808
2809 return STATUS_SUCCESS;
2810 }
2811
2812 _Requires_lock_held_(c->lock)
2813 _When_(return != 0, _Releases_lock_(c->lock))
2814 bool insert_extent_chunk(_In_ device_extension* Vcb, _In_ fcb* fcb, _In_ chunk* c, _In_ uint64_t start_data, _In_ uint64_t length, _In_ bool prealloc, _In_opt_ void* data,
2815 _In_opt_ PIRP Irp, _In_ LIST_ENTRY* rollback, _In_ uint8_t compression, _In_ uint64_t decoded_size, _In_ bool file_write, _In_ uint64_t irp_offset) {
2816 uint64_t address;
2817 NTSTATUS Status;
2818 EXTENT_DATA* ed;
2819 EXTENT_DATA2* ed2;
2820 uint16_t edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2));
2821 uint32_t* csum = NULL;
2822
2823 TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %I64x, %u, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, prealloc, data, rollback);
2824
2825 if (!find_data_address_in_chunk(Vcb, c, length, &address))
2826 return false;
2827
2828 // add extent data to inode
2829 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
2830 if (!ed) {
2831 ERR("out of memory\n");
2832 return false;
2833 }
2834
2835 ed->generation = Vcb->superblock.generation;
2836 ed->decoded_size = decoded_size;
2837 ed->compression = compression;
2838 ed->encryption = BTRFS_ENCRYPTION_NONE;
2839 ed->encoding = BTRFS_ENCODING_NONE;
2840 ed->type = prealloc ? EXTENT_TYPE_PREALLOC : EXTENT_TYPE_REGULAR;
2841
2842 ed2 = (EXTENT_DATA2*)ed->data;
2843 ed2->address = address;
2844 ed2->size = length;
2845 ed2->offset = 0;
2846 ed2->num_bytes = decoded_size;
2847
2848 if (!prealloc && data && !(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
2849 ULONG sl = (ULONG)(length / Vcb->superblock.sector_size);
2850
2851 csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(uint32_t), ALLOC_TAG);
2852 if (!csum) {
2853 ERR("out of memory\n");
2854 ExFreePool(ed);
2855 return false;
2856 }
2857
2858 Status = calc_csum(Vcb, data, sl, csum);
2859 if (!NT_SUCCESS(Status)) {
2860 ERR("calc_csum returned %08x\n", Status);
2861 ExFreePool(csum);
2862 ExFreePool(ed);
2863 return false;
2864 }
2865 }
2866
2867 Status = add_extent_to_fcb(fcb, start_data, ed, edsize, true, csum, rollback);
2868 if (!NT_SUCCESS(Status)) {
2869 ERR("add_extent_to_fcb returned %08x\n", Status);
2870 if (csum) ExFreePool(csum);
2871 ExFreePool(ed);
2872 return false;
2873 }
2874
2875 ExFreePool(ed);
2876
2877 c->used += length;
2878 space_list_subtract(c, false, address, length, rollback);
2879
2880 fcb->inode_item.st_blocks += decoded_size;
2881
2882 fcb->extents_changed = true;
2883 fcb->inode_item_changed = true;
2884 mark_fcb_dirty(fcb);
2885
2886 ExAcquireResourceExclusiveLite(&c->changed_extents_lock, true);
2887
2888 add_changed_extent_ref(c, address, length, fcb->subvol->id, fcb->inode, start_data, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
2889
2890 ExReleaseResourceLite(&c->changed_extents_lock);
2891
2892 release_chunk_lock(c, Vcb);
2893
2894 if (data) {
2895 Status = write_data_complete(Vcb, address, data, (uint32_t)length, Irp, NULL, file_write, irp_offset,
2896 fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
2897 if (!NT_SUCCESS(Status))
2898 ERR("write_data_complete returned %08x\n", Status);
2899 }
2900
2901 return true;
2902 }
2903
2904 static bool try_extend_data(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t length, void* data,
2905 PIRP Irp, uint64_t* written, bool file_write, uint64_t irp_offset, LIST_ENTRY* rollback) {
2906 bool success = false;
2907 EXTENT_DATA* ed;
2908 EXTENT_DATA2* ed2;
2909 chunk* c;
2910 LIST_ENTRY* le;
2911 extent* ext = NULL;
2912
2913 le = fcb->extents.Flink;
2914
2915 while (le != &fcb->extents) {
2916 extent* nextext = CONTAINING_RECORD(le, extent, list_entry);
2917
2918 if (!nextext->ignore) {
2919 if (nextext->offset == start_data) {
2920 ext = nextext;
2921 break;
2922 } else if (nextext->offset > start_data)
2923 break;
2924
2925 ext = nextext;
2926 }
2927
2928 le = le->Flink;
2929 }
2930
2931 if (!ext)
2932 return false;
2933
2934 ed = &ext->extent_data;
2935
2936 if (ed->type != EXTENT_TYPE_REGULAR && ed->type != EXTENT_TYPE_PREALLOC) {
2937 TRACE("not extending extent which is not regular or prealloc\n");
2938 return false;
2939 }
2940
2941 ed2 = (EXTENT_DATA2*)ed->data;
2942
2943 if (ext->offset + ed2->num_bytes != start_data) {
2944 TRACE("last EXTENT_DATA does not run up to start_data (%I64x + %I64x != %I64x)\n", ext->offset, ed2->num_bytes, start_data);
2945 return false;
2946 }
2947
2948 c = get_chunk_from_address(Vcb, ed2->address);
2949
2950 if (c->reloc || c->readonly || c->chunk_item->type != Vcb->data_flags)
2951 return false;
2952
2953 acquire_chunk_lock(c, Vcb);
2954
2955 if (length > c->chunk_item->size - c->used) {
2956 release_chunk_lock(c, Vcb);
2957 return false;
2958 }
2959
2960 if (!c->cache_loaded) {
2961 NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
2962
2963 if (!NT_SUCCESS(Status)) {
2964 ERR("load_cache_chunk returned %08x\n", Status);
2965 release_chunk_lock(c, Vcb);
2966 return false;
2967 }
2968 }
2969
2970 le = c->space.Flink;
2971 while (le != &c->space) {
2972 space* s = CONTAINING_RECORD(le, space, list_entry);
2973
2974 if (s->address == ed2->address + ed2->size) {
2975 uint64_t newlen = min(min(s->size, length), MAX_EXTENT_SIZE);
2976
2977 success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset);
2978
2979 if (success)
2980 *written += newlen;
2981 else
2982 release_chunk_lock(c, Vcb);
2983
2984 return success;
2985 } else if (s->address > ed2->address + ed2->size)
2986 break;
2987
2988 le = le->Flink;
2989 }
2990
2991 release_chunk_lock(c, Vcb);
2992
2993 return false;
2994 }
2995
2996 static NTSTATUS insert_chunk_fragmented(fcb* fcb, uint64_t start, uint64_t length, uint8_t* data, bool prealloc, LIST_ENTRY* rollback) {
2997 LIST_ENTRY* le;
2998 uint64_t flags = fcb->Vcb->data_flags;
2999 bool page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
3000 NTSTATUS Status;
3001 chunk* c;
3002
3003 ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, true);
3004
3005 // first create as many chunks as we can
3006 do {
3007 Status = alloc_chunk(fcb->Vcb, flags, &c, false);
3008 } while (NT_SUCCESS(Status));
3009
3010 if (Status != STATUS_DISK_FULL) {
3011 ERR("alloc_chunk returned %08x\n", Status);
3012 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3013 return Status;
3014 }
3015
3016 le = fcb->Vcb->chunks.Flink;
3017 while (le != &fcb->Vcb->chunks) {
3018 c = CONTAINING_RECORD(le, chunk, list_entry);
3019
3020 if (!c->readonly && !c->reloc) {
3021 acquire_chunk_lock(c, fcb->Vcb);
3022
3023 if (c->chunk_item->type == flags) {
3024 while (!IsListEmpty(&c->space_size) && length > 0) {
3025 space* s = CONTAINING_RECORD(c->space_size.Flink, space, list_entry_size);
3026 uint64_t extlen = min(length, s->size);
3027
3028 if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, prealloc && !page_file, data, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0)) {
3029 start += extlen;
3030 length -= extlen;
3031 if (data) data += extlen;
3032
3033 acquire_chunk_lock(c, fcb->Vcb);
3034 }
3035 }
3036 }
3037
3038 release_chunk_lock(c, fcb->Vcb);
3039
3040 if (length == 0)
3041 break;
3042 }
3043
3044 le = le->Flink;
3045 }
3046
3047 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3048
3049 return length == 0 ? STATUS_SUCCESS : STATUS_DISK_FULL;
3050 }
3051
3052 static NTSTATUS insert_prealloc_extent(fcb* fcb, uint64_t start, uint64_t length, LIST_ENTRY* rollback) {
3053 LIST_ENTRY* le;
3054 chunk* c;
3055 uint64_t flags;
3056 NTSTATUS Status;
3057 bool page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
3058
3059 flags = fcb->Vcb->data_flags;
3060
3061 do {
3062 uint64_t extlen = min(MAX_EXTENT_SIZE, length);
3063
3064 ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, true);
3065
3066 le = fcb->Vcb->chunks.Flink;
3067 while (le != &fcb->Vcb->chunks) {
3068 c = CONTAINING_RECORD(le, chunk, list_entry);
3069
3070 if (!c->readonly && !c->reloc) {
3071 acquire_chunk_lock(c, fcb->Vcb);
3072
3073 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
3074 if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0)) {
3075 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3076 goto cont;
3077 }
3078 }
3079
3080 release_chunk_lock(c, fcb->Vcb);
3081 }
3082
3083 le = le->Flink;
3084 }
3085
3086 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3087
3088 ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, true);
3089
3090 Status = alloc_chunk(fcb->Vcb, flags, &c, false);
3091
3092 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3093
3094 if (!NT_SUCCESS(Status)) {
3095 ERR("alloc_chunk returned %08x\n", Status);
3096 goto end;
3097 }
3098
3099 acquire_chunk_lock(c, fcb->Vcb);
3100
3101 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
3102 if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0))
3103 goto cont;
3104 }
3105
3106 release_chunk_lock(c, fcb->Vcb);
3107
3108 Status = insert_chunk_fragmented(fcb, start, length, NULL, true, rollback);
3109 if (!NT_SUCCESS(Status))
3110 ERR("insert_chunk_fragmented returned %08x\n", Status);
3111
3112 goto end;
3113
3114 cont:
3115 length -= extlen;
3116 start += extlen;
3117 } while (length > 0);
3118
3119 Status = STATUS_SUCCESS;
3120
3121 end:
3122 return Status;
3123 }
3124
3125 static NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t length, void* data,
3126 PIRP Irp, bool file_write, uint64_t irp_offset, LIST_ENTRY* rollback) {
3127 NTSTATUS Status;
3128 LIST_ENTRY* le;
3129 chunk* c;
3130 uint64_t flags, orig_length = length, written = 0;
3131
3132 TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data);
3133
3134 if (start_data > 0) {
3135 try_extend_data(Vcb, fcb, start_data, length, data, Irp, &written, file_write, irp_offset, rollback);
3136
3137 if (written == length)
3138 return STATUS_SUCCESS;
3139 else if (written > 0) {
3140 start_data += written;
3141 irp_offset += written;
3142 length -= written;
3143 data = &((uint8_t*)data)[written];
3144 }
3145 }
3146
3147 flags = Vcb->data_flags;
3148
3149 while (written < orig_length) {
3150 uint64_t newlen = min(length, MAX_EXTENT_SIZE);
3151 bool done = false;
3152
3153 // Rather than necessarily writing the whole extent at once, we deal with it in blocks of 128 MB.
3154 // First, see if we can write the extent part to an existing chunk.
3155
3156 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
3157
3158 le = Vcb->chunks.Flink;
3159 while (le != &Vcb->chunks) {
3160 c = CONTAINING_RECORD(le, chunk, list_entry);
3161
3162 if (!c->readonly && !c->reloc) {
3163 acquire_chunk_lock(c, Vcb);
3164
3165 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
3166 insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
3167 written += newlen;
3168
3169 if (written == orig_length) {
3170 ExReleaseResourceLite(&Vcb->chunk_lock);
3171 return STATUS_SUCCESS;
3172 } else {
3173 done = true;
3174 start_data += newlen;
3175 irp_offset += newlen;
3176 length -= newlen;
3177 data = &((uint8_t*)data)[newlen];
3178 break;
3179 }
3180 } else
3181 release_chunk_lock(c, Vcb);
3182 }
3183
3184 le = le->Flink;
3185 }
3186
3187 ExReleaseResourceLite(&Vcb->chunk_lock);
3188
3189 if (done) continue;
3190
3191 // Otherwise, see if we can put it in a new chunk.
3192
3193 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
3194
3195 Status = alloc_chunk(Vcb, flags, &c, false);
3196
3197 ExReleaseResourceLite(&Vcb->chunk_lock);
3198
3199 if (!NT_SUCCESS(Status)) {
3200 ERR("alloc_chunk returned %08x\n", Status);
3201 return Status;
3202 }
3203
3204 if (c) {
3205 acquire_chunk_lock(c, Vcb);
3206
3207 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
3208 insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
3209 written += newlen;
3210
3211 if (written == orig_length)
3212 return STATUS_SUCCESS;
3213 else {
3214 done = true;
3215 start_data += newlen;
3216 irp_offset += newlen;
3217 length -= newlen;
3218 data = &((uint8_t*)data)[newlen];
3219 }
3220 } else
3221 release_chunk_lock(c, Vcb);
3222 }
3223
3224 if (!done) {
3225 Status = insert_chunk_fragmented(fcb, start_data, length, data, false, rollback);
3226 if (!NT_SUCCESS(Status))
3227 ERR("insert_chunk_fragmented returned %08x\n", Status);
3228
3229 return Status;
3230 }
3231 }
3232
3233 return STATUS_DISK_FULL;
3234 }
3235
3236 NTSTATUS truncate_file(fcb* fcb, uint64_t end, PIRP Irp, LIST_ENTRY* rollback) {
3237 NTSTATUS Status;
3238
3239 // FIXME - convert into inline extent if short enough
3240
3241 if (end > 0 && fcb_is_inline(fcb)) {
3242 uint8_t* buf;
3243 bool make_inline = end <= fcb->Vcb->options.max_inline;
3244
3245 buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(make_inline ? (offsetof(EXTENT_DATA, data[0]) + end) : sector_align(end, fcb->Vcb->superblock.sector_size)), ALLOC_TAG);
3246 if (!buf) {
3247 ERR("out of memory\n");
3248 return STATUS_INSUFFICIENT_RESOURCES;
3249 }
3250
3251 Status = read_file(fcb, make_inline ? (buf + offsetof(EXTENT_DATA, data[0])) : buf, 0, end, NULL, Irp);
3252 if (!NT_SUCCESS(Status)) {
3253 ERR("read_file returned %08x\n", Status);
3254 ExFreePool(buf);
3255 return Status;
3256 }
3257
3258 Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback);
3259 if (!NT_SUCCESS(Status)) {
3260 ERR("excise_extents returned %08x\n", Status);
3261 ExFreePool(buf);
3262 return Status;
3263 }
3264
3265 if (!make_inline) {
3266 RtlZeroMemory(buf + end, (ULONG)(sector_align(end, fcb->Vcb->superblock.sector_size) - end));
3267
3268 Status = do_write_file(fcb, 0, sector_align(end, fcb->Vcb->superblock.sector_size), buf, Irp, false, 0, rollback);
3269 if (!NT_SUCCESS(Status)) {
3270 ERR("do_write_file returned %08x\n", Status);
3271 ExFreePool(buf);
3272 return Status;
3273 }
3274 } else {
3275 EXTENT_DATA* ed = (EXTENT_DATA*)buf;
3276
3277 ed->generation = fcb->Vcb->superblock.generation;
3278 ed->decoded_size = end;
3279 ed->compression = BTRFS_COMPRESSION_NONE;
3280 ed->encryption = BTRFS_ENCRYPTION_NONE;
3281 ed->encoding = BTRFS_ENCODING_NONE;
3282 ed->type = EXTENT_TYPE_INLINE;
3283
3284 Status = add_extent_to_fcb(fcb, 0, ed, (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end), false, NULL, rollback);
3285 if (!NT_SUCCESS(Status)) {
3286 ERR("add_extent_to_fcb returned %08x\n", Status);
3287 ExFreePool(buf);
3288 return Status;
3289 }
3290
3291 fcb->inode_item.st_blocks += end;
3292 }
3293
3294 ExFreePool(buf);
3295 return STATUS_SUCCESS;
3296 }
3297
3298 Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size),
3299 sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), Irp, rollback);
3300 if (!NT_SUCCESS(Status)) {
3301 ERR("excise_extents returned %08x\n", Status);
3302 return Status;
3303 }
3304
3305 fcb->inode_item.st_size = end;
3306 fcb->inode_item_changed = true;
3307 TRACE("setting st_size to %I64x\n", end);
3308
3309 fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
3310 fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
3311 fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
3312 // FIXME - inform cache manager of this
3313
3314 TRACE("fcb %p FileSize = %I64x\n", fcb, fcb->Header.FileSize.QuadPart);
3315
3316 return STATUS_SUCCESS;
3317 }
3318
3319 NTSTATUS extend_file(fcb* fcb, file_ref* fileref, uint64_t end, bool prealloc, PIRP Irp, LIST_ENTRY* rollback) {
3320 uint64_t oldalloc, newalloc;
3321 bool cur_inline;
3322 NTSTATUS Status;
3323
3324 TRACE("(%p, %p, %x, %u)\n", fcb, fileref, end, prealloc);
3325
3326 if (fcb->ads) {
3327 if (end > 0xffff)
3328 return STATUS_DISK_FULL;
3329
3330 return stream_set_end_of_file_information(fcb->Vcb, (uint16_t)end, fcb, fileref, false);
3331 } else {
3332 extent* ext = NULL;
3333 LIST_ENTRY* le;
3334
3335 le = fcb->extents.Blink;
3336 while (le != &fcb->extents) {
3337 extent* ext2 = CONTAINING_RECORD(le, extent, list_entry);
3338
3339 if (!ext2->ignore) {
3340 ext = ext2;
3341 break;
3342 }
3343
3344 le = le->Blink;
3345 }
3346
3347 oldalloc = 0;
3348 if (ext) {
3349 EXTENT_DATA* ed = &ext->extent_data;
3350 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3351
3352 oldalloc = ext->offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes);
3353 cur_inline = ed->type == EXTENT_TYPE_INLINE;
3354
3355 if (cur_inline && end > fcb->Vcb->options.max_inline) {
3356 uint64_t origlength, length;
3357 uint8_t* data;
3358
3359 TRACE("giving inline file proper extents\n");
3360
3361 origlength = ed->decoded_size;
3362
3363 cur_inline = false;
3364
3365 length = sector_align(origlength, fcb->Vcb->superblock.sector_size);
3366
3367 data = ExAllocatePoolWithTag(PagedPool, (ULONG)length, ALLOC_TAG);
3368 if (!data) {
3369 ERR("could not allocate %I64x bytes for data\n", length);
3370 return STATUS_INSUFFICIENT_RESOURCES;
3371 }
3372
3373 Status = read_file(fcb, data, 0, origlength, NULL, Irp);
3374 if (!NT_SUCCESS(Status)) {
3375 ERR("read_file returned %08x\n", Status);
3376 ExFreePool(data);
3377 return Status;
3378 }
3379
3380 RtlZeroMemory(data + origlength, (ULONG)(length - origlength));
3381
3382 Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback);
3383 if (!NT_SUCCESS(Status)) {
3384 ERR("excise_extents returned %08x\n", Status);
3385 ExFreePool(data);
3386 return Status;
3387 }
3388
3389 Status = do_write_file(fcb, 0, length, data, Irp, false, 0, rollback);
3390 if (!NT_SUCCESS(Status)) {
3391 ERR("do_write_file returned %08x\n", Status);
3392 ExFreePool(data);
3393 return Status;
3394 }
3395
3396 oldalloc = ext->offset + length;
3397
3398 ExFreePool(data);
3399 }
3400
3401 if (cur_inline) {
3402 uint16_t edsize;
3403
3404 if (end > oldalloc) {
3405 edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end - ext->offset);
3406 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
3407
3408 if (!ed) {
3409 ERR("out of memory\n");
3410 return STATUS_INSUFFICIENT_RESOURCES;
3411 }
3412
3413 ed->generation = fcb->Vcb->superblock.generation;
3414 ed->decoded_size = end - ext->offset;
3415 ed->compression = BTRFS_COMPRESSION_NONE;
3416 ed->encryption = BTRFS_ENCRYPTION_NONE;
3417 ed->encoding = BTRFS_ENCODING_NONE;
3418 ed->type = EXTENT_TYPE_INLINE;
3419
3420 Status = read_file(fcb, ed->data, ext->offset, oldalloc, NULL, Irp);
3421 if (!NT_SUCCESS(Status)) {
3422 ERR("read_file returned %08x\n", Status);
3423 ExFreePool(ed);
3424 return Status;
3425 }
3426
3427 RtlZeroMemory(ed->data + oldalloc - ext->offset, (ULONG)(end - oldalloc));
3428
3429 remove_fcb_extent(fcb, ext, rollback);
3430
3431 Status = add_extent_to_fcb(fcb, ext->offset, ed, edsize, ext->unique, NULL, rollback);
3432 if (!NT_SUCCESS(Status)) {
3433 ERR("add_extent_to_fcb returned %08x\n", Status);
3434 ExFreePool(ed);
3435 return Status;
3436 }
3437
3438 ExFreePool(ed);
3439
3440 fcb->extents_changed = true;
3441 mark_fcb_dirty(fcb);
3442 }
3443
3444 TRACE("extending inline file (oldalloc = %I64x, end = %I64x)\n", oldalloc, end);
3445
3446 fcb->inode_item.st_size = end;
3447 TRACE("setting st_size to %I64x\n", end);
3448
3449 fcb->inode_item.st_blocks = end;
3450
3451 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3452 } else {
3453 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
3454
3455 if (newalloc > oldalloc) {
3456 if (prealloc) {
3457 // FIXME - try and extend previous extent first
3458
3459 Status = insert_prealloc_extent(fcb, oldalloc, newalloc - oldalloc, rollback);
3460
3461 if (!NT_SUCCESS(Status)) {
3462 ERR("insert_prealloc_extent returned %08x\n", Status);
3463 return Status;
3464 }
3465 }
3466
3467 fcb->extents_changed = true;
3468 }
3469
3470 fcb->inode_item.st_size = end;
3471 fcb->inode_item_changed = true;
3472 mark_fcb_dirty(fcb);
3473
3474 TRACE("setting st_size to %I64x\n", end);
3475
3476 TRACE("newalloc = %I64x\n", newalloc);
3477
3478 fcb->Header.AllocationSize.QuadPart = newalloc;
3479 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3480 }
3481 } else {
3482 if (end > fcb->Vcb->options.max_inline) {
3483 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
3484
3485 if (prealloc) {
3486 Status = insert_prealloc_extent(fcb, 0, newalloc, rollback);
3487
3488 if (!NT_SUCCESS(Status)) {
3489 ERR("insert_prealloc_extent returned %08x\n", Status);
3490 return Status;
3491 }
3492 }
3493
3494 fcb->extents_changed = true;
3495 fcb->inode_item_changed = true;
3496 mark_fcb_dirty(fcb);
3497
3498 fcb->inode_item.st_size = end;
3499 TRACE("setting st_size to %I64x\n", end);
3500
3501 TRACE("newalloc = %I64x\n", newalloc);
3502
3503 fcb->Header.AllocationSize.QuadPart = newalloc;
3504 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3505 } else {
3506 EXTENT_DATA* ed;
3507 uint16_t edsize;
3508
3509 edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end);
3510 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
3511
3512 if (!ed) {
3513 ERR("out of memory\n");
3514 return STATUS_INSUFFICIENT_RESOURCES;
3515 }
3516
3517 ed->generation = fcb->Vcb->superblock.generation;
3518 ed->decoded_size = end;
3519 ed->compression = BTRFS_COMPRESSION_NONE;
3520 ed->encryption = BTRFS_ENCRYPTION_NONE;
3521 ed->encoding = BTRFS_ENCODING_NONE;
3522 ed->type = EXTENT_TYPE_INLINE;
3523
3524 RtlZeroMemory(ed->data, (ULONG)end);
3525
3526 Status = add_extent_to_fcb(fcb, 0, ed, edsize, false, NULL, rollback);
3527 if (!NT_SUCCESS(Status)) {
3528 ERR("add_extent_to_fcb returned %08x\n", Status);
3529 ExFreePool(ed);
3530 return Status;
3531 }
3532
3533 ExFreePool(ed);
3534
3535 fcb->extents_changed = true;
3536 fcb->inode_item_changed = true;
3537 mark_fcb_dirty(fcb);
3538
3539 fcb->inode_item.st_size = end;
3540 TRACE("setting st_size to %I64x\n", end);
3541
3542 fcb->inode_item.st_blocks = end;
3543
3544 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3545 }
3546 }
3547 }
3548
3549 return STATUS_SUCCESS;
3550 }
3551
3552 static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, uint64_t start_data, uint64_t end_data, void* data, uint64_t* written,
3553 PIRP Irp, bool file_write, uint64_t irp_offset, ULONG priority, LIST_ENTRY* rollback) {
3554 EXTENT_DATA* ed = &ext->extent_data;
3555 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3556 NTSTATUS Status;
3557 chunk* c = NULL;
3558
3559 if (start_data <= ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace all
3560 extent* newext;
3561
3562 newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3563 if (!newext) {
3564 ERR("out of memory\n");
3565 return STATUS_INSUFFICIENT_RESOURCES;
3566 }
3567
3568 RtlCopyMemory(&newext->extent_data, &ext->extent_data, ext->datalen);
3569
3570 newext->extent_data.type = EXTENT_TYPE_REGULAR;
3571
3572 Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (uint8_t*)data + ext->offset - start_data, (uint32_t)ed2->num_bytes, Irp,
3573 NULL, file_write, irp_offset + ext->offset - start_data, priority);
3574 if (!NT_SUCCESS(Status)) {
3575 ERR("write_data_complete returned %08x\n", Status);
3576 return Status;
3577 }
3578
3579 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3580 ULONG sl = (ULONG)(ed2->num_bytes / fcb->Vcb->superblock.sector_size);
3581 uint32_t* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(uint32_t), ALLOC_TAG);
3582
3583 if (!csum) {
3584 ERR("out of memory\n");
3585 ExFreePool(newext);
3586 return STATUS_INSUFFICIENT_RESOURCES;
3587 }
3588
3589 Status = calc_csum(fcb->Vcb, (uint8_t*)data + ext->offset - start_data, sl, csum);
3590 if (!NT_SUCCESS(Status)) {
3591 ERR("calc_csum returned %08x\n", Status);
3592 ExFreePool(csum);
3593 ExFreePool(newext);
3594 return Status;
3595 }
3596
3597 newext->csum = csum;
3598 } else
3599 newext->csum = NULL;
3600
3601 *written = ed2->num_bytes;
3602
3603 newext->offset = ext->offset;
3604 newext->datalen = ext->datalen;
3605 newext->unique = ext->unique;
3606 newext->ignore = false;
3607 newext->inserted = true;
3608 InsertHeadList(&ext->list_entry, &newext->list_entry);
3609
3610 add_insert_extent_rollback(rollback, fcb, newext);
3611
3612 remove_fcb_extent(fcb, ext, rollback);
3613
3614 c = get_chunk_from_address(fcb->Vcb, ed2->address);
3615 } else if (start_data <= ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace beginning
3616 EXTENT_DATA2* ned2;
3617 extent *newext1, *newext2;
3618
3619 newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3620 if (!newext1) {
3621 ERR("out of memory\n");
3622 return STATUS_INSUFFICIENT_RESOURCES;
3623 }
3624
3625 newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3626 if (!newext2) {
3627 ERR("out of memory\n");
3628 ExFreePool(newext1);
3629 return STATUS_INSUFFICIENT_RESOURCES;
3630 }
3631
3632 RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3633 newext1->extent_data.type = EXTENT_TYPE_REGULAR;
3634 ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3635 ned2->num_bytes = end_data - ext->offset;
3636
3637 RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3638 ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3639 ned2->offset += end_data - ext->offset;
3640 ned2->num_bytes -= end_data - ext->offset;
3641
3642 Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (uint8_t*)data + ext->offset - start_data, (uint32_t)(end_data - ext->offset),
3643 Irp, NULL, file_write, irp_offset + ext->offset - start_data, priority);
3644 if (!NT_SUCCESS(Status)) {
3645 ERR("write_data_complete returned %08x\n", Status);
3646 ExFreePool(newext1);
3647 ExFreePool(newext2);
3648 return Status;
3649 }
3650
3651 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3652 ULONG sl = (ULONG)((end_data - ext->offset) / fcb->Vcb->superblock.sector_size);
3653 uint32_t* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(uint32_t), ALLOC_TAG);
3654
3655 if (!csum) {
3656 ERR("out of memory\n");
3657 ExFreePool(newext1);
3658 ExFreePool(newext2);
3659 return STATUS_INSUFFICIENT_RESOURCES;
3660 }
3661
3662 Status = calc_csum(fcb->Vcb, (uint8_t*)data + ext->offset - start_data, sl, csum);
3663 if (!NT_SUCCESS(Status)) {
3664 ERR("calc_csum returned %08x\n", Status);
3665 ExFreePool(newext1);
3666 ExFreePool(newext2);
3667 ExFreePool(csum);
3668 return Status;
3669 }
3670
3671 newext1->csum = csum;
3672 } else
3673 newext1->csum = NULL;
3674
3675 *written = end_data - ext->offset;
3676
3677 newext1->offset = ext->offset;
3678 newext1->datalen = ext->datalen;
3679 newext1->unique = ext->unique;
3680 newext1->ignore = false;
3681 newext1->inserted = true;
3682 InsertHeadList(&ext->list_entry, &newext1->list_entry);
3683
3684 add_insert_extent_rollback(rollback, fcb, newext1);
3685
3686 newext2->offset = end_data;
3687 newext2->datalen = ext->datalen;
3688 newext2->unique = ext->unique;
3689 newext2->ignore = false;
3690 newext2->inserted = true;
3691 newext2->csum = NULL;
3692 add_extent(fcb, &newext1->list_entry, newext2);
3693
3694 add_insert_extent_rollback(rollback, fcb, newext2);
3695
3696 c = get_chunk_from_address(fcb->Vcb, ed2->address);
3697
3698 if (!c)
3699 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3700 else {
3701 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3702 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3703
3704 if (!NT_SUCCESS(Status)) {
3705 ERR("update_changed_extent_ref returned %08x\n", Status);
3706 return Status;
3707 }
3708 }
3709
3710 remove_fcb_extent(fcb, ext, rollback);
3711 } else if (start_data > ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace end
3712 EXTENT_DATA2* ned2;
3713 extent *newext1, *newext2;
3714
3715 newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3716 if (!newext1) {
3717 ERR("out of memory\n");
3718 return STATUS_INSUFFICIENT_RESOURCES;
3719 }
3720
3721 newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3722 if (!newext2) {
3723 ERR("out of memory\n");
3724 ExFreePool(newext1);
3725 return STATUS_INSUFFICIENT_RESOURCES;
3726 }
3727
3728 RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3729
3730 ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3731 ned2->num_bytes = start_data - ext->offset;
3732
3733 RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3734
3735 newext2->extent_data.type = EXTENT_TYPE_REGULAR;
3736 ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3737 ned2->offset += start_data - ext->offset;
3738 ned2->num_bytes = ext->offset + ed2->num_bytes - start_data;
3739
3740 Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (uint32_t)ned2->num_bytes, Irp, NULL, file_write, irp_offset, priority);
3741 if (!NT_SUCCESS(Status)) {
3742 ERR("write_data_complete returned %08x\n", Status);
3743 ExFreePool(newext1);
3744 ExFreePool(newext2);
3745 return Status;
3746 }
3747
3748 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3749 ULONG sl = (ULONG)(ned2->num_bytes / fcb->Vcb->superblock.sector_size);
3750 uint32_t* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(uint32_t), ALLOC_TAG);
3751
3752 if (!csum) {
3753 ERR("out of memory\n");
3754 ExFreePool(newext1);
3755 ExFreePool(newext2);
3756 return STATUS_INSUFFICIENT_RESOURCES;
3757 }
3758
3759 Status = calc_csum(fcb->Vcb, data, sl, csum);
3760 if (!NT_SUCCESS(Status)) {
3761 ERR("calc_csum returned %08x\n", Status);
3762 ExFreePool(newext1);
3763 ExFreePool(newext2);
3764 ExFreePool(csum);
3765 return Status;
3766 }
3767
3768 newext2->csum = csum;
3769 } else
3770 newext2->csum = NULL;
3771
3772 *written = ned2->num_bytes;
3773
3774 newext1->offset = ext->offset;
3775 newext1->datalen = ext->datalen;
3776 newext1->unique = ext->unique;
3777 newext1->ignore = false;
3778 newext1->inserted = true;
3779 newext1->csum = NULL;
3780 InsertHeadList(&ext->list_entry, &newext1->list_entry);
3781
3782 add_insert_extent_rollback(rollback, fcb, newext1);
3783
3784 newext2->offset = start_data;
3785 newext2->datalen = ext->datalen;
3786 newext2->unique = ext->unique;
3787 newext2->ignore = false;
3788 newext2->inserted = true;
3789 add_extent(fcb, &newext1->list_entry, newext2);
3790
3791 add_insert_extent_rollback(rollback, fcb, newext2);
3792
3793 c = get_chunk_from_address(fcb->Vcb, ed2->address);
3794
3795 if (!c)
3796 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3797 else {
3798 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3799 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3800
3801 if (!NT_SUCCESS(Status)) {
3802 ERR("update_changed_extent_ref returned %08x\n", Status);
3803 return Status;
3804 }
3805 }
3806
3807 remove_fcb_extent(fcb, ext, rollback);
3808 } else if (start_data > ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace middle
3809 EXTENT_DATA2* ned2;
3810 extent *newext1, *newext2, *newext3;
3811
3812 newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3813 if (!newext1) {
3814 ERR("out of memory\n");
3815 return STATUS_INSUFFICIENT_RESOURCES;
3816 }
3817
3818 newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3819 if (!newext2) {
3820 ERR("out of memory\n");
3821 ExFreePool(newext1);
3822 return STATUS_INSUFFICIENT_RESOURCES;
3823 }
3824
3825 newext3 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3826 if (!newext3) {
3827 ERR("out of memory\n");
3828 ExFreePool(newext1);
3829 ExFreePool(newext2);
3830 return STATUS_INSUFFICIENT_RESOURCES;
3831 }
3832
3833 RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3834 RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3835 RtlCopyMemory(&newext3->extent_data, &ext->extent_data, ext->datalen);
3836
3837 ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3838 ned2->num_bytes = start_data - ext->offset;
3839
3840 newext2->extent_data.type = EXTENT_TYPE_REGULAR;
3841 ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3842 ned2->offset += start_data - ext->offset;
3843 ned2->num_bytes = end_data - start_data;
3844
3845 ned2 = (EXTENT_DATA2*)newext3->extent_data.data;
3846 ned2->offset += end_data - ext->offset;
3847 ned2->num_bytes -= end_data - ext->offset;
3848
3849 ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3850 Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (uint32_t)(end_data - start_data), Irp, NULL, file_write, irp_offset, priority);
3851 if (!NT_SUCCESS(Status)) {
3852 ERR("write_data_complete returned %08x\n", Status);
3853 ExFreePool(newext1);
3854 ExFreePool(newext2);
3855 ExFreePool(newext3);
3856 return Status;
3857 }
3858
3859 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
3860 ULONG sl = (ULONG)((end_data - start_data) / fcb->Vcb->superblock.sector_size);
3861 uint32_t* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(uint32_t), ALLOC_TAG);
3862
3863 if (!csum) {
3864 ERR("out of memory\n");
3865 ExFreePool(newext1);
3866 ExFreePool(newext2);
3867 ExFreePool(newext3);
3868 return STATUS_INSUFFICIENT_RESOURCES;
3869 }
3870
3871 Status = calc_csum(fcb->Vcb, data, sl, csum);
3872 if (!NT_SUCCESS(Status)) {
3873 ERR("calc_csum returned %08x\n", Status);
3874 ExFreePool(newext1);
3875 ExFreePool(newext2);
3876 ExFreePool(newext3);
3877 ExFreePool(csum);
3878 return Status;
3879 }
3880
3881 newext2->csum = csum;
3882 } else
3883 newext2->csum = NULL;
3884
3885 *written = end_data - start_data;
3886
3887 newext1->offset = ext->offset;
3888 newext1->datalen = ext->datalen;
3889 newext1->unique = ext->unique;
3890 newext1->ignore = false;
3891 newext1->inserted = true;
3892 newext1->csum = NULL;
3893 InsertHeadList(&ext->list_entry, &newext1->list_entry);
3894
3895 add_insert_extent_rollback(rollback, fcb, newext1);
3896
3897 newext2->offset = start_data;
3898 newext2->datalen = ext->datalen;
3899 newext2->unique = ext->unique;
3900 newext2->ignore = false;
3901 newext2->inserted = true;
3902 add_extent(fcb, &newext1->list_entry, newext2);
3903
3904 add_insert_extent_rollback(rollback, fcb, newext2);
3905
3906 newext3->offset = end_data;
3907 newext3->datalen = ext->datalen;
3908 newext3->unique = ext->unique;
3909 newext3->ignore = false;
3910 newext3->inserted = true;
3911 newext3->csum = NULL;
3912 add_extent(fcb, &newext2->list_entry, newext3);
3913
3914 add_insert_extent_rollback(rollback, fcb, newext3);
3915
3916 c = get_chunk_from_address(fcb->Vcb, ed2->address);
3917
3918 if (!c)
3919 ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3920 else {
3921 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 2,
3922 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, false, Irp);
3923
3924 if (!NT_SUCCESS(Status)) {
3925 ERR("update_changed_extent_ref returned %08x\n", Status);
3926 return Status;
3927 }
3928 }
3929
3930 remove_fcb_extent(fcb, ext, rollback);
3931 }
3932
3933 if (c)
3934 c->changed = true;
3935
3936 return STATUS_SUCCESS;
3937 }
3938
3939 NTSTATUS do_write_file(fcb* fcb, uint64_t start, uint64_t end_data, void* data, PIRP Irp, bool file_write, uint32_t irp_offset, LIST_ENTRY* rollback) {
3940 NTSTATUS Status;
3941 LIST_ENTRY *le, *le2;
3942 uint64_t written = 0, length = end_data - start;
3943 uint64_t last_cow_start;
3944 ULONG priority = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority;
3945 #ifdef DEBUG_PARANOID
3946 uint64_t last_off;
3947 #endif
3948 bool extents_changed = false;
3949
3950 last_cow_start = 0;
3951
3952 le = fcb->extents.Flink;
3953 while (le != &fcb->extents) {
3954 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
3955
3956 le2 = le->Flink;
3957
3958 if (!ext->ignore) {
3959 EXTENT_DATA* ed = &ext->extent_data;
3960 EXTENT_DATA2* ed2 = ed->type == EXTENT_TYPE_INLINE ? NULL : (EXTENT_DATA2*)ed->data;
3961 uint64_t len;
3962
3963 len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
3964
3965 if (ext->offset + len <= start)
3966 goto nextitem;
3967
3968 if (ext->offset > start + written + length)
3969 break;
3970
3971 if ((fcb->inode_item.flags & BTRFS_INODE_NODATACOW || ed->type == EXTENT_TYPE_PREALLOC) && ext->unique && ed->compression == BTRFS_COMPRESSION_NONE) {
3972 if (max(last_cow_start, start + written) < ext->offset) {
3973 uint64_t start_write = max(last_cow_start, start + written);
3974
3975 extents_changed = true;
3976
3977 Status = excise_extents(fcb->Vcb, fcb, start_write, ext->offset, Irp, rollback);
3978 if (!NT_SUCCESS(Status)) {
3979 ERR("excise_extents returned %08x\n", Status);
3980 return Status;
3981 }
3982
3983 Status = insert_extent(fcb->Vcb, fcb, start_write, ext->offset - start_write, (uint8_t*)data + written, Irp, file_write, irp_offset + written, rollback);
3984 if (!NT_SUCCESS(Status)) {
3985 ERR("insert_extent returned %08x\n", Status);
3986 return Status;
3987 }
3988
3989 written += ext->offset - start_write;
3990 length -= ext->offset - start_write;
3991
3992 if (length == 0)
3993 break;
3994 }
3995
3996 if (ed->type == EXTENT_TYPE_REGULAR) {
3997 uint64_t writeaddr = ed2->address + ed2->offset + start + written - ext->offset;
3998 uint64_t write_len = min(len, length);
3999 chunk* c;
4000
4001 TRACE("doing non-COW write to %I64x\n", writeaddr);
4002
4003 Status = write_data_complete(fcb->Vcb, writeaddr, (uint8_t*)data + written, (uint32_t)write_len, Irp, NULL, file_write, irp_offset + written, priority);
4004 if (!NT_SUCCESS(Status)) {
4005 ERR("write_data_complete returned %08x\n", Status);
4006 return Status;
4007 }
4008
4009 c = get_chunk_from_address(fcb->Vcb, writeaddr);
4010 if (c)
4011 c->changed = true;
4012
4013 // This shouldn't ever get called - nocow files should always also be nosum.
4014 if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
4015 calc_csum(fcb->Vcb, (uint8_t*)data + written, (uint32_t)(write_len / fcb->Vcb->superblock.sector_size),
4016 &ext->csum[(start + written - ext->offset) / fcb->Vcb->superblock.sector_size]);
4017
4018 ext->inserted = true;
4019 extents_changed = true;
4020 }
4021
4022 written += write_len;
4023 length -= write_len;
4024
4025 if (length == 0)
4026 break;
4027 } else if (ed->type == EXTENT_TYPE_PREALLOC) {
4028 uint64_t write_len;
4029
4030 Status = do_write_file_prealloc(fcb, ext, start + written, end_data, (uint8_t*)data + written, &write_len,
4031 Irp, file_write, irp_offset + written, priority, rollback);
4032 if (!NT_SUCCESS(Status)) {
4033 ERR("do_write_file_prealloc returned %08x\n", Status);
4034 return Status;
4035 }
4036
4037 extents_changed = true;
4038
4039 written += write_len;
4040 length -= write_len;
4041
4042 if (length == 0)
4043 break;
4044 }
4045
4046 last_cow_start = ext->offset + len;
4047 }
4048 }
4049
4050 nextitem:
4051 le = le2;
4052 }
4053
4054 if (length > 0) {
4055 uint64_t start_write = max(last_cow_start, start + written);
4056
4057 extents_changed = true;
4058
4059 Status = excise_extents(fcb->Vcb, fcb, start_write, end_data, Irp, rollback);
4060 if (!NT_SUCCESS(Status)) {
4061 ERR("excise_extents returned %08x\n", Status);
4062 return Status;
4063 }
4064
4065 Status = insert_extent(fcb->Vcb, fcb, start_write, end_data - start_write, (uint8_t*)data + written, Irp, file_write, irp_offset + written, rollback);
4066 if (!NT_SUCCESS(Status)) {
4067 ERR("insert_extent returned %08x\n", Status);
4068 return Status;
4069 }
4070 }
4071
4072 #ifdef DEBUG_PARANOID
4073 last_off = 0xffffffffffffffff;
4074
4075 le = fcb->extents.Flink;
4076 while (le != &fcb->extents) {
4077 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
4078
4079 if (!ext->ignore) {
4080 if (ext->offset == last_off) {
4081 ERR("offset %I64x duplicated\n", ext->offset);
4082 int3;
4083 } else if (ext->offset < last_off && last_off != 0xffffffffffffffff) {
4084 ERR("offsets out of order\n");
4085 int3;
4086 }
4087
4088 last_off = ext->offset;
4089 }
4090
4091 le = le->Flink;
4092 }
4093 #endif
4094
4095 if (extents_changed) {
4096 fcb->extents_changed = true;
4097 mark_fcb_dirty(fcb);
4098 }
4099
4100 return STATUS_SUCCESS;
4101 }
4102
4103 NTSTATUS write_compressed(fcb* fcb, uint64_t start_data, uint64_t end_data, void* data, PIRP Irp, LIST_ENTRY* rollback) {
4104 NTSTATUS Status;
4105 uint64_t i;
4106
4107 for (i = 0; i < sector_align(end_data - start_data, COMPRESSED_EXTENT_SIZE) / COMPRESSED_EXTENT_SIZE; i++) {
4108 uint64_t s2, e2;
4109 bool compressed;
4110
4111 s2 = start_data + (i * COMPRESSED_EXTENT_SIZE);
4112 e2 = min(s2 + COMPRESSED_EXTENT_SIZE, end_data);
4113
4114 Status = write_compressed_bit(fcb, s2, e2, (uint8_t*)data + (i * COMPRESSED_EXTENT_SIZE), &compressed, Irp, rollback);
4115
4116 if (!NT_SUCCESS(Status)) {
4117 ERR("write_compressed_bit returned %08x\n", Status);
4118 return Status;
4119 }
4120
4121 // If the first 128 KB of a file is incompressible, we set the nocompress flag so we don't
4122 // bother with the rest of it.
4123 if (s2 == 0 && e2 == COMPRESSED_EXTENT_SIZE && !compressed && !fcb->Vcb->options.compress_force) {
4124 fcb->inode_item.flags |= BTRFS_INODE_NOCOMPRESS;
4125 fcb->inode_item_changed = true;
4126 mark_fcb_dirty(fcb);
4127
4128 // write subsequent data non-compressed
4129 if (e2 < end_data) {
4130 Status = do_write_file(fcb, e2, end_data, (uint8_t*)data + e2, Irp, false, 0, rollback);
4131
4132 if (!NT_SUCCESS(Status)) {
4133 ERR("do_write_file returned %08x\n", Status);
4134 return Status;
4135 }
4136 }
4137
4138 return STATUS_SUCCESS;
4139 }
4140 }
4141
4142 return STATUS_SUCCESS;
4143 }
4144
4145 NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, bool paging_io, bool no_cache,
4146 bool wait, bool deferred_write, bool write_irp, LIST_ENTRY* rollback) {
4147 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4148 PFILE_OBJECT FileObject = IrpSp->FileObject;
4149 EXTENT_DATA* ed2;
4150 uint64_t off64, newlength, start_data, end_data;
4151 uint32_t bufhead;
4152 bool make_inline;
4153 INODE_ITEM* origii;
4154 bool changed_length = false;
4155 NTSTATUS Status;
4156 LARGE_INTEGER time;
4157 BTRFS_TIME now;
4158 fcb* fcb;
4159 ccb* ccb;
4160 file_ref* fileref;
4161 bool paging_lock = false, acquired_fcb_lock = false, acquired_tree_lock = false, pagefile;
4162 ULONG filter = 0;
4163
4164 TRACE("(%p, %p, %I64x, %p, %x, %u, %u)\n", Vcb, FileObject, offset.QuadPart, buf, *length, paging_io, no_cache);
4165
4166 if (*length == 0) {
4167 TRACE("returning success for zero-length write\n");
4168 return STATUS_SUCCESS;
4169 }
4170
4171 if (!FileObject) {
4172 ERR("error - FileObject was NULL\n");
4173 return STATUS_ACCESS_DENIED;
4174 }
4175
4176 fcb = FileObject->FsContext;
4177 ccb = FileObject->FsContext2;
4178 fileref = ccb ? ccb->fileref : NULL;
4179
4180 if (!fcb->ads && fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
4181 WARN("tried to write to something other than a file or symlink (inode %I64x, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
4182 return STATUS_INVALID_DEVICE_REQUEST;
4183 }
4184
4185 if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1)
4186 offset = fcb->Header.FileSize;
4187
4188 off64 = offset.QuadPart;
4189
4190 TRACE("fcb->Header.Flags = %x\n", fcb->Header.Flags);
4191
4192 if (!no_cache && !CcCanIWrite(FileObject, *length, wait, deferred_write))
4193 return STATUS_PENDING;
4194
4195 if (!wait && no_cache)
4196 return STATUS_PENDING;
4197
4198 if (no_cache && !paging_io && FileObject->SectionObjectPointer->DataSectionObject) {
4199 IO_STATUS_BLOCK iosb;
4200
4201 ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, true);
4202
4203 CcFlushCache(FileObject->SectionObjectPointer, &offset, *length, &iosb);
4204
4205 if (!NT_SUCCESS(iosb.Status)) {
4206 ExReleaseResourceLite(fcb->Header.PagingIoResource);
4207 ERR("CcFlushCache returned %08x\n", iosb.Status);
4208 return iosb.Status;
4209 }
4210
4211 paging_lock = true;
4212
4213 CcPurgeCacheSection(FileObject->SectionObjectPointer, &offset, *length, false);
4214 }
4215
4216 if (paging_io) {
4217 if (!ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, wait)) {
4218 Status = STATUS_PENDING;
4219 goto end;
4220 } else
4221 paging_lock = true;
4222 }
4223
4224 pagefile = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE && paging_io;
4225
4226 if (!pagefile && !ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) {
4227 if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
4228 Status = STATUS_PENDING;
4229 goto end;
4230 } else
4231 acquired_tree_lock = true;
4232 }
4233
4234 if (pagefile) {
4235 if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
4236 Status = STATUS_PENDING;
4237 goto end;
4238 } else
4239 acquired_fcb_lock = true;
4240 } else if (!ExIsResourceAcquiredExclusiveLite(fcb->Header.Resource)) {
4241 if (!ExAcquireResourceExclusiveLite(fcb->Header.Resource, wait)) {
4242 Status = STATUS_PENDING;
4243 goto end;
4244 } else
4245 acquired_fcb_lock = true;
4246 }
4247
4248 newlength = fcb->ads ? fcb->adsdata.Length : fcb->inode_item.st_size;
4249
4250 if (fcb->deleted)
4251 newlength = 0;
4252
4253 TRACE("newlength = %I64x\n", newlength);
4254
4255 if (off64 + *length > newlength) {
4256 if (paging_io) {
4257 if (off64 >= newlength) {
4258 TRACE("paging IO tried to write beyond end of file (file size = %I64x, offset = %I64x, length = %x)\n", newlength, off64, *length);
4259 TRACE("FileObject: AllocationSize = %I64x, FileSize = %I64x, ValidDataLength = %I64x\n",
4260 fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
4261 Status = STATUS_SUCCESS;
4262 goto end;
4263 }
4264
4265 *length = (ULONG)(newlength - off64);
4266 } else {
4267 newlength = off64 + *length;
4268 changed_length = true;
4269
4270 TRACE("extending length to %I64x\n", newlength);
4271 }
4272 }
4273
4274 if (fcb->ads)
4275 make_inline = false;
4276 else if (fcb->type == BTRFS_TYPE_SYMLINK)
4277 make_inline = newlength <= (Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node) - offsetof(EXTENT_DATA, data[0]));
4278 else
4279 make_inline = newlength <= fcb->Vcb->options.max_inline;
4280
4281 if (changed_length) {
4282 if (newlength > (uint64_t)fcb->Header.AllocationSize.QuadPart) {
4283 if (!acquired_tree_lock) {
4284 // We need to acquire the tree lock if we don't have it already -
4285 // we can't give an inline file proper extents at the same time as we're
4286 // doing a flush.
4287 if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
4288 Status = STATUS_PENDING;
4289 goto end;
4290 } else
4291 acquired_tree_lock = true;
4292 }
4293
4294 Status = extend_file(fcb, fileref, newlength, false, Irp, rollback);
4295 if (!NT_SUCCESS(Status)) {
4296 ERR("extend_file returned %08x\n", Status);
4297 goto end;
4298 }
4299 } else if (!fcb->ads)
4300 fcb->inode_item.st_size = newlength;
4301
4302 fcb->Header.FileSize.QuadPart = newlength;
4303 fcb->Header.ValidDataLength.QuadPart = newlength;
4304
4305 TRACE("AllocationSize = %I64x\n", fcb->Header.AllocationSize.QuadPart);
4306 TRACE("FileSize = %I64x\n", fcb->Header.FileSize.QuadPart);
4307 TRACE("ValidDataLength = %I64x\n", fcb->Header.ValidDataLength.QuadPart);
4308 }
4309
4310 if (!no_cache) {
4311 Status = STATUS_SUCCESS;
4312
4313 _SEH2_TRY {
4314 if (!FileObject->PrivateCacheMap || changed_length) {
4315 CC_FILE_SIZES ccfs;
4316
4317 ccfs.AllocationSize = fcb->Header.AllocationSize;
4318 ccfs.FileSize = fcb->Header.FileSize;
4319 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
4320
4321 if (!FileObject->PrivateCacheMap)
4322 init_file_cache(FileObject, &ccfs);
4323
4324 CcSetFileSizes(FileObject, &ccfs);
4325 }
4326
4327 if (IrpSp->MinorFunction & IRP_MN_MDL) {
4328 CcPrepareMdlWrite(FileObject, &offset, *length, &Irp->MdlAddress, &Irp->IoStatus);
4329
4330 Status = Irp->IoStatus.Status;
4331 goto end;
4332 } else {
4333 if (fCcCopyWriteEx) {
4334 TRACE("CcCopyWriteEx(%p, %I64x, %x, %u, %p, %p)\n", FileObject, off64, *length, wait, buf, Irp->Tail.Overlay.Thread);
4335 if (!fCcCopyWriteEx(FileObject, &offset, *length, wait, buf, Irp->Tail.Overlay.Thread)) {
4336 Status = STATUS_PENDING;
4337 goto end;
4338 }
4339 TRACE("CcCopyWriteEx finished\n");
4340 } else {
4341 TRACE("CcCopyWrite(%p, %I64x, %x, %u, %p)\n", FileObject, off64, *length, wait, buf);
4342 if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) {
4343 Status = STATUS_PENDING;
4344 goto end;
4345 }
4346 TRACE("CcCopyWrite finished\n");
4347 }
4348 }
4349 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4350 Status = _SEH2_GetExceptionCode();
4351 } _SEH2_END;
4352
4353 if (changed_length) {
4354 queue_notification_fcb(fcb->ads ? fileref->parent : fileref, fcb->ads ? FILE_NOTIFY_CHANGE_STREAM_SIZE : FILE_NOTIFY_CHANGE_SIZE,
4355 fcb->ads ? FILE_ACTION_MODIFIED_STREAM : FILE_ACTION_MODIFIED, fcb->ads && fileref->dc ? &fileref->dc->name : NULL);
4356 }
4357
4358 goto end;
4359 }
4360
4361 if (fcb->ads) {
4362 if (changed_length) {
4363 char* data2;
4364
4365 if (newlength > fcb->adsmaxlen) {
4366 ERR("error - xattr too long (%I64u > %u)\n", newlength, fcb->adsmaxlen);
4367 Status = STATUS_DISK_FULL;
4368 goto end;
4369 }
4370
4371 data2 = ExAllocatePoolWithTag(PagedPool, (ULONG)newlength, ALLOC_TAG);
4372 if (!data2) {
4373 ERR("out of memory\n");
4374 Status = STATUS_INSUFFICIENT_RESOURCES;
4375 goto end;
4376 }
4377
4378 if (fcb->adsdata.Buffer) {
4379 RtlCopyMemory(data2, fcb->adsdata.Buffer, fcb->adsdata.Length);
4380 ExFreePool(fcb->adsdata.Buffer);
4381 }
4382
4383 if (newlength > fcb->adsdata.Length)
4384 RtlZeroMemory(&data2[fcb->adsdata.Length], (ULONG)(newlength - fcb->adsdata.Length));
4385
4386
4387 fcb->adsdata.Buffer = data2;
4388 fcb->adsdata.Length = fcb->adsdata.MaximumLength = (USHORT)newlength;
4389
4390 fcb->Header.AllocationSize.QuadPart = newlength;
4391 fcb->Header.FileSize.QuadPart = newlength;
4392 fcb->Header.ValidDataLength.QuadPart = newlength;
4393 }
4394
4395 if (*length > 0)
4396 RtlCopyMemory(&fcb->adsdata.Buffer[off64], buf, *length);
4397
4398 fcb->Header.ValidDataLength.QuadPart = newlength;
4399
4400 mark_fcb_dirty(fcb);
4401
4402 if (fileref)
4403 mark_fileref_dirty(fileref);
4404 } else {
4405 bool compress = write_fcb_compressed(fcb), no_buf = false;
4406 uint8_t* data;
4407
4408 if (make_inline) {
4409 start_data = 0;
4410 end_data = sector_align(newlength, fcb->Vcb->superblock.sector_size);
4411 bufhead = sizeof(EXTENT_DATA) - 1;
4412 } else if (compress) {
4413 start_data = off64 & ~(uint64_t)(COMPRESSED_EXTENT_SIZE - 1);
4414 end_data = min(sector_align(off64 + *length, COMPRESSED_EXTENT_SIZE),
4415 sector_align(newlength, fcb->Vcb->superblock.sector_size));
4416 bufhead = 0;
4417 } else {
4418 start_data = off64 & ~(uint64_t)(fcb->Vcb->superblock.sector_size - 1);
4419 end_data = sector_align(off64 + *length, fcb->Vcb->superblock.sector_size);
4420 bufhead = 0;
4421 }
4422
4423 if (fcb_is_inline(fcb))
4424 end_data = max(end_data, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size));
4425
4426 fcb->Header.ValidDataLength.QuadPart = newlength;
4427 TRACE("fcb %p FileSize = %I64x\n", fcb, fcb->Header.FileSize.QuadPart);
4428
4429 if (!make_inline && !compress && off64 == start_data && off64 + *length == end_data) {
4430 data = buf;
4431 no_buf = true;
4432 } else {
4433 data = ExAllocatePoolWithTag(PagedPool, (ULONG)(end_data - start_data + bufhead), ALLOC_TAG);
4434 if (!data) {
4435 ERR("out of memory\n");
4436 Status = STATUS_INSUFFICIENT_RESOURCES;
4437 goto end;
4438 }
4439
4440 RtlZeroMemory(data + bufhead, (ULONG)(end_data - start_data));
4441
4442 TRACE("start_data = %I64x\n", start_data);
4443 TRACE("end_data = %I64x\n", end_data);
4444
4445 if (off64 > start_data || off64 + *length < end_data) {
4446 if (changed_length) {
4447 if (fcb->inode_item.st_size > start_data)
4448 Status = read_file(fcb, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL, Irp);
4449 else
4450 Status = STATUS_SUCCESS;
4451 } else
4452 Status = read_file(fcb, data + bufhead, start_data, end_data - start_data, NULL, Irp);
4453
4454 if (!NT_SUCCESS(Status)) {
4455 ERR("read_file returned %08x\n", Status);
4456 ExFreePool(data);
4457 goto end;
4458 }
4459 }
4460
4461 RtlCopyMemory(data + bufhead + off64 - start_data, buf, *length);
4462 }
4463
4464 if (make_inline) {
4465 Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, Irp, rollback);
4466 if (!NT_SUCCESS(Status)) {
4467 ERR("error - excise_extents returned %08x\n", Status);
4468 ExFreePool(data);
4469 goto end;
4470 }
4471
4472 ed2 = (EXTENT_DATA*)data;
4473 ed2->generation = fcb->Vcb->superblock.generation;
4474 ed2->decoded_size = newlength;
4475 ed2->compression = BTRFS_COMPRESSION_NONE;
4476 ed2->encryption = BTRFS_ENCRYPTION_NONE;
4477 ed2->encoding = BTRFS_ENCODING_NONE;
4478 ed2->type = EXTENT_TYPE_INLINE;
4479
4480 Status = add_extent_to_fcb(fcb, 0, ed2, (uint16_t)(offsetof(EXTENT_DATA, data[0]) + newlength), false, NULL, rollback);
4481 if (!NT_SUCCESS(Status)) {
4482 ERR("add_extent_to_fcb returned %08x\n", Status);
4483 ExFreePool(data);
4484 goto end;
4485 }
4486
4487 fcb->inode_item.st_blocks += newlength;
4488 } else if (compress) {
4489 Status = write_compressed(fcb, start_data, end_data, data, Irp, rollback);
4490
4491 if (!NT_SUCCESS(Status)) {
4492 ERR("write_compressed returned %08x\n", Status);
4493 ExFreePool(data);
4494 goto end;
4495 }
4496 } else {
4497 if (write_irp && Irp->MdlAddress && no_buf) {
4498 bool locked = Irp->MdlAddress->MdlFlags & (MDL_PAGES_LOCKED | MDL_PARTIAL);
4499
4500 if (!locked) {
4501 Status = STATUS_SUCCESS;
4502
4503 _SEH2_TRY {
4504 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess);
4505 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4506 Status = _SEH2_GetExceptionCode();
4507 } _SEH2_END;
4508
4509 if (!NT_SUCCESS(Status)) {
4510 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
4511 goto end;
4512 }
4513 }
4514
4515 _SEH2_TRY {
4516 Status = do_write_file(fcb, start_data, end_data, data, Irp, true, 0, rollback);
4517 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4518 Status = _SEH2_GetExceptionCode();
4519 } _SEH2_END;
4520
4521 if (!locked)
4522 MmUnlockPages(Irp->MdlAddress);
4523 } else {
4524 _SEH2_TRY {
4525 Status = do_write_file(fcb, start_data, end_data, data, Irp, false, 0, rollback);
4526 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4527 Status = _SEH2_GetExceptionCode();
4528 } _SEH2_END;
4529 }
4530
4531 if (!NT_SUCCESS(Status)) {
4532 ERR("do_write_file returned %08x\n", Status);
4533 if (!no_buf) ExFreePool(data);
4534 goto end;
4535 }
4536 }
4537
4538 if (!no_buf)
4539 ExFreePool(data);
4540 }
4541
4542 KeQuerySystemTime(&time);
4543 win_time_to_unix(time, &now);
4544
4545 if (!pagefile) {
4546 if (fcb->ads) {
4547 if (fileref && fileref->parent)
4548 origii = &fileref->parent->fcb->inode_item;
4549 else {
4550 ERR("no parent fcb found for stream\n");
4551 Status = STATUS_INTERNAL_ERROR;
4552 goto end;
4553 }
4554 } else
4555 origii = &fcb->inode_item;
4556
4557 origii->transid = Vcb->superblock.generation;
4558 origii->sequence++;
4559
4560 if (!ccb->user_set_change_time)
4561 origii->st_ctime = now;
4562
4563 if (!fcb->ads) {
4564 if (changed_length) {
4565 TRACE("setting st_size to %I64x\n", newlength);
4566 origii->st_size = newlength;
4567 filter |= FILE_NOTIFY_CHANGE_SIZE;
4568 }
4569
4570 fcb->inode_item_changed = true;
4571 } else {
4572 fileref->parent->fcb->inode_item_changed = true;
4573
4574 if (changed_length)
4575 filter |= FILE_NOTIFY_CHANGE_STREAM_SIZE;
4576
4577 filter |= FILE_NOTIFY_CHANGE_STREAM_WRITE;
4578 }
4579
4580 if (!ccb->user_set_write_time) {
4581 origii->st_mtime = now;
4582 filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
4583 }
4584
4585 mark_fcb_dirty(fcb->ads ? fileref->parent->fcb : fcb);
4586 }
4587
4588 if (changed_length) {
4589 CC_FILE_SIZES ccfs;
4590
4591 ccfs.AllocationSize = fcb->Header.AllocationSize;
4592 ccfs.FileSize = fcb->Header.FileSize;
4593 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
4594
4595 _SEH2_TRY {
4596 CcSetFileSizes(FileObject, &ccfs);
4597 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4598 Status = _SEH2_GetExceptionCode();
4599 goto end;
4600 } _SEH2_END;
4601 }
4602
4603 fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
4604 fcb->subvol->root_item.ctime = now;
4605
4606 Status = STATUS_SUCCESS;
4607
4608 if (filter != 0)
4609 queue_notification_fcb(fcb->ads ? fileref->parent : fileref, filter, fcb->ads ? FILE_ACTION_MODIFIED_STREAM : FILE_ACTION_MODIFIED,
4610 fcb->ads && fileref->dc ? &fileref->dc->name : NULL);
4611
4612 end:
4613 if (NT_SUCCESS(Status) && FileObject->Flags & FO_SYNCHRONOUS_IO && !paging_io) {
4614 TRACE("CurrentByteOffset was: %I64x\n", FileObject->CurrentByteOffset.QuadPart);
4615 FileObject->CurrentByteOffset.QuadPart = offset.QuadPart + (NT_SUCCESS(Status) ? *length : 0);
4616 TRACE("CurrentByteOffset now: %I64x\n", FileObject->CurrentByteOffset.QuadPart);
4617 }
4618
4619 if (acquired_fcb_lock)
4620 ExReleaseResourceLite(fcb->Header.Resource);
4621
4622 if (acquired_tree_lock)
4623 ExReleaseResourceLite(&Vcb->tree_lock);
4624
4625 if (paging_lock)
4626 ExReleaseResourceLite(fcb->Header.PagingIoResource);
4627
4628 return Status;
4629 }
4630
4631 NTSTATUS write_file(device_extension* Vcb, PIRP Irp, bool wait, bool deferred_write) {
4632 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4633 void* buf;
4634 NTSTATUS Status;
4635 LARGE_INTEGER offset = IrpSp->Parameters.Write.ByteOffset;
4636 PFILE_OBJECT FileObject = IrpSp->FileObject;
4637 fcb* fcb = FileObject ? FileObject->FsContext : NULL;
4638 LIST_ENTRY rollback;
4639
4640 InitializeListHead(&rollback);
4641
4642 TRACE("write\n");
4643
4644 Irp->IoStatus.Information = 0;
4645
4646 TRACE("offset = %I64x\n", offset.QuadPart);
4647 TRACE("length = %x\n", IrpSp->Parameters.Write.Length);
4648
4649 if (!Irp->AssociatedIrp.SystemBuffer) {
4650 buf = map_user_buffer(Irp, fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
4651
4652 if (Irp->MdlAddress && !buf) {
4653 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
4654 Status = STATUS_INSUFFICIENT_RESOURCES;
4655 goto exit;
4656 }
4657 } else
4658 buf = Irp->AssociatedIrp.SystemBuffer;
4659
4660 TRACE("buf = %p\n", buf);
4661
4662 if (fcb && !(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForWriteAccess(&fcb->lock, Irp)) {
4663 WARN("tried to write to locked region\n");
4664 Status = STATUS_FILE_LOCK_CONFLICT;
4665 goto exit;
4666 }
4667
4668 Status = write_file2(Vcb, Irp, offset, buf, &IrpSp->Parameters.Write.Length, Irp->Flags & IRP_PAGING_IO, Irp->Flags & IRP_NOCACHE,
4669 wait, deferred_write, true, &rollback);
4670
4671 if (Status == STATUS_PENDING)
4672 goto exit;
4673 else if (!NT_SUCCESS(Status)) {
4674 ERR("write_file2 returned %08x\n", Status);
4675 goto exit;
4676 }
4677
4678 if (NT_SUCCESS(Status)) {
4679 Irp->IoStatus.Information = IrpSp->Parameters.Write.Length;
4680
4681 if (diskacc && Status != STATUS_PENDING && Irp->Flags & IRP_NOCACHE) {
4682 PETHREAD thread = NULL;
4683
4684 if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
4685 thread = Irp->Tail.Overlay.Thread;
4686 else if (!IoIsSystemThread(PsGetCurrentThread()))
4687 thread = PsGetCurrentThread();
4688 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
4689 thread = PsGetCurrentThread();
4690
4691 if (thread)
4692 fPsUpdateDiskCounters(PsGetThreadProcess(thread), 0, IrpSp->Parameters.Write.Length, 0, 1, 0);
4693 }
4694 }
4695
4696 exit:
4697 if (NT_SUCCESS(Status))
4698 clear_rollback(&rollback);
4699 else
4700 do_rollback(Vcb, &rollback);
4701
4702 return Status;
4703 }
4704
4705 _Dispatch_type_(IRP_MJ_WRITE)
4706 _Function_class_(DRIVER_DISPATCH)
4707 NTSTATUS __stdcall drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
4708 NTSTATUS Status;
4709 bool top_level;
4710 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4711 device_extension* Vcb = DeviceObject->DeviceExtension;
4712 PFILE_OBJECT FileObject = IrpSp->FileObject;
4713 fcb* fcb = FileObject ? FileObject->FsContext : NULL;
4714 ccb* ccb = FileObject ? FileObject->FsContext2 : NULL;
4715 bool wait = FileObject ? IoIsOperationSynchronous(Irp) : true;
4716
4717 FsRtlEnterFileSystem();
4718
4719 top_level = is_top_level(Irp);
4720
4721 if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
4722 Status = vol_write(DeviceObject, Irp);
4723 goto exit;
4724 } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
4725 Status = STATUS_INVALID_PARAMETER;
4726 goto end;
4727 }
4728
4729 if (!fcb) {
4730 ERR("fcb was NULL\n");
4731 Status = STATUS_INVALID_PARAMETER;
4732 goto end;
4733 }
4734
4735 if (!ccb) {
4736 ERR("ccb was NULL\n");
4737 Status = STATUS_INVALID_PARAMETER;
4738 goto end;
4739 }
4740
4741 if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_WRITE_DATA | FILE_APPEND_DATA))) {
4742 WARN("insufficient permissions\n");
4743 Status = STATUS_ACCESS_DENIED;
4744 goto end;
4745 }
4746
4747 if (fcb == Vcb->volume_fcb) {
4748 if (!Vcb->locked || Vcb->locked_fileobj != FileObject) {
4749 ERR("trying to write to volume when not locked, or locked with another FileObject\n");
4750 Status = STATUS_ACCESS_DENIED;
4751 goto end;
4752 }
4753
4754 TRACE("writing directly to volume\n");
4755
4756 IoSkipCurrentIrpStackLocation(Irp);
4757
4758 Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
4759 goto exit;
4760 }
4761
4762 if (is_subvol_readonly(fcb->subvol, Irp)) {
4763 Status = STATUS_ACCESS_DENIED;
4764 goto end;
4765 }
4766
4767 if (Vcb->readonly) {
4768 Status = STATUS_MEDIA_WRITE_PROTECTED;
4769 goto end;
4770 }
4771
4772 _SEH2_TRY {
4773 if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
4774 CcMdlWriteComplete(IrpSp->FileObject, &IrpSp->Parameters.Write.ByteOffset, Irp->MdlAddress);
4775
4776 Irp->MdlAddress = NULL;
4777 Status = STATUS_SUCCESS;
4778 } else {
4779 if (!(Irp->Flags & IRP_PAGING_IO))
4780 FsRtlCheckOplock(fcb_oplock(fcb), Irp, NULL, NULL, NULL);
4781
4782 // Don't offload jobs when doing paging IO - otherwise this can lead to
4783 // deadlocks in CcCopyWrite.
4784 if (Irp->Flags & IRP_PAGING_IO)
4785 wait = true;
4786
4787 Status = write_file(Vcb, Irp, wait, false);
4788 }
4789 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4790 Status = _SEH2_GetExceptionCode();
4791 } _SEH2_END;
4792
4793 end:
4794 Irp->IoStatus.Status = Status;
4795
4796 TRACE("wrote %u bytes\n", Irp->IoStatus.Information);
4797
4798 if (Status != STATUS_PENDING)
4799 IoCompleteRequest(Irp, IO_NO_INCREMENT);
4800 else {
4801 IoMarkIrpPending(Irp);
4802
4803 if (!add_thread_job(Vcb, Irp))
4804 Status = do_write_job(Vcb, Irp);
4805 }
4806
4807 exit:
4808 if (top_level)
4809 IoSetTopLevelIrp(NULL);
4810
4811 TRACE("returning %08x\n", Status);
4812
4813 FsRtlExitFileSystem();
4814
4815 return Status;
4816 }