[BTRFS]
[reactos.git] / reactos / drivers / filesystems / btrfs / write.c
1 /* Copyright (c) Mark Harmstone 2016
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 // BOOL did_split;
21 BOOL chunk_test = FALSE;
22
23 typedef struct {
24 UINT64 start;
25 UINT64 end;
26 UINT8* data;
27 UINT32 skip_start;
28 UINT32 skip_end;
29 } write_stripe;
30
31 typedef struct {
32 LONG stripes_left;
33 KEVENT event;
34 } read_stripe_master;
35
36 typedef struct {
37 PIRP Irp;
38 PDEVICE_OBJECT devobj;
39 IO_STATUS_BLOCK iosb;
40 read_stripe_master* master;
41 } read_stripe;
42
43 // static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len);
44 static NTSTATUS STDCALL write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr);
45 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback);
46
47 BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
48 LIST_ENTRY* le;
49 space* s;
50
51 TRACE("(%p, %llx, %llx, %p)\n", Vcb, c->offset, length, address);
52
53 if (IsListEmpty(&c->space_size))
54 return FALSE;
55
56 le = c->space_size.Flink;
57 while (le != &c->space_size) {
58 s = CONTAINING_RECORD(le, space, list_entry_size);
59
60 if (s->size == length) {
61 *address = s->address;
62 return TRUE;
63 } else if (s->size < length) {
64 if (le == c->space_size.Flink)
65 return FALSE;
66
67 s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
68
69 *address = s->address;
70 return TRUE;
71 }
72
73 le = le->Flink;
74 }
75
76 s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
77
78 if (s->size > length) {
79 *address = s->address;
80 return TRUE;
81 }
82
83 return FALSE;
84 }
85
86 chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address) {
87 LIST_ENTRY* le2;
88 chunk* c;
89
90 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
91
92 le2 = Vcb->chunks.Flink;
93 while (le2 != &Vcb->chunks) {
94 c = CONTAINING_RECORD(le2, chunk, list_entry);
95
96 // TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
97
98 if (address >= c->offset && address < c->offset + c->chunk_item->size) {
99 ExReleaseResourceLite(&Vcb->chunk_lock);
100 return c;
101 }
102
103 le2 = le2->Flink;
104 }
105
106 ExReleaseResourceLite(&Vcb->chunk_lock);
107
108 return NULL;
109 }
110
111 typedef struct {
112 space* dh;
113 device* device;
114 } stripe;
115
116 static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) {
117 UINT64 lastaddr;
118 LIST_ENTRY* le;
119
120 lastaddr = 0;
121
122 le = Vcb->chunks.Flink;
123 while (le != &Vcb->chunks) {
124 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
125
126 if (c->offset >= lastaddr + size)
127 return lastaddr;
128
129 lastaddr = c->offset + c->chunk_item->size;
130
131 le = le->Flink;
132 }
133
134 return lastaddr;
135 }
136
137 static BOOL find_new_dup_stripes(device_extension* Vcb, stripe* stripes, UINT64 max_stripe_size) {
138 UINT64 j, devnum, devusage = 0xffffffffffffffff;
139 space *devdh1 = NULL, *devdh2 = NULL;
140
141 for (j = 0; j < Vcb->superblock.num_devices; j++) {
142 if (!Vcb->devices[j].readonly) {
143 UINT64 usage = (Vcb->devices[j].devitem.bytes_used * 4096) / Vcb->devices[j].devitem.num_bytes;
144
145 // favour devices which have been used the least
146 if (usage < devusage) {
147 if (!IsListEmpty(&Vcb->devices[j].space)) {
148 LIST_ENTRY* le;
149 space *dh1 = NULL, *dh2 = NULL;
150
151 le = Vcb->devices[j].space.Flink;
152 while (le != &Vcb->devices[j].space) {
153 space* dh = CONTAINING_RECORD(le, space, list_entry);
154
155 if (dh->size >= max_stripe_size && (!dh1 || dh->size < dh1->size)) {
156 dh2 = dh1;
157 dh1 = dh;
158 }
159
160 le = le->Flink;
161 }
162
163 if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) {
164 devnum = j;
165 devusage = usage;
166 devdh1 = dh1;
167 devdh2 = dh2 ? dh2 : dh1;
168 }
169 }
170 }
171 }
172 }
173
174 if (!devdh1)
175 return FALSE;
176
177 stripes[0].device = &Vcb->devices[devnum];
178 stripes[0].dh = devdh1;
179 stripes[1].device = stripes[0].device;
180 stripes[1].dh = devdh2;
181
182 return TRUE;
183 }
184
185 static BOOL find_new_stripe(device_extension* Vcb, stripe* stripes, UINT16 i, UINT64 max_stripe_size, UINT16 type) {
186 UINT64 j, k, devnum = 0xffffffffffffffff, devusage = 0xffffffffffffffff;
187 space* devdh = NULL;
188
189 for (j = 0; j < Vcb->superblock.num_devices; j++) {
190 UINT64 usage;
191 BOOL skip = FALSE;
192
193 if (Vcb->devices[j].readonly)
194 continue;
195
196 // skip this device if it already has a stripe
197 if (i > 0) {
198 for (k = 0; k < i; k++) {
199 if (stripes[k].device == &Vcb->devices[j]) {
200 skip = TRUE;
201 break;
202 }
203 }
204 }
205
206 if (!skip) {
207 usage = (Vcb->devices[j].devitem.bytes_used * 4096) / Vcb->devices[j].devitem.num_bytes;
208
209 // favour devices which have been used the least
210 if (usage < devusage) {
211 if (!IsListEmpty(&Vcb->devices[j].space)) {
212 LIST_ENTRY* le;
213
214 le = Vcb->devices[j].space.Flink;
215 while (le != &Vcb->devices[j].space) {
216 space* dh = CONTAINING_RECORD(le, space, list_entry);
217
218 if ((devnum != j && dh->size >= max_stripe_size) ||
219 (devnum == j && dh->size >= max_stripe_size && dh->size < devdh->size)
220 ) {
221 devdh = dh;
222 devnum = j;
223 devusage = usage;
224 }
225
226 le = le->Flink;
227 }
228 }
229 }
230 }
231 }
232
233 if (!devdh)
234 return FALSE;
235
236 stripes[i].dh = devdh;
237 stripes[i].device = &Vcb->devices[devnum];
238
239 return TRUE;
240 }
241
242 chunk* alloc_chunk(device_extension* Vcb, UINT64 flags) {
243 UINT64 max_stripe_size, max_chunk_size, stripe_size, stripe_length, factor;
244 UINT64 total_size = 0, i, logaddr;
245 UINT16 type, num_stripes, sub_stripes, max_stripes, min_stripes;
246 stripe* stripes = NULL;
247 ULONG cisize;
248 CHUNK_ITEM_STRIPE* cis;
249 chunk* c = NULL;
250 space* s = NULL;
251 BOOL success = FALSE;
252
253 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
254
255 for (i = 0; i < Vcb->superblock.num_devices; i++) {
256 total_size += Vcb->devices[i].devitem.num_bytes;
257 }
258 TRACE("total_size = %llx\n", total_size);
259
260 // We purposely check for DATA first - mixed blocks have the same size
261 // as DATA ones.
262 if (flags & BLOCK_FLAG_DATA) {
263 max_stripe_size = 0x40000000; // 1 GB
264 max_chunk_size = 10 * max_stripe_size;
265 } else if (flags & BLOCK_FLAG_METADATA) {
266 if (total_size > 0xC80000000) // 50 GB
267 max_stripe_size = 0x40000000; // 1 GB
268 else
269 max_stripe_size = 0x10000000; // 256 MB
270
271 max_chunk_size = max_stripe_size;
272 } else if (flags & BLOCK_FLAG_SYSTEM) {
273 max_stripe_size = 0x2000000; // 32 MB
274 max_chunk_size = 2 * max_stripe_size;
275 }
276
277 max_chunk_size = min(max_chunk_size, total_size / 10); // cap at 10%
278
279 TRACE("would allocate a new chunk of %llx bytes and stripe %llx\n", max_chunk_size, max_stripe_size);
280
281 if (flags & BLOCK_FLAG_DUPLICATE) {
282 min_stripes = 2;
283 max_stripes = 2;
284 sub_stripes = 0;
285 type = BLOCK_FLAG_DUPLICATE;
286 } else if (flags & BLOCK_FLAG_RAID0) {
287 min_stripes = 2;
288 max_stripes = Vcb->superblock.num_devices;
289 sub_stripes = 0;
290 type = BLOCK_FLAG_RAID0;
291 } else if (flags & BLOCK_FLAG_RAID1) {
292 min_stripes = 2;
293 max_stripes = 2;
294 sub_stripes = 1;
295 type = BLOCK_FLAG_RAID1;
296 } else if (flags & BLOCK_FLAG_RAID10) {
297 min_stripes = 4;
298 max_stripes = Vcb->superblock.num_devices;
299 sub_stripes = 2;
300 type = BLOCK_FLAG_RAID10;
301 } else if (flags & BLOCK_FLAG_RAID5) {
302 min_stripes = 3;
303 max_stripes = Vcb->superblock.num_devices;
304 sub_stripes = 1;
305 type = BLOCK_FLAG_RAID5;
306 } else if (flags & BLOCK_FLAG_RAID6) {
307 min_stripes = 4;
308 max_stripes = 257;
309 sub_stripes = 1;
310 type = BLOCK_FLAG_RAID6;
311 } else { // SINGLE
312 min_stripes = 1;
313 max_stripes = 1;
314 sub_stripes = 1;
315 type = 0;
316 }
317
318 stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * max_stripes, ALLOC_TAG);
319 if (!stripes) {
320 ERR("out of memory\n");
321 goto end;
322 }
323
324 num_stripes = 0;
325
326 if (type == BLOCK_FLAG_DUPLICATE) {
327 if (!find_new_dup_stripes(Vcb, stripes, max_stripe_size))
328 goto end;
329 else
330 num_stripes = max_stripes;
331 } else {
332 for (i = 0; i < max_stripes; i++) {
333 if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, type))
334 break;
335 else
336 num_stripes++;
337 }
338 }
339
340 // for RAID10, round down to an even number of stripes
341 if (type == BLOCK_FLAG_RAID10 && (num_stripes % sub_stripes) != 0) {
342 num_stripes -= num_stripes % sub_stripes;
343 }
344
345 if (num_stripes < min_stripes) {
346 WARN("found %u stripes, needed at least %u\n", num_stripes, min_stripes);
347 goto end;
348 }
349
350 c = ExAllocatePoolWithTag(NonPagedPool, sizeof(chunk), ALLOC_TAG);
351 if (!c) {
352 ERR("out of memory\n");
353 goto end;
354 }
355
356 cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
357 c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, cisize, ALLOC_TAG);
358 if (!c->chunk_item) {
359 ERR("out of memory\n");
360 goto end;
361 }
362
363 stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
364
365 stripe_size = max_stripe_size;
366 for (i = 0; i < num_stripes; i++) {
367 if (stripes[i].dh->size < stripe_size)
368 stripe_size = stripes[i].dh->size;
369 }
370
371 if (type == 0 || type == BLOCK_FLAG_DUPLICATE || type == BLOCK_FLAG_RAID1)
372 factor = 1;
373 else if (type == BLOCK_FLAG_RAID0)
374 factor = num_stripes;
375 else if (type == BLOCK_FLAG_RAID10)
376 factor = num_stripes / sub_stripes;
377 else if (type == BLOCK_FLAG_RAID5)
378 factor = num_stripes - 1;
379 else if (type == BLOCK_FLAG_RAID6)
380 factor = num_stripes - 2;
381
382 if (stripe_size * factor > max_chunk_size)
383 stripe_size = max_chunk_size / factor;
384
385 if (stripe_size % stripe_length > 0)
386 stripe_size -= stripe_size % stripe_length;
387
388 if (stripe_size == 0)
389 goto end;
390
391 c->chunk_item->size = stripe_size * factor;
392 c->chunk_item->root_id = Vcb->extent_root->id;
393 c->chunk_item->stripe_length = stripe_length;
394 c->chunk_item->type = flags;
395 c->chunk_item->opt_io_alignment = c->chunk_item->stripe_length;
396 c->chunk_item->opt_io_width = c->chunk_item->stripe_length;
397 c->chunk_item->sector_size = stripes[0].device->devitem.minimal_io_size;
398 c->chunk_item->num_stripes = num_stripes;
399 c->chunk_item->sub_stripes = sub_stripes;
400
401 c->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
402 if (!c->devices) {
403 ERR("out of memory\n");
404 goto end;
405 }
406
407 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
408 for (i = 0; i < num_stripes; i++) {
409 cis[i].dev_id = stripes[i].device->devitem.dev_id;
410
411 if (type == BLOCK_FLAG_DUPLICATE && i == 1 && stripes[i].dh == stripes[0].dh)
412 cis[i].offset = stripes[0].dh->address + stripe_size;
413 else
414 cis[i].offset = stripes[i].dh->address;
415
416 cis[i].dev_uuid = stripes[i].device->devitem.device_uuid;
417
418 c->devices[i] = stripes[i].device;
419 }
420
421 logaddr = find_new_chunk_address(Vcb, c->chunk_item->size);
422
423 Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
424
425 c->size = cisize;
426 c->offset = logaddr;
427 c->used = c->oldused = 0;
428 c->cache = NULL;
429 c->readonly = FALSE;
430 InitializeListHead(&c->space);
431 InitializeListHead(&c->space_size);
432 InitializeListHead(&c->deleting);
433 InitializeListHead(&c->changed_extents);
434
435 InitializeListHead(&c->range_locks);
436 KeInitializeSpinLock(&c->range_locks_spinlock);
437 KeInitializeEvent(&c->range_locks_event, NotificationEvent, FALSE);
438
439 ExInitializeResourceLite(&c->lock);
440 ExInitializeResourceLite(&c->changed_extents_lock);
441
442 s = ExAllocatePoolWithTag(NonPagedPool, sizeof(space), ALLOC_TAG);
443 if (!s) {
444 ERR("out of memory\n");
445 goto end;
446 }
447
448 s->address = c->offset;
449 s->size = c->chunk_item->size;
450 InsertTailList(&c->space, &s->list_entry);
451 InsertTailList(&c->space_size, &s->list_entry_size);
452
453 protect_superblocks(Vcb, c);
454
455 for (i = 0; i < num_stripes; i++) {
456 stripes[i].device->devitem.bytes_used += stripe_size;
457
458 space_list_subtract2(Vcb, &stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL);
459 }
460
461 success = TRUE;
462
463 end:
464 if (stripes)
465 ExFreePool(stripes);
466
467 if (!success) {
468 if (c && c->chunk_item) ExFreePool(c->chunk_item);
469 if (c) ExFreePool(c);
470 if (s) ExFreePool(s);
471 } else {
472 LIST_ENTRY* le;
473 BOOL done = FALSE;
474
475 le = Vcb->chunks.Flink;
476 while (le != &Vcb->chunks) {
477 chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
478
479 if (c2->offset > c->offset) {
480 InsertHeadList(le->Blink, &c->list_entry);
481 done = TRUE;
482 break;
483 }
484
485 le = le->Flink;
486 }
487
488 if (!done)
489 InsertTailList(&Vcb->chunks, &c->list_entry);
490
491 c->created = TRUE;
492 InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed);
493 }
494
495 ExReleaseResourceLite(&Vcb->chunk_lock);
496
497 return success ? c : NULL;
498 }
499
500 static NTSTATUS prepare_raid0_write(chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) {
501 UINT64 startoff, endoff;
502 UINT16 startoffstripe, endoffstripe, stripenum;
503 UINT64 pos, *stripeoff;
504 UINT32 i;
505
506 stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG);
507 if (!stripeoff) {
508 ERR("out of memory\n");
509 return STATUS_INSUFFICIENT_RESOURCES;
510 }
511
512 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
513 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
514
515 for (i = 0; i < c->chunk_item->num_stripes; i++) {
516 if (startoffstripe > i) {
517 stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
518 } else if (startoffstripe == i) {
519 stripes[i].start = startoff;
520 } else {
521 stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
522 }
523
524 if (endoffstripe > i) {
525 stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
526 } else if (endoffstripe == i) {
527 stripes[i].end = endoff + 1;
528 } else {
529 stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
530 }
531
532 if (stripes[i].start != stripes[i].end) {
533 stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, stripes[i].end - stripes[i].start, ALLOC_TAG);
534
535 if (!stripes[i].data) {
536 ERR("out of memory\n");
537 ExFreePool(stripeoff);
538 return STATUS_INSUFFICIENT_RESOURCES;
539 }
540 }
541 }
542
543 pos = 0;
544 RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes);
545
546 stripenum = startoffstripe;
547 while (pos < length) {
548 if (pos == 0) {
549 UINT32 writelen = min(stripes[stripenum].end - stripes[stripenum].start,
550 c->chunk_item->stripe_length - (stripes[stripenum].start % c->chunk_item->stripe_length));
551
552 RtlCopyMemory(stripes[stripenum].data, data, writelen);
553 stripeoff[stripenum] += writelen;
554 pos += writelen;
555 } else if (length - pos < c->chunk_item->stripe_length) {
556 RtlCopyMemory(stripes[stripenum].data + stripeoff[stripenum], (UINT8*)data + pos, length - pos);
557 break;
558 } else {
559 RtlCopyMemory(stripes[stripenum].data + stripeoff[stripenum], (UINT8*)data + pos, c->chunk_item->stripe_length);
560 stripeoff[stripenum] += c->chunk_item->stripe_length;
561 pos += c->chunk_item->stripe_length;
562 }
563
564 stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
565 }
566
567 ExFreePool(stripeoff);
568
569 return STATUS_SUCCESS;
570 }
571
572 static NTSTATUS prepare_raid10_write(chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) {
573 UINT64 startoff, endoff;
574 UINT16 startoffstripe, endoffstripe, stripenum;
575 UINT64 pos, *stripeoff;
576 UINT32 i;
577
578 stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG);
579 if (!stripeoff) {
580 ERR("out of memory\n");
581 return STATUS_INSUFFICIENT_RESOURCES;
582 }
583
584 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &startoff, &startoffstripe);
585 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &endoff, &endoffstripe);
586
587 startoffstripe *= c->chunk_item->sub_stripes;
588 endoffstripe *= c->chunk_item->sub_stripes;
589
590 for (i = 0; i < c->chunk_item->num_stripes; i += c->chunk_item->sub_stripes) {
591 UINT16 j;
592
593 if (startoffstripe > i) {
594 stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
595 } else if (startoffstripe == i) {
596 stripes[i].start = startoff;
597 } else {
598 stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
599 }
600
601 if (endoffstripe > i) {
602 stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
603 } else if (endoffstripe == i) {
604 stripes[i].end = endoff + 1;
605 } else {
606 stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
607 }
608
609 if (stripes[i].start != stripes[i].end) {
610 stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, stripes[i].end - stripes[i].start, ALLOC_TAG);
611
612 if (!stripes[i].data) {
613 ERR("out of memory\n");
614 ExFreePool(stripeoff);
615 return STATUS_INSUFFICIENT_RESOURCES;
616 }
617 }
618
619 for (j = 1; j < c->chunk_item->sub_stripes; j++) {
620 stripes[i+j].start = stripes[i].start;
621 stripes[i+j].end = stripes[i].end;
622 stripes[i+j].data = stripes[i].data;
623 }
624 }
625
626 pos = 0;
627 RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
628
629 stripenum = startoffstripe / c->chunk_item->sub_stripes;
630 while (pos < length) {
631 if (pos == 0) {
632 UINT32 writelen = min(stripes[stripenum * c->chunk_item->sub_stripes].end - stripes[stripenum * c->chunk_item->sub_stripes].start,
633 c->chunk_item->stripe_length - (stripes[stripenum * c->chunk_item->sub_stripes].start % c->chunk_item->stripe_length));
634
635 RtlCopyMemory(stripes[stripenum * c->chunk_item->sub_stripes].data, data, writelen);
636 stripeoff[stripenum] += writelen;
637 pos += writelen;
638 } else if (length - pos < c->chunk_item->stripe_length) {
639 RtlCopyMemory(stripes[stripenum * c->chunk_item->sub_stripes].data + stripeoff[stripenum], (UINT8*)data + pos, length - pos);
640 break;
641 } else {
642 RtlCopyMemory(stripes[stripenum * c->chunk_item->sub_stripes].data + stripeoff[stripenum], (UINT8*)data + pos, c->chunk_item->stripe_length);
643 stripeoff[stripenum] += c->chunk_item->stripe_length;
644 pos += c->chunk_item->stripe_length;
645 }
646
647 stripenum = (stripenum + 1) % (c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
648 }
649
650 ExFreePool(stripeoff);
651
652 return STATUS_SUCCESS;
653 }
654
655 static NTSTATUS STDCALL read_stripe_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID ptr) {
656 read_stripe* stripe = ptr;
657 read_stripe_master* master = stripe->master;
658 ULONG stripes_left = InterlockedDecrement(&master->stripes_left);
659
660 stripe->iosb = Irp->IoStatus;
661
662 if (stripes_left == 0)
663 KeSetEvent(&master->event, 0, FALSE);
664
665 return STATUS_MORE_PROCESSING_REQUIRED;
666 }
667
668 static NTSTATUS make_read_irp(PIRP old_irp, read_stripe* stripe, UINT64 offset, void* data, UINT32 length) {
669 PIO_STACK_LOCATION IrpSp;
670 PIRP Irp;
671
672 if (!old_irp) {
673 Irp = IoAllocateIrp(stripe->devobj->StackSize, FALSE);
674
675 if (!Irp) {
676 ERR("IoAllocateIrp failed\n");
677 return STATUS_INSUFFICIENT_RESOURCES;
678 }
679 } else {
680 Irp = IoMakeAssociatedIrp(old_irp, stripe->devobj->StackSize);
681
682 if (!Irp) {
683 ERR("IoMakeAssociatedIrp failed\n");
684 return STATUS_INSUFFICIENT_RESOURCES;
685 }
686 }
687
688 IrpSp = IoGetNextIrpStackLocation(Irp);
689 IrpSp->MajorFunction = IRP_MJ_READ;
690
691 if (stripe->devobj->Flags & DO_BUFFERED_IO) {
692 FIXME("FIXME - buffered IO\n");
693 IoFreeIrp(Irp);
694 return STATUS_INTERNAL_ERROR;
695 } else if (stripe->devobj->Flags & DO_DIRECT_IO) {
696 Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
697 if (!Irp->MdlAddress) {
698 ERR("IoAllocateMdl failed\n");
699 IoFreeIrp(Irp);
700 return STATUS_INSUFFICIENT_RESOURCES;
701 }
702
703 MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
704 } else {
705 Irp->UserBuffer = data;
706 }
707
708 IrpSp->Parameters.Read.Length = length;
709 IrpSp->Parameters.Read.ByteOffset.QuadPart = offset;
710
711 Irp->UserIosb = &stripe->iosb;
712
713 IoSetCompletionRoutine(Irp, read_stripe_completion, stripe, TRUE, TRUE, TRUE);
714
715 stripe->Irp = Irp;
716
717 return STATUS_SUCCESS;
718 }
719
720 static NTSTATUS prepare_raid5_write(PIRP Irp, chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) {
721 UINT64 startoff, endoff;
722 UINT16 startoffstripe, endoffstripe, stripenum, parity, logstripe;
723 UINT64 start = 0xffffffffffffffff, end = 0;
724 UINT64 pos, stripepos;
725 UINT32 firststripesize, laststripesize;
726 UINT32 i;
727 UINT8* data2 = (UINT8*)data;
728 UINT32 num_reads;
729 BOOL same_stripe = FALSE, multiple_stripes;
730
731 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 1, &startoff, &startoffstripe);
732 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 1, &endoff, &endoffstripe);
733
734 for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
735 UINT64 ststart, stend;
736
737 if (startoffstripe > i) {
738 ststart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
739 } else if (startoffstripe == i) {
740 ststart = startoff;
741 } else {
742 ststart = startoff - (startoff % c->chunk_item->stripe_length);
743 }
744
745 if (endoffstripe > i) {
746 stend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
747 } else if (endoffstripe == i) {
748 stend = endoff + 1;
749 } else {
750 stend = endoff - (endoff % c->chunk_item->stripe_length);
751 }
752
753 if (ststart != stend) {
754 stripes[i].start = ststart;
755 stripes[i].end = stend;
756
757 if (ststart < start) {
758 start = ststart;
759 firststripesize = c->chunk_item->stripe_length - (ststart % c->chunk_item->stripe_length);
760 }
761
762 if (stend > end) {
763 end = stend;
764 laststripesize = stend % c->chunk_item->stripe_length;
765 if (laststripesize == 0)
766 laststripesize = c->chunk_item->stripe_length;
767 }
768 }
769 }
770
771 if (start == end) {
772 ERR("error: start == end (%llx)\n", start);
773 return STATUS_INTERNAL_ERROR;
774 }
775
776 if (startoffstripe == endoffstripe && start / c->chunk_item->stripe_length == end / c->chunk_item->stripe_length) {
777 firststripesize = end - start;
778 laststripesize = firststripesize;
779 }
780
781 for (i = 0; i < c->chunk_item->num_stripes; i++) {
782 stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, end - start, ALLOC_TAG);
783 if (!stripes[i].data) {
784 ERR("out of memory\n");
785 return STATUS_INSUFFICIENT_RESOURCES;
786 }
787
788 if (i < c->chunk_item->num_stripes - 1) {
789 if (stripes[i].start == 0 && stripes[i].end == 0)
790 stripes[i].start = stripes[i].end = start;
791 }
792 }
793
794 num_reads = 0;
795 multiple_stripes = (end - 1) / c->chunk_item->stripe_length != start / c->chunk_item->stripe_length;
796
797 for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
798 if (stripes[i].start == stripes[i].end) {
799 num_reads++;
800
801 if (multiple_stripes)
802 num_reads++;
803 } else {
804 if (stripes[i].start > start)
805 num_reads++;
806
807 if (stripes[i].end < end)
808 num_reads++;
809 }
810 }
811
812 if (num_reads > 0) {
813 UINT32 j;
814 read_stripe_master* master;
815 read_stripe* read_stripes;
816 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
817 NTSTATUS Status;
818
819 master = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe_master), ALLOC_TAG);
820 if (!master) {
821 ERR("out of memory\n");
822 return STATUS_INSUFFICIENT_RESOURCES;
823 }
824
825 read_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe) * num_reads, ALLOC_TAG);
826 if (!read_stripes) {
827 ERR("out of memory\n");
828 ExFreePool(master);
829 return STATUS_INSUFFICIENT_RESOURCES;
830 }
831
832 parity = (((address - c->offset) / ((c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
833 stripenum = (parity + 1) % c->chunk_item->num_stripes;
834
835 j = 0;
836 for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
837 if (stripes[i].start > start || stripes[i].start == stripes[i].end) {
838 ULONG readlen;
839
840 read_stripes[j].Irp = NULL;
841 read_stripes[j].devobj = c->devices[stripenum]->devobj;
842 read_stripes[j].master = master;
843
844 if (stripes[i].start != stripes[i].end)
845 readlen = stripes[i].start - start;
846 else
847 readlen = firststripesize;
848
849 Status = make_read_irp(Irp, &read_stripes[j], start + cis[stripenum].offset, stripes[stripenum].data, readlen);
850
851 if (!NT_SUCCESS(Status)) {
852 ERR("make_read_irp returned %08x\n", Status);
853 j++;
854 goto readend;
855 }
856
857 stripes[stripenum].skip_start = readlen;
858
859 j++;
860 if (j == num_reads) break;
861 }
862
863 stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
864 }
865
866 if (j < num_reads) {
867 parity = (((address + length - 1 - c->offset) / ((c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
868 stripenum = (parity + 1) % c->chunk_item->num_stripes;
869
870 for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
871 if ((stripes[i].start != stripes[i].end && stripes[i].end < end) || (stripes[i].start == stripes[i].end && multiple_stripes)) {
872 read_stripes[j].Irp = NULL;
873 read_stripes[j].devobj = c->devices[stripenum]->devobj;
874 read_stripes[j].master = master;
875
876 if (stripes[i].start == stripes[i].end) {
877 Status = make_read_irp(Irp, &read_stripes[j], start + firststripesize + cis[stripenum].offset, &stripes[stripenum].data[firststripesize], laststripesize);
878 stripes[stripenum].skip_end = laststripesize;
879 } else {
880 Status = make_read_irp(Irp, &read_stripes[j], stripes[i].end + cis[stripenum].offset, &stripes[stripenum].data[stripes[i].end - start], end - stripes[i].end);
881 stripes[stripenum].skip_end = end - stripes[i].end;
882 }
883
884 if (!NT_SUCCESS(Status)) {
885 ERR("make_read_irp returned %08x\n", Status);
886 j++;
887 goto readend;
888 }
889
890 j++;
891 if (j == num_reads) break;
892 }
893
894 stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
895 }
896 }
897
898 master->stripes_left = j;
899 KeInitializeEvent(&master->event, NotificationEvent, FALSE);
900
901 for (i = 0; i < j; i++) {
902 Status = IoCallDriver(read_stripes[i].devobj, read_stripes[i].Irp);
903 if (!NT_SUCCESS(Status)) {
904 ERR("IoCallDriver returned %08x\n", Status);
905 goto readend;
906 }
907 }
908
909 KeWaitForSingleObject(&master->event, Executive, KernelMode, FALSE, NULL);
910
911 for (i = 0; i < j; i++) {
912 if (!NT_SUCCESS(read_stripes[i].iosb.Status)) {
913 Status = read_stripes[i].iosb.Status;
914 goto readend;
915 }
916 }
917
918 Status = STATUS_SUCCESS;
919
920 readend:
921 for (i = 0; i < j; i++) {
922 if (read_stripes[i].Irp) {
923 if (read_stripes[i].devobj->Flags & DO_DIRECT_IO) {
924 MmUnlockPages(read_stripes[i].Irp->MdlAddress);
925 IoFreeMdl(read_stripes[i].Irp->MdlAddress);
926 }
927
928 IoFreeIrp(read_stripes[i].Irp); // FIXME - what if IoCallDriver fails and other Irps are still running?
929 }
930 }
931
932 ExFreePool(read_stripes);
933 ExFreePool(master);
934
935 if (!NT_SUCCESS(Status))
936 return Status;
937 }
938
939 pos = 0;
940
941 parity = (((address - c->offset) / ((c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
942 stripepos = 0;
943
944 if ((address - c->offset) % (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)) > 0) {
945 UINT16 firstdata;
946 BOOL first = TRUE;
947
948 stripenum = (parity + 1) % c->chunk_item->num_stripes;
949
950 for (logstripe = 0; logstripe < c->chunk_item->num_stripes - 1; logstripe++) {
951 ULONG copylen;
952
953 if (pos >= length)
954 break;
955
956 if (stripes[logstripe].start < start + firststripesize && stripes[logstripe].start != stripes[logstripe].end) {
957 copylen = min(start + firststripesize - stripes[logstripe].start, length - pos);
958
959 if (!first && copylen < c->chunk_item->stripe_length) {
960 same_stripe = TRUE;
961 break;
962 }
963
964 RtlCopyMemory(&stripes[stripenum].data[firststripesize - copylen], &data2[pos], copylen);
965
966 pos += copylen;
967 first = FALSE;
968 }
969
970 stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
971 }
972
973 firstdata = parity == 0 ? 1 : 0;
974
975 RtlCopyMemory(stripes[parity].data, stripes[firstdata].data, firststripesize);
976
977 for (i = firstdata + 1; i < c->chunk_item->num_stripes; i++) {
978 if (i != parity)
979 do_xor(&stripes[parity].data[0], &stripes[i].data[0], firststripesize);
980 }
981
982 if (!same_stripe) {
983 stripepos = firststripesize;
984 parity = (parity + 1) % c->chunk_item->num_stripes;
985 }
986 }
987
988 while (length >= pos + c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 1)) {
989 UINT16 firstdata;
990
991 stripenum = (parity + 1) % c->chunk_item->num_stripes;
992
993 for (i = 0; i < c->chunk_item->num_stripes - 1; i++) {
994 RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], c->chunk_item->stripe_length);
995
996 pos += c->chunk_item->stripe_length;
997 stripenum = (stripenum +1) % c->chunk_item->num_stripes;
998 }
999
1000 firstdata = parity == 0 ? 1 : 0;
1001
1002 RtlCopyMemory(&stripes[parity].data[stripepos], &stripes[firstdata].data[stripepos], c->chunk_item->stripe_length);
1003
1004 for (i = firstdata + 1; i < c->chunk_item->num_stripes; i++) {
1005 if (i != parity)
1006 do_xor(&stripes[parity].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
1007 }
1008
1009 parity = (parity + 1) % c->chunk_item->num_stripes;
1010 stripepos += c->chunk_item->stripe_length;
1011 }
1012
1013 if (pos < length) {
1014 UINT16 firstdata;
1015
1016 if (!same_stripe) {
1017 stripenum = (parity + 1) % c->chunk_item->num_stripes;
1018 i = 0;
1019 } else
1020 i = logstripe;
1021
1022 while (pos < length) {
1023 ULONG copylen;
1024
1025 copylen = min(stripes[i].end - start - stripepos, length - pos);
1026
1027 RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], copylen);
1028
1029 pos += copylen;
1030 stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
1031 i++;
1032 }
1033
1034 firstdata = parity == 0 ? 1 : 0;
1035
1036 RtlCopyMemory(&stripes[parity].data[stripepos], &stripes[firstdata].data[stripepos], laststripesize);
1037
1038 for (i = firstdata + 1; i < c->chunk_item->num_stripes; i++) {
1039 if (i != parity)
1040 do_xor(&stripes[parity].data[stripepos], &stripes[i].data[stripepos], laststripesize);
1041 }
1042 }
1043
1044 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1045 stripes[i].start = start;
1046 stripes[i].end = end;
1047 }
1048
1049 return STATUS_SUCCESS;
1050 }
1051
1052 static NTSTATUS prepare_raid6_write(PIRP Irp, chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes) {
1053 UINT64 startoff, endoff;
1054 UINT16 startoffstripe, endoffstripe, stripenum, parity1, parity2, logstripe;
1055 UINT64 start = 0xffffffffffffffff, end = 0;
1056 UINT64 pos, stripepos;
1057 UINT32 firststripesize, laststripesize;
1058 UINT32 i;
1059 UINT8* data2 = (UINT8*)data;
1060 UINT32 num_reads;
1061 BOOL same_stripe = FALSE, multiple_stripes;
1062
1063 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 2, &startoff, &startoffstripe);
1064 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes - 2, &endoff, &endoffstripe);
1065
1066 for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
1067 UINT64 ststart, stend;
1068
1069 if (startoffstripe > i) {
1070 ststart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1071 } else if (startoffstripe == i) {
1072 ststart = startoff;
1073 } else {
1074 ststart = startoff - (startoff % c->chunk_item->stripe_length);
1075 }
1076
1077 if (endoffstripe > i) {
1078 stend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1079 } else if (endoffstripe == i) {
1080 stend = endoff + 1;
1081 } else {
1082 stend = endoff - (endoff % c->chunk_item->stripe_length);
1083 }
1084
1085 if (ststart != stend) {
1086 stripes[i].start = ststart;
1087 stripes[i].end = stend;
1088
1089 if (ststart < start) {
1090 start = ststart;
1091 firststripesize = c->chunk_item->stripe_length - (ststart % c->chunk_item->stripe_length);
1092 }
1093
1094 if (stend > end) {
1095 end = stend;
1096 laststripesize = stend % c->chunk_item->stripe_length;
1097 if (laststripesize == 0)
1098 laststripesize = c->chunk_item->stripe_length;
1099 }
1100 }
1101 }
1102
1103 if (start == end) {
1104 ERR("error: start == end (%llx)\n", start);
1105 return STATUS_INTERNAL_ERROR;
1106 }
1107
1108 if (startoffstripe == endoffstripe && start / c->chunk_item->stripe_length == end / c->chunk_item->stripe_length) {
1109 firststripesize = end - start;
1110 laststripesize = firststripesize;
1111 }
1112
1113 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1114 stripes[i].data = ExAllocatePoolWithTag(NonPagedPool, end - start, ALLOC_TAG);
1115 if (!stripes[i].data) {
1116 ERR("out of memory\n");
1117 return STATUS_INSUFFICIENT_RESOURCES;
1118 }
1119
1120 if (i < c->chunk_item->num_stripes - 2) {
1121 if (stripes[i].start == 0 && stripes[i].end == 0)
1122 stripes[i].start = stripes[i].end = start;
1123 }
1124 }
1125
1126 num_reads = 0;
1127 multiple_stripes = (end - 1) / c->chunk_item->stripe_length != start / c->chunk_item->stripe_length;
1128
1129 for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
1130 if (stripes[i].start == stripes[i].end) {
1131 num_reads++;
1132
1133 if (multiple_stripes)
1134 num_reads++;
1135 } else {
1136 if (stripes[i].start > start)
1137 num_reads++;
1138
1139 if (stripes[i].end < end)
1140 num_reads++;
1141 }
1142 }
1143
1144 if (num_reads > 0) {
1145 UINT32 j;
1146 read_stripe_master* master;
1147 read_stripe* read_stripes;
1148 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1149 NTSTATUS Status;
1150
1151 master = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe_master), ALLOC_TAG);
1152 if (!master) {
1153 ERR("out of memory\n");
1154 return STATUS_INSUFFICIENT_RESOURCES;
1155 }
1156
1157 read_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_stripe) * num_reads, ALLOC_TAG);
1158 if (!read_stripes) {
1159 ERR("out of memory\n");
1160 ExFreePool(master);
1161 return STATUS_INSUFFICIENT_RESOURCES;
1162 }
1163
1164 parity1 = (((address - c->offset) / ((c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
1165 stripenum = (parity1 + 2) % c->chunk_item->num_stripes;
1166
1167 j = 0;
1168 for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
1169 if (stripes[i].start > start || stripes[i].start == stripes[i].end) {
1170 ULONG readlen;
1171
1172 read_stripes[j].Irp = NULL;
1173 read_stripes[j].devobj = c->devices[stripenum]->devobj;
1174 read_stripes[j].master = master;
1175
1176 if (stripes[i].start != stripes[i].end)
1177 readlen = stripes[i].start - start;
1178 else
1179 readlen = firststripesize;
1180
1181 Status = make_read_irp(Irp, &read_stripes[j], start + cis[stripenum].offset, stripes[stripenum].data, readlen);
1182
1183 if (!NT_SUCCESS(Status)) {
1184 ERR("make_read_irp returned %08x\n", Status);
1185 j++;
1186 goto readend;
1187 }
1188
1189 stripes[stripenum].skip_start = readlen;
1190
1191 j++;
1192 if (j == num_reads) break;
1193 }
1194
1195 stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
1196 }
1197
1198 if (j < num_reads) {
1199 parity1 = (((address + length - 1 - c->offset) / ((c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
1200 stripenum = (parity1 + 2) % c->chunk_item->num_stripes;
1201
1202 for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
1203 if ((stripes[i].start != stripes[i].end && stripes[i].end < end) || (stripes[i].start == stripes[i].end && multiple_stripes)) {
1204 read_stripes[j].Irp = NULL;
1205 read_stripes[j].devobj = c->devices[stripenum]->devobj;
1206 read_stripes[j].master = master;
1207
1208 if (stripes[i].start == stripes[i].end) {
1209 Status = make_read_irp(Irp, &read_stripes[j], start + firststripesize + cis[stripenum].offset, &stripes[stripenum].data[firststripesize], laststripesize);
1210 stripes[stripenum].skip_end = laststripesize;
1211 } else {
1212 Status = make_read_irp(Irp, &read_stripes[j], stripes[i].end + cis[stripenum].offset, &stripes[stripenum].data[stripes[i].end - start], end - stripes[i].end);
1213 stripes[stripenum].skip_end = end - stripes[i].end;
1214 }
1215
1216 if (!NT_SUCCESS(Status)) {
1217 ERR("make_read_irp returned %08x\n", Status);
1218 j++;
1219 goto readend;
1220 }
1221
1222 j++;
1223 if (j == num_reads) break;
1224 }
1225
1226 stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
1227 }
1228 }
1229
1230 master->stripes_left = j;
1231 KeInitializeEvent(&master->event, NotificationEvent, FALSE);
1232
1233 for (i = 0; i < j; i++) {
1234 Status = IoCallDriver(read_stripes[i].devobj, read_stripes[i].Irp);
1235 if (!NT_SUCCESS(Status)) {
1236 ERR("IoCallDriver returned %08x\n", Status);
1237 goto readend;
1238 }
1239 }
1240
1241 KeWaitForSingleObject(&master->event, Executive, KernelMode, FALSE, NULL);
1242
1243 for (i = 0; i < j; i++) {
1244 if (!NT_SUCCESS(read_stripes[i].iosb.Status)) {
1245 Status = read_stripes[i].iosb.Status;
1246 goto readend;
1247 }
1248 }
1249
1250 Status = STATUS_SUCCESS;
1251
1252 readend:
1253 for (i = 0; i < j; i++) {
1254 if (read_stripes[i].Irp) {
1255 if (read_stripes[i].devobj->Flags & DO_DIRECT_IO) {
1256 MmUnlockPages(read_stripes[i].Irp->MdlAddress);
1257 IoFreeMdl(read_stripes[i].Irp->MdlAddress);
1258 }
1259
1260 IoFreeIrp(read_stripes[i].Irp); // FIXME - what if IoCallDriver fails and other Irps are still running?
1261 }
1262 }
1263
1264 ExFreePool(read_stripes);
1265 ExFreePool(master);
1266
1267 if (!NT_SUCCESS(Status))
1268 return Status;
1269 }
1270
1271 pos = 0;
1272
1273 parity1 = (((address - c->offset) / ((c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length)) + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
1274 parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1275 stripepos = 0;
1276
1277 if ((address - c->offset) % (c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)) > 0) {
1278 BOOL first = TRUE;
1279
1280 stripenum = (parity2 + 1) % c->chunk_item->num_stripes;
1281
1282 for (logstripe = 0; logstripe < c->chunk_item->num_stripes - 2; logstripe++) {
1283 ULONG copylen;
1284
1285 if (pos >= length)
1286 break;
1287
1288 if (stripes[logstripe].start < start + firststripesize && stripes[logstripe].start != stripes[logstripe].end) {
1289 copylen = min(start + firststripesize - stripes[logstripe].start, length - pos);
1290
1291 if (!first && copylen < c->chunk_item->stripe_length) {
1292 same_stripe = TRUE;
1293 break;
1294 }
1295
1296 RtlCopyMemory(&stripes[stripenum].data[firststripesize - copylen], &data2[pos], copylen);
1297
1298 pos += copylen;
1299 first = FALSE;
1300 }
1301
1302 stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
1303 }
1304
1305 i = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
1306 RtlCopyMemory(stripes[parity1].data, stripes[i].data, firststripesize);
1307 RtlCopyMemory(stripes[parity2].data, stripes[i].data, firststripesize);
1308 i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
1309
1310 do {
1311 do_xor(stripes[parity1].data, stripes[i].data, firststripesize);
1312
1313 galois_double(stripes[parity2].data, firststripesize);
1314 do_xor(stripes[parity2].data, stripes[i].data, firststripesize);
1315
1316 i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
1317 } while (i != parity2);
1318
1319 if (!same_stripe) {
1320 stripepos = firststripesize;
1321 parity1 = parity2;
1322 parity2 = (parity2 + 1) % c->chunk_item->num_stripes;
1323 }
1324 }
1325
1326 while (length >= pos + c->chunk_item->stripe_length * (c->chunk_item->num_stripes - 2)) {
1327 stripenum = (parity2 + 1) % c->chunk_item->num_stripes;
1328
1329 for (i = 0; i < c->chunk_item->num_stripes - 2; i++) {
1330 RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], c->chunk_item->stripe_length);
1331
1332 pos += c->chunk_item->stripe_length;
1333 stripenum = (stripenum +1) % c->chunk_item->num_stripes;
1334 }
1335
1336 i = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
1337 RtlCopyMemory(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
1338 RtlCopyMemory(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
1339 i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
1340
1341 do {
1342 do_xor(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
1343
1344 galois_double(&stripes[parity2].data[stripepos], c->chunk_item->stripe_length);
1345 do_xor(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], c->chunk_item->stripe_length);
1346
1347 i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
1348 } while (i != parity2);
1349
1350 parity1 = parity2;
1351 parity2 = (parity2 + 1) % c->chunk_item->num_stripes;
1352 stripepos += c->chunk_item->stripe_length;
1353 }
1354
1355 if (pos < length) {
1356 if (!same_stripe) {
1357 stripenum = (parity2 + 1) % c->chunk_item->num_stripes;
1358 i = 0;
1359 } else
1360 i = logstripe;
1361
1362 while (pos < length) {
1363 ULONG copylen;
1364
1365 copylen = min(stripes[i].end - start - stripepos, length - pos);
1366
1367 RtlCopyMemory(&stripes[stripenum].data[stripepos], &data2[pos], copylen);
1368
1369 pos += copylen;
1370 stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
1371 i++;
1372 }
1373
1374 i = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
1375 RtlCopyMemory(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], laststripesize);
1376 RtlCopyMemory(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], laststripesize);
1377 i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
1378
1379 do {
1380 do_xor(&stripes[parity1].data[stripepos], &stripes[i].data[stripepos], laststripesize);
1381
1382 galois_double(&stripes[parity2].data[stripepos], laststripesize);
1383 do_xor(&stripes[parity2].data[stripepos], &stripes[i].data[stripepos], laststripesize);
1384
1385 i = i == 0 ? (c->chunk_item->num_stripes - 1) : (i - 1);
1386 } while (i != parity2);
1387 }
1388
1389 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1390 stripes[i].start = start;
1391 stripes[i].end = end;
1392 }
1393
1394 return STATUS_SUCCESS;
1395 }
1396
1397 NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, BOOL need_free, UINT32 length, write_data_context* wtc, PIRP Irp, chunk* c) {
1398 NTSTATUS Status;
1399 UINT32 i;
1400 CHUNK_ITEM_STRIPE* cis;
1401 write_data_stripe* stripe;
1402 write_stripe* stripes = NULL;
1403 BOOL need_free2;
1404
1405 TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
1406
1407 if (!c) {
1408 c = get_chunk_from_address(Vcb, address);
1409 if (!c) {
1410 ERR("could not get chunk for address %llx\n", address);
1411 return STATUS_INTERNAL_ERROR;
1412 }
1413 }
1414
1415 stripes = ExAllocatePoolWithTag(PagedPool, sizeof(write_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
1416 if (!stripes) {
1417 ERR("out of memory\n");
1418 return STATUS_INSUFFICIENT_RESOURCES;
1419 }
1420
1421 RtlZeroMemory(stripes, sizeof(write_stripe) * c->chunk_item->num_stripes);
1422
1423 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1424
1425 if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
1426 Status = prepare_raid0_write(c, address, data, length, stripes);
1427 if (!NT_SUCCESS(Status)) {
1428 ERR("prepare_raid0_write returned %08x\n", Status);
1429 ExFreePool(stripes);
1430 return Status;
1431 }
1432
1433 if (need_free)
1434 ExFreePool(data);
1435
1436 need_free2 = TRUE;
1437 } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
1438 Status = prepare_raid10_write(c, address, data, length, stripes);
1439 if (!NT_SUCCESS(Status)) {
1440 ERR("prepare_raid10_write returned %08x\n", Status);
1441 ExFreePool(stripes);
1442 return Status;
1443 }
1444
1445 if (need_free)
1446 ExFreePool(data);
1447
1448 need_free2 = TRUE;
1449 } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
1450 Status = prepare_raid5_write(Irp, c, address, data, length, stripes);
1451 if (!NT_SUCCESS(Status)) {
1452 ERR("prepare_raid5_write returned %08x\n", Status);
1453 ExFreePool(stripes);
1454 return Status;
1455 }
1456
1457 if (need_free)
1458 ExFreePool(data);
1459
1460 need_free2 = TRUE;
1461 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
1462 Status = prepare_raid6_write(Irp, c, address, data, length, stripes);
1463 if (!NT_SUCCESS(Status)) {
1464 ERR("prepare_raid6_write returned %08x\n", Status);
1465 ExFreePool(stripes);
1466 return Status;
1467 }
1468
1469 if (need_free)
1470 ExFreePool(data);
1471
1472 need_free2 = TRUE;
1473 } else { // write same data to every location - SINGLE, DUP, RAID1
1474 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1475 stripes[i].start = address - c->offset;
1476 stripes[i].end = stripes[i].start + length;
1477 stripes[i].data = data;
1478 }
1479 need_free2 = need_free;
1480 }
1481
1482 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1483 PIO_STACK_LOCATION IrpSp;
1484
1485 // FIXME - handle missing devices
1486
1487 stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_stripe), ALLOC_TAG);
1488 if (!stripe) {
1489 ERR("out of memory\n");
1490 Status = STATUS_INSUFFICIENT_RESOURCES;
1491 goto end;
1492 }
1493
1494 if (stripes[i].start + stripes[i].skip_start == stripes[i].end - stripes[i].skip_end || stripes[i].start == stripes[i].end) {
1495 stripe->status = WriteDataStatus_Ignore;
1496 stripe->Irp = NULL;
1497 stripe->buf = stripes[i].data;
1498 stripe->need_free = need_free2;
1499 } else {
1500 stripe->context = (struct _write_data_context*)wtc;
1501 stripe->buf = stripes[i].data;
1502 stripe->need_free = need_free2;
1503 stripe->device = c->devices[i];
1504 RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
1505 stripe->status = WriteDataStatus_Pending;
1506
1507 if (!Irp) {
1508 stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
1509
1510 if (!stripe->Irp) {
1511 ERR("IoAllocateIrp failed\n");
1512 Status = STATUS_INTERNAL_ERROR;
1513 goto end;
1514 }
1515 } else {
1516 stripe->Irp = IoMakeAssociatedIrp(Irp, stripe->device->devobj->StackSize);
1517
1518 if (!stripe->Irp) {
1519 ERR("IoMakeAssociatedIrp failed\n");
1520 Status = STATUS_INTERNAL_ERROR;
1521 goto end;
1522 }
1523 }
1524
1525 IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
1526 IrpSp->MajorFunction = IRP_MJ_WRITE;
1527
1528 if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
1529 stripe->Irp->AssociatedIrp.SystemBuffer = stripes[i].data + stripes[i].skip_start;
1530
1531 stripe->Irp->Flags = IRP_BUFFERED_IO;
1532 } else if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
1533 stripe->Irp->MdlAddress = IoAllocateMdl(stripes[i].data + stripes[i].skip_start,
1534 stripes[i].end - stripes[i].start - stripes[i].skip_start - stripes[i].skip_end, FALSE, FALSE, NULL);
1535 if (!stripe->Irp->MdlAddress) {
1536 ERR("IoAllocateMdl failed\n");
1537 Status = STATUS_INTERNAL_ERROR;
1538 goto end;
1539 }
1540
1541 MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess);
1542 } else {
1543 stripe->Irp->UserBuffer = stripes[i].data + stripes[i].skip_start;
1544 }
1545
1546 #ifdef DEBUG_PARANOID
1547 if (stripes[i].end < stripes[i].start + stripes[i].skip_start + stripes[i].skip_end) {
1548 ERR("trying to write stripe with negative length (%llx < %llx + %x + %x)\n",
1549 stripes[i].end, stripes[i].start, stripes[i].skip_start, stripes[i].skip_end);
1550 int3;
1551 }
1552 #endif
1553
1554 IrpSp->Parameters.Write.Length = stripes[i].end - stripes[i].start - stripes[i].skip_start - stripes[i].skip_end;
1555 IrpSp->Parameters.Write.ByteOffset.QuadPart = stripes[i].start + cis[i].offset + stripes[i].skip_start;
1556
1557 stripe->Irp->UserIosb = &stripe->iosb;
1558 wtc->stripes_left++;
1559
1560 IoSetCompletionRoutine(stripe->Irp, write_data_completion, stripe, TRUE, TRUE, TRUE);
1561 }
1562
1563 InsertTailList(&wtc->stripes, &stripe->list_entry);
1564 }
1565
1566 Status = STATUS_SUCCESS;
1567
1568 end:
1569
1570 if (stripes) ExFreePool(stripes);
1571
1572 if (!NT_SUCCESS(Status)) {
1573 free_write_data_stripes(wtc);
1574 ExFreePool(wtc);
1575 }
1576
1577 return Status;
1578 }
1579
1580 void get_raid56_lock_range(chunk* c, UINT64 address, UINT64 length, UINT64* lockaddr, UINT64* locklen) {
1581 UINT64 startoff, endoff;
1582 UINT16 startoffstripe, endoffstripe, datastripes;
1583 UINT64 start = 0xffffffffffffffff, end = 0, logend;
1584 UINT16 i;
1585
1586 datastripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
1587
1588 get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, datastripes, &startoff, &startoffstripe);
1589 get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, datastripes, &endoff, &endoffstripe);
1590
1591 for (i = 0; i < datastripes; i++) {
1592 UINT64 ststart, stend;
1593
1594 if (startoffstripe > i) {
1595 ststart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1596 } else if (startoffstripe == i) {
1597 ststart = startoff;
1598 } else {
1599 ststart = startoff - (startoff % c->chunk_item->stripe_length);
1600 }
1601
1602 if (endoffstripe > i) {
1603 stend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1604 } else if (endoffstripe == i) {
1605 stend = endoff + 1;
1606 } else {
1607 stend = endoff - (endoff % c->chunk_item->stripe_length);
1608 }
1609
1610 if (ststart != stend) {
1611 if (ststart < start)
1612 start = ststart;
1613
1614 if (stend > end)
1615 end = stend;
1616 }
1617 }
1618
1619 *lockaddr = c->offset + ((start / c->chunk_item->stripe_length) * c->chunk_item->stripe_length * datastripes) +
1620 start % c->chunk_item->stripe_length;
1621
1622 logend = c->offset + ((end / c->chunk_item->stripe_length) * c->chunk_item->stripe_length * datastripes);
1623 logend += c->chunk_item->stripe_length * (datastripes - 1);
1624 logend += end % c->chunk_item->stripe_length == 0 ? c->chunk_item->stripe_length : (end % c->chunk_item->stripe_length);
1625 *locklen = logend - *lockaddr;
1626 }
1627
1628 NTSTATUS STDCALL write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c) {
1629 write_data_context* wtc;
1630 NTSTATUS Status;
1631 UINT64 lockaddr, locklen;
1632 // #ifdef DEBUG_PARANOID
1633 // UINT8* buf2;
1634 // #endif
1635
1636 wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_context), ALLOC_TAG);
1637 if (!wtc) {
1638 ERR("out of memory\n");
1639 return STATUS_INSUFFICIENT_RESOURCES;
1640 }
1641
1642 KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
1643 InitializeListHead(&wtc->stripes);
1644 wtc->tree = FALSE;
1645 wtc->stripes_left = 0;
1646
1647 if (!c) {
1648 c = get_chunk_from_address(Vcb, address);
1649 if (!c) {
1650 ERR("could not get chunk for address %llx\n", address);
1651 return STATUS_INTERNAL_ERROR;
1652 }
1653 }
1654
1655 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
1656 get_raid56_lock_range(c, address, length, &lockaddr, &locklen);
1657 chunk_lock_range(Vcb, c, lockaddr, locklen);
1658 }
1659
1660 Status = write_data(Vcb, address, data, FALSE, length, wtc, Irp, c);
1661 if (!NT_SUCCESS(Status)) {
1662 ERR("write_data returned %08x\n", Status);
1663
1664 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
1665 chunk_unlock_range(Vcb, c, lockaddr, locklen);
1666
1667 free_write_data_stripes(wtc);
1668 ExFreePool(wtc);
1669 return Status;
1670 }
1671
1672 if (wtc->stripes.Flink != &wtc->stripes) {
1673 // launch writes and wait
1674 LIST_ENTRY* le = wtc->stripes.Flink;
1675 while (le != &wtc->stripes) {
1676 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1677
1678 if (stripe->status != WriteDataStatus_Ignore)
1679 IoCallDriver(stripe->device->devobj, stripe->Irp);
1680
1681 le = le->Flink;
1682 }
1683
1684 KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
1685
1686 le = wtc->stripes.Flink;
1687 while (le != &wtc->stripes) {
1688 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1689
1690 if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
1691 Status = stripe->iosb.Status;
1692 break;
1693 }
1694
1695 le = le->Flink;
1696 }
1697
1698 free_write_data_stripes(wtc);
1699 }
1700
1701 if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
1702 chunk_unlock_range(Vcb, c, lockaddr, locklen);
1703
1704 ExFreePool(wtc);
1705
1706 // #ifdef DEBUG_PARANOID
1707 // buf2 = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1708 // Status = read_data(Vcb, address, length, NULL, FALSE, buf2, NULL, Irp);
1709 //
1710 // if (!NT_SUCCESS(Status) || RtlCompareMemory(buf2, data, length) != length)
1711 // int3;
1712 //
1713 // ExFreePool(buf2);
1714 // #endif
1715
1716 return STATUS_SUCCESS;
1717 }
1718
1719 static NTSTATUS STDCALL write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1720 write_data_stripe* stripe = conptr;
1721 write_data_context* context = (write_data_context*)stripe->context;
1722 LIST_ENTRY* le;
1723
1724 // FIXME - we need a lock here
1725
1726 if (stripe->status == WriteDataStatus_Cancelling) {
1727 stripe->status = WriteDataStatus_Cancelled;
1728 goto end;
1729 }
1730
1731 stripe->iosb = Irp->IoStatus;
1732
1733 if (NT_SUCCESS(Irp->IoStatus.Status)) {
1734 stripe->status = WriteDataStatus_Success;
1735 } else {
1736 le = context->stripes.Flink;
1737
1738 stripe->status = WriteDataStatus_Error;
1739
1740 while (le != &context->stripes) {
1741 write_data_stripe* s2 = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1742
1743 if (s2->status == WriteDataStatus_Pending) {
1744 s2->status = WriteDataStatus_Cancelling;
1745 IoCancelIrp(s2->Irp);
1746 }
1747
1748 le = le->Flink;
1749 }
1750 }
1751
1752 end:
1753 if (InterlockedDecrement(&context->stripes_left) == 0)
1754 KeSetEvent(&context->Event, 0, FALSE);
1755
1756 return STATUS_MORE_PROCESSING_REQUIRED;
1757 }
1758
1759 void free_write_data_stripes(write_data_context* wtc) {
1760 LIST_ENTRY *le, *le2, *nextle;
1761
1762 le = wtc->stripes.Flink;
1763 while (le != &wtc->stripes) {
1764 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1765
1766 if (stripe->Irp) {
1767 if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
1768 MmUnlockPages(stripe->Irp->MdlAddress);
1769 IoFreeMdl(stripe->Irp->MdlAddress);
1770 }
1771 }
1772
1773 le = le->Flink;
1774 }
1775
1776 le = wtc->stripes.Flink;
1777 while (le != &wtc->stripes) {
1778 write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
1779
1780 nextle = le->Flink;
1781
1782 if (stripe->buf && stripe->need_free) {
1783 ExFreePool(stripe->buf);
1784
1785 le2 = le->Flink;
1786 while (le2 != &wtc->stripes) {
1787 write_data_stripe* s2 = CONTAINING_RECORD(le2, write_data_stripe, list_entry);
1788
1789 if (s2->buf == stripe->buf)
1790 s2->buf = NULL;
1791
1792 le2 = le2->Flink;
1793 }
1794
1795 }
1796
1797 ExFreePool(stripe);
1798
1799 le = nextle;
1800 }
1801 }
1802
1803 NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, PIRP Irp, LIST_ENTRY* rollback) {
1804 NTSTATUS Status;
1805 LIST_ENTRY* le;
1806
1807 le = fcb->extents.Flink;
1808
1809 while (le != &fcb->extents) {
1810 LIST_ENTRY* le2 = le->Flink;
1811 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
1812 EXTENT_DATA* ed = ext->data;
1813 EXTENT_DATA2* ed2;
1814 UINT64 len;
1815
1816 if (!ext->ignore) {
1817 if (ext->datalen < sizeof(EXTENT_DATA)) {
1818 ERR("extent at %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA));
1819 Status = STATUS_INTERNAL_ERROR;
1820 goto end;
1821 }
1822
1823 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1824 if (ext->datalen < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1825 ERR("extent at %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
1826 Status = STATUS_INTERNAL_ERROR;
1827 goto end;
1828 }
1829
1830 ed2 = (EXTENT_DATA2*)ed->data;
1831 }
1832
1833 len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
1834
1835 if (ext->offset < end_data && ext->offset + len > start_data) {
1836 if (ed->type == EXTENT_TYPE_INLINE) {
1837 if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
1838 remove_fcb_extent(fcb, ext, rollback);
1839
1840 fcb->inode_item.st_blocks -= len;
1841 fcb->inode_item_changed = TRUE;
1842 } else if (start_data <= ext->offset && end_data < ext->offset + len) { // remove beginning
1843 EXTENT_DATA* ned;
1844 UINT64 size;
1845 extent* newext;
1846
1847 size = len - (end_data - ext->offset);
1848
1849 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
1850 if (!ned) {
1851 ERR("out of memory\n");
1852 Status = STATUS_INSUFFICIENT_RESOURCES;
1853 goto end;
1854 }
1855
1856 newext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
1857 if (!newext) {
1858 ERR("out of memory\n");
1859 Status = STATUS_INSUFFICIENT_RESOURCES;
1860 ExFreePool(ned);
1861 goto end;
1862 }
1863
1864 ned->generation = Vcb->superblock.generation;
1865 ned->decoded_size = size;
1866 ned->compression = ed->compression;
1867 ned->encryption = ed->encryption;
1868 ned->encoding = ed->encoding;
1869 ned->type = ed->type;
1870
1871 RtlCopyMemory(&ned->data[0], &ed->data[end_data - ext->offset], size);
1872
1873 newext->offset = end_data;
1874 newext->data = ned;
1875 newext->datalen = sizeof(EXTENT_DATA) - 1 + size;
1876 newext->unique = ext->unique;
1877 newext->ignore = FALSE;
1878 InsertHeadList(&ext->list_entry, &newext->list_entry);
1879
1880 remove_fcb_extent(fcb, ext, rollback);
1881
1882 fcb->inode_item.st_blocks -= end_data - ext->offset;
1883 fcb->inode_item_changed = TRUE;
1884 } else if (start_data > ext->offset && end_data >= ext->offset + len) { // remove end
1885 EXTENT_DATA* ned;
1886 UINT64 size;
1887 extent* newext;
1888
1889 size = start_data - ext->offset;
1890
1891 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
1892 if (!ned) {
1893 ERR("out of memory\n");
1894 Status = STATUS_INSUFFICIENT_RESOURCES;
1895 goto end;
1896 }
1897
1898 newext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
1899 if (!newext) {
1900 ERR("out of memory\n");
1901 Status = STATUS_INSUFFICIENT_RESOURCES;
1902 ExFreePool(ned);
1903 goto end;
1904 }
1905
1906 ned->generation = Vcb->superblock.generation;
1907 ned->decoded_size = size;
1908 ned->compression = ed->compression;
1909 ned->encryption = ed->encryption;
1910 ned->encoding = ed->encoding;
1911 ned->type = ed->type;
1912
1913 RtlCopyMemory(&ned->data[0], &ed->data[0], size);
1914
1915 newext->offset = ext->offset;
1916 newext->data = ned;
1917 newext->datalen = sizeof(EXTENT_DATA) - 1 + size;
1918 newext->unique = ext->unique;
1919 newext->ignore = FALSE;
1920 InsertHeadList(&ext->list_entry, &newext->list_entry);
1921
1922 remove_fcb_extent(fcb, ext, rollback);
1923
1924 fcb->inode_item.st_blocks -= ext->offset + len - start_data;
1925 fcb->inode_item_changed = TRUE;
1926 } else if (start_data > ext->offset && end_data < ext->offset + len) { // remove middle
1927 EXTENT_DATA *ned1, *ned2;
1928 UINT64 size;
1929 extent *newext1, *newext2;
1930
1931 size = start_data - ext->offset;
1932
1933 ned1 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
1934 if (!ned1) {
1935 ERR("out of memory\n");
1936 Status = STATUS_INSUFFICIENT_RESOURCES;
1937 goto end;
1938 }
1939
1940 newext1 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
1941 if (!newext1) {
1942 ERR("out of memory\n");
1943 Status = STATUS_INSUFFICIENT_RESOURCES;
1944 ExFreePool(ned1);
1945 goto end;
1946 }
1947
1948 ned1->generation = Vcb->superblock.generation;
1949 ned1->decoded_size = size;
1950 ned1->compression = ed->compression;
1951 ned1->encryption = ed->encryption;
1952 ned1->encoding = ed->encoding;
1953 ned1->type = ed->type;
1954
1955 RtlCopyMemory(&ned1->data[0], &ed->data[0], size);
1956
1957 newext1->offset = ext->offset;
1958 newext1->data = ned1;
1959 newext1->datalen = sizeof(EXTENT_DATA) - 1 + size;
1960 newext1->unique = ext->unique;
1961 newext1->ignore = FALSE;
1962
1963 size = ext->offset + len - end_data;
1964
1965 ned2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
1966 if (!ned2) {
1967 ERR("out of memory\n");
1968 Status = STATUS_INSUFFICIENT_RESOURCES;
1969 ExFreePool(ned1);
1970 ExFreePool(newext1);
1971 goto end;
1972 }
1973
1974 newext2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
1975 if (!newext2) {
1976 ERR("out of memory\n");
1977 Status = STATUS_INSUFFICIENT_RESOURCES;
1978 ExFreePool(ned1);
1979 ExFreePool(newext1);
1980 ExFreePool(ned2);
1981 goto end;
1982 }
1983
1984 ned2->generation = Vcb->superblock.generation;
1985 ned2->decoded_size = size;
1986 ned2->compression = ed->compression;
1987 ned2->encryption = ed->encryption;
1988 ned2->encoding = ed->encoding;
1989 ned2->type = ed->type;
1990
1991 RtlCopyMemory(&ned2->data[0], &ed->data[end_data - ext->offset], size);
1992
1993 newext2->offset = end_data;
1994 newext2->data = ned2;
1995 newext2->datalen = sizeof(EXTENT_DATA) - 1 + size;
1996 newext2->unique = ext->unique;
1997 newext2->ignore = FALSE;
1998
1999 InsertHeadList(&ext->list_entry, &newext1->list_entry);
2000 InsertHeadList(&newext1->list_entry, &newext2->list_entry);
2001
2002 remove_fcb_extent(fcb, ext, rollback);
2003
2004 fcb->inode_item.st_blocks -= end_data - start_data;
2005 fcb->inode_item_changed = TRUE;
2006 }
2007 } else if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
2008 if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
2009 if (ed2->size != 0) {
2010 chunk* c;
2011
2012 fcb->inode_item.st_blocks -= len;
2013 fcb->inode_item_changed = TRUE;
2014
2015 c = get_chunk_from_address(Vcb, ed2->address);
2016
2017 if (!c) {
2018 ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
2019 } else {
2020 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
2021 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
2022 if (!NT_SUCCESS(Status)) {
2023 ERR("update_changed_extent_ref returned %08x\n", Status);
2024 goto end;
2025 }
2026 }
2027 }
2028
2029 remove_fcb_extent(fcb, ext, rollback);
2030 } else if (start_data <= ext->offset && end_data < ext->offset + len) { // remove beginning
2031 EXTENT_DATA* ned;
2032 EXTENT_DATA2* ned2;
2033 extent* newext;
2034
2035 if (ed2->size != 0) {
2036 fcb->inode_item.st_blocks -= end_data - ext->offset;
2037 fcb->inode_item_changed = TRUE;
2038 }
2039
2040 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2041 if (!ned) {
2042 ERR("out of memory\n");
2043 Status = STATUS_INSUFFICIENT_RESOURCES;
2044 goto end;
2045 }
2046
2047 newext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
2048 if (!newext) {
2049 ERR("out of memory\n");
2050 Status = STATUS_INSUFFICIENT_RESOURCES;
2051 ExFreePool(ned);
2052 goto end;
2053 }
2054
2055 ned2 = (EXTENT_DATA2*)&ned->data[0];
2056
2057 ned->generation = Vcb->superblock.generation;
2058 ned->decoded_size = ed->decoded_size;
2059 ned->compression = ed->compression;
2060 ned->encryption = ed->encryption;
2061 ned->encoding = ed->encoding;
2062 ned->type = ed->type;
2063 ned2->address = ed2->address;
2064 ned2->size = ed2->size;
2065 ned2->offset = ed2->offset + (end_data - ext->offset);
2066 ned2->num_bytes = ed2->num_bytes - (end_data - ext->offset);
2067
2068 newext->offset = end_data;
2069 newext->data = ned;
2070 newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2071 newext->unique = ext->unique;
2072 newext->ignore = FALSE;
2073 InsertHeadList(&ext->list_entry, &newext->list_entry);
2074
2075 remove_fcb_extent(fcb, ext, rollback);
2076 } else if (start_data > ext->offset && end_data >= ext->offset + len) { // remove end
2077 EXTENT_DATA* ned;
2078 EXTENT_DATA2* ned2;
2079 extent* newext;
2080
2081 if (ed2->size != 0) {
2082 fcb->inode_item.st_blocks -= ext->offset + len - start_data;
2083 fcb->inode_item_changed = TRUE;
2084 }
2085
2086 ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2087 if (!ned) {
2088 ERR("out of memory\n");
2089 Status = STATUS_INSUFFICIENT_RESOURCES;
2090 goto end;
2091 }
2092
2093 newext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
2094 if (!newext) {
2095 ERR("out of memory\n");
2096 Status = STATUS_INSUFFICIENT_RESOURCES;
2097 ExFreePool(ned);
2098 goto end;
2099 }
2100
2101 ned2 = (EXTENT_DATA2*)&ned->data[0];
2102
2103 ned->generation = Vcb->superblock.generation;
2104 ned->decoded_size = ed->decoded_size;
2105 ned->compression = ed->compression;
2106 ned->encryption = ed->encryption;
2107 ned->encoding = ed->encoding;
2108 ned->type = ed->type;
2109 ned2->address = ed2->address;
2110 ned2->size = ed2->size;
2111 ned2->offset = ed2->offset;
2112 ned2->num_bytes = start_data - ext->offset;
2113
2114 newext->offset = ext->offset;
2115 newext->data = ned;
2116 newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2117 newext->unique = ext->unique;
2118 newext->ignore = FALSE;
2119 InsertHeadList(&ext->list_entry, &newext->list_entry);
2120
2121 remove_fcb_extent(fcb, ext, rollback);
2122 } else if (start_data > ext->offset && end_data < ext->offset + len) { // remove middle
2123 EXTENT_DATA *neda, *nedb;
2124 EXTENT_DATA2 *neda2, *nedb2;
2125 extent *newext1, *newext2;
2126
2127 if (ed2->size != 0) {
2128 chunk* c;
2129
2130 fcb->inode_item.st_blocks -= end_data - start_data;
2131 fcb->inode_item_changed = TRUE;
2132
2133 c = get_chunk_from_address(Vcb, ed2->address);
2134
2135 if (!c) {
2136 ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
2137 } else {
2138 Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
2139 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
2140 if (!NT_SUCCESS(Status)) {
2141 ERR("update_changed_extent_ref returned %08x\n", Status);
2142 goto end;
2143 }
2144 }
2145 }
2146
2147 neda = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2148 if (!neda) {
2149 ERR("out of memory\n");
2150 Status = STATUS_INSUFFICIENT_RESOURCES;
2151 goto end;
2152 }
2153
2154 newext1 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
2155 if (!newext1) {
2156 ERR("out of memory\n");
2157 Status = STATUS_INSUFFICIENT_RESOURCES;
2158 ExFreePool(neda);
2159 goto end;
2160 }
2161
2162 nedb = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2163 if (!nedb) {
2164 ERR("out of memory\n");
2165 Status = STATUS_INSUFFICIENT_RESOURCES;
2166 ExFreePool(neda);
2167 ExFreePool(newext1);
2168 goto end;
2169 }
2170
2171 newext2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
2172 if (!newext1) {
2173 ERR("out of memory\n");
2174 Status = STATUS_INSUFFICIENT_RESOURCES;
2175 ExFreePool(neda);
2176 ExFreePool(newext1);
2177 ExFreePool(nedb);
2178 goto end;
2179 }
2180
2181 neda2 = (EXTENT_DATA2*)&neda->data[0];
2182
2183 neda->generation = Vcb->superblock.generation;
2184 neda->decoded_size = ed->decoded_size;
2185 neda->compression = ed->compression;
2186 neda->encryption = ed->encryption;
2187 neda->encoding = ed->encoding;
2188 neda->type = ed->type;
2189 neda2->address = ed2->address;
2190 neda2->size = ed2->size;
2191 neda2->offset = ed2->offset;
2192 neda2->num_bytes = start_data - ext->offset;
2193
2194 nedb2 = (EXTENT_DATA2*)&nedb->data[0];
2195
2196 nedb->generation = Vcb->superblock.generation;
2197 nedb->decoded_size = ed->decoded_size;
2198 nedb->compression = ed->compression;
2199 nedb->encryption = ed->encryption;
2200 nedb->encoding = ed->encoding;
2201 nedb->type = ed->type;
2202 nedb2->address = ed2->address;
2203 nedb2->size = ed2->size;
2204 nedb2->offset = ed2->offset + (end_data - ext->offset);
2205 nedb2->num_bytes = ext->offset + len - end_data;
2206
2207 newext1->offset = ext->offset;
2208 newext1->data = neda;
2209 newext1->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2210 newext1->unique = ext->unique;
2211 newext1->ignore = FALSE;
2212
2213 newext2->offset = end_data;
2214 newext2->data = nedb;
2215 newext2->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2216 newext2->unique = ext->unique;
2217 newext2->ignore = FALSE;
2218
2219 InsertHeadList(&ext->list_entry, &newext1->list_entry);
2220 InsertHeadList(&newext1->list_entry, &newext2->list_entry);
2221
2222 remove_fcb_extent(fcb, ext, rollback);
2223 }
2224 }
2225 }
2226 }
2227
2228 le = le2;
2229 }
2230
2231 Status = STATUS_SUCCESS;
2232
2233 end:
2234 fcb->extents_changed = TRUE;
2235 mark_fcb_dirty(fcb);
2236
2237 return Status;
2238 }
2239
2240 static NTSTATUS do_write_data(device_extension* Vcb, UINT64 address, void* data, UINT64 length, LIST_ENTRY* changed_sector_list, PIRP Irp) {
2241 NTSTATUS Status;
2242 changed_sector* sc;
2243 int i;
2244
2245 Status = write_data_complete(Vcb, address, data, length, Irp, NULL);
2246 if (!NT_SUCCESS(Status)) {
2247 ERR("write_data returned %08x\n", Status);
2248 return Status;
2249 }
2250
2251 if (changed_sector_list) {
2252 sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
2253 if (!sc) {
2254 ERR("out of memory\n");
2255 return STATUS_INSUFFICIENT_RESOURCES;
2256 }
2257
2258 sc->ol.key = address;
2259 sc->length = length / Vcb->superblock.sector_size;
2260 sc->deleted = FALSE;
2261
2262 sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
2263 if (!sc->checksums) {
2264 ERR("out of memory\n");
2265 ExFreePool(sc);
2266 return STATUS_INSUFFICIENT_RESOURCES;
2267 }
2268
2269 for (i = 0; i < sc->length; i++) {
2270 sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
2271 }
2272
2273 insert_into_ordered_list(changed_sector_list, &sc->ol);
2274 }
2275
2276 return STATUS_SUCCESS;
2277 }
2278
2279 static void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* ext) {
2280 rollback_extent* re;
2281
2282 re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
2283 if (!re) {
2284 ERR("out of memory\n");
2285 return;
2286 }
2287
2288 re->fcb = fcb;
2289 re->ext = ext;
2290
2291 add_rollback(fcb->Vcb, rollback, ROLLBACK_INSERT_EXTENT, re);
2292 }
2293
2294 static BOOL add_extent_to_fcb(fcb* fcb, UINT64 offset, EXTENT_DATA* ed, ULONG edsize, BOOL unique, LIST_ENTRY* rollback) {
2295 extent* ext;
2296 LIST_ENTRY* le;
2297
2298 ext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
2299 if (!ext) {
2300 ERR("out of memory\n");
2301 return FALSE;
2302 }
2303
2304 ext->offset = offset;
2305 ext->data = ed;
2306 ext->datalen = edsize;
2307 ext->unique = unique;
2308 ext->ignore = FALSE;
2309
2310 le = fcb->extents.Flink;
2311 while (le != &fcb->extents) {
2312 extent* oldext = CONTAINING_RECORD(le, extent, list_entry);
2313
2314 if (!oldext->ignore) {
2315 if (oldext->offset > offset) {
2316 InsertHeadList(le->Blink, &ext->list_entry);
2317 goto end;
2318 }
2319 }
2320
2321 le = le->Flink;
2322 }
2323
2324 InsertTailList(&fcb->extents, &ext->list_entry);
2325
2326 end:
2327 add_insert_extent_rollback(rollback, fcb, ext);
2328
2329 return TRUE;
2330 }
2331
2332 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) {
2333 if (!ext->ignore) {
2334 rollback_extent* re;
2335
2336 ext->ignore = TRUE;
2337
2338 re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
2339 if (!re) {
2340 ERR("out of memory\n");
2341 return;
2342 }
2343
2344 re->fcb = fcb;
2345 re->ext = ext;
2346
2347 add_rollback(fcb->Vcb, rollback, ROLLBACK_DELETE_EXTENT, re);
2348 }
2349 }
2350
2351 BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, BOOL prealloc, void* data,
2352 LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback, UINT8 compression, UINT64 decoded_size) {
2353 UINT64 address;
2354 NTSTATUS Status;
2355 EXTENT_DATA* ed;
2356 EXTENT_DATA2* ed2;
2357 ULONG edsize = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2358 // #ifdef DEBUG_PARANOID
2359 // traverse_ptr tp;
2360 // KEY searchkey;
2361 // #endif
2362
2363 TRACE("(%p, (%llx, %llx), %llx, %llx, %llx, %u, %p, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, prealloc, data, changed_sector_list, rollback);
2364
2365 if (!find_address_in_chunk(Vcb, c, length, &address))
2366 return FALSE;
2367
2368 // #ifdef DEBUG_PARANOID
2369 // searchkey.obj_id = address;
2370 // searchkey.obj_type = TYPE_EXTENT_ITEM;
2371 // searchkey.offset = 0xffffffffffffffff;
2372 //
2373 // Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
2374 // if (!NT_SUCCESS(Status)) {
2375 // ERR("error - find_item returned %08x\n", Status);
2376 // } else if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
2377 // ERR("address %llx already allocated\n", address);
2378 // int3;
2379 // }
2380 // #endif
2381
2382 // add extent data to inode
2383 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
2384 if (!ed) {
2385 ERR("out of memory\n");
2386 return FALSE;
2387 }
2388
2389 ed->generation = Vcb->superblock.generation;
2390 ed->decoded_size = decoded_size;
2391 ed->compression = compression;
2392 ed->encryption = BTRFS_ENCRYPTION_NONE;
2393 ed->encoding = BTRFS_ENCODING_NONE;
2394 ed->type = prealloc ? EXTENT_TYPE_PREALLOC : EXTENT_TYPE_REGULAR;
2395
2396 ed2 = (EXTENT_DATA2*)ed->data;
2397 ed2->address = address;
2398 ed2->size = length;
2399 ed2->offset = 0;
2400 ed2->num_bytes = decoded_size;
2401
2402 if (!add_extent_to_fcb(fcb, start_data, ed, edsize, TRUE, rollback)) {
2403 ERR("add_extent_to_fcb failed\n");
2404 ExFreePool(ed);
2405 return FALSE;
2406 }
2407
2408 increase_chunk_usage(c, length);
2409 space_list_subtract(Vcb, c, FALSE, address, length, rollback);
2410
2411 fcb->inode_item.st_blocks += decoded_size;
2412
2413 fcb->extents_changed = TRUE;
2414 fcb->inode_item_changed = TRUE;
2415 mark_fcb_dirty(fcb);
2416
2417 ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
2418
2419 add_changed_extent_ref(c, address, length, fcb->subvol->id, fcb->inode, start_data, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
2420
2421 ExReleaseResourceLite(&c->changed_extents_lock);
2422
2423 ExReleaseResourceLite(&c->lock);
2424
2425 if (data) {
2426 Status = do_write_data(Vcb, address, data, length, changed_sector_list, Irp);
2427 if (!NT_SUCCESS(Status))
2428 ERR("do_write_data returned %08x\n", Status);
2429 }
2430
2431 return TRUE;
2432 }
2433
2434 static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
2435 LIST_ENTRY* changed_sector_list, PIRP Irp, UINT64* written, LIST_ENTRY* rollback) {
2436 BOOL success = FALSE;
2437 EXTENT_DATA* ed;
2438 EXTENT_DATA2* ed2;
2439 chunk* c;
2440 LIST_ENTRY* le;
2441 space* s;
2442 extent* ext = NULL;
2443
2444 le = fcb->extents.Flink;
2445
2446 while (le != &fcb->extents) {
2447 extent* nextext = CONTAINING_RECORD(le, extent, list_entry);
2448
2449 if (!nextext->ignore) {
2450 if (nextext->offset == start_data) {
2451 ext = nextext;
2452 break;
2453 } else if (nextext->offset > start_data)
2454 break;
2455
2456 ext = nextext;
2457 }
2458
2459 le = le->Flink;
2460 }
2461
2462 if (!ext)
2463 return FALSE;
2464
2465 ed = ext->data;
2466
2467 if (ext->datalen < sizeof(EXTENT_DATA)) {
2468 ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA));
2469 return FALSE;
2470 }
2471
2472 if (ed->type != EXTENT_TYPE_REGULAR && ed->type != EXTENT_TYPE_PREALLOC) {
2473 TRACE("not extending extent which is not regular or prealloc\n");
2474 return FALSE;
2475 }
2476
2477 ed2 = (EXTENT_DATA2*)ed->data;
2478
2479 if (ext->datalen < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
2480 ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
2481 return FALSE;
2482 }
2483
2484 if (ext->offset + ed2->num_bytes != start_data) {
2485 TRACE("last EXTENT_DATA does not run up to start_data (%llx + %llx != %llx)\n", ext->offset, ed2->num_bytes, start_data);
2486 return FALSE;
2487 }
2488
2489 c = get_chunk_from_address(Vcb, ed2->address);
2490
2491 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
2492
2493 le = c->space.Flink;
2494 while (le != &c->space) {
2495 s = CONTAINING_RECORD(le, space, list_entry);
2496
2497 if (s->address == ed2->address + ed2->size) {
2498 UINT64 newlen = min(min(s->size, length), MAX_EXTENT_SIZE);
2499
2500 success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen);
2501
2502 if (success)
2503 *written += newlen;
2504
2505 return success;
2506 } else if (s->address > ed2->address + ed2->size)
2507 break;
2508
2509 le = le->Flink;
2510 }
2511
2512 ExReleaseResourceLite(&c->lock);
2513
2514 return FALSE;
2515 }
2516
2517 static NTSTATUS insert_prealloc_extent(fcb* fcb, UINT64 start, UINT64 length, LIST_ENTRY* rollback) {
2518 LIST_ENTRY* le;
2519 chunk* c;
2520 #ifdef __REACTOS__
2521 UINT64 flags;
2522 #else
2523 UINT64 flags, origlength = length;
2524 #endif
2525 NTSTATUS Status;
2526 BOOL page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
2527
2528 flags = fcb->Vcb->data_flags;
2529
2530 // FIXME - try and maximize contiguous ranges first. If we can't do that,
2531 // allocate all the free space we find until it's enough.
2532
2533 do {
2534 UINT64 extlen = min(MAX_EXTENT_SIZE, length);
2535
2536 ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, TRUE);
2537
2538 le = fcb->Vcb->chunks.Flink;
2539 while (le != &fcb->Vcb->chunks) {
2540 c = CONTAINING_RECORD(le, chunk, list_entry);
2541
2542 if (!c->readonly) {
2543 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
2544
2545 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
2546 if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen)) {
2547 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
2548 goto cont;
2549 }
2550 }
2551
2552 ExReleaseResourceLite(&c->lock);
2553 }
2554
2555 le = le->Flink;
2556 }
2557
2558 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
2559
2560 ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, TRUE);
2561
2562 if ((c = alloc_chunk(fcb->Vcb, flags))) {
2563 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
2564
2565 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
2566
2567 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
2568 if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen))
2569 goto cont;
2570 }
2571
2572 ExReleaseResourceLite(&c->lock);
2573 } else
2574 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
2575
2576 WARN("couldn't find any data chunks with %llx bytes free\n", origlength);
2577 Status = STATUS_DISK_FULL;
2578 goto end;
2579
2580 cont:
2581 length -= extlen;
2582 start += extlen;
2583 } while (length > 0);
2584
2585 Status = STATUS_SUCCESS;
2586
2587 end:
2588 return Status;
2589 }
2590
2591 // static void print_tree(tree* t) {
2592 // LIST_ENTRY* le = t->itemlist.Flink;
2593 // while (le != &t->itemlist) {
2594 // tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
2595 // ERR("%llx,%x,%llx (ignore = %s)\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE");
2596 // le = le->Flink;
2597 // }
2598 // }
2599
2600 NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) {
2601 LIST_ENTRY* le;
2602 chunk* c;
2603 UINT64 flags, orig_length = length, written = 0;
2604
2605 TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list);
2606
2607 if (start_data > 0) {
2608 try_extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, Irp, &written, rollback);
2609
2610 if (written == length)
2611 return STATUS_SUCCESS;
2612 else if (written > 0) {
2613 start_data += written;
2614 length -= written;
2615 data = &((UINT8*)data)[written];
2616 }
2617 }
2618
2619 flags = Vcb->data_flags;
2620
2621 while (written < orig_length) {
2622 UINT64 newlen = min(length, MAX_EXTENT_SIZE);
2623 BOOL done = FALSE;
2624
2625 // Rather than necessarily writing the whole extent at once, we deal with it in blocks of 128 MB.
2626 // First, see if we can write the extent part to an existing chunk.
2627
2628 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
2629
2630 le = Vcb->chunks.Flink;
2631 while (le != &Vcb->chunks) {
2632 c = CONTAINING_RECORD(le, chunk, list_entry);
2633
2634 if (!c->readonly) {
2635 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
2636
2637 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
2638 insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) {
2639 written += newlen;
2640
2641 if (written == orig_length) {
2642 ExReleaseResourceLite(&Vcb->chunk_lock);
2643 return STATUS_SUCCESS;
2644 } else {
2645 done = TRUE;
2646 start_data += newlen;
2647 length -= newlen;
2648 data = &((UINT8*)data)[newlen];
2649 break;
2650 }
2651 } else
2652 ExReleaseResourceLite(&c->lock);
2653 }
2654
2655 le = le->Flink;
2656 }
2657
2658 ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
2659
2660 if (done) continue;
2661
2662 // Otherwise, see if we can put it in a new chunk.
2663
2664 ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, TRUE);
2665
2666 if ((c = alloc_chunk(Vcb, flags))) {
2667 ExReleaseResourceLite(&Vcb->chunk_lock);
2668
2669 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
2670
2671 if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
2672 insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, changed_sector_list, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen)) {
2673 written += newlen;
2674
2675 if (written == orig_length)
2676 return STATUS_SUCCESS;
2677 else {
2678 done = TRUE;
2679 start_data += newlen;
2680 length -= newlen;
2681 data = &((UINT8*)data)[newlen];
2682 }
2683 } else
2684 ExReleaseResourceLite(&c->lock);
2685 } else
2686 ExReleaseResourceLite(&Vcb->chunk_lock);
2687
2688 if (!done) {
2689 FIXME("FIXME - not enough room to write whole extent part, try to write bits and pieces\n"); // FIXME
2690 break;
2691 }
2692 }
2693
2694 WARN("couldn't find any data chunks with %llx bytes free\n", length);
2695
2696 return STATUS_DISK_FULL;
2697 }
2698
2699 void commit_checksum_changes(device_extension* Vcb, LIST_ENTRY* changed_sector_list) {
2700 while (!IsListEmpty(changed_sector_list)) {
2701 LIST_ENTRY* le = RemoveHeadList(changed_sector_list);
2702 InsertTailList(&Vcb->sector_checksums, le);
2703 }
2704 }
2705
2706 NTSTATUS truncate_file(fcb* fcb, UINT64 end, PIRP Irp, LIST_ENTRY* rollback) {
2707 NTSTATUS Status;
2708
2709 // FIXME - convert into inline extent if short enough
2710
2711 Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size),
2712 sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), Irp, rollback);
2713 if (!NT_SUCCESS(Status)) {
2714 ERR("error - excise_extents failed\n");
2715 return Status;
2716 }
2717
2718 fcb->inode_item.st_size = end;
2719 fcb->inode_item_changed = TRUE;
2720 TRACE("setting st_size to %llx\n", end);
2721
2722 fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
2723 fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
2724 fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
2725 // FIXME - inform cache manager of this
2726
2727 TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
2728
2729 return STATUS_SUCCESS;
2730 }
2731
2732 NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIRP Irp, LIST_ENTRY* rollback) {
2733 UINT64 oldalloc, newalloc;
2734 BOOL cur_inline;
2735 NTSTATUS Status;
2736
2737 TRACE("(%p, %p, %x, %u)\n", fcb, fileref, end, prealloc);
2738
2739 if (fcb->ads)
2740 return stream_set_end_of_file_information(fcb->Vcb, end, fcb, fileref, NULL, FALSE, rollback);
2741 else {
2742 extent* ext = NULL;
2743 LIST_ENTRY* le;
2744
2745 le = fcb->extents.Blink;
2746 while (le != &fcb->extents) {
2747 extent* ext2 = CONTAINING_RECORD(le, extent, list_entry);
2748
2749 if (!ext2->ignore) {
2750 ext = ext2;
2751 break;
2752 }
2753
2754 le = le->Blink;
2755 }
2756
2757 oldalloc = 0;
2758 if (ext) {
2759 EXTENT_DATA* ed = ext->data;
2760 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
2761
2762 if (ext->datalen < sizeof(EXTENT_DATA)) {
2763 ERR("extent %llx was %u bytes, expected at least %u\n", ext->offset, ext->datalen, sizeof(EXTENT_DATA));
2764 return STATUS_INTERNAL_ERROR;
2765 }
2766
2767 oldalloc = ext->offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes);
2768 cur_inline = ed->type == EXTENT_TYPE_INLINE;
2769
2770 if (cur_inline && end > fcb->Vcb->options.max_inline) {
2771 LIST_ENTRY changed_sector_list;
2772 BOOL nocsum = fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
2773 UINT64 origlength, length;
2774 UINT8* data;
2775 UINT64 offset = ext->offset;
2776
2777 TRACE("giving inline file proper extents\n");
2778
2779 origlength = ed->decoded_size;
2780
2781 cur_inline = FALSE;
2782
2783 if (!nocsum)
2784 InitializeListHead(&changed_sector_list);
2785
2786 length = sector_align(origlength, fcb->Vcb->superblock.sector_size);
2787
2788 data = ExAllocatePoolWithTag(PagedPool, length, ALLOC_TAG);
2789 if (!data) {
2790 ERR("could not allocate %llx bytes for data\n", length);
2791 return STATUS_INSUFFICIENT_RESOURCES;
2792 }
2793
2794 if (length > origlength)
2795 RtlZeroMemory(data + origlength, length - origlength);
2796
2797 RtlCopyMemory(data, ed->data, origlength);
2798
2799 fcb->inode_item.st_blocks -= origlength;
2800 fcb->inode_item_changed = TRUE;
2801 mark_fcb_dirty(fcb);
2802
2803 remove_fcb_extent(fcb, ext, rollback);
2804
2805 if (write_fcb_compressed(fcb)) {
2806 Status = write_compressed(fcb, offset, offset + length, data, nocsum ? NULL : &changed_sector_list, Irp, rollback);
2807 if (!NT_SUCCESS(Status)) {
2808 ERR("write_compressed returned %08x\n", Status);
2809 ExFreePool(data);
2810 return Status;
2811 }
2812 } else {
2813 Status = insert_extent(fcb->Vcb, fcb, offset, length, data, nocsum ? NULL : &changed_sector_list, Irp, rollback);
2814 if (!NT_SUCCESS(Status)) {
2815 ERR("insert_extent returned %08x\n", Status);
2816 ExFreePool(data);
2817 return Status;
2818 }
2819 }
2820
2821 oldalloc = ext->offset + length;
2822
2823 ExFreePool(data);
2824
2825 if (!nocsum) {
2826 ExAcquireResourceExclusiveLite(&fcb->Vcb->checksum_lock, TRUE);
2827 commit_checksum_changes(fcb->Vcb, &changed_sector_list);
2828 ExReleaseResourceLite(&fcb->Vcb->checksum_lock);
2829 }
2830 }
2831
2832 if (cur_inline) {
2833 ULONG edsize;
2834
2835 if (end > oldalloc) {
2836 edsize = sizeof(EXTENT_DATA) - 1 + end - ext->offset;
2837 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
2838
2839 if (!ed) {
2840 ERR("out of memory\n");
2841 return STATUS_INSUFFICIENT_RESOURCES;
2842 }
2843
2844 RtlZeroMemory(ed, edsize);
2845 RtlCopyMemory(ed, ext->data, ext->datalen);
2846
2847 ed->decoded_size = end - ext->offset;
2848
2849 remove_fcb_extent(fcb, ext, rollback);
2850
2851 if (!add_extent_to_fcb(fcb, ext->offset, ed, edsize, ext->unique, rollback)) {
2852 ERR("add_extent_to_fcb failed\n");
2853 ExFreePool(ed);
2854 return STATUS_INTERNAL_ERROR;
2855 }
2856
2857 fcb->extents_changed = TRUE;
2858 mark_fcb_dirty(fcb);
2859 }
2860
2861 TRACE("extending inline file (oldalloc = %llx, end = %llx)\n", oldalloc, end);
2862
2863 fcb->inode_item.st_size = end;
2864 TRACE("setting st_size to %llx\n", end);
2865
2866 fcb->inode_item.st_blocks = end;
2867
2868 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
2869 } else {
2870 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
2871
2872 if (newalloc > oldalloc) {
2873 if (prealloc) {
2874 // FIXME - try and extend previous extent first
2875
2876 Status = insert_prealloc_extent(fcb, oldalloc, newalloc - oldalloc, rollback);
2877
2878 if (!NT_SUCCESS(Status)) {
2879 ERR("insert_prealloc_extent returned %08x\n", Status);
2880 return Status;
2881 }
2882 }
2883
2884 fcb->extents_changed = TRUE;
2885 }
2886
2887 fcb->inode_item.st_size = end;
2888 fcb->inode_item_changed = TRUE;
2889 mark_fcb_dirty(fcb);
2890
2891 TRACE("setting st_size to %llx\n", end);
2892
2893 TRACE("newalloc = %llx\n", newalloc);
2894
2895 fcb->Header.AllocationSize.QuadPart = newalloc;
2896 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
2897 }
2898 } else {
2899 if (end > fcb->Vcb->options.max_inline) {
2900 newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
2901
2902 if (prealloc) {
2903 Status = insert_prealloc_extent(fcb, 0, newalloc, rollback);
2904
2905 if (!NT_SUCCESS(Status)) {
2906 ERR("insert_prealloc_extent returned %08x\n", Status);
2907 return Status;
2908 }
2909 }
2910
2911 fcb->extents_changed = TRUE;
2912 fcb->inode_item_changed = TRUE;
2913 mark_fcb_dirty(fcb);
2914
2915 fcb->inode_item.st_size = end;
2916 TRACE("setting st_size to %llx\n", end);
2917
2918 TRACE("newalloc = %llx\n", newalloc);
2919
2920 fcb->Header.AllocationSize.QuadPart = newalloc;
2921 fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
2922 } else {
2923 EXTENT_DATA* ed;
2924 ULONG edsize;
2925
2926 edsize = sizeof(EXTENT_DATA) - 1 + end;
2927 ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
2928
2929 if (!ed) {
2930 ERR("out of memory\n");
2931 return STATUS_INSUFFICIENT_RESOURCES;
2932 }
2933
2934 ed->generation = fcb->Vcb->superblock.generation;
2935 ed->decoded_size = end;
2936 ed->compression = BTRFS_COMPRESSION_NONE;
2937 ed->encryption = BTRFS_ENCRYPTION_NONE;
2938 ed->encoding = BTRFS_ENCODING_NONE;
2939 ed->type = EXTENT_TYPE_INLINE;
2940
2941 RtlZeroMemory(ed->data, end);
2942
2943 if (!add_extent_to_fcb(fcb, 0, ed, edsize, FALSE, rollback)) {
2944 ERR("add_extent_to_fcb failed\n");
2945 ExFreePool(ed);
2946 return STATUS_INTERNAL_ERROR;
2947 }
2948
2949 fcb->extents_changed = TRUE;
2950 fcb->inode_item_changed = TRUE;
2951 mark_fcb_dirty(fcb);
2952
2953 fcb->inode_item.st_size = end;
2954 TRACE("setting st_size to %llx\n", end);
2955
2956 fcb->inode_item.st_blocks = end;
2957
2958 fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
2959 }
2960 }
2961 }
2962
2963 return STATUS_SUCCESS;
2964 }
2965
2966 static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, UINT64 end_data, void* data, UINT64* written,
2967 LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) {
2968 EXTENT_DATA* ed = ext->data;
2969 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
2970 NTSTATUS Status;
2971 chunk* c;
2972
2973 if (start_data <= ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace all
2974 EXTENT_DATA* ned;
2975 extent* newext;
2976
2977 ned = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
2978 if (!ned) {
2979 ERR("out of memory\n");
2980 return STATUS_INSUFFICIENT_RESOURCES;
2981 }
2982
2983 newext = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
2984 if (!newext) {
2985 ERR("out of memory\n");
2986 ExFreePool(ned);
2987 return STATUS_INSUFFICIENT_RESOURCES;
2988 }
2989
2990 RtlCopyMemory(ned, ext->data, ext->datalen);
2991
2992 ned->type = EXTENT_TYPE_REGULAR;
2993
2994 Status = do_write_data(fcb->Vcb, ed2->address + ed2->offset, (UINT8*)data + ext->offset - start_data, ed2->num_bytes, changed_sector_list, Irp);
2995 if (!NT_SUCCESS(Status)) {
2996 ERR("do_write_data returned %08x\n", Status);
2997 return Status;
2998 }
2999
3000 *written = ed2->num_bytes;
3001
3002 newext->offset = ext->offset;
3003 newext->data = ned;
3004 newext->datalen = ext->datalen;
3005 newext->unique = ext->unique;
3006 newext->ignore = FALSE;
3007 InsertHeadList(&ext->list_entry, &newext->list_entry);
3008
3009 add_insert_extent_rollback(rollback, fcb, newext);
3010
3011 remove_fcb_extent(fcb, ext, rollback);
3012 } else if (start_data <= ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace beginning
3013 EXTENT_DATA *ned, *nedb;
3014 EXTENT_DATA2* ned2;
3015 extent *newext1, *newext2;
3016
3017 ned = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
3018 if (!ned) {
3019 ERR("out of memory\n");
3020 return STATUS_INSUFFICIENT_RESOURCES;
3021 }
3022
3023 nedb = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
3024 if (!nedb) {
3025 ERR("out of memory\n");
3026 ExFreePool(ned);
3027 return STATUS_INSUFFICIENT_RESOURCES;
3028 }
3029
3030 newext1 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
3031 if (!newext1) {
3032 ERR("out of memory\n");
3033 ExFreePool(ned);
3034 ExFreePool(nedb);
3035 return STATUS_INSUFFICIENT_RESOURCES;
3036 }
3037
3038 newext2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
3039 if (!newext2) {
3040 ERR("out of memory\n");
3041 ExFreePool(ned);
3042 ExFreePool(nedb);
3043 ExFreePool(newext1);
3044 return STATUS_INSUFFICIENT_RESOURCES;
3045 }
3046
3047 RtlCopyMemory(ned, ext->data, ext->datalen);
3048 ned->type = EXTENT_TYPE_REGULAR;
3049 ned2 = (EXTENT_DATA2*)ned->data;
3050 ned2->num_bytes = end_data - ext->offset;
3051
3052 RtlCopyMemory(nedb, ext->data, ext->datalen);
3053 ned2 = (EXTENT_DATA2*)nedb->data;
3054 ned2->offset += end_data - ext->offset;
3055 ned2->num_bytes -= end_data - ext->offset;
3056
3057 Status = do_write_data(fcb->Vcb, ed2->address + ed2->offset, (UINT8*)data + ext->offset - start_data, end_data - ext->offset, changed_sector_list, Irp);
3058 if (!NT_SUCCESS(Status)) {
3059 ERR("do_write_data returned %08x\n", Status);
3060 return Status;
3061 }
3062
3063 *written = end_data - ext->offset;
3064
3065 newext1->offset = ext->offset;
3066 newext1->data = ned;
3067 newext1->datalen = ext->datalen;
3068 newext1->unique = ext->unique;
3069 newext1->ignore = FALSE;
3070 InsertHeadList(&ext->list_entry, &newext1->list_entry);
3071
3072 add_insert_extent_rollback(rollback, fcb, newext1);
3073
3074 newext2->offset = end_data;
3075 newext2->data = nedb;
3076 newext2->datalen = ext->datalen;
3077 newext2->unique = ext->unique;
3078 newext2->ignore = FALSE;
3079 InsertHeadList(&newext1->list_entry, &newext2->list_entry);
3080
3081 add_insert_extent_rollback(rollback, fcb, newext2);
3082
3083 c = get_chunk_from_address(fcb->Vcb, ed2->address);
3084
3085 if (!c)
3086 ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
3087 else {
3088 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3089 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
3090
3091 if (!NT_SUCCESS(Status)) {
3092 ERR("update_changed_extent_ref returned %08x\n", Status);
3093 return Status;
3094 }
3095 }
3096
3097 remove_fcb_extent(fcb, ext, rollback);
3098 } else if (start_data > ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace end
3099 EXTENT_DATA *ned, *nedb;
3100 EXTENT_DATA2* ned2;
3101 extent *newext1, *newext2;
3102
3103 ned = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
3104 if (!ned) {
3105 ERR("out of memory\n");
3106 return STATUS_INSUFFICIENT_RESOURCES;
3107 }
3108
3109 nedb = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
3110 if (!nedb) {
3111 ERR("out of memory\n");
3112 ExFreePool(ned);
3113 return STATUS_INSUFFICIENT_RESOURCES;
3114 }
3115
3116 newext1 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
3117 if (!newext1) {
3118 ERR("out of memory\n");
3119 ExFreePool(ned);
3120 ExFreePool(nedb);
3121 return STATUS_INSUFFICIENT_RESOURCES;
3122 }
3123
3124 newext2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
3125 if (!newext2) {
3126 ERR("out of memory\n");
3127 ExFreePool(ned);
3128 ExFreePool(nedb);
3129 ExFreePool(newext1);
3130 return STATUS_INSUFFICIENT_RESOURCES;
3131 }
3132
3133 RtlCopyMemory(ned, ext->data, ext->datalen);
3134
3135 ned2 = (EXTENT_DATA2*)ned->data;
3136 ned2->num_bytes = start_data - ext->offset;
3137
3138 RtlCopyMemory(nedb, ext->data, ext->datalen);
3139
3140 nedb->type = EXTENT_TYPE_REGULAR;
3141 ned2 = (EXTENT_DATA2*)nedb->data;
3142 ned2->offset += start_data - ext->offset;
3143 ned2->num_bytes = ext->offset + ed2->num_bytes - start_data;
3144
3145 Status = do_write_data(fcb->Vcb, ed2->address + ned2->offset, data, ned2->num_bytes, changed_sector_list, Irp);
3146 if (!NT_SUCCESS(Status)) {
3147 ERR("do_write_data returned %08x\n", Status);
3148 return Status;
3149 }
3150
3151 *written = ned2->num_bytes;
3152
3153 newext1->offset = ext->offset;
3154 newext1->data = ned;
3155 newext1->datalen = ext->datalen;
3156 newext1->unique = ext->unique;
3157 newext1->ignore = FALSE;
3158 InsertHeadList(&ext->list_entry, &newext1->list_entry);
3159
3160 add_insert_extent_rollback(rollback, fcb, newext1);
3161
3162 newext2->offset = start_data;
3163 newext2->data = nedb;
3164 newext2->datalen = ext->datalen;
3165 newext2->unique = ext->unique;
3166 newext2->ignore = FALSE;
3167 InsertHeadList(&newext1->list_entry, &newext2->list_entry);
3168
3169 add_insert_extent_rollback(rollback, fcb, newext2);
3170
3171 c = get_chunk_from_address(fcb->Vcb, ed2->address);
3172
3173 if (!c)
3174 ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
3175 else {
3176 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3177 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
3178
3179 if (!NT_SUCCESS(Status)) {
3180 ERR("update_changed_extent_ref returned %08x\n", Status);
3181 return Status;
3182 }
3183 }
3184
3185 remove_fcb_extent(fcb, ext, rollback);
3186 } else if (start_data > ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace middle
3187 EXTENT_DATA *ned, *nedb, *nedc;
3188 EXTENT_DATA2* ned2;
3189 extent *newext1, *newext2, *newext3;
3190
3191 ned = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
3192 if (!ned) {
3193 ERR("out of memory\n");
3194 return STATUS_INSUFFICIENT_RESOURCES;
3195 }
3196
3197 nedb = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
3198 if (!nedb) {
3199 ERR("out of memory\n");
3200 ExFreePool(ned);
3201 return STATUS_INSUFFICIENT_RESOURCES;
3202 }
3203
3204 nedc = ExAllocatePoolWithTag(PagedPool, ext->datalen, ALLOC_TAG);
3205 if (!nedb) {
3206 ERR("out of memory\n");
3207 ExFreePool(ned);
3208 ExFreePool(nedb);
3209 return STATUS_INSUFFICIENT_RESOURCES;
3210 }
3211
3212 newext1 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
3213 if (!newext1) {
3214 ERR("out of memory\n");
3215 ExFreePool(ned);
3216 ExFreePool(nedb);
3217 ExFreePool(nedc);
3218 return STATUS_INSUFFICIENT_RESOURCES;
3219 }
3220
3221 newext2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
3222 if (!newext2) {
3223 ERR("out of memory\n");
3224 ExFreePool(ned);
3225 ExFreePool(nedb);
3226 ExFreePool(nedc);
3227 ExFreePool(newext1);
3228 return STATUS_INSUFFICIENT_RESOURCES;
3229 }
3230
3231 newext3 = ExAllocatePoolWithTag(PagedPool, sizeof(extent), ALLOC_TAG);
3232 if (!newext2) {
3233 ERR("out of memory\n");
3234 ExFreePool(ned);
3235 ExFreePool(nedb);
3236 ExFreePool(nedc);
3237 ExFreePool(newext1);
3238 ExFreePool(newext2);
3239 return STATUS_INSUFFICIENT_RESOURCES;
3240 }
3241
3242 RtlCopyMemory(ned, ext->data, ext->datalen);
3243 RtlCopyMemory(nedb, ext->data, ext->datalen);
3244 RtlCopyMemory(nedc, ext->data, ext->datalen);
3245
3246 ned2 = (EXTENT_DATA2*)ned->data;
3247 ned2->num_bytes = start_data - ext->offset;
3248
3249 nedb->type = EXTENT_TYPE_REGULAR;
3250 ned2 = (EXTENT_DATA2*)nedb->data;
3251 ned2->offset += start_data - ext->offset;
3252 ned2->num_bytes = end_data - start_data;
3253
3254 ned2 = (EXTENT_DATA2*)nedc->data;
3255 ned2->offset += end_data - ext->offset;
3256 ned2->num_bytes -= end_data - ext->offset;
3257
3258 ned2 = (EXTENT_DATA2*)nedb->data;
3259 Status = do_write_data(fcb->Vcb, ed2->address + ned2->offset, data, end_data - start_data, changed_sector_list, Irp);
3260 if (!NT_SUCCESS(Status)) {
3261 ERR("do_write_data returned %08x\n", Status);
3262 return Status;
3263 }
3264
3265 *written = end_data - start_data;
3266
3267 newext1->offset = ext->offset;
3268 newext1->data = ned;
3269 newext1->datalen = ext->datalen;
3270 newext1->unique = ext->unique;
3271 newext1->ignore = FALSE;
3272 InsertHeadList(&ext->list_entry, &newext1->list_entry);
3273
3274 add_insert_extent_rollback(rollback, fcb, newext1);
3275
3276 newext2->offset = start_data;
3277 newext2->data = nedb;
3278 newext2->datalen = ext->datalen;
3279 newext2->unique = ext->unique;
3280 newext2->ignore = FALSE;
3281 InsertHeadList(&newext1->list_entry, &newext2->list_entry);
3282
3283 add_insert_extent_rollback(rollback, fcb, newext2);
3284
3285 newext3->offset = end_data;
3286 newext3->data = nedc;
3287 newext3->datalen = ext->datalen;
3288 newext3->unique = ext->unique;
3289 newext3->ignore = FALSE;
3290 InsertHeadList(&newext2->list_entry, &newext3->list_entry);
3291
3292 add_insert_extent_rollback(rollback, fcb, newext3);
3293
3294 c = get_chunk_from_address(fcb->Vcb, ed2->address);
3295
3296 if (!c)
3297 ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
3298 else {
3299 Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 2,
3300 fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
3301
3302 if (!NT_SUCCESS(Status)) {
3303 ERR("update_changed_extent_ref returned %08x\n", Status);
3304 return Status;
3305 }
3306 }
3307
3308 remove_fcb_extent(fcb, ext, rollback);
3309 }
3310
3311 return STATUS_SUCCESS;
3312 }
3313
3314 NTSTATUS do_write_file(fcb* fcb, UINT64 start, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) {
3315 NTSTATUS Status;
3316 LIST_ENTRY *le, *le2;
3317 UINT64 written = 0, length = end_data - start;
3318 UINT64 last_cow_start;
3319 #ifdef DEBUG_PARANOID
3320 UINT64 last_off;
3321 #endif
3322
3323 last_cow_start = 0;
3324
3325 le = fcb->extents.Flink;
3326 while (le != &fcb->extents) {
3327 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
3328
3329 le2 = le->Flink;
3330
3331 if (!ext->ignore) {
3332 EXTENT_DATA* ed = ext->data;
3333 EXTENT_DATA2* ed2 = ed->type == EXTENT_TYPE_INLINE ? NULL : (EXTENT_DATA2*)ed->data;
3334 UINT64 len;
3335
3336 len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
3337
3338 if (ext->offset + len <= start)
3339 goto nextitem;
3340
3341 if (ext->offset > start + written + length)
3342 break;
3343
3344 if ((fcb->inode_item.flags & BTRFS_INODE_NODATACOW || ed->type == EXTENT_TYPE_PREALLOC) && ext->unique) {
3345 if (max(last_cow_start, start + written) < ext->offset) {
3346 UINT64 start_write = max(last_cow_start, start + written);
3347
3348 Status = excise_extents(fcb->Vcb, fcb, start_write, ext->offset, Irp, rollback);
3349 if (!NT_SUCCESS(Status)) {
3350 ERR("excise_extents returned %08x\n", Status);
3351 return Status;
3352 }
3353
3354 Status = insert_extent(fcb->Vcb, fcb, start_write, ext->offset - start_write, data, changed_sector_list, Irp, rollback);
3355 if (!NT_SUCCESS(Status)) {
3356 ERR("insert_extent returned %08x\n", Status);
3357 return Status;
3358 }
3359
3360 written += ext->offset - start_write;
3361 length -= ext->offset - start_write;
3362
3363 if (length == 0)
3364 break;
3365 }
3366
3367 if (ed->type == EXTENT_TYPE_REGULAR) {
3368 UINT64 writeaddr = ed2->address + ed2->offset + start + written - ext->offset;
3369 UINT64 write_len = min(len, length);
3370
3371 TRACE("doing non-COW write to %llx\n", writeaddr);
3372
3373 Status = write_data_complete(fcb->Vcb, writeaddr, (UINT8*)data + written, write_len, Irp, NULL);
3374 if (!NT_SUCCESS(Status)) {
3375 ERR("write_data_complete returned %08x\n", Status);
3376 return Status;
3377 }
3378
3379 if (changed_sector_list) {
3380 unsigned int i;
3381 changed_sector* sc;
3382
3383 sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
3384 if (!sc) {
3385 ERR("out of memory\n");
3386 return STATUS_INSUFFICIENT_RESOURCES;
3387 }
3388
3389 sc->ol.key = writeaddr;
3390 sc->length = write_len / fcb->Vcb->superblock.sector_size;
3391 sc->deleted = FALSE;
3392
3393 sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
3394 if (!sc->checksums) {
3395 ERR("out of memory\n");
3396 ExFreePool(sc);
3397 return STATUS_INSUFFICIENT_RESOURCES;
3398 }
3399
3400 for (i = 0; i < sc->length; i++) {
3401 sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + written + (i * fcb->Vcb->superblock.sector_size), fcb->Vcb->superblock.sector_size);
3402 }
3403
3404 insert_into_ordered_list(changed_sector_list, &sc->ol);
3405 }
3406
3407 written += write_len;
3408 length -= write_len;
3409
3410 if (length == 0)
3411 break;
3412 } else if (ed->type == EXTENT_TYPE_PREALLOC) {
3413 UINT64 write_len;
3414
3415 Status = do_write_file_prealloc(fcb, ext, start + written, end_data, (UINT8*)data + written, &write_len,
3416 changed_sector_list, Irp, rollback);
3417 if (!NT_SUCCESS(Status)) {
3418 ERR("do_write_file_prealloc returned %08x\n", Status);
3419 return Status;
3420 }
3421
3422 written += write_len;
3423 length -= write_len;
3424
3425 if (length == 0)
3426 break;
3427 }
3428
3429 last_cow_start = ext->offset + len;
3430 }
3431 }
3432
3433 nextitem:
3434 le = le2;
3435 }
3436
3437 if (length > 0) {
3438 UINT64 start_write = max(last_cow_start, start + written);
3439
3440 Status = excise_extents(fcb->Vcb, fcb, start_write, end_data, Irp, rollback);
3441 if (!NT_SUCCESS(Status)) {
3442 ERR("excise_extents returned %08x\n", Status);
3443 return Status;
3444 }
3445
3446 Status = insert_extent(fcb->Vcb, fcb, start_write, end_data - start_write, data, changed_sector_list, Irp, rollback);
3447 if (!NT_SUCCESS(Status)) {
3448 ERR("insert_extent returned %08x\n", Status);
3449 return Status;
3450 }
3451 }
3452
3453 #ifdef DEBUG_PARANOID
3454 last_off = 0xffffffffffffffff;
3455
3456 le = fcb->extents.Flink;
3457 while (le != &fcb->extents) {
3458 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
3459
3460 if (!ext->ignore) {
3461 if (ext->offset == last_off) {
3462 ERR("offset %llx duplicated\n", ext->offset);
3463 int3;
3464 } else if (ext->offset < last_off && last_off != 0xffffffffffffffff) {
3465 ERR("offsets out of order\n");
3466 int3;
3467 }
3468
3469 last_off = ext->offset;
3470 }
3471
3472 le = le->Flink;
3473 }
3474 #endif
3475
3476 fcb->extents_changed = TRUE;
3477 mark_fcb_dirty(fcb);
3478
3479 return STATUS_SUCCESS;
3480 }
3481
3482 NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, LIST_ENTRY* changed_sector_list, PIRP Irp, LIST_ENTRY* rollback) {
3483 NTSTATUS Status;
3484 UINT64 i;
3485
3486 for (i = 0; i < sector_align(end_data - start_data, COMPRESSED_EXTENT_SIZE) / COMPRESSED_EXTENT_SIZE; i++) {
3487 UINT64 s2, e2;
3488 BOOL compressed;
3489
3490 s2 = start_data + (i * COMPRESSED_EXTENT_SIZE);
3491 e2 = min(s2 + COMPRESSED_EXTENT_SIZE, end_data);
3492
3493 Status = write_compressed_bit(fcb, s2, e2, (UINT8*)data + (i * COMPRESSED_EXTENT_SIZE), &compressed, changed_sector_list, Irp, rollback);
3494
3495 if (!NT_SUCCESS(Status)) {
3496 ERR("write_compressed_bit returned %08x\n", Status);
3497 return Status;
3498 }
3499
3500 // If the first 128 KB of a file is incompressible, we set the nocompress flag so we don't
3501 // bother with the rest of it.
3502 if (s2 == 0 && e2 == COMPRESSED_EXTENT_SIZE && !compressed && !fcb->Vcb->options.compress_force) {
3503 fcb->inode_item.flags |= BTRFS_INODE_NOCOMPRESS;
3504 fcb->inode_item_changed = TRUE;
3505 mark_fcb_dirty(fcb);
3506
3507 // write subsequent data non-compressed
3508 if (e2 < end_data) {
3509 Status = do_write_file(fcb, e2, end_data, (UINT8*)data + e2, changed_sector_list, Irp, rollback);
3510
3511 if (!NT_SUCCESS(Status)) {
3512 ERR("do_write_file returned %08x\n", Status);
3513 return Status;
3514 }
3515 }
3516
3517 return STATUS_SUCCESS;
3518 }
3519 }
3520
3521 return STATUS_SUCCESS;
3522 }
3523
3524 NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, BOOL paging_io, BOOL no_cache,
3525 BOOL wait, BOOL deferred_write, LIST_ENTRY* rollback) {
3526 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3527 PFILE_OBJECT FileObject = IrpSp->FileObject;
3528 EXTENT_DATA* ed2;
3529 UINT64 newlength, start_data, end_data;
3530 UINT32 bufhead;
3531 BOOL make_inline;
3532 UINT8* data;
3533 LIST_ENTRY changed_sector_list;
3534 INODE_ITEM* origii;
3535 BOOL changed_length = FALSE, nocsum/*, lazy_writer = FALSE, write_eof = FALSE*/;
3536 NTSTATUS Status;
3537 LARGE_INTEGER time;
3538 BTRFS_TIME now;
3539 fcb* fcb;
3540 ccb* ccb;
3541 file_ref* fileref;
3542 BOOL paging_lock = FALSE, fcb_lock = FALSE, tree_lock = FALSE, pagefile;
3543 ULONG filter = 0;
3544
3545 TRACE("(%p, %p, %llx, %p, %x, %u, %u)\n", Vcb, FileObject, offset.QuadPart, buf, *length, paging_io, no_cache);
3546
3547 if (*length == 0) {
3548 WARN("returning success for zero-length write\n");
3549 return STATUS_SUCCESS;
3550 }
3551
3552 if (!FileObject) {
3553 ERR("error - FileObject was NULL\n");
3554 return STATUS_ACCESS_DENIED;
3555 }
3556
3557 fcb = FileObject->FsContext;
3558 ccb = FileObject->FsContext2;
3559 fileref = ccb ? ccb->fileref : NULL;
3560
3561 if (!fcb->ads && fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
3562 WARN("tried to write to something other than a file or symlink (inode %llx, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
3563 return STATUS_INVALID_DEVICE_REQUEST;
3564 }
3565
3566 if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1) {
3567 offset = fcb->Header.FileSize;
3568 // write_eof = TRUE;
3569 }
3570
3571 TRACE("fcb->Header.Flags = %x\n", fcb->Header.Flags);
3572
3573 if (!no_cache && !CcCanIWrite(FileObject, *length, wait, deferred_write))
3574 return STATUS_PENDING;
3575
3576 if (!wait && no_cache)
3577 return STATUS_PENDING;
3578
3579 if (no_cache && !paging_io && FileObject->SectionObjectPointer->DataSectionObject) {
3580 IO_STATUS_BLOCK iosb;
3581
3582 ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, TRUE);
3583
3584 CcFlushCache(FileObject->SectionObjectPointer, &offset, *length, &iosb);
3585
3586 if (!NT_SUCCESS(iosb.Status)) {
3587 ExReleaseResourceLite(fcb->Header.PagingIoResource);
3588 ERR("CcFlushCache returned %08x\n", iosb.Status);
3589 return iosb.Status;
3590 }
3591
3592 paging_lock = TRUE;
3593
3594 CcPurgeCacheSection(FileObject->SectionObjectPointer, &offset, *length, FALSE);
3595 }
3596
3597 if (paging_io) {
3598 if (!ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, wait)) {
3599 Status = STATUS_PENDING;
3600 goto end;
3601 } else
3602 paging_lock = TRUE;
3603 }
3604
3605 pagefile = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE && paging_io;
3606
3607 if (!pagefile && !ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) {
3608 if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
3609 Status = STATUS_PENDING;
3610 goto end;
3611 } else
3612 tree_lock = TRUE;
3613 }
3614
3615 if (no_cache) {
3616 if (pagefile) {
3617 if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
3618 Status = STATUS_PENDING;
3619 goto end;
3620 } else
3621 fcb_lock = TRUE;
3622 } else if (!ExIsResourceAcquiredExclusiveLite(fcb->Header.Resource)) {
3623 if (!ExAcquireResourceExclusiveLite(fcb->Header.Resource, wait)) {
3624 Status = STATUS_PENDING;
3625 goto end;
3626 } else
3627 fcb_lock = TRUE;
3628 }
3629 }
3630
3631 nocsum = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
3632
3633 newlength = fcb->ads ? fcb->adsdata.Length : fcb->inode_item.st_size;
3634
3635 if (fcb->deleted)
3636 newlength = 0;
3637
3638 TRACE("newlength = %llx\n", newlength);
3639
3640 // if (KeGetCurrentThread() == fcb->lazy_writer_thread) {
3641 // ERR("lazy writer on the TV\n");
3642 // lazy_writer = TRUE;
3643 // }
3644
3645 if (offset.QuadPart + *length > newlength) {
3646 if (paging_io) {
3647 if (offset.QuadPart >= newlength) {
3648 TRACE("paging IO tried to write beyond end of file (file size = %llx, offset = %llx, length = %x)\n", newlength, offset.QuadPart, *length);
3649 TRACE("filename %S\n", file_desc(FileObject));
3650 TRACE("FileObject: AllocationSize = %llx, FileSize = %llx, ValidDataLength = %llx\n",
3651 fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
3652 Status = STATUS_SUCCESS;
3653 goto end;
3654 }
3655
3656 *length = newlength - offset.QuadPart;
3657 } else {
3658 newlength = offset.QuadPart + *length;
3659 changed_length = TRUE;
3660
3661 TRACE("extending length to %llx\n", newlength);
3662 }
3663 }
3664
3665 make_inline = fcb->ads ? FALSE : newlength <= fcb->Vcb->options.max_inline;
3666
3667 if (changed_length) {
3668 if (newlength > fcb->Header.AllocationSize.QuadPart) {
3669 if (!tree_lock) {
3670 // We need to acquire the tree lock if we don't have it already -
3671 // we can't give an inline file proper extents at the same as we're
3672 // doing a flush.
3673 if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
3674 Status = STATUS_PENDING;
3675 goto end;
3676 } else
3677 tree_lock = TRUE;
3678 }
3679
3680 Status = extend_file(fcb, fileref, newlength, FALSE, Irp, rollback);
3681 if (!NT_SUCCESS(Status)) {
3682 ERR("extend_file returned %08x\n", Status);
3683 goto end;
3684 }
3685 } else if (!fcb->ads)
3686 fcb->inode_item.st_size = newlength;
3687
3688 fcb->Header.FileSize.QuadPart = newlength;
3689 fcb->Header.ValidDataLength.QuadPart = newlength;
3690
3691 TRACE("AllocationSize = %llx\n", fcb->Header.AllocationSize.QuadPart);
3692 TRACE("FileSize = %llx\n", fcb->Header.FileSize.QuadPart);
3693 TRACE("ValidDataLength = %llx\n", fcb->Header.ValidDataLength.QuadPart);
3694 }
3695
3696 if (!no_cache) {
3697 if (!FileObject->PrivateCacheMap || changed_length) {
3698 CC_FILE_SIZES ccfs;
3699
3700 ccfs.AllocationSize = fcb->Header.AllocationSize;
3701 ccfs.FileSize = fcb->Header.FileSize;
3702 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3703
3704 if (!FileObject->PrivateCacheMap) {
3705 TRACE("calling CcInitializeCacheMap...\n");
3706 CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, FileObject);
3707
3708 CcSetReadAheadGranularity(FileObject, READ_AHEAD_GRANULARITY);
3709 }
3710
3711 CcSetFileSizes(FileObject, &ccfs);
3712 }
3713
3714 if (IrpSp->MinorFunction & IRP_MN_MDL) {
3715 CcPrepareMdlWrite(FileObject, &offset, *length, &Irp->MdlAddress, &Irp->IoStatus);
3716
3717 Status = Irp->IoStatus.Status;
3718 goto end;
3719 } else {
3720 TRACE("CcCopyWrite(%p, %llx, %x, %u, %p)\n", FileObject, offset.QuadPart, *length, wait, buf);
3721 if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) {
3722 Status = STATUS_PENDING;
3723 goto end;
3724 }
3725 TRACE("CcCopyWrite finished\n");
3726 }
3727
3728 Status = STATUS_SUCCESS;
3729 goto end;
3730 }
3731
3732 if (fcb->ads) {
3733 if (changed_length) {
3734 char* data2;
3735
3736 if (newlength > fcb->adsmaxlen) {
3737 ERR("error - xattr too long (%llu > %u)\n", newlength, fcb->adsmaxlen);
3738 Status = STATUS_DISK_FULL;
3739 goto end;
3740 }
3741
3742 data2 = ExAllocatePoolWithTag(PagedPool, newlength, ALLOC_TAG);
3743 if (!data2) {
3744 ERR("out of memory\n");
3745 Status = STATUS_INSUFFICIENT_RESOURCES;
3746 goto end;
3747 }
3748
3749 if (fcb->adsdata.Buffer) {
3750 RtlCopyMemory(data2, fcb->adsdata.Buffer, fcb->adsdata.Length);
3751 ExFreePool(fcb->adsdata.Buffer);
3752 }
3753
3754 if (newlength > fcb->adsdata.Length)
3755 RtlZeroMemory(&data2[fcb->adsdata.Length], newlength - fcb->adsdata.Length);
3756
3757
3758 fcb->adsdata.Buffer = data2;
3759 fcb->adsdata.Length = fcb->adsdata.MaximumLength = newlength;
3760
3761 fcb->Header.AllocationSize.QuadPart = newlength;
3762 fcb->Header.FileSize.QuadPart = newlength;
3763 fcb->Header.ValidDataLength.QuadPart = newlength;
3764 }
3765
3766 if (*length > 0)
3767 RtlCopyMemory(&fcb->adsdata.Buffer[offset.QuadPart], buf, *length);
3768
3769 fcb->Header.ValidDataLength.QuadPart = newlength;
3770
3771 mark_fcb_dirty(fcb);
3772
3773 if (fileref)
3774 mark_fileref_dirty(fileref);
3775 } else {
3776 BOOL compress = write_fcb_compressed(fcb);
3777
3778 if (make_inline) {
3779 start_data = 0;
3780 end_data = sector_align(newlength, fcb->Vcb->superblock.sector_size);
3781 bufhead = sizeof(EXTENT_DATA) - 1;
3782 } else if (compress) {
3783 start_data = offset.QuadPart & ~(UINT64)(COMPRESSED_EXTENT_SIZE - 1);
3784 end_data = min(sector_align(offset.QuadPart + *length, COMPRESSED_EXTENT_SIZE),
3785 sector_align(newlength, fcb->Vcb->superblock.sector_size));
3786 bufhead = 0;
3787 } else {
3788 start_data = offset.QuadPart & ~(UINT64)(fcb->Vcb->superblock.sector_size - 1);
3789 end_data = sector_align(offset.QuadPart + *length, fcb->Vcb->superblock.sector_size);
3790 bufhead = 0;
3791 }
3792
3793 fcb->Header.ValidDataLength.QuadPart = newlength;
3794 TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
3795
3796 data = ExAllocatePoolWithTag(PagedPool, end_data - start_data + bufhead, ALLOC_TAG);
3797 if (!data) {
3798 ERR("out of memory\n");
3799 Status = STATUS_INSUFFICIENT_RESOURCES;
3800 goto end;
3801 }
3802
3803 RtlZeroMemory(data + bufhead, end_data - start_data);
3804
3805 TRACE("start_data = %llx\n", start_data);
3806 TRACE("end_data = %llx\n", end_data);
3807
3808 if (offset.QuadPart > start_data || offset.QuadPart + *length < end_data) {
3809 if (changed_length) {
3810 if (fcb->inode_item.st_size > start_data)
3811 Status = read_file(fcb, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL, Irp);
3812 else
3813 Status = STATUS_SUCCESS;
3814 } else
3815 Status = read_file(fcb, data + bufhead, start_data, end_data - start_data, NULL, Irp);
3816
3817 if (!NT_SUCCESS(Status)) {
3818 ERR("read_file returned %08x\n", Status);
3819 ExFreePool(data);
3820 goto end;
3821 }
3822 }
3823
3824 RtlCopyMemory(data + bufhead + offset.QuadPart - start_data, buf, *length);
3825
3826 if (!nocsum)
3827 InitializeListHead(&changed_sector_list);
3828
3829 if (make_inline) {
3830 Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, Irp, rollback);
3831 if (!NT_SUCCESS(Status)) {
3832 ERR("error - excise_extents returned %08x\n", Status);
3833 ExFreePool(data);
3834 goto end;
3835 }
3836
3837 ed2 = (EXTENT_DATA*)data;
3838 ed2->generation = fcb->Vcb->superblock.generation;
3839 ed2->decoded_size = newlength;
3840 ed2->compression = BTRFS_COMPRESSION_NONE;
3841 ed2->encryption = BTRFS_ENCRYPTION_NONE;
3842 ed2->encoding = BTRFS_ENCODING_NONE;
3843 ed2->type = EXTENT_TYPE_INLINE;
3844
3845 if (!add_extent_to_fcb(fcb, 0, ed2, sizeof(EXTENT_DATA) - 1 + newlength, FALSE, rollback)) {
3846 ERR("add_extent_to_fcb failed\n");
3847 ExFreePool(data);
3848 Status = STATUS_INTERNAL_ERROR;
3849 goto end;
3850 }
3851
3852 fcb->inode_item.st_blocks += newlength;
3853 } else if (compress) {
3854 Status = write_compressed(fcb, start_data, end_data, data, nocsum ? NULL : &changed_sector_list, Irp, rollback);
3855
3856 if (!NT_SUCCESS(Status)) {
3857 ERR("write_compressed returned %08x\n", Status);
3858 ExFreePool(data);
3859 goto end;
3860 }
3861
3862 ExFreePool(data);
3863 } else {
3864 Status = do_write_file(fcb, start_data, end_data, data, nocsum ? NULL : &changed_sector_list, Irp, rollback);
3865
3866 if (!NT_SUCCESS(Status)) {
3867 ERR("do_write_file returned %08x\n", Status);
3868 ExFreePool(data);
3869 goto end;
3870 }
3871
3872 ExFreePool(data);
3873 }
3874 }
3875
3876 if (!pagefile) {
3877 KeQuerySystemTime(&time);
3878 win_time_to_unix(time, &now);
3879
3880 // ERR("no_cache = %s, FileObject->PrivateCacheMap = %p\n", no_cache ? "TRUE" : "FALSE", FileObject->PrivateCacheMap);
3881 //
3882 // if (!no_cache) {
3883 // if (!FileObject->PrivateCacheMap) {
3884 // CC_FILE_SIZES ccfs;
3885 //
3886 // ccfs.AllocationSize = fcb->Header.AllocationSize;
3887 // ccfs.FileSize = fcb->Header.FileSize;
3888 // ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3889 //
3890 // TRACE("calling CcInitializeCacheMap...\n");
3891 // CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, fcb);
3892 //
3893 // changed_length = FALSE;
3894 // }
3895 // }
3896
3897 if (fcb->ads) {
3898 if (fileref && fileref->parent)
3899 origii = &fileref->parent->fcb->inode_item;
3900 else {
3901 ERR("no parent fcb found for stream\n");
3902 Status = STATUS_INTERNAL_ERROR;
3903 goto end;
3904 }
3905 } else
3906 origii = &fcb->inode_item;
3907
3908 origii->transid = Vcb->superblock.generation;
3909 origii->sequence++;
3910
3911 if (!ccb->user_set_change_time)
3912 origii->st_ctime = now;
3913
3914 if (!fcb->ads) {
3915 if (changed_length) {
3916 TRACE("setting st_size to %llx\n", newlength);
3917 origii->st_size = newlength;
3918 filter |= FILE_NOTIFY_CHANGE_SIZE;
3919 }
3920
3921 if (!ccb->user_set_write_time) {
3922 origii->st_mtime = now;
3923 filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
3924 }
3925
3926 fcb->inode_item_changed = TRUE;
3927 } else
3928 fileref->parent->fcb->inode_item_changed = TRUE;
3929
3930 mark_fcb_dirty(fcb->ads ? fileref->parent->fcb : fcb);
3931 }
3932
3933 if (!nocsum) {
3934 ExAcquireResourceExclusiveLite(&Vcb->checksum_lock, TRUE);
3935 commit_checksum_changes(Vcb, &changed_sector_list);
3936 ExReleaseResourceLite(&Vcb->checksum_lock);
3937 }
3938
3939 if (changed_length) {
3940 CC_FILE_SIZES ccfs;
3941
3942 ccfs.AllocationSize = fcb->Header.AllocationSize;
3943 ccfs.FileSize = fcb->Header.FileSize;
3944 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3945
3946 CcSetFileSizes(FileObject, &ccfs);
3947 }
3948
3949 // FIXME - make sure this still called if STATUS_PENDING and async
3950 // if (!no_cache) {
3951 // if (!CcCopyWrite(FileObject, &offset, *length, TRUE, buf)) {
3952 // ERR("CcCopyWrite failed.\n");
3953 // }
3954 // }
3955
3956 fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
3957 fcb->subvol->root_item.ctime = now;
3958
3959 Status = STATUS_SUCCESS;
3960
3961 if (filter != 0)
3962 send_notification_fcb(fcb->ads ? fileref->parent : fileref, filter, FILE_ACTION_MODIFIED);
3963
3964 end:
3965 if (NT_SUCCESS(Status) && FileObject->Flags & FO_SYNCHRONOUS_IO && !paging_io) {
3966 TRACE("CurrentByteOffset was: %llx\n", FileObject->CurrentByteOffset.QuadPart);
3967 FileObject->CurrentByteOffset.QuadPart = offset.QuadPart + (NT_SUCCESS(Status) ? *length : 0);
3968 TRACE("CurrentByteOffset now: %llx\n", FileObject->CurrentByteOffset.QuadPart);
3969 }
3970
3971 if (fcb_lock)
3972 ExReleaseResourceLite(fcb->Header.Resource);
3973
3974 if (tree_lock)
3975 ExReleaseResourceLite(&Vcb->tree_lock);
3976
3977 if (paging_lock)
3978 ExReleaseResourceLite(fcb->Header.PagingIoResource);
3979
3980 return Status;
3981 }
3982
3983 NTSTATUS write_file(device_extension* Vcb, PIRP Irp, BOOL wait, BOOL deferred_write) {
3984 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3985 void* buf;
3986 NTSTATUS Status;
3987 LARGE_INTEGER offset = IrpSp->Parameters.Write.ByteOffset;
3988 PFILE_OBJECT FileObject = IrpSp->FileObject;
3989 fcb* fcb = FileObject ? FileObject->FsContext : NULL;
3990 // BOOL locked = FALSE;
3991 // LARGE_INTEGER freq, time1, time2;
3992 LIST_ENTRY rollback;
3993
3994 InitializeListHead(&rollback);
3995
3996 // time1 = KeQueryPerformanceCounter(&freq);
3997
3998 TRACE("write\n");
3999
4000 Irp->IoStatus.Information = 0;
4001
4002 TRACE("offset = %llx\n", offset.QuadPart);
4003 TRACE("length = %x\n", IrpSp->Parameters.Write.Length);
4004
4005 if (!Irp->AssociatedIrp.SystemBuffer) {
4006 buf = map_user_buffer(Irp);
4007
4008 if (Irp->MdlAddress && !buf) {
4009 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
4010 Status = STATUS_INSUFFICIENT_RESOURCES;
4011 goto exit;
4012 }
4013 } else
4014 buf = Irp->AssociatedIrp.SystemBuffer;
4015
4016 TRACE("buf = %p\n", buf);
4017
4018 // if (Irp->Flags & IRP_NOCACHE) {
4019 // if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
4020 // Status = STATUS_PENDING;
4021 // goto exit;
4022 // }
4023 // locked = TRUE;
4024 // }
4025
4026 if (fcb && !(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForWriteAccess(&fcb->lock, Irp)) {
4027 WARN("tried to write to locked region\n");
4028 Status = STATUS_FILE_LOCK_CONFLICT;
4029 goto exit;
4030 }
4031
4032 // ERR("Irp->Flags = %x\n", Irp->Flags);
4033 Status = write_file2(Vcb, Irp, offset, buf, &IrpSp->Parameters.Write.Length, Irp->Flags & IRP_PAGING_IO, Irp->Flags & IRP_NOCACHE,
4034 wait, deferred_write, &rollback);
4035
4036 if (Status == STATUS_PENDING)
4037 goto exit;
4038 else if (!NT_SUCCESS(Status)) {
4039 ERR("write_file2 returned %08x\n", Status);
4040 goto exit;
4041 }
4042
4043 // if (locked)
4044 // Status = consider_write(Vcb);
4045
4046 if (NT_SUCCESS(Status)) {
4047 Irp->IoStatus.Information = IrpSp->Parameters.Write.Length;
4048
4049 #ifdef DEBUG_PARANOID
4050 // if (locked)
4051 // check_extents_consistent(Vcb, FileObject->FsContext); // TESTING
4052
4053 // check_extent_tree_consistent(Vcb);
4054 #endif
4055 }
4056
4057 exit:
4058 // if (locked) {
4059 if (NT_SUCCESS(Status))
4060 clear_rollback(Vcb, &rollback);
4061 else
4062 do_rollback(Vcb, &rollback);
4063 //
4064 // ExReleaseResourceLite(&Vcb->tree_lock);
4065 // }
4066
4067 // time2 = KeQueryPerformanceCounter(NULL);
4068
4069 // ERR("time = %u (freq = %u)\n", (UINT32)(time2.QuadPart - time1.QuadPart), (UINT32)freq.QuadPart);
4070
4071 return Status;
4072 }
4073
4074 NTSTATUS STDCALL drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
4075 NTSTATUS Status;
4076 BOOL top_level;
4077 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
4078 device_extension* Vcb = DeviceObject->DeviceExtension;
4079 PFILE_OBJECT FileObject = IrpSp->FileObject;
4080 fcb* fcb = FileObject ? FileObject->FsContext : NULL;
4081 ccb* ccb = FileObject ? FileObject->FsContext2 : NULL;
4082
4083 FsRtlEnterFileSystem();
4084
4085 top_level = is_top_level(Irp);
4086
4087 if (Vcb && Vcb->type == VCB_TYPE_PARTITION0) {
4088 Status = part0_passthrough(DeviceObject, Irp);
4089 goto exit;
4090 }
4091
4092 if (!fcb) {
4093 ERR("fcb was NULL\n");
4094 Status = STATUS_INVALID_PARAMETER;
4095 goto end;
4096 }
4097
4098 if (!ccb) {
4099 ERR("ccb was NULL\n");
4100 Status = STATUS_INVALID_PARAMETER;
4101 goto end;
4102 }
4103
4104 if (fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY) {
4105 Status = STATUS_ACCESS_DENIED;
4106 goto end;
4107 }
4108
4109 if (Vcb->readonly) {
4110 Status = STATUS_MEDIA_WRITE_PROTECTED;
4111 goto end;
4112 }
4113
4114 if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_WRITE_DATA | FILE_APPEND_DATA))) {
4115 WARN("insufficient permissions\n");
4116 Status = STATUS_ACCESS_DENIED;
4117 goto end;
4118 }
4119
4120 // ERR("recursive = %s\n", Irp != IoGetTopLevelIrp() ? "TRUE" : "FALSE");
4121
4122 _SEH2_TRY {
4123 if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
4124 CcMdlWriteComplete(IrpSp->FileObject, &IrpSp->Parameters.Write.ByteOffset, Irp->MdlAddress);
4125
4126 Irp->MdlAddress = NULL;
4127 Status = STATUS_SUCCESS;
4128 } else {
4129 Status = write_file(Vcb, Irp, IoIsOperationSynchronous(Irp), FALSE);
4130 }
4131 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
4132 Status = _SEH2_GetExceptionCode();
4133 } _SEH2_END;
4134
4135 end:
4136 Irp->IoStatus.Status = Status;
4137
4138 TRACE("wrote %u bytes\n", Irp->IoStatus.Information);
4139
4140 if (Status != STATUS_PENDING)
4141 IoCompleteRequest(Irp, IO_NO_INCREMENT);
4142 else {
4143 IoMarkIrpPending(Irp);
4144
4145 if (!add_thread_job(Vcb, Irp))
4146 do_write_job(Vcb, Irp);
4147 }
4148
4149 exit:
4150 if (top_level)
4151 IoSetTopLevelIrp(NULL);
4152
4153 FsRtlExitFileSystem();
4154
4155 TRACE("returning %08x\n", Status);
4156
4157 return Status;
4158 }