[BTRFS] Allow driver to start at first stage when no hive is present.
[reactos.git] / drivers / filesystems / btrfs / scrub.c
1 /* Copyright (c) Mark Harmstone 2017
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 #define SCRUB_UNIT 0x100000 // 1 MB
21
22 struct _scrub_context;
23
24 typedef struct {
25 struct _scrub_context* context;
26 PIRP Irp;
27 UINT64 start;
28 UINT32 length;
29 IO_STATUS_BLOCK iosb;
30 UINT8* buf;
31 BOOL csum_error;
32 UINT32* bad_csums;
33 } scrub_context_stripe;
34
35 typedef struct _scrub_context {
36 KEVENT Event;
37 scrub_context_stripe* stripes;
38 LONG stripes_left;
39 } scrub_context;
40
41 typedef struct {
42 ANSI_STRING name;
43 BOOL orig_subvol;
44 LIST_ENTRY list_entry;
45 } path_part;
46
47 static void log_file_checksum_error(device_extension* Vcb, UINT64 addr, UINT64 devid, UINT64 subvol, UINT64 inode, UINT64 offset) {
48 LIST_ENTRY *le, parts;
49 root* r = NULL;
50 KEY searchkey;
51 traverse_ptr tp;
52 UINT64 dir;
53 BOOL orig_subvol = TRUE, not_in_tree = FALSE;
54 ANSI_STRING fn;
55 scrub_error* err;
56 NTSTATUS Status;
57 ULONG utf16len;
58
59 le = Vcb->roots.Flink;
60 while (le != &Vcb->roots) {
61 root* r2 = CONTAINING_RECORD(le, root, list_entry);
62
63 if (r2->id == subvol) {
64 r = r2;
65 break;
66 }
67
68 le = le->Flink;
69 }
70
71 if (!r) {
72 ERR("could not find subvol %llx\n", subvol);
73 return;
74 }
75
76 InitializeListHead(&parts);
77
78 dir = inode;
79
80 while (TRUE) {
81 if (dir == r->root_item.objid) {
82 if (r == Vcb->root_fileref->fcb->subvol)
83 break;
84
85 searchkey.obj_id = r->id;
86 searchkey.obj_type = TYPE_ROOT_BACKREF;
87 searchkey.offset = 0xffffffffffffffff;
88
89 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
90 if (!NT_SUCCESS(Status)) {
91 ERR("find_item returned %08x\n", Status);
92 goto end;
93 }
94
95 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
96 ROOT_REF* rr = (ROOT_REF*)tp.item->data;
97 path_part* pp;
98
99 if (tp.item->size < sizeof(ROOT_REF)) {
100 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
101 goto end;
102 }
103
104 if (tp.item->size < offsetof(ROOT_REF, name[0]) + rr->n) {
105 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
106 tp.item->size, offsetof(ROOT_REF, name[0]) + rr->n);
107 goto end;
108 }
109
110 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG);
111 if (!pp) {
112 ERR("out of memory\n");
113 goto end;
114 }
115
116 pp->name.Buffer = rr->name;
117 pp->name.Length = pp->name.MaximumLength = rr->n;
118 pp->orig_subvol = FALSE;
119
120 InsertTailList(&parts, &pp->list_entry);
121
122 r = NULL;
123
124 le = Vcb->roots.Flink;
125 while (le != &Vcb->roots) {
126 root* r2 = CONTAINING_RECORD(le, root, list_entry);
127
128 if (r2->id == tp.item->key.offset) {
129 r = r2;
130 break;
131 }
132
133 le = le->Flink;
134 }
135
136 if (!r) {
137 ERR("could not find subvol %llx\n", tp.item->key.offset);
138 goto end;
139 }
140
141 dir = rr->dir;
142 orig_subvol = FALSE;
143 } else {
144 not_in_tree = TRUE;
145 break;
146 }
147 } else {
148 searchkey.obj_id = dir;
149 searchkey.obj_type = TYPE_INODE_EXTREF;
150 searchkey.offset = 0xffffffffffffffff;
151
152 Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL);
153 if (!NT_SUCCESS(Status)) {
154 ERR("find_item returned %08x\n", Status);
155 goto end;
156 }
157
158 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_REF) {
159 INODE_REF* ir = (INODE_REF*)tp.item->data;
160 path_part* pp;
161
162 if (tp.item->size < sizeof(INODE_REF)) {
163 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF));
164 goto end;
165 }
166
167 if (tp.item->size < offsetof(INODE_REF, name[0]) + ir->n) {
168 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
169 tp.item->size, offsetof(INODE_REF, name[0]) + ir->n);
170 goto end;
171 }
172
173 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG);
174 if (!pp) {
175 ERR("out of memory\n");
176 goto end;
177 }
178
179 pp->name.Buffer = ir->name;
180 pp->name.Length = pp->name.MaximumLength = ir->n;
181 pp->orig_subvol = orig_subvol;
182
183 InsertTailList(&parts, &pp->list_entry);
184
185 if (dir == tp.item->key.offset)
186 break;
187
188 dir = tp.item->key.offset;
189 } else if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_EXTREF) {
190 INODE_EXTREF* ier = (INODE_EXTREF*)tp.item->data;
191 path_part* pp;
192
193 if (tp.item->size < sizeof(INODE_EXTREF)) {
194 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
195 tp.item->size, sizeof(INODE_EXTREF));
196 goto end;
197 }
198
199 if (tp.item->size < offsetof(INODE_EXTREF, name[0]) + ier->n) {
200 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
201 tp.item->size, offsetof(INODE_EXTREF, name[0]) + ier->n);
202 goto end;
203 }
204
205 pp = ExAllocatePoolWithTag(PagedPool, sizeof(path_part), ALLOC_TAG);
206 if (!pp) {
207 ERR("out of memory\n");
208 goto end;
209 }
210
211 pp->name.Buffer = ier->name;
212 pp->name.Length = pp->name.MaximumLength = ier->n;
213 pp->orig_subvol = orig_subvol;
214
215 InsertTailList(&parts, &pp->list_entry);
216
217 if (dir == ier->dir)
218 break;
219
220 dir = ier->dir;
221 } else {
222 ERR("could not find INODE_REF for inode %llx in subvol %llx\n", dir, r->id);
223 goto end;
224 }
225 }
226 }
227
228 fn.MaximumLength = 0;
229
230 if (not_in_tree) {
231 le = parts.Blink;
232 while (le != &parts) {
233 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry);
234 LIST_ENTRY* le2 = le->Blink;
235
236 if (pp->orig_subvol)
237 break;
238
239 RemoveTailList(&parts);
240 ExFreePool(pp);
241
242 le = le2;
243 }
244 }
245
246 le = parts.Flink;
247 while (le != &parts) {
248 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry);
249
250 fn.MaximumLength += pp->name.Length + 1;
251
252 le = le->Flink;
253 }
254
255 fn.Buffer = ExAllocatePoolWithTag(PagedPool, fn.MaximumLength, ALLOC_TAG);
256 if (!fn.Buffer) {
257 ERR("out of memory\n");
258 goto end;
259 }
260
261 fn.Length = 0;
262
263 le = parts.Blink;
264 while (le != &parts) {
265 path_part* pp = CONTAINING_RECORD(le, path_part, list_entry);
266
267 fn.Buffer[fn.Length] = '\\';
268 fn.Length++;
269
270 RtlCopyMemory(&fn.Buffer[fn.Length], pp->name.Buffer, pp->name.Length);
271 fn.Length += pp->name.Length;
272
273 le = le->Blink;
274 }
275
276 if (not_in_tree)
277 ERR("subvol %llx, %.*s, offset %llx\n", subvol, fn.Length, fn.Buffer, offset);
278 else
279 ERR("%.*s, offset %llx\n", fn.Length, fn.Buffer, offset);
280
281 Status = RtlUTF8ToUnicodeN(NULL, 0, &utf16len, fn.Buffer, fn.Length);
282 if (!NT_SUCCESS(Status)) {
283 ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status);
284 ExFreePool(fn.Buffer);
285 goto end;
286 }
287
288 err = ExAllocatePoolWithTag(PagedPool, offsetof(scrub_error, data.filename[0]) + utf16len, ALLOC_TAG);
289 if (!err) {
290 ERR("out of memory\n");
291 ExFreePool(fn.Buffer);
292 goto end;
293 }
294
295 err->address = addr;
296 err->device = devid;
297 err->recovered = FALSE;
298 err->is_metadata = FALSE;
299 err->parity = FALSE;
300
301 err->data.subvol = not_in_tree ? subvol : 0;
302 err->data.offset = offset;
303 err->data.filename_length = (UINT16)utf16len;
304
305 Status = RtlUTF8ToUnicodeN(err->data.filename, utf16len, &utf16len, fn.Buffer, fn.Length);
306 if (!NT_SUCCESS(Status)) {
307 ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status);
308 ExFreePool(fn.Buffer);
309 ExFreePool(err);
310 goto end;
311 }
312
313 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE);
314
315 Vcb->scrub.num_errors++;
316 InsertTailList(&Vcb->scrub.errors, &err->list_entry);
317
318 ExReleaseResourceLite(&Vcb->scrub.stats_lock);
319
320 ExFreePool(fn.Buffer);
321
322 end:
323 while (!IsListEmpty(&parts)) {
324 path_part* pp = CONTAINING_RECORD(RemoveHeadList(&parts), path_part, list_entry);
325
326 ExFreePool(pp);
327 }
328 }
329
330 static void log_file_checksum_error_shared(device_extension* Vcb, UINT64 treeaddr, UINT64 addr, UINT64 devid, UINT64 extent) {
331 tree_header* tree;
332 NTSTATUS Status;
333 leaf_node* ln;
334 ULONG i;
335
336 tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
337 if (!tree) {
338 ERR("out of memory\n");
339 return;
340 }
341
342 Status = read_data(Vcb, treeaddr, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)tree, NULL, NULL, NULL, 0, FALSE, NormalPagePriority);
343 if (!NT_SUCCESS(Status)) {
344 ERR("read_data returned %08x\n", Status);
345 goto end;
346 }
347
348 if (tree->level != 0) {
349 ERR("tree level was %x, expected 0\n", tree->level);
350 goto end;
351 }
352
353 ln = (leaf_node*)&tree[1];
354
355 for (i = 0; i < tree->num_items; i++) {
356 if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
357 EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)tree + sizeof(tree_header) + ln[i].offset);
358 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
359
360 if (ed->type == EXTENT_TYPE_REGULAR && ed2->size != 0 && ed2->address == addr)
361 log_file_checksum_error(Vcb, addr, devid, tree->tree_id, ln[i].key.obj_id, ln[i].key.offset + addr - extent);
362 }
363 }
364
365 end:
366 ExFreePool(tree);
367 }
368
369 static void log_tree_checksum_error(device_extension* Vcb, UINT64 addr, UINT64 devid, UINT64 root, UINT8 level, KEY* firstitem) {
370 scrub_error* err;
371
372 err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG);
373 if (!err) {
374 ERR("out of memory\n");
375 return;
376 }
377
378 err->address = addr;
379 err->device = devid;
380 err->recovered = FALSE;
381 err->is_metadata = TRUE;
382 err->parity = FALSE;
383
384 err->metadata.root = root;
385 err->metadata.level = level;
386
387 if (firstitem) {
388 ERR("root %llx, level %u, first item (%llx,%x,%llx)\n", root, level, firstitem->obj_id,
389 firstitem->obj_type, firstitem->offset);
390
391 err->metadata.firstitem = *firstitem;
392 } else {
393 ERR("root %llx, level %u\n", root, level);
394
395 RtlZeroMemory(&err->metadata.firstitem, sizeof(KEY));
396 }
397
398 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE);
399
400 Vcb->scrub.num_errors++;
401 InsertTailList(&Vcb->scrub.errors, &err->list_entry);
402
403 ExReleaseResourceLite(&Vcb->scrub.stats_lock);
404 }
405
406 static void log_tree_checksum_error_shared(device_extension* Vcb, UINT64 offset, UINT64 address, UINT64 devid) {
407 tree_header* tree;
408 NTSTATUS Status;
409 internal_node* in;
410 ULONG i;
411
412 tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
413 if (!tree) {
414 ERR("out of memory\n");
415 return;
416 }
417
418 Status = read_data(Vcb, offset, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)tree, NULL, NULL, NULL, 0, FALSE, NormalPagePriority);
419 if (!NT_SUCCESS(Status)) {
420 ERR("read_data returned %08x\n", Status);
421 goto end;
422 }
423
424 if (tree->level == 0) {
425 ERR("tree level was 0\n");
426 goto end;
427 }
428
429 in = (internal_node*)&tree[1];
430
431 for (i = 0; i < tree->num_items; i++) {
432 if (in[i].address == address) {
433 log_tree_checksum_error(Vcb, address, devid, tree->tree_id, tree->level - 1, &in[i].key);
434 break;
435 }
436 }
437
438 end:
439 ExFreePool(tree);
440 }
441
442 static void log_unrecoverable_error(device_extension* Vcb, UINT64 address, UINT64 devid) {
443 KEY searchkey;
444 traverse_ptr tp;
445 NTSTATUS Status;
446 EXTENT_ITEM* ei;
447 EXTENT_ITEM2* ei2 = NULL;
448 UINT8* ptr;
449 ULONG len;
450 UINT64 rc;
451
452 // FIXME - still log even if rest of this function fails
453
454 searchkey.obj_id = address;
455 searchkey.obj_type = TYPE_METADATA_ITEM;
456 searchkey.offset = 0xffffffffffffffff;
457
458 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
459 if (!NT_SUCCESS(Status)) {
460 ERR("find_item returned %08x\n", Status);
461 return;
462 }
463
464 if ((tp.item->key.obj_type != TYPE_EXTENT_ITEM && tp.item->key.obj_type != TYPE_METADATA_ITEM) ||
465 tp.item->key.obj_id >= address + Vcb->superblock.sector_size ||
466 (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.obj_id + tp.item->key.offset <= address) ||
467 (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->key.obj_id + Vcb->superblock.node_size <= address)
468 )
469 return;
470
471 if (tp.item->size < sizeof(EXTENT_ITEM)) {
472 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
473 return;
474 }
475
476 ei = (EXTENT_ITEM*)tp.item->data;
477 ptr = (UINT8*)&ei[1];
478 len = tp.item->size - sizeof(EXTENT_ITEM);
479
480 if (tp.item->key.obj_id == TYPE_EXTENT_ITEM && ei->flags & EXTENT_ITEM_TREE_BLOCK) {
481 if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
482 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
483 tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
484 return;
485 }
486
487 ei2 = (EXTENT_ITEM2*)ptr;
488
489 ptr += sizeof(EXTENT_ITEM2);
490 len -= sizeof(EXTENT_ITEM2);
491 }
492
493 rc = 0;
494
495 while (len > 0) {
496 UINT8 type = *ptr;
497
498 ptr++;
499 len--;
500
501 if (type == TYPE_TREE_BLOCK_REF) {
502 TREE_BLOCK_REF* tbr;
503
504 if (len < sizeof(TREE_BLOCK_REF)) {
505 ERR("TREE_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(TREE_BLOCK_REF), len);
506 break;
507 }
508
509 tbr = (TREE_BLOCK_REF*)ptr;
510
511 log_tree_checksum_error(Vcb, address, devid, tbr->offset, ei2 ? ei2->level : (UINT8)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL);
512
513 rc++;
514
515 ptr += sizeof(TREE_BLOCK_REF);
516 len -= sizeof(TREE_BLOCK_REF);
517 } else if (type == TYPE_EXTENT_DATA_REF) {
518 EXTENT_DATA_REF* edr;
519
520 if (len < sizeof(EXTENT_DATA_REF)) {
521 ERR("EXTENT_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(EXTENT_DATA_REF), len);
522 break;
523 }
524
525 edr = (EXTENT_DATA_REF*)ptr;
526
527 log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id);
528
529 rc += edr->count;
530
531 ptr += sizeof(EXTENT_DATA_REF);
532 len -= sizeof(EXTENT_DATA_REF);
533 } else if (type == TYPE_SHARED_BLOCK_REF) {
534 SHARED_BLOCK_REF* sbr;
535
536 if (len < sizeof(SHARED_BLOCK_REF)) {
537 ERR("SHARED_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_BLOCK_REF), len);
538 break;
539 }
540
541 sbr = (SHARED_BLOCK_REF*)ptr;
542
543 log_tree_checksum_error_shared(Vcb, sbr->offset, address, devid);
544
545 rc++;
546
547 ptr += sizeof(SHARED_BLOCK_REF);
548 len -= sizeof(SHARED_BLOCK_REF);
549 } else if (type == TYPE_SHARED_DATA_REF) {
550 SHARED_DATA_REF* sdr;
551
552 if (len < sizeof(SHARED_DATA_REF)) {
553 ERR("SHARED_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_DATA_REF), len);
554 break;
555 }
556
557 sdr = (SHARED_DATA_REF*)ptr;
558
559 log_file_checksum_error_shared(Vcb, sdr->offset, address, devid, tp.item->key.obj_id);
560
561 rc += sdr->count;
562
563 ptr += sizeof(SHARED_DATA_REF);
564 len -= sizeof(SHARED_DATA_REF);
565 } else {
566 ERR("unknown extent type %x\n", type);
567 break;
568 }
569 }
570
571 if (rc < ei->refcount) {
572 do {
573 traverse_ptr next_tp;
574
575 if (find_next_item(Vcb, &tp, &next_tp, FALSE, NULL))
576 tp = next_tp;
577 else
578 break;
579
580 if (tp.item->key.obj_id == address) {
581 if (tp.item->key.obj_type == TYPE_TREE_BLOCK_REF)
582 log_tree_checksum_error(Vcb, address, devid, tp.item->key.offset, ei2 ? ei2->level : (UINT8)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL);
583 else if (tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) {
584 EXTENT_DATA_REF* edr;
585
586 if (tp.item->size < sizeof(EXTENT_DATA_REF)) {
587 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
588 tp.item->size, sizeof(EXTENT_DATA_REF));
589 break;
590 }
591
592 edr = (EXTENT_DATA_REF*)tp.item->data;
593
594 log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id);
595 } else if (tp.item->key.obj_type == TYPE_SHARED_BLOCK_REF)
596 log_tree_checksum_error_shared(Vcb, tp.item->key.offset, address, devid);
597 else if (tp.item->key.obj_type == TYPE_SHARED_DATA_REF)
598 log_file_checksum_error_shared(Vcb, tp.item->key.offset, address, devid, tp.item->key.obj_id);
599 } else
600 break;
601 } while (TRUE);
602 }
603 }
604
605 static void log_error(device_extension* Vcb, UINT64 addr, UINT64 devid, BOOL metadata, BOOL recoverable, BOOL parity) {
606 if (recoverable) {
607 scrub_error* err;
608
609 if (parity) {
610 ERR("recovering from parity error at %llx on device %llx\n", addr, devid);
611 } else {
612 if (metadata)
613 ERR("recovering from metadata checksum error at %llx on device %llx\n", addr, devid);
614 else
615 ERR("recovering from data checksum error at %llx on device %llx\n", addr, devid);
616 }
617
618 err = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_error), ALLOC_TAG);
619 if (!err) {
620 ERR("out of memory\n");
621 return;
622 }
623
624 err->address = addr;
625 err->device = devid;
626 err->recovered = TRUE;
627 err->is_metadata = metadata;
628 err->parity = parity;
629
630 if (metadata)
631 RtlZeroMemory(&err->metadata, sizeof(err->metadata));
632 else
633 RtlZeroMemory(&err->data, sizeof(err->data));
634
635 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE);
636
637 Vcb->scrub.num_errors++;
638 InsertTailList(&Vcb->scrub.errors, &err->list_entry);
639
640 ExReleaseResourceLite(&Vcb->scrub.stats_lock);
641 } else {
642 if (metadata)
643 ERR("unrecoverable metadata checksum error at %llx\n", addr);
644 else
645 ERR("unrecoverable data checksum error at %llx\n", addr);
646
647 log_unrecoverable_error(Vcb, addr, devid);
648 }
649 }
650
651 _Function_class_(IO_COMPLETION_ROUTINE)
652 #ifdef __REACTOS__
653 static NTSTATUS NTAPI scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
654 #else
655 static NTSTATUS scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
656 #endif
657 scrub_context_stripe* stripe = conptr;
658 scrub_context* context = (scrub_context*)stripe->context;
659 ULONG left = InterlockedDecrement(&context->stripes_left);
660
661 UNUSED(DeviceObject);
662
663 stripe->iosb = Irp->IoStatus;
664
665 if (left == 0)
666 KeSetEvent(&context->Event, 0, FALSE);
667
668 return STATUS_MORE_PROCESSING_REQUIRED;
669 }
670
671 static NTSTATUS scrub_extent_dup(device_extension* Vcb, chunk* c, UINT64 offset, UINT32* csum, scrub_context* context) {
672 NTSTATUS Status;
673 BOOL csum_error = FALSE;
674 ULONG i;
675 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
676 UINT16 present_devices = 0;
677
678 if (csum) {
679 ULONG good_stripe = 0xffffffff;
680
681 for (i = 0; i < c->chunk_item->num_stripes; i++) {
682 if (c->devices[i]->devobj) {
683 present_devices++;
684
685 // if first stripe is okay, we only need to check that the others are identical to it
686 if (good_stripe != 0xffffffff) {
687 if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf,
688 context->stripes[good_stripe].length) != context->stripes[i].length) {
689 context->stripes[i].csum_error = TRUE;
690 csum_error = TRUE;
691 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
692 }
693 } else {
694 Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, csum);
695 if (Status == STATUS_CRC_ERROR) {
696 context->stripes[i].csum_error = TRUE;
697 csum_error = TRUE;
698 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
699 } else if (!NT_SUCCESS(Status)) {
700 ERR("check_csum returned %08x\n", Status);
701 return Status;
702 } else
703 good_stripe = i;
704 }
705 }
706 }
707 } else {
708 ULONG good_stripe = 0xffffffff;
709
710 for (i = 0; i < c->chunk_item->num_stripes; i++) {
711 ULONG j;
712
713 if (c->devices[i]->devobj) {
714 // if first stripe is okay, we only need to check that the others are identical to it
715 if (good_stripe != 0xffffffff) {
716 if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf,
717 context->stripes[good_stripe].length) != context->stripes[i].length) {
718 context->stripes[i].csum_error = TRUE;
719 csum_error = TRUE;
720 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
721 }
722 } else {
723 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
724 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
725 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
726
727 if (crc32 != *((UINT32*)th->csum) || th->address != offset + UInt32x32To64(j, Vcb->superblock.node_size)) {
728 context->stripes[i].csum_error = TRUE;
729 csum_error = TRUE;
730 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
731 }
732 }
733
734 if (!context->stripes[i].csum_error)
735 good_stripe = i;
736 }
737 }
738 }
739 }
740
741 if (!csum_error)
742 return STATUS_SUCCESS;
743
744 // handle checksum error
745
746 for (i = 0; i < c->chunk_item->num_stripes; i++) {
747 if (context->stripes[i].csum_error) {
748 if (csum) {
749 context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG);
750 if (!context->stripes[i].bad_csums) {
751 ERR("out of memory\n");
752 return STATUS_INSUFFICIENT_RESOURCES;
753 }
754
755 Status = calc_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, context->stripes[i].bad_csums);
756 if (!NT_SUCCESS(Status)) {
757 ERR("calc_csum returned %08x\n", Status);
758 return Status;
759 }
760 } else {
761 ULONG j;
762
763 context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * sizeof(UINT32) / Vcb->superblock.node_size, ALLOC_TAG);
764 if (!context->stripes[i].bad_csums) {
765 ERR("out of memory\n");
766 return STATUS_INSUFFICIENT_RESOURCES;
767 }
768
769 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
770 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
771 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
772
773 context->stripes[i].bad_csums[j] = crc32;
774 }
775 }
776 }
777 }
778
779 if (present_devices > 1) {
780 ULONG good_stripe = 0xffffffff;
781
782 for (i = 0; i < c->chunk_item->num_stripes; i++) {
783 if (c->devices[i]->devobj && !context->stripes[i].csum_error) {
784 good_stripe = i;
785 break;
786 }
787 }
788
789 if (good_stripe != 0xffffffff) {
790 // log
791
792 for (i = 0; i < c->chunk_item->num_stripes; i++) {
793 if (context->stripes[i].csum_error) {
794 ULONG j;
795
796 if (csum) {
797 for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
798 if (context->stripes[i].bad_csums[j] != csum[j]) {
799 UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
800
801 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, TRUE, FALSE);
802 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
803 }
804 }
805 } else {
806 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
807 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
808 UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
809
810 if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr) {
811 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, TRUE, FALSE);
812 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
813 }
814 }
815 }
816 }
817 }
818
819 // write good data over bad
820
821 for (i = 0; i < c->chunk_item->num_stripes; i++) {
822 if (context->stripes[i].csum_error && !c->devices[i]->readonly) {
823 Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + offset - c->offset,
824 context->stripes[good_stripe].buf, context->stripes[i].length);
825
826 if (!NT_SUCCESS(Status)) {
827 ERR("write_data_phys returned %08x\n", Status);
828 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS);
829 return Status;
830 }
831 }
832 }
833
834 return STATUS_SUCCESS;
835 }
836
837 // if csum errors on all stripes, check sector by sector
838
839 for (i = 0; i < c->chunk_item->num_stripes; i++) {
840 ULONG j;
841
842 if (c->devices[i]->devobj) {
843 if (csum) {
844 for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
845 if (context->stripes[i].bad_csums[j] != csum[j]) {
846 ULONG k;
847 UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
848 BOOL recovered = FALSE;
849
850 for (k = 0; k < c->chunk_item->num_stripes; k++) {
851 if (i != k && c->devices[k]->devobj && context->stripes[k].bad_csums[j] == csum[j]) {
852 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, TRUE, FALSE);
853 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
854
855 RtlCopyMemory(context->stripes[i].buf + (j * Vcb->superblock.sector_size),
856 context->stripes[k].buf + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
857
858 recovered = TRUE;
859 break;
860 }
861 }
862
863 if (!recovered) {
864 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, FALSE, FALSE);
865 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
866 }
867 }
868 }
869 } else {
870 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
871 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
872 UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
873
874 if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr) {
875 ULONG k;
876 BOOL recovered = FALSE;
877
878 for (k = 0; k < c->chunk_item->num_stripes; k++) {
879 if (i != k && c->devices[k]->devobj) {
880 tree_header* th2 = (tree_header*)&context->stripes[k].buf[j * Vcb->superblock.node_size];
881
882 if (context->stripes[k].bad_csums[j] == *((UINT32*)th2->csum) && th2->address == addr) {
883 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, TRUE, FALSE);
884 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
885
886 RtlCopyMemory(th, th2, Vcb->superblock.node_size);
887
888 recovered = TRUE;
889 break;
890 }
891 }
892 }
893
894 if (!recovered) {
895 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, FALSE, FALSE);
896 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
897 }
898 }
899 }
900 }
901 }
902 }
903
904 // write good data over bad
905
906 for (i = 0; i < c->chunk_item->num_stripes; i++) {
907 if (c->devices[i]->devobj && !c->devices[i]->readonly) {
908 Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + offset - c->offset,
909 context->stripes[i].buf, context->stripes[i].length);
910 if (!NT_SUCCESS(Status)) {
911 ERR("write_data_phys returned %08x\n", Status);
912 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
913 return Status;
914 }
915 }
916 }
917
918 return STATUS_SUCCESS;
919 }
920
921 for (i = 0; i < c->chunk_item->num_stripes; i++) {
922 if (c->devices[i]->devobj) {
923 ULONG j;
924
925 if (csum) {
926 for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
927 if (context->stripes[i].bad_csums[j] != csum[j]) {
928 UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
929
930 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, FALSE, FALSE);
931 }
932 }
933 } else {
934 for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
935 tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
936 UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
937
938 if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr)
939 log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, FALSE, FALSE);
940 }
941 }
942 }
943 }
944
945 return STATUS_SUCCESS;
946 }
947
948 static NTSTATUS scrub_extent_raid0(device_extension* Vcb, chunk* c, UINT64 offset, UINT32 length, UINT16 startoffstripe, UINT32* csum, scrub_context* context) {
949 ULONG j;
950 UINT16 stripe;
951 UINT32 pos, *stripeoff;
952
953 pos = 0;
954 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * c->chunk_item->num_stripes, ALLOC_TAG);
955 if (!stripeoff) {
956 ERR("out of memory\n");
957 return STATUS_INSUFFICIENT_RESOURCES;
958 }
959
960 RtlZeroMemory(stripeoff, sizeof(UINT32) * c->chunk_item->num_stripes);
961
962 stripe = startoffstripe;
963 while (pos < length) {
964 UINT32 readlen;
965
966 if (pos == 0)
967 readlen = (UINT32)min(context->stripes[stripe].length, c->chunk_item->stripe_length - (context->stripes[stripe].start % c->chunk_item->stripe_length));
968 else
969 readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length);
970
971 if (csum) {
972 for (j = 0; j < readlen; j += Vcb->superblock.sector_size) {
973 UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + stripeoff[stripe], Vcb->superblock.sector_size);
974
975 if (crc32 != csum[pos / Vcb->superblock.sector_size]) {
976 UINT64 addr = offset + pos;
977
978 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE);
979 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
980 }
981
982 pos += Vcb->superblock.sector_size;
983 stripeoff[stripe] += Vcb->superblock.sector_size;
984 }
985 } else {
986 for (j = 0; j < readlen; j += Vcb->superblock.node_size) {
987 tree_header* th = (tree_header*)(context->stripes[stripe].buf + stripeoff[stripe]);
988 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
989 UINT64 addr = offset + pos;
990
991 if (crc32 != *((UINT32*)th->csum) || th->address != addr) {
992 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE);
993 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
994 }
995
996 pos += Vcb->superblock.node_size;
997 stripeoff[stripe] += Vcb->superblock.node_size;
998 }
999 }
1000
1001 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1002 }
1003
1004 ExFreePool(stripeoff);
1005
1006 return STATUS_SUCCESS;
1007 }
1008
1009 static NTSTATUS scrub_extent_raid10(device_extension* Vcb, chunk* c, UINT64 offset, UINT32 length, UINT16 startoffstripe, UINT32* csum, scrub_context* context) {
1010 ULONG j;
1011 UINT16 stripe, sub_stripes = max(c->chunk_item->sub_stripes, 1);
1012 UINT32 pos, *stripeoff;
1013 BOOL csum_error = FALSE;
1014 NTSTATUS Status;
1015
1016 pos = 0;
1017 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * c->chunk_item->num_stripes / sub_stripes, ALLOC_TAG);
1018 if (!stripeoff) {
1019 ERR("out of memory\n");
1020 return STATUS_INSUFFICIENT_RESOURCES;
1021 }
1022
1023 RtlZeroMemory(stripeoff, sizeof(UINT32) * c->chunk_item->num_stripes / sub_stripes);
1024
1025 stripe = startoffstripe;
1026 while (pos < length) {
1027 UINT32 readlen;
1028
1029 if (pos == 0)
1030 readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length,
1031 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1032 else
1033 readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length);
1034
1035 if (csum) {
1036 ULONG good_stripe = 0xffffffff;
1037 UINT16 k;
1038
1039 for (k = 0; k < sub_stripes; k++) {
1040 if (c->devices[(stripe * sub_stripes) + k]->devobj) {
1041 // if first stripe is okay, we only need to check that the others are identical to it
1042 if (good_stripe != 0xffffffff) {
1043 if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe],
1044 context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe],
1045 readlen) != readlen) {
1046 context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE;
1047 csum_error = TRUE;
1048 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1049 }
1050 } else {
1051 for (j = 0; j < readlen; j += Vcb->superblock.sector_size) {
1052 UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j, Vcb->superblock.sector_size);
1053
1054 if (crc32 != csum[(pos + j) / Vcb->superblock.sector_size]) {
1055 csum_error = TRUE;
1056 context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE;
1057 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1058 break;
1059 }
1060 }
1061
1062 if (!context->stripes[(stripe * sub_stripes) + k].csum_error)
1063 good_stripe = k;
1064 }
1065 }
1066 }
1067
1068 pos += readlen;
1069 stripeoff[stripe] += readlen;
1070 } else {
1071 ULONG good_stripe = 0xffffffff;
1072 UINT16 k;
1073
1074 for (k = 0; k < sub_stripes; k++) {
1075 if (c->devices[(stripe * sub_stripes) + k]->devobj) {
1076 // if first stripe is okay, we only need to check that the others are identical to it
1077 if (good_stripe != 0xffffffff) {
1078 if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe],
1079 context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe],
1080 readlen) != readlen) {
1081 context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE;
1082 csum_error = TRUE;
1083 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1084 }
1085 } else {
1086 for (j = 0; j < readlen; j += Vcb->superblock.node_size) {
1087 tree_header* th = (tree_header*)(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j);
1088 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1089 UINT64 addr = offset + pos + j;
1090
1091 if (crc32 != *((UINT32*)th->csum) || th->address != addr) {
1092 csum_error = TRUE;
1093 context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE;
1094 log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1095 break;
1096 }
1097 }
1098
1099 if (!context->stripes[(stripe * sub_stripes) + k].csum_error)
1100 good_stripe = k;
1101 }
1102 }
1103 }
1104
1105 pos += readlen;
1106 stripeoff[stripe] += readlen;
1107 }
1108
1109 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1110 }
1111
1112 if (!csum_error) {
1113 Status = STATUS_SUCCESS;
1114 goto end;
1115 }
1116
1117 for (j = 0; j < c->chunk_item->num_stripes; j += sub_stripes) {
1118 ULONG goodstripe = 0xffffffff;
1119 UINT16 k;
1120 BOOL hasbadstripe = FALSE;
1121
1122 if (context->stripes[j].length == 0)
1123 continue;
1124
1125 for (k = 0; k < sub_stripes; k++) {
1126 if (c->devices[j + k]->devobj) {
1127 if (!context->stripes[j + k].csum_error)
1128 goodstripe = k;
1129 else
1130 hasbadstripe = TRUE;
1131 }
1132 }
1133
1134 if (hasbadstripe) {
1135 if (goodstripe != 0xffffffff) {
1136 for (k = 0; k < sub_stripes; k++) {
1137 if (c->devices[j + k]->devobj && context->stripes[j + k].csum_error) {
1138 UINT32 so = 0;
1139 BOOL recovered = FALSE;
1140
1141 pos = 0;
1142
1143 stripe = startoffstripe;
1144 while (pos < length) {
1145 UINT32 readlen;
1146
1147 if (pos == 0)
1148 readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length,
1149 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1150 else
1151 readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length);
1152
1153 if (stripe == j / sub_stripes) {
1154 if (csum) {
1155 ULONG l;
1156
1157 for (l = 0; l < readlen; l += Vcb->superblock.sector_size) {
1158 if (RtlCompareMemory(context->stripes[j + k].buf + so,
1159 context->stripes[j + goodstripe].buf + so,
1160 Vcb->superblock.sector_size) != Vcb->superblock.sector_size) {
1161 UINT64 addr = offset + pos;
1162
1163 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, TRUE, FALSE);
1164
1165 recovered = TRUE;
1166 }
1167
1168 pos += Vcb->superblock.sector_size;
1169 so += Vcb->superblock.sector_size;
1170 }
1171 } else {
1172 ULONG l;
1173
1174 for (l = 0; l < readlen; l += Vcb->superblock.node_size) {
1175 if (RtlCompareMemory(context->stripes[j + k].buf + so,
1176 context->stripes[j + goodstripe].buf + so,
1177 Vcb->superblock.node_size) != Vcb->superblock.node_size) {
1178 UINT64 addr = offset + pos;
1179
1180 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, TRUE, FALSE);
1181
1182 recovered = TRUE;
1183 }
1184
1185 pos += Vcb->superblock.node_size;
1186 so += Vcb->superblock.node_size;
1187 }
1188 }
1189 } else
1190 pos += readlen;
1191
1192 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1193 }
1194
1195 if (recovered) {
1196 // write good data over bad
1197
1198 if (!c->devices[j + k]->readonly) {
1199 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1200
1201 Status = write_data_phys(c->devices[j + k]->devobj, cis[j + k].offset + offset - c->offset,
1202 context->stripes[j + goodstripe].buf, context->stripes[j + goodstripe].length);
1203
1204 if (!NT_SUCCESS(Status)) {
1205 ERR("write_data_phys returned %08x\n", Status);
1206 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS);
1207 goto end;
1208 }
1209 }
1210 }
1211 }
1212 }
1213 } else {
1214 UINT32 so = 0;
1215 BOOL recovered = FALSE;
1216
1217 if (csum) {
1218 for (k = 0; k < sub_stripes; k++) {
1219 if (c->devices[j + k]->devobj) {
1220 context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG);
1221 if (!context->stripes[j + k].bad_csums) {
1222 ERR("out of memory\n");
1223 Status = STATUS_INSUFFICIENT_RESOURCES;
1224 goto end;
1225 }
1226
1227 Status = calc_csum(Vcb, context->stripes[j + k].buf, context->stripes[j + k].length / Vcb->superblock.sector_size, context->stripes[j + k].bad_csums);
1228 if (!NT_SUCCESS(Status)) {
1229 ERR("calc_csum returned %08x\n", Status);
1230 goto end;
1231 }
1232 }
1233 }
1234 } else {
1235 for (k = 0; k < sub_stripes; k++) {
1236 if (c->devices[j + k]->devobj) {
1237 ULONG l;
1238
1239 context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(UINT32) / Vcb->superblock.node_size, ALLOC_TAG);
1240 if (!context->stripes[j + k].bad_csums) {
1241 ERR("out of memory\n");
1242 Status = STATUS_INSUFFICIENT_RESOURCES;
1243 goto end;
1244 }
1245
1246 for (l = 0; l < context->stripes[j + k].length / Vcb->superblock.node_size; l++) {
1247 tree_header* th = (tree_header*)&context->stripes[j + k].buf[l * Vcb->superblock.node_size];
1248 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1249
1250 context->stripes[j + k].bad_csums[l] = crc32;
1251 }
1252 }
1253 }
1254 }
1255
1256 pos = 0;
1257
1258 stripe = startoffstripe;
1259 while (pos < length) {
1260 UINT32 readlen;
1261
1262 if (pos == 0)
1263 readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length,
1264 c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1265 else
1266 readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length);
1267
1268 if (stripe == j / sub_stripes) {
1269 ULONG l;
1270
1271 if (csum) {
1272 for (l = 0; l < readlen; l += Vcb->superblock.sector_size) {
1273 UINT32 crc32 = csum[pos / Vcb->superblock.sector_size];
1274 BOOL has_error = FALSE;
1275
1276 goodstripe = 0xffffffff;
1277 for (k = 0; k < sub_stripes; k++) {
1278 if (c->devices[j + k]->devobj) {
1279 if (context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32)
1280 has_error = TRUE;
1281 else
1282 goodstripe = k;
1283 }
1284 }
1285
1286 if (has_error) {
1287 if (goodstripe != 0xffffffff) {
1288 for (k = 0; k < sub_stripes; k++) {
1289 if (c->devices[j + k]->devobj && context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32) {
1290 UINT64 addr = offset + pos;
1291
1292 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, TRUE, FALSE);
1293
1294 recovered = TRUE;
1295
1296 RtlCopyMemory(context->stripes[j + k].buf + so, context->stripes[j + goodstripe].buf + so,
1297 Vcb->superblock.sector_size);
1298 }
1299 }
1300 } else {
1301 UINT64 addr = offset + pos;
1302
1303 for (k = 0; k < sub_stripes; k++) {
1304 if (c->devices[j + j]->devobj) {
1305 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, FALSE, FALSE);
1306 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1307 }
1308 }
1309 }
1310 }
1311
1312 pos += Vcb->superblock.sector_size;
1313 so += Vcb->superblock.sector_size;
1314 }
1315 } else {
1316 for (l = 0; l < readlen; l += Vcb->superblock.node_size) {
1317 for (k = 0; k < sub_stripes; k++) {
1318 if (c->devices[j + k]->devobj) {
1319 tree_header* th = (tree_header*)&context->stripes[j + k].buf[so];
1320 UINT64 addr = offset + pos;
1321
1322 if (context->stripes[j + k].bad_csums[so / Vcb->superblock.node_size] != *((UINT32*)th->csum) || th->address != addr) {
1323 ULONG m;
1324
1325 recovered = FALSE;
1326
1327 for (m = 0; m < sub_stripes; m++) {
1328 if (m != k) {
1329 tree_header* th2 = (tree_header*)&context->stripes[j + m].buf[so];
1330
1331 if (context->stripes[j + m].bad_csums[so / Vcb->superblock.node_size] == *((UINT32*)th2->csum) && th2->address == addr) {
1332 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, TRUE, FALSE);
1333
1334 RtlCopyMemory(th, th2, Vcb->superblock.node_size);
1335
1336 recovered = TRUE;
1337 break;
1338 } else
1339 log_device_error(Vcb, c->devices[j + m], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1340 }
1341 }
1342
1343 if (!recovered)
1344 log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, FALSE, FALSE);
1345 }
1346 }
1347 }
1348
1349 pos += Vcb->superblock.node_size;
1350 so += Vcb->superblock.node_size;
1351 }
1352 }
1353 } else
1354 pos += readlen;
1355
1356 stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1357 }
1358
1359 if (recovered) {
1360 // write good data over bad
1361
1362 for (k = 0; k < sub_stripes; k++) {
1363 if (c->devices[j + k]->devobj && !c->devices[j + k]->readonly) {
1364 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1365
1366 Status = write_data_phys(c->devices[j + k]->devobj, cis[j + k].offset + offset - c->offset,
1367 context->stripes[j + k].buf, context->stripes[j + k].length);
1368
1369 if (!NT_SUCCESS(Status)) {
1370 ERR("write_data_phys returned %08x\n", Status);
1371 log_device_error(Vcb, c->devices[j + k], BTRFS_DEV_STAT_WRITE_ERRORS);
1372 goto end;
1373 }
1374 }
1375 }
1376 }
1377 }
1378 }
1379 }
1380
1381 Status = STATUS_SUCCESS;
1382
1383 end:
1384 ExFreePool(stripeoff);
1385
1386 return Status;
1387 }
1388
1389 static NTSTATUS scrub_extent(device_extension* Vcb, chunk* c, ULONG type, UINT64 offset, UINT32 size, UINT32* csum) {
1390 ULONG i;
1391 scrub_context context;
1392 CHUNK_ITEM_STRIPE* cis;
1393 NTSTATUS Status;
1394 UINT16 startoffstripe, num_missing, allowed_missing;
1395
1396 TRACE("(%p, %p, %llx, %llx, %p)\n", Vcb, c, offset, size, csum);
1397
1398 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
1399 if (!context.stripes) {
1400 ERR("out of memory\n");
1401 Status = STATUS_INSUFFICIENT_RESOURCES;
1402 goto end;
1403 }
1404
1405 RtlZeroMemory(context.stripes, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes);
1406
1407 context.stripes_left = 0;
1408
1409 cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1410
1411 if (type == BLOCK_FLAG_RAID0) {
1412 UINT64 startoff, endoff;
1413 UINT16 endoffstripe;
1414
1415 get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
1416 get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
1417
1418 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1419 if (startoffstripe > i)
1420 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1421 else if (startoffstripe == i)
1422 context.stripes[i].start = startoff;
1423 else
1424 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
1425
1426 if (endoffstripe > i)
1427 context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start);
1428 else if (endoffstripe == i)
1429 context.stripes[i].length = (UINT32)(endoff + 1 - context.stripes[i].start);
1430 else
1431 context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start);
1432 }
1433
1434 allowed_missing = 0;
1435 } else if (type == BLOCK_FLAG_RAID10) {
1436 UINT64 startoff, endoff;
1437 UINT16 endoffstripe, j, sub_stripes = max(c->chunk_item->sub_stripes, 1);
1438
1439 get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
1440 get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
1441
1442 if ((c->chunk_item->num_stripes % sub_stripes) != 0) {
1443 ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", c->offset, c->chunk_item->num_stripes, sub_stripes);
1444 Status = STATUS_INTERNAL_ERROR;
1445 goto end;
1446 }
1447
1448 startoffstripe *= sub_stripes;
1449 endoffstripe *= sub_stripes;
1450
1451 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
1452 if (startoffstripe > i)
1453 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1454 else if (startoffstripe == i)
1455 context.stripes[i].start = startoff;
1456 else
1457 context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
1458
1459 if (endoffstripe > i)
1460 context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start);
1461 else if (endoffstripe == i)
1462 context.stripes[i].length = (UINT32)(endoff + 1 - context.stripes[i].start);
1463 else
1464 context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start);
1465
1466 for (j = 1; j < sub_stripes; j++) {
1467 context.stripes[i+j].start = context.stripes[i].start;
1468 context.stripes[i+j].length = context.stripes[i].length;
1469 }
1470 }
1471
1472 startoffstripe /= sub_stripes;
1473 allowed_missing = 1;
1474 } else
1475 allowed_missing = c->chunk_item->num_stripes - 1;
1476
1477 num_missing = 0;
1478
1479 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1480 PIO_STACK_LOCATION IrpSp;
1481
1482 context.stripes[i].context = (struct _scrub_context*)&context;
1483
1484 if (type == BLOCK_FLAG_DUPLICATE) {
1485 context.stripes[i].start = offset - c->offset;
1486 context.stripes[i].length = size;
1487 } else if (type != BLOCK_FLAG_RAID0 && type != BLOCK_FLAG_RAID10) {
1488 ERR("unexpected chunk type %x\n", type);
1489 Status = STATUS_INTERNAL_ERROR;
1490 goto end;
1491 }
1492
1493 if (!c->devices[i]->devobj) {
1494 num_missing++;
1495
1496 if (num_missing > allowed_missing) {
1497 ERR("too many missing devices (at least %u, maximum allowed %u)\n", num_missing, allowed_missing);
1498 Status = STATUS_INTERNAL_ERROR;
1499 goto end;
1500 }
1501 } else if (context.stripes[i].length > 0) {
1502 context.stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG);
1503
1504 if (!context.stripes[i].buf) {
1505 ERR("out of memory\n");
1506 Status = STATUS_INSUFFICIENT_RESOURCES;
1507 goto end;
1508 }
1509
1510 context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, FALSE);
1511
1512 if (!context.stripes[i].Irp) {
1513 ERR("IoAllocateIrp failed\n");
1514 Status = STATUS_INSUFFICIENT_RESOURCES;
1515 goto end;
1516 }
1517
1518 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
1519 IrpSp->MajorFunction = IRP_MJ_READ;
1520
1521 if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) {
1522 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG);
1523 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
1524 ERR("out of memory\n");
1525 Status = STATUS_INSUFFICIENT_RESOURCES;
1526 goto end;
1527 }
1528
1529 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
1530
1531 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
1532 } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) {
1533 context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, context.stripes[i].length, FALSE, FALSE, NULL);
1534 if (!context.stripes[i].Irp->MdlAddress) {
1535 ERR("IoAllocateMdl failed\n");
1536 Status = STATUS_INSUFFICIENT_RESOURCES;
1537 goto end;
1538 }
1539
1540 Status = STATUS_SUCCESS;
1541
1542 _SEH2_TRY {
1543 MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
1544 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1545 Status = _SEH2_GetExceptionCode();
1546 } _SEH2_END;
1547
1548 if (!NT_SUCCESS(Status)) {
1549 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1550 IoFreeMdl(context.stripes[i].Irp->MdlAddress);
1551 context.stripes[i].Irp->MdlAddress = NULL;
1552 goto end;
1553 }
1554 } else
1555 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
1556
1557 IrpSp->Parameters.Read.Length = context.stripes[i].length;
1558 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].start + cis[i].offset;
1559
1560 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
1561
1562 IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion, &context.stripes[i], TRUE, TRUE, TRUE);
1563
1564 context.stripes_left++;
1565
1566 Vcb->scrub.data_scrubbed += context.stripes[i].length;
1567 }
1568 }
1569
1570 if (context.stripes_left == 0) {
1571 ERR("error - not reading any stripes\n");
1572 Status = STATUS_INTERNAL_ERROR;
1573 goto end;
1574 }
1575
1576 KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
1577
1578 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1579 if (c->devices[i]->devobj && context.stripes[i].length > 0)
1580 IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp);
1581 }
1582
1583 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
1584
1585 // return an error if any of the stripes returned an error
1586 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1587 if (!NT_SUCCESS(context.stripes[i].iosb.Status)) {
1588 Status = context.stripes[i].iosb.Status;
1589 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS);
1590 goto end;
1591 }
1592 }
1593
1594 if (type == BLOCK_FLAG_DUPLICATE) {
1595 Status = scrub_extent_dup(Vcb, c, offset, csum, &context);
1596 if (!NT_SUCCESS(Status)) {
1597 ERR("scrub_extent_dup returned %08x\n", Status);
1598 goto end;
1599 }
1600 } else if (type == BLOCK_FLAG_RAID0) {
1601 Status = scrub_extent_raid0(Vcb, c, offset, size, startoffstripe, csum, &context);
1602 if (!NT_SUCCESS(Status)) {
1603 ERR("scrub_extent_raid0 returned %08x\n", Status);
1604 goto end;
1605 }
1606 } else if (type == BLOCK_FLAG_RAID10) {
1607 Status = scrub_extent_raid10(Vcb, c, offset, size, startoffstripe, csum, &context);
1608 if (!NT_SUCCESS(Status)) {
1609 ERR("scrub_extent_raid10 returned %08x\n", Status);
1610 goto end;
1611 }
1612 }
1613
1614 end:
1615 if (context.stripes) {
1616 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1617 if (context.stripes[i].Irp) {
1618 if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) {
1619 MmUnlockPages(context.stripes[i].Irp->MdlAddress);
1620 IoFreeMdl(context.stripes[i].Irp->MdlAddress);
1621 }
1622 IoFreeIrp(context.stripes[i].Irp);
1623 }
1624
1625 if (context.stripes[i].buf)
1626 ExFreePool(context.stripes[i].buf);
1627
1628 if (context.stripes[i].bad_csums)
1629 ExFreePool(context.stripes[i].bad_csums);
1630 }
1631
1632 ExFreePool(context.stripes);
1633 }
1634
1635 return Status;
1636 }
1637
1638 static NTSTATUS scrub_data_extent(device_extension* Vcb, chunk* c, UINT64 offset, ULONG type, UINT32* csum, RTL_BITMAP* bmp) {
1639 NTSTATUS Status;
1640 ULONG runlength, index;
1641
1642 runlength = RtlFindFirstRunClear(bmp, &index);
1643
1644 while (runlength != 0) {
1645 do {
1646 ULONG rl;
1647
1648 if (runlength * Vcb->superblock.sector_size > SCRUB_UNIT)
1649 rl = SCRUB_UNIT / Vcb->superblock.sector_size;
1650 else
1651 rl = runlength;
1652
1653 Status = scrub_extent(Vcb, c, type, offset + UInt32x32To64(index, Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, &csum[index]);
1654 if (!NT_SUCCESS(Status)) {
1655 ERR("scrub_data_extent_dup returned %08x\n", Status);
1656 return Status;
1657 }
1658
1659 runlength -= rl;
1660 index += rl;
1661 } while (runlength > 0);
1662
1663 runlength = RtlFindNextForwardRunClear(bmp, index, &index);
1664 }
1665
1666 return STATUS_SUCCESS;
1667 }
1668
1669 typedef struct {
1670 UINT8* buf;
1671 PIRP Irp;
1672 void* context;
1673 IO_STATUS_BLOCK iosb;
1674 UINT64 offset;
1675 BOOL rewrite, missing;
1676 RTL_BITMAP error;
1677 ULONG* errorarr;
1678 } scrub_context_raid56_stripe;
1679
1680 typedef struct {
1681 scrub_context_raid56_stripe* stripes;
1682 LONG stripes_left;
1683 KEVENT Event;
1684 RTL_BITMAP alloc;
1685 RTL_BITMAP has_csum;
1686 RTL_BITMAP is_tree;
1687 UINT32* csum;
1688 UINT8* parity_scratch;
1689 UINT8* parity_scratch2;
1690 } scrub_context_raid56;
1691
1692 _Function_class_(IO_COMPLETION_ROUTINE)
1693 #ifdef __REACTOS__
1694 static NTSTATUS NTAPI scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1695 #else
1696 static NTSTATUS scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1697 #endif
1698 scrub_context_raid56_stripe* stripe = conptr;
1699 scrub_context_raid56* context = (scrub_context_raid56*)stripe->context;
1700 LONG left = InterlockedDecrement(&context->stripes_left);
1701
1702 UNUSED(DeviceObject);
1703
1704 stripe->iosb = Irp->IoStatus;
1705
1706 if (left == 0)
1707 KeSetEvent(&context->Event, 0, FALSE);
1708
1709 return STATUS_MORE_PROCESSING_REQUIRED;
1710 }
1711
1712 static void scrub_raid5_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, UINT64 stripe_start, UINT64 bit_start,
1713 UINT64 num, UINT16 missing_devices) {
1714 ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off;
1715 UINT16 stripe, parity = (bit_start + num + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
1716 UINT64 stripeoff;
1717
1718 stripe = (parity + 1) % c->chunk_item->num_stripes;
1719 off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1);
1720 stripeoff = num * sectors_per_stripe;
1721
1722 if (missing_devices == 0)
1723 RtlCopyMemory(context->parity_scratch, &context->stripes[parity].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1724
1725 while (stripe != parity) {
1726 RtlClearAllBits(&context->stripes[stripe].error);
1727
1728 for (i = 0; i < sectors_per_stripe; i++) {
1729 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
1730 if (RtlCheckBit(&context->is_tree, off)) {
1731 tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size];
1732 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1733 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1734
1735 if (crc32 != *((UINT32*)th->csum) || th->address != addr) {
1736 RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size);
1737 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1738
1739 if (missing_devices > 0)
1740 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE);
1741 }
1742
1743 off += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1744 stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1745 i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1;
1746
1747 continue;
1748 } else if (RtlCheckBit(&context->has_csum, off)) {
1749 UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1750
1751 if (crc32 != context->csum[off]) {
1752 RtlSetBit(&context->stripes[stripe].error, i);
1753 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1754
1755 if (missing_devices > 0) {
1756 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1757
1758 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE);
1759 }
1760 }
1761 }
1762 }
1763
1764 off++;
1765 stripeoff++;
1766 }
1767
1768 if (missing_devices == 0)
1769 do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1770
1771 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1772 stripeoff = num * sectors_per_stripe;
1773 }
1774
1775 // check parity
1776
1777 if (missing_devices == 0) {
1778 RtlClearAllBits(&context->stripes[parity].error);
1779
1780 for (i = 0; i < sectors_per_stripe; i++) {
1781 ULONG o, j;
1782
1783 o = i * Vcb->superblock.sector_size;
1784 for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE
1785 if (context->parity_scratch[o] != 0) {
1786 RtlSetBit(&context->stripes[parity].error, i);
1787 break;
1788 }
1789 o++;
1790 }
1791 }
1792 }
1793
1794 // log and fix errors
1795
1796 if (missing_devices > 0)
1797 return;
1798
1799 for (i = 0; i < sectors_per_stripe; i++) {
1800 ULONG num_errors = 0, bad_off;
1801 UINT64 bad_stripe;
1802 BOOL alloc = FALSE;
1803
1804 stripe = (parity + 1) % c->chunk_item->num_stripes;
1805 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1806
1807 while (stripe != parity) {
1808 if (RtlCheckBit(&context->alloc, off)) {
1809 alloc = TRUE;
1810
1811 if (RtlCheckBit(&context->stripes[stripe].error, i)) {
1812 bad_stripe = stripe;
1813 bad_off = off;
1814 num_errors++;
1815 }
1816 }
1817
1818 off += sectors_per_stripe;
1819 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1820 }
1821
1822 if (!alloc)
1823 continue;
1824
1825 if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity].error, i)) // everything fine
1826 continue;
1827
1828 if (num_errors == 0 && RtlCheckBit(&context->stripes[parity].error, i)) { // parity error
1829 UINT64 addr;
1830
1831 do_xor(&context->stripes[parity].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1832 &context->parity_scratch[i * Vcb->superblock.sector_size],
1833 Vcb->superblock.sector_size);
1834
1835 bad_off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1836 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size);
1837
1838 context->stripes[parity].rewrite = TRUE;
1839
1840 log_error(Vcb, addr, c->devices[parity]->devitem.dev_id, FALSE, TRUE, TRUE);
1841 log_device_error(Vcb, c->devices[parity], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1842 } else if (num_errors == 1) {
1843 UINT32 crc32;
1844 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size);
1845
1846 if (RtlCheckBit(&context->is_tree, bad_off)) {
1847 tree_header* th;
1848
1849 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
1850 &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1851 Vcb->superblock.node_size);
1852
1853 th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
1854 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1855
1856 if (crc32 == *((UINT32*)th->csum) && th->address == addr) {
1857 RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1858 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
1859
1860 context->stripes[bad_stripe].rewrite = TRUE;
1861
1862 RtlClearBits(&context->stripes[bad_stripe].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
1863
1864 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, TRUE, TRUE, FALSE);
1865 } else
1866 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, TRUE, FALSE, FALSE);
1867 } else {
1868 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
1869 &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1870 Vcb->superblock.sector_size);
1871
1872 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
1873
1874 if (crc32 == context->csum[bad_off]) {
1875 RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1876 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
1877
1878 context->stripes[bad_stripe].rewrite = TRUE;
1879
1880 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, FALSE, TRUE, FALSE);
1881 } else
1882 log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, FALSE, FALSE, FALSE);
1883 }
1884 } else {
1885 stripe = (parity + 1) % c->chunk_item->num_stripes;
1886 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1887
1888 while (stripe != parity) {
1889 if (RtlCheckBit(&context->alloc, off)) {
1890 if (RtlCheckBit(&context->stripes[stripe].error, i)) {
1891 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1892
1893 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), FALSE, FALSE);
1894 }
1895 }
1896
1897 off += sectors_per_stripe;
1898 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1899 }
1900 }
1901 }
1902 }
1903
1904 static void scrub_raid6_stripe(device_extension* Vcb, chunk* c, scrub_context_raid56* context, UINT64 stripe_start, UINT64 bit_start,
1905 UINT64 num, UINT16 missing_devices) {
1906 ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off;
1907 UINT16 stripe, parity1 = (bit_start + num + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
1908 UINT16 parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1909 UINT64 stripeoff;
1910
1911 stripe = (parity1 + 2) % c->chunk_item->num_stripes;
1912 off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2);
1913 stripeoff = num * sectors_per_stripe;
1914
1915 if (c->devices[parity1]->devobj)
1916 RtlCopyMemory(context->parity_scratch, &context->stripes[parity1].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1917
1918 if (c->devices[parity2]->devobj)
1919 RtlZeroMemory(context->parity_scratch2, (ULONG)c->chunk_item->stripe_length);
1920
1921 while (stripe != parity1) {
1922 RtlClearAllBits(&context->stripes[stripe].error);
1923
1924 for (i = 0; i < sectors_per_stripe; i++) {
1925 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
1926 if (RtlCheckBit(&context->is_tree, off)) {
1927 tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size];
1928 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1929 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1930
1931 if (crc32 != *((UINT32*)th->csum) || th->address != addr) {
1932 RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size);
1933 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1934
1935 if (missing_devices == 2)
1936 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE);
1937 }
1938
1939 off += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1940 stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1941 i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1;
1942
1943 continue;
1944 } else if (RtlCheckBit(&context->has_csum, off)) {
1945 UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1946
1947 if (crc32 != context->csum[off]) {
1948 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1949
1950 RtlSetBit(&context->stripes[stripe].error, i);
1951 log_device_error(Vcb, c->devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1952
1953 if (missing_devices == 2)
1954 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE);
1955 }
1956 }
1957 }
1958
1959 off++;
1960 stripeoff++;
1961 }
1962
1963 if (c->devices[parity1]->devobj)
1964 do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (UINT32)c->chunk_item->stripe_length);
1965
1966 stripe = (stripe + 1) % c->chunk_item->num_stripes;
1967 stripeoff = num * sectors_per_stripe;
1968 }
1969
1970 RtlClearAllBits(&context->stripes[parity1].error);
1971
1972 if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity2]->devobj)) {
1973 // check parity 1
1974
1975 for (i = 0; i < sectors_per_stripe; i++) {
1976 ULONG o, j;
1977
1978 o = i * Vcb->superblock.sector_size;
1979 for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE
1980 if (context->parity_scratch[o] != 0) {
1981 RtlSetBit(&context->stripes[parity1].error, i);
1982 break;
1983 }
1984 o++;
1985 }
1986 }
1987 }
1988
1989 RtlClearAllBits(&context->stripes[parity2].error);
1990
1991 if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity1]->devobj)) {
1992 // check parity 2
1993
1994 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
1995
1996 while (stripe != parity2) {
1997 galois_double(context->parity_scratch2, (UINT32)c->chunk_item->stripe_length);
1998 do_xor(context->parity_scratch2, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (UINT32)c->chunk_item->stripe_length);
1999
2000 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2001 }
2002
2003 for (i = 0; i < sectors_per_stripe; i++) {
2004 if (RtlCompareMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2005 &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size) != Vcb->superblock.sector_size)
2006 RtlSetBit(&context->stripes[parity2].error, i);
2007 }
2008 }
2009
2010 if (missing_devices == 2)
2011 return;
2012
2013 // log and fix errors
2014
2015 for (i = 0; i < sectors_per_stripe; i++) {
2016 ULONG num_errors = 0;
2017 UINT64 bad_stripe1, bad_stripe2;
2018 ULONG bad_off1, bad_off2;
2019 BOOL alloc = FALSE;
2020
2021 stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2022 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2023
2024 while (stripe != parity1) {
2025 if (RtlCheckBit(&context->alloc, off)) {
2026 alloc = TRUE;
2027
2028 if (!c->devices[stripe]->devobj || RtlCheckBit(&context->stripes[stripe].error, i)) {
2029 if (num_errors == 0) {
2030 bad_stripe1 = stripe;
2031 bad_off1 = off;
2032 } else if (num_errors == 1) {
2033 bad_stripe2 = stripe;
2034 bad_off2 = off;
2035 }
2036 num_errors++;
2037 }
2038 }
2039
2040 off += sectors_per_stripe;
2041 stripe = (stripe + 1) % c->chunk_item->num_stripes;
2042 }
2043
2044 if (!alloc)
2045 continue;
2046
2047 if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity1].error, i) && !RtlCheckBit(&context->stripes[parity2].error, i)) // everything fine
2048 continue;
2049
2050 if (num_errors == 0) { // parity error
2051 UINT64 addr;
2052
2053 if (RtlCheckBit(&context->stripes[parity1].error, i)) {
2054 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2055 &context->parity_scratch[i * Vcb->superblock.sector_size],
2056 Vcb->superblock.sector_size);
2057
2058 bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2059 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2060
2061 context->stripes[parity1].rewrite = TRUE;
2062
2063 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE);
2064 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
2065 }
2066
2067 if (RtlCheckBit(&context->stripes[parity2].error, i)) {
2068 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2069 &context->parity_scratch2[i * Vcb->superblock.sector_size],
2070 Vcb->superblock.sector_size);
2071
2072 bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2073 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2074
2075 context->stripes[parity2].rewrite = TRUE;
2076
2077 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE);
2078 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
2079 }
2080 } else if (num_errors == 1) {
2081 UINT32 crc32a, crc32b, len;
2082 UINT16 stripe_num, bad_stripe_num;
2083 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2084 UINT8* scratch;
2085
2086 len = RtlCheckBit(&context->is_tree, bad_off1)? Vcb->superblock.node_size : Vcb->superblock.sector_size;
2087
2088 scratch = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
2089 if (!scratch) {
2090 ERR("out of memory\n");
2091 return;
2092 }
2093
2094 RtlZeroMemory(scratch, len);
2095
2096 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
2097 &context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2098
2099 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2100
2101 if (c->devices[parity2]->devobj) {
2102 stripe_num = c->chunk_item->num_stripes - 3;
2103 while (stripe != parity2) {
2104 galois_double(scratch, len);
2105
2106 if (stripe != bad_stripe1)
2107 do_xor(scratch, &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2108 else
2109 bad_stripe_num = stripe_num;
2110
2111 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2112 stripe_num--;
2113 }
2114
2115 do_xor(scratch, &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2116
2117 if (bad_stripe_num != 0)
2118 galois_divpower(scratch, (UINT8)bad_stripe_num, len);
2119 }
2120
2121 if (RtlCheckBit(&context->is_tree, bad_off1)) {
2122 tree_header *th1 = NULL, *th2 = NULL;
2123
2124 if (c->devices[parity1]->devobj) {
2125 th1 = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
2126 crc32a = ~calc_crc32c(0xffffffff, (UINT8*)&th1->fs_uuid, Vcb->superblock.node_size - sizeof(th1->csum));
2127 }
2128
2129 if (c->devices[parity2]->devobj) {
2130 th2 = (tree_header*)scratch;
2131 crc32b = ~calc_crc32c(0xffffffff, (UINT8*)&th2->fs_uuid, Vcb->superblock.node_size - sizeof(th2->csum));
2132 }
2133
2134 if ((c->devices[parity1]->devobj && crc32a == *((UINT32*)th1->csum) && th1->address == addr) ||
2135 (c->devices[parity2]->devobj && crc32b == *((UINT32*)th2->csum) && th2->address == addr)) {
2136 if (!c->devices[parity1]->devobj || crc32a != *((UINT32*)th1->csum) || th1->address != addr) {
2137 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2138 scratch, Vcb->superblock.node_size);
2139
2140 if (c->devices[parity1]->devobj) {
2141 // fix parity 1
2142
2143 stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2144
2145 RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2146 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2147 Vcb->superblock.node_size);
2148
2149 stripe = (stripe + 1) % c->chunk_item->num_stripes;
2150
2151 while (stripe != parity1) {
2152 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2153 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2154 Vcb->superblock.node_size);
2155
2156 stripe = (stripe + 1) % c->chunk_item->num_stripes;
2157 }
2158
2159 context->stripes[parity1].rewrite = TRUE;
2160
2161 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE);
2162 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
2163 }
2164 } else {
2165 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2166 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2167
2168 if (!c->devices[parity2]->devobj || crc32b != *((UINT32*)th2->csum) || th2->address != addr) {
2169 // fix parity 2
2170 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2171
2172 if (c->devices[parity2]->devobj) {
2173 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2174 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2175 Vcb->superblock.node_size);
2176
2177 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2178
2179 while (stripe != parity2) {
2180 galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.node_size);
2181
2182 do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2183 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2184 Vcb->superblock.node_size);
2185
2186 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2187 }
2188
2189 context->stripes[parity2].rewrite = TRUE;
2190
2191 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE);
2192 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
2193 }
2194 }
2195 }
2196
2197 context->stripes[bad_stripe1].rewrite = TRUE;
2198
2199 RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2200
2201 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, TRUE, FALSE);
2202 } else
2203 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, FALSE, FALSE);
2204 } else {
2205 if (c->devices[parity1]->devobj)
2206 crc32a = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2207
2208 if (c->devices[parity2]->devobj)
2209 crc32b = ~calc_crc32c(0xffffffff, scratch, Vcb->superblock.sector_size);
2210
2211 if ((c->devices[parity1]->devobj && crc32a == context->csum[bad_off1]) || (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1])) {
2212 if (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1]) {
2213 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2214 scratch, Vcb->superblock.sector_size);
2215
2216 if (c->devices[parity1]->devobj && crc32a != context->csum[bad_off1]) {
2217 // fix parity 1
2218
2219 stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2220
2221 RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2222 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2223 Vcb->superblock.sector_size);
2224
2225 stripe = (stripe + 1) % c->chunk_item->num_stripes;
2226
2227 while (stripe != parity1) {
2228 do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2229 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2230 Vcb->superblock.sector_size);
2231
2232 stripe = (stripe + 1) % c->chunk_item->num_stripes;
2233 }
2234
2235 context->stripes[parity1].rewrite = TRUE;
2236
2237 log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE);
2238 log_device_error(Vcb, c->devices[parity1], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
2239 }
2240 } else {
2241 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2242 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2243
2244 if (c->devices[parity2]->devobj && crc32b != context->csum[bad_off1]) {
2245 // fix parity 2
2246 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2247
2248 RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2249 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2250 Vcb->superblock.sector_size);
2251
2252 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2253
2254 while (stripe != parity2) {
2255 galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size);
2256
2257 do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2258 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2259 Vcb->superblock.sector_size);
2260
2261 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2262 }
2263
2264 context->stripes[parity2].rewrite = TRUE;
2265
2266 log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE);
2267 log_device_error(Vcb, c->devices[parity2], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
2268 }
2269 }
2270
2271 context->stripes[bad_stripe1].rewrite = TRUE;
2272
2273 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, TRUE, FALSE);
2274 } else
2275 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, FALSE, FALSE);
2276 }
2277
2278 ExFreePool(scratch);
2279 } else if (num_errors == 2 && missing_devices == 0) {
2280 UINT16 x, y, k;
2281 UINT64 addr;
2282 UINT32 len = (RtlCheckBit(&context->is_tree, bad_off1) || RtlCheckBit(&context->is_tree, bad_off2)) ? Vcb->superblock.node_size : Vcb->superblock.sector_size;
2283 UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
2284 UINT32 j;
2285
2286 stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2287
2288 // put qxy in parity_scratch
2289 // put pxy in parity_scratch2
2290
2291 k = c->chunk_item->num_stripes - 3;
2292 if (stripe == bad_stripe1 || stripe == bad_stripe2) {
2293 RtlZeroMemory(&context->parity_scratch[i * Vcb->superblock.sector_size], len);
2294 RtlZeroMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size], len);
2295
2296 if (stripe == bad_stripe1)
2297 x = k;
2298 else
2299 y = k;
2300 } else {
2301 RtlCopyMemory(&context->parity_scratch[i * Vcb->superblock.sector_size],
2302 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2303 RtlCopyMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size],
2304 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2305 }
2306
2307 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2308
2309 k--;
2310 do {
2311 galois_double(&context->parity_scratch[i * Vcb->superblock.sector_size], len);
2312
2313 if (stripe != bad_stripe1 && stripe != bad_stripe2) {
2314 do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
2315 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2316 do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size],
2317 &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2318 } else if (stripe == bad_stripe1)
2319 x = k;
2320 else if (stripe == bad_stripe2)
2321 y = k;
2322
2323 stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2324 k--;
2325 } while (stripe != parity2);
2326
2327 gyx = gpow2(y > x ? (y-x) : (255-x+y));
2328 gx = gpow2(255-x);
2329
2330 denom = gdiv(1, gyx ^ 1);
2331 a = gmul(gyx, denom);
2332 b = gmul(gx, denom);
2333
2334 p = &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)];
2335 q = &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)];
2336 pxy = &context->parity_scratch2[i * Vcb->superblock.sector_size];
2337 qxy = &context->parity_scratch[i * Vcb->superblock.sector_size];
2338
2339 for (j = 0; j < len; j++) {
2340 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
2341
2342 p++;
2343 q++;
2344 pxy++;
2345 qxy++;
2346 }
2347
2348 do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->parity_scratch[i * Vcb->superblock.sector_size], len);
2349 do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2350
2351 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2352
2353 if (RtlCheckBit(&context->is_tree, bad_off1)) {
2354 tree_header* th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
2355 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
2356
2357 if (crc32 == *((UINT32*)th->csum) && th->address == addr) {
2358 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2359 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2360
2361 context->stripes[bad_stripe1].rewrite = TRUE;
2362
2363 RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2364
2365 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, TRUE, FALSE);
2366 } else
2367 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, FALSE, FALSE);
2368 } else {
2369 UINT32 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2370
2371 if (crc32 == context->csum[bad_off1]) {
2372 RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2373 &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2374
2375 context->stripes[bad_stripe1].rewrite = TRUE;
2376
2377 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, TRUE, FALSE);
2378 } else
2379 log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, FALSE, FALSE);
2380 }
2381
2382 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off2 * Vcb->superblock.sector_size);
2383
2384 if (RtlCheckBit(&context->is_tree, bad_off2)) {
2385 tree_header* th = (tree_header*)&context->parity_scratch2[i * Vcb->superblock.sector_size];
2386 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
2387
2388 if (crc32 == *((UINT32*)th->csum) && th->address == addr) {
2389 RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2390 &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2391
2392 context->stripes[bad_stripe2].rewrite = TRUE;
2393
2394 RtlClearBits(&context->stripes[bad_stripe2].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2395
2396 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, TRUE, TRUE, FALSE);
2397 } else
2398 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, TRUE, FALSE, FALSE);
2399 } else {
2400 UINT32 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2401
2402 if (crc32 == context->csum[bad_off2]) {
2403 RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2404 &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2405
2406 context->stripes[bad_stripe2].rewrite = TRUE;
2407
2408 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, FALSE, TRUE, FALSE);
2409 } else
2410 log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, FALSE, FALSE, FALSE);
2411 }
2412 } else {
2413 stripe = (parity2 + 1) % c->chunk_item->num_stripes;
2414 off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2415
2416 while (stripe != parity1) {
2417 if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
2418 if (RtlCheckBit(&context->stripes[stripe].error, i)) {
2419 UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
2420
2421 log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), FALSE, FALSE);
2422 }
2423 }
2424
2425 off += sectors_per_stripe;
2426 stripe = (stripe + 1) % c->chunk_item->num_stripes;
2427 }
2428 }
2429 }
2430 }
2431
2432 static NTSTATUS scrub_chunk_raid56_stripe_run(device_extension* Vcb, chunk* c, UINT64 stripe_start, UINT64 stripe_end) {
2433 NTSTATUS Status;
2434 KEY searchkey;
2435 traverse_ptr tp;
2436 BOOL b;
2437 UINT64 run_start, run_end, full_stripe_len, stripe;
2438 UINT32 max_read, num_sectors;
2439 ULONG arrlen, *allocarr, *csumarr = NULL, *treearr, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1;
2440 scrub_context_raid56 context;
2441 UINT16 i;
2442 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
2443
2444 TRACE("(%p, %p, %llx, %llx)\n", Vcb, c, stripe_start, stripe_end);
2445
2446 full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length;
2447 run_start = c->offset + (stripe_start * full_stripe_len);
2448 run_end = c->offset + ((stripe_end + 1) * full_stripe_len);
2449
2450 searchkey.obj_id = run_start;
2451 searchkey.obj_type = TYPE_METADATA_ITEM;
2452 searchkey.offset = 0xffffffffffffffff;
2453
2454 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
2455 if (!NT_SUCCESS(Status)) {
2456 ERR("find_item returned %08x\n", Status);
2457 return Status;
2458 }
2459
2460 num_sectors = (UINT32)((stripe_end - stripe_start + 1) * full_stripe_len / Vcb->superblock.sector_size);
2461 arrlen = (ULONG)sector_align((num_sectors / 8) + 1, sizeof(ULONG));
2462
2463 allocarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2464 if (!allocarr) {
2465 ERR("out of memory\n");
2466 return STATUS_INSUFFICIENT_RESOURCES;
2467 }
2468
2469 treearr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2470 if (!treearr) {
2471 ERR("out of memory\n");
2472 ExFreePool(allocarr);
2473 return STATUS_INSUFFICIENT_RESOURCES;
2474 }
2475
2476 RtlInitializeBitMap(&context.alloc, allocarr, num_sectors);
2477 RtlClearAllBits(&context.alloc);
2478
2479 RtlInitializeBitMap(&context.is_tree, treearr, num_sectors);
2480 RtlClearAllBits(&context.is_tree);
2481
2482 context.parity_scratch = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG);
2483 if (!context.parity_scratch) {
2484 ERR("out of memory\n");
2485 ExFreePool(allocarr);
2486 ExFreePool(treearr);
2487 return STATUS_INSUFFICIENT_RESOURCES;
2488 }
2489
2490 if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2491 csumarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2492 if (!csumarr) {
2493 ERR("out of memory\n");
2494 ExFreePool(allocarr);
2495 ExFreePool(treearr);
2496 ExFreePool(context.parity_scratch);
2497 return STATUS_INSUFFICIENT_RESOURCES;
2498 }
2499
2500 RtlInitializeBitMap(&context.has_csum, csumarr, num_sectors);
2501 RtlClearAllBits(&context.has_csum);
2502
2503 context.csum = ExAllocatePoolWithTag(PagedPool, num_sectors * sizeof(UINT32), ALLOC_TAG);
2504 if (!context.csum) {
2505 ERR("out of memory\n");
2506 ExFreePool(allocarr);
2507 ExFreePool(treearr);
2508 ExFreePool(context.parity_scratch);
2509 ExFreePool(csumarr);
2510 return STATUS_INSUFFICIENT_RESOURCES;
2511 }
2512 }
2513
2514 if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2515 context.parity_scratch2 = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG);
2516 if (!context.parity_scratch2) {
2517 ERR("out of memory\n");
2518 ExFreePool(allocarr);
2519 ExFreePool(treearr);
2520 ExFreePool(context.parity_scratch);
2521
2522 if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2523 ExFreePool(csumarr);
2524 ExFreePool(context.csum);
2525 }
2526
2527 return STATUS_INSUFFICIENT_RESOURCES;
2528 }
2529 }
2530
2531 do {
2532 traverse_ptr next_tp;
2533
2534 if (tp.item->key.obj_id >= run_end)
2535 break;
2536
2537 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM) {
2538 UINT64 size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2539
2540 if (tp.item->key.obj_id + size > run_start) {
2541 UINT64 extent_start = max(run_start, tp.item->key.obj_id);
2542 UINT64 extent_end = min(tp.item->key.obj_id + size, run_end);
2543 BOOL extent_is_tree = FALSE;
2544
2545 RtlSetBits(&context.alloc, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size));
2546
2547 if (tp.item->key.obj_type == TYPE_METADATA_ITEM)
2548 extent_is_tree = TRUE;
2549 else {
2550 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
2551
2552 if (tp.item->size < sizeof(EXTENT_ITEM)) {
2553 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
2554 Status = STATUS_INTERNAL_ERROR;
2555 goto end;
2556 }
2557
2558 if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
2559 extent_is_tree = TRUE;
2560 }
2561
2562 if (extent_is_tree)
2563 RtlSetBits(&context.is_tree, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size));
2564 else if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2565 traverse_ptr tp2;
2566 BOOL b2;
2567
2568 searchkey.obj_id = EXTENT_CSUM_ID;
2569 searchkey.obj_type = TYPE_EXTENT_CSUM;
2570 searchkey.offset = extent_start;
2571
2572 Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, FALSE, NULL);
2573 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
2574 ERR("find_item returned %08x\n", Status);
2575 goto end;
2576 }
2577
2578 do {
2579 traverse_ptr next_tp2;
2580
2581 if (tp2.item->key.offset >= extent_end)
2582 break;
2583
2584 if (tp2.item->key.offset >= extent_start) {
2585 UINT64 csum_start = max(extent_start, tp2.item->key.offset);
2586 UINT64 csum_end = min(extent_end, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32)));
2587
2588 RtlSetBits(&context.has_csum, (ULONG)((csum_start - run_start) / Vcb->superblock.sector_size), (ULONG)((csum_end - csum_start) / Vcb->superblock.sector_size));
2589
2590 RtlCopyMemory(&context.csum[(csum_start - run_start) / Vcb->superblock.sector_size],
2591 tp2.item->data + ((csum_start - tp2.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size),
2592 (ULONG)((csum_end - csum_start) * sizeof(UINT32) / Vcb->superblock.sector_size));
2593 }
2594
2595 b2 = find_next_item(Vcb, &tp2, &next_tp2, FALSE, NULL);
2596
2597 if (b2)
2598 tp2 = next_tp2;
2599 } while (b2);
2600 }
2601 }
2602 }
2603
2604 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
2605
2606 if (b)
2607 tp = next_tp;
2608 } while (b);
2609
2610 context.stripes = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_context_raid56_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
2611 if (!context.stripes) {
2612 ERR("out of memory\n");
2613 Status = STATUS_INSUFFICIENT_RESOURCES;
2614 goto end;
2615 }
2616
2617 max_read = (UINT32)min(1048576 / c->chunk_item->stripe_length, stripe_end - stripe_start + 1); // only process 1 MB of data at a time
2618
2619 for (i = 0; i < c->chunk_item->num_stripes; i++) {
2620 context.stripes[i].buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(max_read * c->chunk_item->stripe_length), ALLOC_TAG);
2621 if (!context.stripes[i].buf) {
2622 UINT64 j;
2623
2624 ERR("out of memory\n");
2625
2626 for (j = 0; j < i; j++) {
2627 ExFreePool(context.stripes[j].buf);
2628 }
2629 ExFreePool(context.stripes);
2630
2631 Status = STATUS_INSUFFICIENT_RESOURCES;
2632 goto end;
2633 }
2634
2635 context.stripes[i].errorarr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align(((c->chunk_item->stripe_length / Vcb->superblock.sector_size) / 8) + 1, sizeof(ULONG)), ALLOC_TAG);
2636 if (!context.stripes[i].errorarr) {
2637 UINT64 j;
2638
2639 ERR("out of memory\n");
2640
2641 ExFreePool(context.stripes[i].buf);
2642
2643 for (j = 0; j < i; j++) {
2644 ExFreePool(context.stripes[j].buf);
2645 }
2646 ExFreePool(context.stripes);
2647
2648 Status = STATUS_INSUFFICIENT_RESOURCES;
2649 goto end;
2650 }
2651
2652 RtlInitializeBitMap(&context.stripes[i].error, context.stripes[i].errorarr, (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size));
2653
2654 context.stripes[i].context = &context;
2655 context.stripes[i].rewrite = FALSE;
2656 }
2657
2658 stripe = stripe_start;
2659
2660 Status = STATUS_SUCCESS;
2661
2662 chunk_lock_range(Vcb, c, run_start, run_end - run_start);
2663
2664 do {
2665 ULONG read_stripes;
2666 UINT16 missing_devices = 0;
2667 BOOL need_wait = FALSE;
2668
2669 if (max_read < stripe_end + 1 - stripe)
2670 read_stripes = max_read;
2671 else
2672 read_stripes = (ULONG)(stripe_end + 1 - stripe);
2673
2674 context.stripes_left = c->chunk_item->num_stripes;
2675
2676 // read megabyte by megabyte
2677 for (i = 0; i < c->chunk_item->num_stripes; i++) {
2678 if (c->devices[i]->devobj) {
2679 PIO_STACK_LOCATION IrpSp;
2680
2681 context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, FALSE);
2682
2683 if (!context.stripes[i].Irp) {
2684 ERR("IoAllocateIrp failed\n");
2685 Status = STATUS_INSUFFICIENT_RESOURCES;
2686 goto end3;
2687 }
2688
2689 context.stripes[i].Irp->MdlAddress = NULL;
2690
2691 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2692 IrpSp->MajorFunction = IRP_MJ_READ;
2693
2694 if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2695 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(read_stripes * c->chunk_item->stripe_length), ALLOC_TAG);
2696 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2697 ERR("out of memory\n");
2698 Status = STATUS_INSUFFICIENT_RESOURCES;
2699 goto end3;
2700 }
2701
2702 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
2703
2704 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
2705 } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) {
2706 context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, (ULONG)(read_stripes * c->chunk_item->stripe_length), FALSE, FALSE, NULL);
2707 if (!context.stripes[i].Irp->MdlAddress) {
2708 ERR("IoAllocateMdl failed\n");
2709 Status = STATUS_INSUFFICIENT_RESOURCES;
2710 goto end3;
2711 }
2712
2713 Status = STATUS_SUCCESS;
2714
2715 _SEH2_TRY {
2716 MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
2717 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2718 Status = _SEH2_GetExceptionCode();
2719 } _SEH2_END;
2720
2721 if (!NT_SUCCESS(Status)) {
2722 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
2723 IoFreeMdl(context.stripes[i].Irp->MdlAddress);
2724 goto end3;
2725 }
2726 } else
2727 context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
2728
2729 context.stripes[i].offset = stripe * c->chunk_item->stripe_length;
2730
2731 IrpSp->Parameters.Read.Length = (ULONG)(read_stripes * c->chunk_item->stripe_length);
2732 IrpSp->Parameters.Read.ByteOffset.QuadPart = cis[i].offset + context.stripes[i].offset;
2733
2734 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2735 context.stripes[i].missing = FALSE;
2736
2737 IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion_raid56, &context.stripes[i], TRUE, TRUE, TRUE);
2738
2739 Vcb->scrub.data_scrubbed += read_stripes * c->chunk_item->stripe_length;
2740 need_wait = TRUE;
2741 } else {
2742 context.stripes[i].Irp = NULL;
2743 context.stripes[i].missing = TRUE;
2744 missing_devices++;
2745 InterlockedDecrement(&context.stripes_left);
2746 }
2747 }
2748
2749 if (c->chunk_item->type & BLOCK_FLAG_RAID5 && missing_devices > 1) {
2750 ERR("too many missing devices (%u, maximum 1)\n", missing_devices);
2751 Status = STATUS_UNEXPECTED_IO_ERROR;
2752 goto end3;
2753 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6 && missing_devices > 2) {
2754 ERR("too many missing devices (%u, maximum 2)\n", missing_devices);
2755 Status = STATUS_UNEXPECTED_IO_ERROR;
2756 goto end3;
2757 }
2758
2759 if (need_wait) {
2760 KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
2761
2762 for (i = 0; i < c->chunk_item->num_stripes; i++) {
2763 if (c->devices[i]->devobj)
2764 IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp);
2765 }
2766
2767 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
2768 }
2769
2770 // return an error if any of the stripes returned an error
2771 for (i = 0; i < c->chunk_item->num_stripes; i++) {
2772 if (!context.stripes[i].missing && !NT_SUCCESS(context.stripes[i].iosb.Status)) {
2773 Status = context.stripes[i].iosb.Status;
2774 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_READ_ERRORS);
2775 goto end3;
2776 }
2777 }
2778
2779 if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2780 for (i = 0; i < read_stripes; i++) {
2781 scrub_raid6_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices);
2782 }
2783 } else {
2784 for (i = 0; i < read_stripes; i++) {
2785 scrub_raid5_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices);
2786 }
2787 }
2788 stripe += read_stripes;
2789
2790 end3:
2791 for (i = 0; i < c->chunk_item->num_stripes; i++) {
2792 if (context.stripes[i].Irp) {
2793 if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) {
2794 MmUnlockPages(context.stripes[i].Irp->MdlAddress);
2795 IoFreeMdl(context.stripes[i].Irp->MdlAddress);
2796 }
2797 IoFreeIrp(context.stripes[i].Irp);
2798 context.stripes[i].Irp = NULL;
2799
2800 if (context.stripes[i].rewrite) {
2801 Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + context.stripes[i].offset,
2802 context.stripes[i].buf, (UINT32)(read_stripes * c->chunk_item->stripe_length));
2803
2804 if (!NT_SUCCESS(Status)) {
2805 ERR("write_data_phys returned %08x\n", Status);
2806 log_device_error(Vcb, c->devices[i], BTRFS_DEV_STAT_WRITE_ERRORS);
2807 goto end2;
2808 }
2809 }
2810 }
2811 }
2812
2813 if (!NT_SUCCESS(Status))
2814 break;
2815 } while (stripe < stripe_end);
2816
2817 end2:
2818 chunk_unlock_range(Vcb, c, run_start, run_end - run_start);
2819
2820 for (i = 0; i < c->chunk_item->num_stripes; i++) {
2821 ExFreePool(context.stripes[i].buf);
2822 ExFreePool(context.stripes[i].errorarr);
2823 }
2824 ExFreePool(context.stripes);
2825
2826 end:
2827 ExFreePool(treearr);
2828 ExFreePool(allocarr);
2829 ExFreePool(context.parity_scratch);
2830
2831 if (c->chunk_item->type & BLOCK_FLAG_RAID6)
2832 ExFreePool(context.parity_scratch2);
2833
2834 if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2835 ExFreePool(csumarr);
2836 ExFreePool(context.csum);
2837 }
2838
2839 return Status;
2840 }
2841
2842 static NTSTATUS scrub_chunk_raid56(device_extension* Vcb, chunk* c, UINT64* offset, BOOL* changed) {
2843 NTSTATUS Status;
2844 KEY searchkey;
2845 traverse_ptr tp;
2846 BOOL b;
2847 UINT64 full_stripe_len, stripe, stripe_start, stripe_end, total_data = 0;
2848 ULONG num_extents = 0, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1;
2849
2850 full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length;
2851 stripe = (*offset - c->offset) / full_stripe_len;
2852
2853 *offset = c->offset + (stripe * full_stripe_len);
2854
2855 searchkey.obj_id = *offset;
2856 searchkey.obj_type = TYPE_METADATA_ITEM;
2857 searchkey.offset = 0xffffffffffffffff;
2858
2859 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
2860 if (!NT_SUCCESS(Status)) {
2861 ERR("find_item returned %08x\n", Status);
2862 return Status;
2863 }
2864
2865 *changed = FALSE;
2866
2867 do {
2868 traverse_ptr next_tp;
2869
2870 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
2871 break;
2872
2873 if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) {
2874 UINT64 size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2875
2876 TRACE("%llx\n", tp.item->key.obj_id);
2877
2878 if (size < Vcb->superblock.sector_size) {
2879 ERR("extent %llx has size less than sector_size (%llx < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size);
2880 return STATUS_INTERNAL_ERROR;
2881 }
2882
2883 stripe = (tp.item->key.obj_id - c->offset) / full_stripe_len;
2884
2885 if (*changed) {
2886 if (stripe > stripe_end + 1) {
2887 Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end);
2888 if (!NT_SUCCESS(Status)) {
2889 ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status);
2890 return Status;
2891 }
2892
2893 stripe_start = stripe;
2894 }
2895 } else
2896 stripe_start = stripe;
2897
2898 stripe_end = (tp.item->key.obj_id + size - 1 - c->offset) / full_stripe_len;
2899
2900 *changed = TRUE;
2901
2902 total_data += size;
2903 num_extents++;
2904
2905 // only do so much at a time
2906 if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB
2907 break;
2908 }
2909
2910 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
2911
2912 if (b)
2913 tp = next_tp;
2914 } while (b);
2915
2916 if (*changed) {
2917 Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end);
2918 if (!NT_SUCCESS(Status)) {
2919 ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status);
2920 return Status;
2921 }
2922
2923 *offset = c->offset + ((stripe_end + 1) * full_stripe_len);
2924 }
2925
2926 return STATUS_SUCCESS;
2927 }
2928
2929 static NTSTATUS scrub_chunk(device_extension* Vcb, chunk* c, UINT64* offset, BOOL* changed) {
2930 NTSTATUS Status;
2931 KEY searchkey;
2932 traverse_ptr tp;
2933 BOOL b = FALSE, tree_run = FALSE;
2934 ULONG type, num_extents = 0;
2935 UINT64 total_data = 0, tree_run_start, tree_run_end;
2936
2937 TRACE("chunk %llx\n", c->offset);
2938
2939 ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
2940
2941 if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
2942 type = BLOCK_FLAG_DUPLICATE;
2943 else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
2944 type = BLOCK_FLAG_RAID0;
2945 else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
2946 type = BLOCK_FLAG_DUPLICATE;
2947 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
2948 type = BLOCK_FLAG_RAID10;
2949 else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
2950 Status = scrub_chunk_raid56(Vcb, c, offset, changed);
2951 goto end;
2952 } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2953 Status = scrub_chunk_raid56(Vcb, c, offset, changed);
2954 goto end;
2955 } else // SINGLE
2956 type = BLOCK_FLAG_DUPLICATE;
2957
2958 searchkey.obj_id = *offset;
2959 searchkey.obj_type = TYPE_METADATA_ITEM;
2960 searchkey.offset = 0xffffffffffffffff;
2961
2962 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
2963 if (!NT_SUCCESS(Status)) {
2964 ERR("error - find_item returned %08x\n", Status);
2965 goto end;
2966 }
2967
2968 do {
2969 traverse_ptr next_tp;
2970
2971 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
2972 break;
2973
2974 if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) {
2975 UINT64 size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2976 BOOL is_tree;
2977 UINT32* csum = NULL;
2978 RTL_BITMAP bmp;
2979 ULONG* bmparr = NULL;
2980
2981 TRACE("%llx\n", tp.item->key.obj_id);
2982
2983 is_tree = FALSE;
2984
2985 if (tp.item->key.obj_type == TYPE_METADATA_ITEM)
2986 is_tree = TRUE;
2987 else {
2988 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
2989
2990 if (tp.item->size < sizeof(EXTENT_ITEM)) {
2991 ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
2992 Status = STATUS_INTERNAL_ERROR;
2993 goto end;
2994 }
2995
2996 if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
2997 is_tree = TRUE;
2998 }
2999
3000 if (size < Vcb->superblock.sector_size) {
3001 ERR("extent %llx has size less than sector_size (%llx < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size);
3002 Status = STATUS_INTERNAL_ERROR;
3003 goto end;
3004 }
3005
3006 // load csum
3007 if (!is_tree) {
3008 traverse_ptr tp2;
3009
3010 csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(sizeof(UINT32) * size / Vcb->superblock.sector_size), ALLOC_TAG);
3011 if (!csum) {
3012 ERR("out of memory\n");
3013 Status = STATUS_INSUFFICIENT_RESOURCES;
3014 goto end;
3015 }
3016
3017 bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)(sector_align(((size / Vcb->superblock.sector_size) >> 3) + 1, sizeof(ULONG))), ALLOC_TAG);
3018 if (!bmparr) {
3019 ERR("out of memory\n");
3020 ExFreePool(csum);
3021 Status = STATUS_INSUFFICIENT_RESOURCES;
3022 goto end;
3023 }
3024
3025 RtlInitializeBitMap(&bmp, bmparr, (ULONG)(size / Vcb->superblock.sector_size));
3026 RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum
3027
3028 searchkey.obj_id = EXTENT_CSUM_ID;
3029 searchkey.obj_type = TYPE_EXTENT_CSUM;
3030 searchkey.offset = tp.item->key.obj_id;
3031
3032 Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, FALSE, NULL);
3033 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
3034 ERR("find_item returned %08x\n", Status);
3035 ExFreePool(csum);
3036 ExFreePool(bmparr);
3037 goto end;
3038 }
3039
3040 if (Status != STATUS_NOT_FOUND) {
3041 do {
3042 traverse_ptr next_tp2;
3043
3044 if (tp2.item->key.obj_type == TYPE_EXTENT_CSUM) {
3045 if (tp2.item->key.offset >= tp.item->key.obj_id + size)
3046 break;
3047 else if (tp2.item->size >= sizeof(UINT32) && tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= tp.item->key.obj_id) {
3048 UINT64 cs = max(tp.item->key.obj_id, tp2.item->key.offset);
3049 UINT64 ce = min(tp.item->key.obj_id + size, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32)));
3050
3051 RtlCopyMemory(csum + ((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size),
3052 tp2.item->data + ((cs - tp2.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size),
3053 (ULONG)((ce - cs) * sizeof(UINT32) / Vcb->superblock.sector_size));
3054
3055 RtlClearBits(&bmp, (ULONG)((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size));
3056
3057 if (ce == tp.item->key.obj_id + size)
3058 break;
3059 }
3060 }
3061
3062 if (find_next_item(Vcb, &tp2, &next_tp2, FALSE, NULL))
3063 tp2 = next_tp2;
3064 else
3065 break;
3066 } while (TRUE);
3067 }
3068 }
3069
3070 if (tree_run) {
3071 if (!is_tree || tp.item->key.obj_id > tree_run_end) {
3072 Status = scrub_extent(Vcb, c, type, tree_run_start, (UINT32)(tree_run_end - tree_run_start), NULL);
3073 if (!NT_SUCCESS(Status)) {
3074 ERR("scrub_extent returned %08x\n", Status);
3075 goto end;
3076 }
3077
3078 if (!is_tree)
3079 tree_run = FALSE;
3080 else {
3081 tree_run_start = tp.item->key.obj_id;
3082 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3083 }
3084 } else
3085 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3086 } else if (is_tree) {
3087 tree_run = TRUE;
3088 tree_run_start = tp.item->key.obj_id;
3089 tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3090 }
3091
3092 if (!is_tree) {
3093 Status = scrub_data_extent(Vcb, c, tp.item->key.obj_id, type, csum, &bmp);
3094 if (!NT_SUCCESS(Status)) {
3095 ERR("scrub_data_extent returned %08x\n", Status);
3096 ExFreePool(csum);
3097 ExFreePool(bmparr);
3098 goto end;
3099 }
3100
3101 ExFreePool(csum);
3102 ExFreePool(bmparr);
3103 }
3104
3105 *offset = tp.item->key.obj_id + size;
3106 *changed = TRUE;
3107
3108 total_data += size;
3109 num_extents++;
3110
3111 // only do so much at a time
3112 if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB
3113 break;
3114 }
3115
3116 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
3117
3118 if (b)
3119 tp = next_tp;
3120 } while (b);
3121
3122 if (tree_run) {
3123 Status = scrub_extent(Vcb, c, type, tree_run_start, (UINT32)(tree_run_end - tree_run_start), NULL);
3124 if (!NT_SUCCESS(Status)) {
3125 ERR("scrub_extent returned %08x\n", Status);
3126 goto end;
3127 }
3128 }
3129
3130 Status = STATUS_SUCCESS;
3131
3132 end:
3133 ExReleaseResourceLite(&Vcb->tree_lock);
3134
3135 return Status;
3136 }
3137
3138 _Function_class_(KSTART_ROUTINE)
3139 #ifdef __REACTOS__
3140 static void NTAPI scrub_thread(void* context) {
3141 #else
3142 static void scrub_thread(void* context) {
3143 #endif
3144 device_extension* Vcb = context;
3145 LIST_ENTRY chunks, *le;
3146 NTSTATUS Status;
3147 LARGE_INTEGER time;
3148
3149 KeInitializeEvent(&Vcb->scrub.finished, NotificationEvent, FALSE);
3150
3151 InitializeListHead(&chunks);
3152
3153 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
3154
3155 if (Vcb->need_write && !Vcb->readonly)
3156 Status = do_write(Vcb, NULL);
3157 else
3158 Status = STATUS_SUCCESS;
3159
3160 free_trees(Vcb);
3161
3162 if (!NT_SUCCESS(Status)) {
3163 ExReleaseResourceLite(&Vcb->tree_lock);
3164 ERR("do_write returned %08x\n", Status);
3165 Vcb->scrub.error = Status;
3166 goto end;
3167 }
3168
3169 ExConvertExclusiveToSharedLite(&Vcb->tree_lock);
3170
3171 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE);
3172
3173 KeQuerySystemTime(&Vcb->scrub.start_time);
3174 Vcb->scrub.finish_time.QuadPart = 0;
3175 Vcb->scrub.resume_time.QuadPart = Vcb->scrub.start_time.QuadPart;
3176 Vcb->scrub.duration.QuadPart = 0;
3177 Vcb->scrub.total_chunks = 0;
3178 Vcb->scrub.chunks_left = 0;
3179 Vcb->scrub.data_scrubbed = 0;
3180 Vcb->scrub.num_errors = 0;
3181
3182 while (!IsListEmpty(&Vcb->scrub.errors)) {
3183 scrub_error* err = CONTAINING_RECORD(RemoveHeadList(&Vcb->scrub.errors), scrub_error, list_entry);
3184 ExFreePool(err);
3185 }
3186
3187 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
3188
3189 le = Vcb->chunks.Flink;
3190 while (le != &Vcb->chunks) {
3191 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
3192
3193 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
3194
3195 if (!c->readonly) {
3196 InsertTailList(&chunks, &c->list_entry_balance);
3197 Vcb->scrub.total_chunks++;
3198 Vcb->scrub.chunks_left++;
3199 }
3200
3201 ExReleaseResourceLite(&c->lock);
3202
3203 le = le->Flink;
3204 }
3205
3206 ExReleaseResourceLite(&Vcb->chunk_lock);
3207
3208 ExReleaseResource(&Vcb->scrub.stats_lock);
3209
3210 ExReleaseResourceLite(&Vcb->tree_lock);
3211
3212 while (!IsListEmpty(&chunks)) {
3213 chunk* c = CONTAINING_RECORD(RemoveHeadList(&chunks), chunk, list_entry_balance);
3214 UINT64 offset = c->offset;
3215 BOOL changed;
3216
3217 c->reloc = TRUE;
3218
3219 KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, FALSE, NULL);
3220
3221 if (!Vcb->scrub.stopping) {
3222 do {
3223 changed = FALSE;
3224
3225 Status = scrub_chunk(Vcb, c, &offset, &changed);
3226 if (!NT_SUCCESS(Status)) {
3227 ERR("scrub_chunk returned %08x\n", Status);
3228 Vcb->scrub.stopping = TRUE;
3229 Vcb->scrub.error = Status;
3230 break;
3231 }
3232
3233 if (offset == c->offset + c->chunk_item->size || Vcb->scrub.stopping)
3234 break;
3235
3236 KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, FALSE, NULL);
3237 } while (changed);
3238 }
3239
3240 ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, TRUE);
3241
3242 if (!Vcb->scrub.stopping)
3243 Vcb->scrub.chunks_left--;
3244
3245 if (IsListEmpty(&chunks))
3246 KeQuerySystemTime(&Vcb->scrub.finish_time);
3247
3248 ExReleaseResource(&Vcb->scrub.stats_lock);
3249
3250 c->reloc = FALSE;
3251 c->list_entry_balance.Flink = NULL;
3252 }
3253
3254 KeQuerySystemTime(&time);
3255 Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3256
3257 end:
3258 ZwClose(Vcb->scrub.thread);
3259 Vcb->scrub.thread = NULL;
3260
3261 KeSetEvent(&Vcb->scrub.finished, 0, FALSE);
3262 }
3263
3264 NTSTATUS start_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
3265 NTSTATUS Status;
3266
3267 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3268 return STATUS_PRIVILEGE_NOT_HELD;
3269
3270 if (Vcb->locked) {
3271 WARN("cannot start scrub while locked\n");
3272 return STATUS_DEVICE_NOT_READY;
3273 }
3274
3275 if (Vcb->balance.thread) {
3276 WARN("cannot start scrub while balance running\n");
3277 return STATUS_DEVICE_NOT_READY;
3278 }
3279
3280 if (Vcb->scrub.thread) {
3281 WARN("scrub already running\n");
3282 return STATUS_DEVICE_NOT_READY;
3283 }
3284
3285 if (Vcb->readonly)
3286 return STATUS_MEDIA_WRITE_PROTECTED;
3287
3288 Vcb->scrub.stopping = FALSE;
3289 Vcb->scrub.paused = FALSE;
3290 Vcb->scrub.error = STATUS_SUCCESS;
3291 KeInitializeEvent(&Vcb->scrub.event, NotificationEvent, !Vcb->scrub.paused);
3292
3293 Status = PsCreateSystemThread(&Vcb->scrub.thread, 0, NULL, NULL, NULL, scrub_thread, Vcb);
3294 if (!NT_SUCCESS(Status)) {
3295 ERR("PsCreateSystemThread returned %08x\n", Status);
3296 return Status;
3297 }
3298
3299 return STATUS_SUCCESS;
3300 }
3301
3302 NTSTATUS query_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode, void* data, ULONG length) {
3303 btrfs_query_scrub* bqs = (btrfs_query_scrub*)data;
3304 ULONG len;
3305 NTSTATUS Status;
3306 LIST_ENTRY* le;
3307 btrfs_scrub_error* bse = NULL;
3308
3309 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3310 return STATUS_PRIVILEGE_NOT_HELD;
3311
3312 if (length < offsetof(btrfs_query_scrub, errors))
3313 return STATUS_BUFFER_TOO_SMALL;
3314
3315 ExAcquireResourceSharedLite(&Vcb->scrub.stats_lock, TRUE);
3316
3317 if (Vcb->scrub.thread && Vcb->scrub.chunks_left > 0)
3318 bqs->status = Vcb->scrub.paused ? BTRFS_SCRUB_PAUSED : BTRFS_SCRUB_RUNNING;
3319 else
3320 bqs->status = BTRFS_SCRUB_STOPPED;
3321
3322 bqs->start_time.QuadPart = Vcb->scrub.start_time.QuadPart;
3323 bqs->finish_time.QuadPart = Vcb->scrub.finish_time.QuadPart;
3324 bqs->chunks_left = Vcb->scrub.chunks_left;
3325 bqs->total_chunks = Vcb->scrub.total_chunks;
3326 bqs->data_scrubbed = Vcb->scrub.data_scrubbed;
3327
3328 bqs->duration = Vcb->scrub.duration.QuadPart;
3329
3330 if (bqs->status == BTRFS_SCRUB_RUNNING) {
3331 LARGE_INTEGER time;
3332
3333 KeQuerySystemTime(&time);
3334 bqs->duration += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3335 }
3336
3337 bqs->error = Vcb->scrub.error;
3338
3339 bqs->num_errors = Vcb->scrub.num_errors;
3340
3341 len = length - offsetof(btrfs_query_scrub, errors);
3342
3343 le = Vcb->scrub.errors.Flink;
3344 while (le != &Vcb->scrub.errors) {
3345 scrub_error* err = CONTAINING_RECORD(le, scrub_error, list_entry);
3346 ULONG errlen;
3347
3348 if (err->is_metadata)
3349 errlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY);
3350 else
3351 errlen = offsetof(btrfs_scrub_error, data.filename) + err->data.filename_length;
3352
3353 if (len < errlen) {
3354 Status = STATUS_BUFFER_OVERFLOW;
3355 goto end;
3356 }
3357
3358 if (!bse)
3359 bse = &bqs->errors;
3360 else {
3361 ULONG lastlen;
3362
3363 if (bse->is_metadata)
3364 lastlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY);
3365 else
3366 lastlen = offsetof(btrfs_scrub_error, data.filename) + bse->data.filename_length;
3367
3368 bse->next_entry = lastlen;
3369 bse = (btrfs_scrub_error*)(((UINT8*)bse) + lastlen);
3370 }
3371
3372 bse->next_entry = 0;
3373 bse->address = err->address;
3374 bse->device = err->device;
3375 bse->recovered = err->recovered;
3376 bse->is_metadata = err->is_metadata;
3377 bse->parity = err->parity;
3378
3379 if (err->is_metadata) {
3380 bse->metadata.root = err->metadata.root;
3381 bse->metadata.level = err->metadata.level;
3382 bse->metadata.firstitem = err->metadata.firstitem;
3383 } else {
3384 bse->data.subvol = err->data.subvol;
3385 bse->data.offset = err->data.offset;
3386 bse->data.filename_length = err->data.filename_length;
3387 RtlCopyMemory(bse->data.filename, err->data.filename, err->data.filename_length);
3388 }
3389
3390 len -= errlen;
3391 le = le->Flink;
3392 }
3393
3394 Status = STATUS_SUCCESS;
3395
3396 end:
3397 ExReleaseResourceLite(&Vcb->scrub.stats_lock);
3398
3399 return Status;
3400 }
3401
3402 NTSTATUS pause_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
3403 LARGE_INTEGER time;
3404
3405 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3406 return STATUS_PRIVILEGE_NOT_HELD;
3407
3408 if (!Vcb->scrub.thread)
3409 return STATUS_DEVICE_NOT_READY;
3410
3411 if (Vcb->scrub.paused)
3412 return STATUS_DEVICE_NOT_READY;
3413
3414 Vcb->scrub.paused = TRUE;
3415 KeClearEvent(&Vcb->scrub.event);
3416
3417 KeQuerySystemTime(&time);
3418 Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3419
3420 return STATUS_SUCCESS;
3421 }
3422
3423 NTSTATUS resume_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
3424 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3425 return STATUS_PRIVILEGE_NOT_HELD;
3426
3427 if (!Vcb->scrub.thread)
3428 return STATUS_DEVICE_NOT_READY;
3429
3430 if (!Vcb->scrub.paused)
3431 return STATUS_DEVICE_NOT_READY;
3432
3433 Vcb->scrub.paused = FALSE;
3434 KeSetEvent(&Vcb->scrub.event, 0, FALSE);
3435
3436 KeQuerySystemTime(&Vcb->scrub.resume_time);
3437
3438 return STATUS_SUCCESS;
3439 }
3440
3441 NTSTATUS stop_scrub(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
3442 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3443 return STATUS_PRIVILEGE_NOT_HELD;
3444
3445 if (!Vcb->scrub.thread)
3446 return STATUS_DEVICE_NOT_READY;
3447
3448 Vcb->scrub.paused = FALSE;
3449 Vcb->scrub.stopping = TRUE;
3450 KeSetEvent(&Vcb->scrub.event, 0, FALSE);
3451
3452 return STATUS_SUCCESS;
3453 }