1 /* Copyright (c) Mark Harmstone 2017
3 * This file is part of WinBtrfs.
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
18 #include "btrfs_drv.h"
20 #define SCRUB_UNIT 0x100000 // 1 MB
22 struct _scrub_context
;
25 struct _scrub_context
* context
;
33 } scrub_context_stripe
;
35 typedef struct _scrub_context
{
37 scrub_context_stripe
* stripes
;
44 LIST_ENTRY list_entry
;
47 static void log_file_checksum_error(device_extension
* Vcb
, uint64_t addr
, uint64_t devid
, uint64_t subvol
, uint64_t inode
, uint64_t offset
) {
48 LIST_ENTRY
*le
, parts
;
53 bool orig_subvol
= true, not_in_tree
= false;
59 le
= Vcb
->roots
.Flink
;
60 while (le
!= &Vcb
->roots
) {
61 root
* r2
= CONTAINING_RECORD(le
, root
, list_entry
);
63 if (r2
->id
== subvol
) {
72 ERR("could not find subvol %I64x\n", subvol
);
76 InitializeListHead(&parts
);
81 if (dir
== r
->root_item
.objid
) {
82 if (r
== Vcb
->root_fileref
->fcb
->subvol
)
85 searchkey
.obj_id
= r
->id
;
86 searchkey
.obj_type
= TYPE_ROOT_BACKREF
;
87 searchkey
.offset
= 0xffffffffffffffff;
89 Status
= find_item(Vcb
, Vcb
->root_root
, &tp
, &searchkey
, false, NULL
);
90 if (!NT_SUCCESS(Status
)) {
91 ERR("find_item returned %08x\n", Status
);
95 if (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== searchkey
.obj_type
) {
96 ROOT_REF
* rr
= (ROOT_REF
*)tp
.item
->data
;
99 if (tp
.item
->size
< sizeof(ROOT_REF
)) {
100 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(ROOT_REF
));
104 if (tp
.item
->size
< offsetof(ROOT_REF
, name
[0]) + rr
->n
) {
105 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
,
106 tp
.item
->size
, offsetof(ROOT_REF
, name
[0]) + rr
->n
);
110 pp
= ExAllocatePoolWithTag(PagedPool
, sizeof(path_part
), ALLOC_TAG
);
112 ERR("out of memory\n");
116 pp
->name
.Buffer
= rr
->name
;
117 pp
->name
.Length
= pp
->name
.MaximumLength
= rr
->n
;
118 pp
->orig_subvol
= false;
120 InsertTailList(&parts
, &pp
->list_entry
);
124 le
= Vcb
->roots
.Flink
;
125 while (le
!= &Vcb
->roots
) {
126 root
* r2
= CONTAINING_RECORD(le
, root
, list_entry
);
128 if (r2
->id
== tp
.item
->key
.offset
) {
137 ERR("could not find subvol %I64x\n", tp
.item
->key
.offset
);
148 searchkey
.obj_id
= dir
;
149 searchkey
.obj_type
= TYPE_INODE_EXTREF
;
150 searchkey
.offset
= 0xffffffffffffffff;
152 Status
= find_item(Vcb
, r
, &tp
, &searchkey
, false, NULL
);
153 if (!NT_SUCCESS(Status
)) {
154 ERR("find_item returned %08x\n", Status
);
158 if (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== TYPE_INODE_REF
) {
159 INODE_REF
* ir
= (INODE_REF
*)tp
.item
->data
;
162 if (tp
.item
->size
< sizeof(INODE_REF
)) {
163 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(INODE_REF
));
167 if (tp
.item
->size
< offsetof(INODE_REF
, name
[0]) + ir
->n
) {
168 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
,
169 tp
.item
->size
, offsetof(INODE_REF
, name
[0]) + ir
->n
);
173 pp
= ExAllocatePoolWithTag(PagedPool
, sizeof(path_part
), ALLOC_TAG
);
175 ERR("out of memory\n");
179 pp
->name
.Buffer
= ir
->name
;
180 pp
->name
.Length
= pp
->name
.MaximumLength
= ir
->n
;
181 pp
->orig_subvol
= orig_subvol
;
183 InsertTailList(&parts
, &pp
->list_entry
);
185 if (dir
== tp
.item
->key
.offset
)
188 dir
= tp
.item
->key
.offset
;
189 } else if (tp
.item
->key
.obj_id
== searchkey
.obj_id
&& tp
.item
->key
.obj_type
== TYPE_INODE_EXTREF
) {
190 INODE_EXTREF
* ier
= (INODE_EXTREF
*)tp
.item
->data
;
193 if (tp
.item
->size
< sizeof(INODE_EXTREF
)) {
194 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
,
195 tp
.item
->size
, sizeof(INODE_EXTREF
));
199 if (tp
.item
->size
< offsetof(INODE_EXTREF
, name
[0]) + ier
->n
) {
200 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
,
201 tp
.item
->size
, offsetof(INODE_EXTREF
, name
[0]) + ier
->n
);
205 pp
= ExAllocatePoolWithTag(PagedPool
, sizeof(path_part
), ALLOC_TAG
);
207 ERR("out of memory\n");
211 pp
->name
.Buffer
= ier
->name
;
212 pp
->name
.Length
= pp
->name
.MaximumLength
= ier
->n
;
213 pp
->orig_subvol
= orig_subvol
;
215 InsertTailList(&parts
, &pp
->list_entry
);
222 ERR("could not find INODE_REF for inode %I64x in subvol %I64x\n", dir
, r
->id
);
228 fn
.MaximumLength
= 0;
232 while (le
!= &parts
) {
233 path_part
* pp
= CONTAINING_RECORD(le
, path_part
, list_entry
);
234 LIST_ENTRY
* le2
= le
->Blink
;
239 RemoveTailList(&parts
);
247 while (le
!= &parts
) {
248 path_part
* pp
= CONTAINING_RECORD(le
, path_part
, list_entry
);
250 fn
.MaximumLength
+= pp
->name
.Length
+ 1;
255 fn
.Buffer
= ExAllocatePoolWithTag(PagedPool
, fn
.MaximumLength
, ALLOC_TAG
);
257 ERR("out of memory\n");
264 while (le
!= &parts
) {
265 path_part
* pp
= CONTAINING_RECORD(le
, path_part
, list_entry
);
267 fn
.Buffer
[fn
.Length
] = '\\';
270 RtlCopyMemory(&fn
.Buffer
[fn
.Length
], pp
->name
.Buffer
, pp
->name
.Length
);
271 fn
.Length
+= pp
->name
.Length
;
277 ERR("subvol %I64x, %.*s, offset %I64x\n", subvol
, fn
.Length
, fn
.Buffer
, offset
);
279 ERR("%.*s, offset %I64x\n", fn
.Length
, fn
.Buffer
, offset
);
281 Status
= utf8_to_utf16(NULL
, 0, &utf16len
, fn
.Buffer
, fn
.Length
);
282 if (!NT_SUCCESS(Status
)) {
283 ERR("utf8_to_utf16 1 returned %08x\n", Status
);
284 ExFreePool(fn
.Buffer
);
288 err
= ExAllocatePoolWithTag(PagedPool
, offsetof(scrub_error
, data
.filename
[0]) + utf16len
, ALLOC_TAG
);
290 ERR("out of memory\n");
291 ExFreePool(fn
.Buffer
);
297 err
->recovered
= false;
298 err
->is_metadata
= false;
301 err
->data
.subvol
= not_in_tree
? subvol
: 0;
302 err
->data
.offset
= offset
;
303 err
->data
.filename_length
= (uint16_t)utf16len
;
305 Status
= utf8_to_utf16(err
->data
.filename
, utf16len
, &utf16len
, fn
.Buffer
, fn
.Length
);
306 if (!NT_SUCCESS(Status
)) {
307 ERR("utf8_to_utf16 2 returned %08x\n", Status
);
308 ExFreePool(fn
.Buffer
);
313 ExAcquireResourceExclusiveLite(&Vcb
->scrub
.stats_lock
, true);
315 Vcb
->scrub
.num_errors
++;
316 InsertTailList(&Vcb
->scrub
.errors
, &err
->list_entry
);
318 ExReleaseResourceLite(&Vcb
->scrub
.stats_lock
);
320 ExFreePool(fn
.Buffer
);
323 while (!IsListEmpty(&parts
)) {
324 path_part
* pp
= CONTAINING_RECORD(RemoveHeadList(&parts
), path_part
, list_entry
);
330 static void log_file_checksum_error_shared(device_extension
* Vcb
, uint64_t treeaddr
, uint64_t addr
, uint64_t devid
, uint64_t extent
) {
336 tree
= ExAllocatePoolWithTag(PagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
338 ERR("out of memory\n");
342 Status
= read_data(Vcb
, treeaddr
, Vcb
->superblock
.node_size
, NULL
, true, (uint8_t*)tree
, NULL
, NULL
, NULL
, 0, false, NormalPagePriority
);
343 if (!NT_SUCCESS(Status
)) {
344 ERR("read_data returned %08x\n", Status
);
348 if (tree
->level
!= 0) {
349 ERR("tree level was %x, expected 0\n", tree
->level
);
353 ln
= (leaf_node
*)&tree
[1];
355 for (i
= 0; i
< tree
->num_items
; i
++) {
356 if (ln
[i
].key
.obj_type
== TYPE_EXTENT_DATA
&& ln
[i
].size
>= sizeof(EXTENT_DATA
) - 1 + sizeof(EXTENT_DATA2
)) {
357 EXTENT_DATA
* ed
= (EXTENT_DATA
*)((uint8_t*)tree
+ sizeof(tree_header
) + ln
[i
].offset
);
358 EXTENT_DATA2
* ed2
= (EXTENT_DATA2
*)ed
->data
;
360 if (ed
->type
== EXTENT_TYPE_REGULAR
&& ed2
->size
!= 0 && ed2
->address
== addr
)
361 log_file_checksum_error(Vcb
, addr
, devid
, tree
->tree_id
, ln
[i
].key
.obj_id
, ln
[i
].key
.offset
+ addr
- extent
);
369 static void log_tree_checksum_error(device_extension
* Vcb
, uint64_t addr
, uint64_t devid
, uint64_t root
, uint8_t level
, KEY
* firstitem
) {
372 err
= ExAllocatePoolWithTag(PagedPool
, sizeof(scrub_error
), ALLOC_TAG
);
374 ERR("out of memory\n");
380 err
->recovered
= false;
381 err
->is_metadata
= true;
384 err
->metadata
.root
= root
;
385 err
->metadata
.level
= level
;
388 ERR("root %I64x, level %u, first item (%I64x,%x,%I64x)\n", root
, level
, firstitem
->obj_id
,
389 firstitem
->obj_type
, firstitem
->offset
);
391 err
->metadata
.firstitem
= *firstitem
;
393 ERR("root %I64x, level %u\n", root
, level
);
395 RtlZeroMemory(&err
->metadata
.firstitem
, sizeof(KEY
));
398 ExAcquireResourceExclusiveLite(&Vcb
->scrub
.stats_lock
, true);
400 Vcb
->scrub
.num_errors
++;
401 InsertTailList(&Vcb
->scrub
.errors
, &err
->list_entry
);
403 ExReleaseResourceLite(&Vcb
->scrub
.stats_lock
);
406 static void log_tree_checksum_error_shared(device_extension
* Vcb
, uint64_t offset
, uint64_t address
, uint64_t devid
) {
412 tree
= ExAllocatePoolWithTag(PagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
414 ERR("out of memory\n");
418 Status
= read_data(Vcb
, offset
, Vcb
->superblock
.node_size
, NULL
, true, (uint8_t*)tree
, NULL
, NULL
, NULL
, 0, false, NormalPagePriority
);
419 if (!NT_SUCCESS(Status
)) {
420 ERR("read_data returned %08x\n", Status
);
424 if (tree
->level
== 0) {
425 ERR("tree level was 0\n");
429 in
= (internal_node
*)&tree
[1];
431 for (i
= 0; i
< tree
->num_items
; i
++) {
432 if (in
[i
].address
== address
) {
433 log_tree_checksum_error(Vcb
, address
, devid
, tree
->tree_id
, tree
->level
- 1, &in
[i
].key
);
442 static void log_unrecoverable_error(device_extension
* Vcb
, uint64_t address
, uint64_t devid
) {
447 EXTENT_ITEM2
* ei2
= NULL
;
452 // FIXME - still log even if rest of this function fails
454 searchkey
.obj_id
= address
;
455 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
456 searchkey
.offset
= 0xffffffffffffffff;
458 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, NULL
);
459 if (!NT_SUCCESS(Status
)) {
460 ERR("find_item returned %08x\n", Status
);
464 if ((tp
.item
->key
.obj_type
!= TYPE_EXTENT_ITEM
&& tp
.item
->key
.obj_type
!= TYPE_METADATA_ITEM
) ||
465 tp
.item
->key
.obj_id
>= address
+ Vcb
->superblock
.sector_size
||
466 (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
&& tp
.item
->key
.obj_id
+ tp
.item
->key
.offset
<= address
) ||
467 (tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
&& tp
.item
->key
.obj_id
+ Vcb
->superblock
.node_size
<= address
)
471 if (tp
.item
->size
< sizeof(EXTENT_ITEM
)) {
472 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(EXTENT_ITEM
));
476 ei
= (EXTENT_ITEM
*)tp
.item
->data
;
477 ptr
= (uint8_t*)&ei
[1];
478 len
= tp
.item
->size
- sizeof(EXTENT_ITEM
);
480 if (tp
.item
->key
.obj_id
== TYPE_EXTENT_ITEM
&& ei
->flags
& EXTENT_ITEM_TREE_BLOCK
) {
481 if (tp
.item
->size
< sizeof(EXTENT_ITEM
) + sizeof(EXTENT_ITEM2
)) {
482 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
,
483 tp
.item
->size
, sizeof(EXTENT_ITEM
) + sizeof(EXTENT_ITEM2
));
487 ei2
= (EXTENT_ITEM2
*)ptr
;
489 ptr
+= sizeof(EXTENT_ITEM2
);
490 len
-= sizeof(EXTENT_ITEM2
);
501 if (type
== TYPE_TREE_BLOCK_REF
) {
504 if (len
< sizeof(TREE_BLOCK_REF
)) {
505 ERR("TREE_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(TREE_BLOCK_REF
), len
);
509 tbr
= (TREE_BLOCK_REF
*)ptr
;
511 log_tree_checksum_error(Vcb
, address
, devid
, tbr
->offset
, ei2
? ei2
->level
: (uint8_t)tp
.item
->key
.offset
, ei2
? &ei2
->firstitem
: NULL
);
515 ptr
+= sizeof(TREE_BLOCK_REF
);
516 len
-= sizeof(TREE_BLOCK_REF
);
517 } else if (type
== TYPE_EXTENT_DATA_REF
) {
518 EXTENT_DATA_REF
* edr
;
520 if (len
< sizeof(EXTENT_DATA_REF
)) {
521 ERR("EXTENT_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(EXTENT_DATA_REF
), len
);
525 edr
= (EXTENT_DATA_REF
*)ptr
;
527 log_file_checksum_error(Vcb
, address
, devid
, edr
->root
, edr
->objid
, edr
->offset
+ address
- tp
.item
->key
.obj_id
);
531 ptr
+= sizeof(EXTENT_DATA_REF
);
532 len
-= sizeof(EXTENT_DATA_REF
);
533 } else if (type
== TYPE_SHARED_BLOCK_REF
) {
534 SHARED_BLOCK_REF
* sbr
;
536 if (len
< sizeof(SHARED_BLOCK_REF
)) {
537 ERR("SHARED_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_BLOCK_REF
), len
);
541 sbr
= (SHARED_BLOCK_REF
*)ptr
;
543 log_tree_checksum_error_shared(Vcb
, sbr
->offset
, address
, devid
);
547 ptr
+= sizeof(SHARED_BLOCK_REF
);
548 len
-= sizeof(SHARED_BLOCK_REF
);
549 } else if (type
== TYPE_SHARED_DATA_REF
) {
550 SHARED_DATA_REF
* sdr
;
552 if (len
< sizeof(SHARED_DATA_REF
)) {
553 ERR("SHARED_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_DATA_REF
), len
);
557 sdr
= (SHARED_DATA_REF
*)ptr
;
559 log_file_checksum_error_shared(Vcb
, sdr
->offset
, address
, devid
, tp
.item
->key
.obj_id
);
563 ptr
+= sizeof(SHARED_DATA_REF
);
564 len
-= sizeof(SHARED_DATA_REF
);
566 ERR("unknown extent type %x\n", type
);
571 if (rc
< ei
->refcount
) {
573 traverse_ptr next_tp
;
575 if (find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
))
580 if (tp
.item
->key
.obj_id
== address
) {
581 if (tp
.item
->key
.obj_type
== TYPE_TREE_BLOCK_REF
)
582 log_tree_checksum_error(Vcb
, address
, devid
, tp
.item
->key
.offset
, ei2
? ei2
->level
: (uint8_t)tp
.item
->key
.offset
, ei2
? &ei2
->firstitem
: NULL
);
583 else if (tp
.item
->key
.obj_type
== TYPE_EXTENT_DATA_REF
) {
584 EXTENT_DATA_REF
* edr
;
586 if (tp
.item
->size
< sizeof(EXTENT_DATA_REF
)) {
587 ERR("(%I64x,%x,%I64x) was %u bytes, expected %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
,
588 tp
.item
->size
, sizeof(EXTENT_DATA_REF
));
592 edr
= (EXTENT_DATA_REF
*)tp
.item
->data
;
594 log_file_checksum_error(Vcb
, address
, devid
, edr
->root
, edr
->objid
, edr
->offset
+ address
- tp
.item
->key
.obj_id
);
595 } else if (tp
.item
->key
.obj_type
== TYPE_SHARED_BLOCK_REF
)
596 log_tree_checksum_error_shared(Vcb
, tp
.item
->key
.offset
, address
, devid
);
597 else if (tp
.item
->key
.obj_type
== TYPE_SHARED_DATA_REF
)
598 log_file_checksum_error_shared(Vcb
, tp
.item
->key
.offset
, address
, devid
, tp
.item
->key
.obj_id
);
605 static void log_error(device_extension
* Vcb
, uint64_t addr
, uint64_t devid
, bool metadata
, bool recoverable
, bool parity
) {
610 ERR("recovering from parity error at %I64x on device %I64x\n", addr
, devid
);
613 ERR("recovering from metadata checksum error at %I64x on device %I64x\n", addr
, devid
);
615 ERR("recovering from data checksum error at %I64x on device %I64x\n", addr
, devid
);
618 err
= ExAllocatePoolWithTag(PagedPool
, sizeof(scrub_error
), ALLOC_TAG
);
620 ERR("out of memory\n");
626 err
->recovered
= true;
627 err
->is_metadata
= metadata
;
628 err
->parity
= parity
;
631 RtlZeroMemory(&err
->metadata
, sizeof(err
->metadata
));
633 RtlZeroMemory(&err
->data
, sizeof(err
->data
));
635 ExAcquireResourceExclusiveLite(&Vcb
->scrub
.stats_lock
, true);
637 Vcb
->scrub
.num_errors
++;
638 InsertTailList(&Vcb
->scrub
.errors
, &err
->list_entry
);
640 ExReleaseResourceLite(&Vcb
->scrub
.stats_lock
);
643 ERR("unrecoverable metadata checksum error at %I64x\n", addr
);
645 ERR("unrecoverable data checksum error at %I64x\n", addr
);
647 log_unrecoverable_error(Vcb
, addr
, devid
);
651 _Function_class_(IO_COMPLETION_ROUTINE
)
652 static NTSTATUS __stdcall
scrub_read_completion(PDEVICE_OBJECT DeviceObject
, PIRP Irp
, PVOID conptr
) {
653 scrub_context_stripe
* stripe
= conptr
;
654 scrub_context
* context
= (scrub_context
*)stripe
->context
;
655 ULONG left
= InterlockedDecrement(&context
->stripes_left
);
657 UNUSED(DeviceObject
);
659 stripe
->iosb
= Irp
->IoStatus
;
662 KeSetEvent(&context
->Event
, 0, false);
664 return STATUS_MORE_PROCESSING_REQUIRED
;
667 static NTSTATUS
scrub_extent_dup(device_extension
* Vcb
, chunk
* c
, uint64_t offset
, uint32_t* csum
, scrub_context
* context
) {
669 bool csum_error
= false;
671 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
672 uint16_t present_devices
= 0;
675 ULONG good_stripe
= 0xffffffff;
677 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
678 if (c
->devices
[i
]->devobj
) {
681 // if first stripe is okay, we only need to check that the others are identical to it
682 if (good_stripe
!= 0xffffffff) {
683 if (RtlCompareMemory(context
->stripes
[i
].buf
, context
->stripes
[good_stripe
].buf
,
684 context
->stripes
[good_stripe
].length
) != context
->stripes
[i
].length
) {
685 context
->stripes
[i
].csum_error
= true;
687 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
690 Status
= check_csum(Vcb
, context
->stripes
[i
].buf
, context
->stripes
[i
].length
/ Vcb
->superblock
.sector_size
, csum
);
691 if (Status
== STATUS_CRC_ERROR
) {
692 context
->stripes
[i
].csum_error
= true;
694 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
695 } else if (!NT_SUCCESS(Status
)) {
696 ERR("check_csum returned %08x\n", Status
);
704 ULONG good_stripe
= 0xffffffff;
706 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
709 if (c
->devices
[i
]->devobj
) {
710 // if first stripe is okay, we only need to check that the others are identical to it
711 if (good_stripe
!= 0xffffffff) {
712 if (RtlCompareMemory(context
->stripes
[i
].buf
, context
->stripes
[good_stripe
].buf
,
713 context
->stripes
[good_stripe
].length
) != context
->stripes
[i
].length
) {
714 context
->stripes
[i
].csum_error
= true;
716 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
719 for (j
= 0; j
< context
->stripes
[i
].length
/ Vcb
->superblock
.node_size
; j
++) {
720 tree_header
* th
= (tree_header
*)&context
->stripes
[i
].buf
[j
* Vcb
->superblock
.node_size
];
721 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
723 if (crc32
!= *((uint32_t*)th
->csum
) || th
->address
!= offset
+ UInt32x32To64(j
, Vcb
->superblock
.node_size
)) {
724 context
->stripes
[i
].csum_error
= true;
726 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
730 if (!context
->stripes
[i
].csum_error
)
738 return STATUS_SUCCESS
;
740 // handle checksum error
742 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
743 if (context
->stripes
[i
].csum_error
) {
745 context
->stripes
[i
].bad_csums
= ExAllocatePoolWithTag(PagedPool
, context
->stripes
[i
].length
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
, ALLOC_TAG
);
746 if (!context
->stripes
[i
].bad_csums
) {
747 ERR("out of memory\n");
748 return STATUS_INSUFFICIENT_RESOURCES
;
751 Status
= calc_csum(Vcb
, context
->stripes
[i
].buf
, context
->stripes
[i
].length
/ Vcb
->superblock
.sector_size
, context
->stripes
[i
].bad_csums
);
752 if (!NT_SUCCESS(Status
)) {
753 ERR("calc_csum returned %08x\n", Status
);
759 context
->stripes
[i
].bad_csums
= ExAllocatePoolWithTag(PagedPool
, context
->stripes
[i
].length
* sizeof(uint32_t) / Vcb
->superblock
.node_size
, ALLOC_TAG
);
760 if (!context
->stripes
[i
].bad_csums
) {
761 ERR("out of memory\n");
762 return STATUS_INSUFFICIENT_RESOURCES
;
765 for (j
= 0; j
< context
->stripes
[i
].length
/ Vcb
->superblock
.node_size
; j
++) {
766 tree_header
* th
= (tree_header
*)&context
->stripes
[i
].buf
[j
* Vcb
->superblock
.node_size
];
767 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
769 context
->stripes
[i
].bad_csums
[j
] = crc32
;
775 if (present_devices
> 1) {
776 ULONG good_stripe
= 0xffffffff;
778 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
779 if (c
->devices
[i
]->devobj
&& !context
->stripes
[i
].csum_error
) {
785 if (good_stripe
!= 0xffffffff) {
788 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
789 if (context
->stripes
[i
].csum_error
) {
793 for (j
= 0; j
< context
->stripes
[i
].length
/ Vcb
->superblock
.sector_size
; j
++) {
794 if (context
->stripes
[i
].bad_csums
[j
] != csum
[j
]) {
795 uint64_t addr
= offset
+ UInt32x32To64(j
, Vcb
->superblock
.sector_size
);
797 log_error(Vcb
, addr
, c
->devices
[i
]->devitem
.dev_id
, false, true, false);
798 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
802 for (j
= 0; j
< context
->stripes
[i
].length
/ Vcb
->superblock
.node_size
; j
++) {
803 tree_header
* th
= (tree_header
*)&context
->stripes
[i
].buf
[j
* Vcb
->superblock
.node_size
];
804 uint64_t addr
= offset
+ UInt32x32To64(j
, Vcb
->superblock
.node_size
);
806 if (context
->stripes
[i
].bad_csums
[j
] != *((uint32_t*)th
->csum
) || th
->address
!= addr
) {
807 log_error(Vcb
, addr
, c
->devices
[i
]->devitem
.dev_id
, true, true, false);
808 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
815 // write good data over bad
817 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
818 if (context
->stripes
[i
].csum_error
&& !c
->devices
[i
]->readonly
) {
819 Status
= write_data_phys(c
->devices
[i
]->devobj
, c
->devices
[i
]->fileobj
, cis
[i
].offset
+ offset
- c
->offset
,
820 context
->stripes
[good_stripe
].buf
, context
->stripes
[i
].length
);
822 if (!NT_SUCCESS(Status
)) {
823 ERR("write_data_phys returned %08x\n", Status
);
824 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_WRITE_ERRORS
);
830 return STATUS_SUCCESS
;
833 // if csum errors on all stripes, check sector by sector
835 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
838 if (c
->devices
[i
]->devobj
) {
840 for (j
= 0; j
< context
->stripes
[i
].length
/ Vcb
->superblock
.sector_size
; j
++) {
841 if (context
->stripes
[i
].bad_csums
[j
] != csum
[j
]) {
843 uint64_t addr
= offset
+ UInt32x32To64(j
, Vcb
->superblock
.sector_size
);
844 bool recovered
= false;
846 for (k
= 0; k
< c
->chunk_item
->num_stripes
; k
++) {
847 if (i
!= k
&& c
->devices
[k
]->devobj
&& context
->stripes
[k
].bad_csums
[j
] == csum
[j
]) {
848 log_error(Vcb
, addr
, c
->devices
[i
]->devitem
.dev_id
, false, true, false);
849 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
851 RtlCopyMemory(context
->stripes
[i
].buf
+ (j
* Vcb
->superblock
.sector_size
),
852 context
->stripes
[k
].buf
+ (j
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
860 log_error(Vcb
, addr
, c
->devices
[i
]->devitem
.dev_id
, false, false, false);
861 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
866 for (j
= 0; j
< context
->stripes
[i
].length
/ Vcb
->superblock
.node_size
; j
++) {
867 tree_header
* th
= (tree_header
*)&context
->stripes
[i
].buf
[j
* Vcb
->superblock
.node_size
];
868 uint64_t addr
= offset
+ UInt32x32To64(j
, Vcb
->superblock
.node_size
);
870 if (context
->stripes
[i
].bad_csums
[j
] != *((uint32_t*)th
->csum
) || th
->address
!= addr
) {
872 bool recovered
= false;
874 for (k
= 0; k
< c
->chunk_item
->num_stripes
; k
++) {
875 if (i
!= k
&& c
->devices
[k
]->devobj
) {
876 tree_header
* th2
= (tree_header
*)&context
->stripes
[k
].buf
[j
* Vcb
->superblock
.node_size
];
878 if (context
->stripes
[k
].bad_csums
[j
] == *((uint32_t*)th2
->csum
) && th2
->address
== addr
) {
879 log_error(Vcb
, addr
, c
->devices
[i
]->devitem
.dev_id
, true, true, false);
880 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
882 RtlCopyMemory(th
, th2
, Vcb
->superblock
.node_size
);
891 log_error(Vcb
, addr
, c
->devices
[i
]->devitem
.dev_id
, true, false, false);
892 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
900 // write good data over bad
902 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
903 if (c
->devices
[i
]->devobj
&& !c
->devices
[i
]->readonly
) {
904 Status
= write_data_phys(c
->devices
[i
]->devobj
, c
->devices
[i
]->fileobj
, cis
[i
].offset
+ offset
- c
->offset
,
905 context
->stripes
[i
].buf
, context
->stripes
[i
].length
);
906 if (!NT_SUCCESS(Status
)) {
907 ERR("write_data_phys returned %08x\n", Status
);
908 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
914 return STATUS_SUCCESS
;
917 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
918 if (c
->devices
[i
]->devobj
) {
922 for (j
= 0; j
< context
->stripes
[i
].length
/ Vcb
->superblock
.sector_size
; j
++) {
923 if (context
->stripes
[i
].bad_csums
[j
] != csum
[j
]) {
924 uint64_t addr
= offset
+ UInt32x32To64(j
, Vcb
->superblock
.sector_size
);
926 log_error(Vcb
, addr
, c
->devices
[i
]->devitem
.dev_id
, false, false, false);
930 for (j
= 0; j
< context
->stripes
[i
].length
/ Vcb
->superblock
.node_size
; j
++) {
931 tree_header
* th
= (tree_header
*)&context
->stripes
[i
].buf
[j
* Vcb
->superblock
.node_size
];
932 uint64_t addr
= offset
+ UInt32x32To64(j
, Vcb
->superblock
.node_size
);
934 if (context
->stripes
[i
].bad_csums
[j
] != *((uint32_t*)th
->csum
) || th
->address
!= addr
)
935 log_error(Vcb
, addr
, c
->devices
[i
]->devitem
.dev_id
, true, false, false);
941 return STATUS_SUCCESS
;
944 static NTSTATUS
scrub_extent_raid0(device_extension
* Vcb
, chunk
* c
, uint64_t offset
, uint32_t length
, uint16_t startoffstripe
, uint32_t* csum
, scrub_context
* context
) {
947 uint32_t pos
, *stripeoff
;
950 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(uint32_t) * c
->chunk_item
->num_stripes
, ALLOC_TAG
);
952 ERR("out of memory\n");
953 return STATUS_INSUFFICIENT_RESOURCES
;
956 RtlZeroMemory(stripeoff
, sizeof(uint32_t) * c
->chunk_item
->num_stripes
);
958 stripe
= startoffstripe
;
959 while (pos
< length
) {
963 readlen
= (uint32_t)min(context
->stripes
[stripe
].length
, c
->chunk_item
->stripe_length
- (context
->stripes
[stripe
].start
% c
->chunk_item
->stripe_length
));
965 readlen
= min(length
- pos
, (uint32_t)c
->chunk_item
->stripe_length
);
968 for (j
= 0; j
< readlen
; j
+= Vcb
->superblock
.sector_size
) {
969 uint32_t crc32
= ~calc_crc32c(0xffffffff, context
->stripes
[stripe
].buf
+ stripeoff
[stripe
], Vcb
->superblock
.sector_size
);
971 if (crc32
!= csum
[pos
/ Vcb
->superblock
.sector_size
]) {
972 uint64_t addr
= offset
+ pos
;
974 log_error(Vcb
, addr
, c
->devices
[stripe
]->devitem
.dev_id
, false, false, false);
975 log_device_error(Vcb
, c
->devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
978 pos
+= Vcb
->superblock
.sector_size
;
979 stripeoff
[stripe
] += Vcb
->superblock
.sector_size
;
982 for (j
= 0; j
< readlen
; j
+= Vcb
->superblock
.node_size
) {
983 tree_header
* th
= (tree_header
*)(context
->stripes
[stripe
].buf
+ stripeoff
[stripe
]);
984 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
985 uint64_t addr
= offset
+ pos
;
987 if (crc32
!= *((uint32_t*)th
->csum
) || th
->address
!= addr
) {
988 log_error(Vcb
, addr
, c
->devices
[stripe
]->devitem
.dev_id
, true, false, false);
989 log_device_error(Vcb
, c
->devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
992 pos
+= Vcb
->superblock
.node_size
;
993 stripeoff
[stripe
] += Vcb
->superblock
.node_size
;
997 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1000 ExFreePool(stripeoff
);
1002 return STATUS_SUCCESS
;
1005 static NTSTATUS
scrub_extent_raid10(device_extension
* Vcb
, chunk
* c
, uint64_t offset
, uint32_t length
, uint16_t startoffstripe
, uint32_t* csum
, scrub_context
* context
) {
1007 uint16_t stripe
, sub_stripes
= max(c
->chunk_item
->sub_stripes
, 1);
1008 uint32_t pos
, *stripeoff
;
1009 bool csum_error
= false;
1013 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(uint32_t) * c
->chunk_item
->num_stripes
/ sub_stripes
, ALLOC_TAG
);
1015 ERR("out of memory\n");
1016 return STATUS_INSUFFICIENT_RESOURCES
;
1019 RtlZeroMemory(stripeoff
, sizeof(uint32_t) * c
->chunk_item
->num_stripes
/ sub_stripes
);
1021 stripe
= startoffstripe
;
1022 while (pos
< length
) {
1026 readlen
= (uint32_t)min(context
->stripes
[stripe
* sub_stripes
].length
,
1027 c
->chunk_item
->stripe_length
- (context
->stripes
[stripe
* sub_stripes
].start
% c
->chunk_item
->stripe_length
));
1029 readlen
= min(length
- pos
, (uint32_t)c
->chunk_item
->stripe_length
);
1032 ULONG good_stripe
= 0xffffffff;
1035 for (k
= 0; k
< sub_stripes
; k
++) {
1036 if (c
->devices
[(stripe
* sub_stripes
) + k
]->devobj
) {
1037 // if first stripe is okay, we only need to check that the others are identical to it
1038 if (good_stripe
!= 0xffffffff) {
1039 if (RtlCompareMemory(context
->stripes
[(stripe
* sub_stripes
) + k
].buf
+ stripeoff
[stripe
],
1040 context
->stripes
[(stripe
* sub_stripes
) + good_stripe
].buf
+ stripeoff
[stripe
],
1041 readlen
) != readlen
) {
1042 context
->stripes
[(stripe
* sub_stripes
) + k
].csum_error
= true;
1044 log_device_error(Vcb
, c
->devices
[(stripe
* sub_stripes
) + k
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1047 for (j
= 0; j
< readlen
; j
+= Vcb
->superblock
.sector_size
) {
1048 uint32_t crc32
= ~calc_crc32c(0xffffffff, context
->stripes
[(stripe
* sub_stripes
) + k
].buf
+ stripeoff
[stripe
] + j
, Vcb
->superblock
.sector_size
);
1050 if (crc32
!= csum
[(pos
+ j
) / Vcb
->superblock
.sector_size
]) {
1052 context
->stripes
[(stripe
* sub_stripes
) + k
].csum_error
= true;
1053 log_device_error(Vcb
, c
->devices
[(stripe
* sub_stripes
) + k
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1058 if (!context
->stripes
[(stripe
* sub_stripes
) + k
].csum_error
)
1065 stripeoff
[stripe
] += readlen
;
1067 ULONG good_stripe
= 0xffffffff;
1070 for (k
= 0; k
< sub_stripes
; k
++) {
1071 if (c
->devices
[(stripe
* sub_stripes
) + k
]->devobj
) {
1072 // if first stripe is okay, we only need to check that the others are identical to it
1073 if (good_stripe
!= 0xffffffff) {
1074 if (RtlCompareMemory(context
->stripes
[(stripe
* sub_stripes
) + k
].buf
+ stripeoff
[stripe
],
1075 context
->stripes
[(stripe
* sub_stripes
) + good_stripe
].buf
+ stripeoff
[stripe
],
1076 readlen
) != readlen
) {
1077 context
->stripes
[(stripe
* sub_stripes
) + k
].csum_error
= true;
1079 log_device_error(Vcb
, c
->devices
[(stripe
* sub_stripes
) + k
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1082 for (j
= 0; j
< readlen
; j
+= Vcb
->superblock
.node_size
) {
1083 tree_header
* th
= (tree_header
*)(context
->stripes
[(stripe
* sub_stripes
) + k
].buf
+ stripeoff
[stripe
] + j
);
1084 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
1085 uint64_t addr
= offset
+ pos
+ j
;
1087 if (crc32
!= *((uint32_t*)th
->csum
) || th
->address
!= addr
) {
1089 context
->stripes
[(stripe
* sub_stripes
) + k
].csum_error
= true;
1090 log_device_error(Vcb
, c
->devices
[(stripe
* sub_stripes
) + k
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1095 if (!context
->stripes
[(stripe
* sub_stripes
) + k
].csum_error
)
1102 stripeoff
[stripe
] += readlen
;
1105 stripe
= (stripe
+ 1) % (c
->chunk_item
->num_stripes
/ sub_stripes
);
1109 Status
= STATUS_SUCCESS
;
1113 for (j
= 0; j
< c
->chunk_item
->num_stripes
; j
+= sub_stripes
) {
1114 ULONG goodstripe
= 0xffffffff;
1116 bool hasbadstripe
= false;
1118 if (context
->stripes
[j
].length
== 0)
1121 for (k
= 0; k
< sub_stripes
; k
++) {
1122 if (c
->devices
[j
+ k
]->devobj
) {
1123 if (!context
->stripes
[j
+ k
].csum_error
)
1126 hasbadstripe
= true;
1131 if (goodstripe
!= 0xffffffff) {
1132 for (k
= 0; k
< sub_stripes
; k
++) {
1133 if (c
->devices
[j
+ k
]->devobj
&& context
->stripes
[j
+ k
].csum_error
) {
1135 bool recovered
= false;
1139 stripe
= startoffstripe
;
1140 while (pos
< length
) {
1144 readlen
= (uint32_t)min(context
->stripes
[stripe
* sub_stripes
].length
,
1145 c
->chunk_item
->stripe_length
- (context
->stripes
[stripe
* sub_stripes
].start
% c
->chunk_item
->stripe_length
));
1147 readlen
= min(length
- pos
, (uint32_t)c
->chunk_item
->stripe_length
);
1149 if (stripe
== j
/ sub_stripes
) {
1153 for (l
= 0; l
< readlen
; l
+= Vcb
->superblock
.sector_size
) {
1154 if (RtlCompareMemory(context
->stripes
[j
+ k
].buf
+ so
,
1155 context
->stripes
[j
+ goodstripe
].buf
+ so
,
1156 Vcb
->superblock
.sector_size
) != Vcb
->superblock
.sector_size
) {
1157 uint64_t addr
= offset
+ pos
;
1159 log_error(Vcb
, addr
, c
->devices
[j
+ k
]->devitem
.dev_id
, false, true, false);
1164 pos
+= Vcb
->superblock
.sector_size
;
1165 so
+= Vcb
->superblock
.sector_size
;
1170 for (l
= 0; l
< readlen
; l
+= Vcb
->superblock
.node_size
) {
1171 if (RtlCompareMemory(context
->stripes
[j
+ k
].buf
+ so
,
1172 context
->stripes
[j
+ goodstripe
].buf
+ so
,
1173 Vcb
->superblock
.node_size
) != Vcb
->superblock
.node_size
) {
1174 uint64_t addr
= offset
+ pos
;
1176 log_error(Vcb
, addr
, c
->devices
[j
+ k
]->devitem
.dev_id
, true, true, false);
1181 pos
+= Vcb
->superblock
.node_size
;
1182 so
+= Vcb
->superblock
.node_size
;
1188 stripe
= (stripe
+ 1) % (c
->chunk_item
->num_stripes
/ sub_stripes
);
1192 // write good data over bad
1194 if (!c
->devices
[j
+ k
]->readonly
) {
1195 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
1197 Status
= write_data_phys(c
->devices
[j
+ k
]->devobj
, c
->devices
[j
+ k
]->fileobj
, cis
[j
+ k
].offset
+ offset
- c
->offset
,
1198 context
->stripes
[j
+ goodstripe
].buf
, context
->stripes
[j
+ goodstripe
].length
);
1200 if (!NT_SUCCESS(Status
)) {
1201 ERR("write_data_phys returned %08x\n", Status
);
1202 log_device_error(Vcb
, c
->devices
[j
+ k
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1211 bool recovered
= false;
1214 for (k
= 0; k
< sub_stripes
; k
++) {
1215 if (c
->devices
[j
+ k
]->devobj
) {
1216 context
->stripes
[j
+ k
].bad_csums
= ExAllocatePoolWithTag(PagedPool
, context
->stripes
[j
+ k
].length
* sizeof(uint32_t) / Vcb
->superblock
.sector_size
, ALLOC_TAG
);
1217 if (!context
->stripes
[j
+ k
].bad_csums
) {
1218 ERR("out of memory\n");
1219 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1223 Status
= calc_csum(Vcb
, context
->stripes
[j
+ k
].buf
, context
->stripes
[j
+ k
].length
/ Vcb
->superblock
.sector_size
, context
->stripes
[j
+ k
].bad_csums
);
1224 if (!NT_SUCCESS(Status
)) {
1225 ERR("calc_csum returned %08x\n", Status
);
1231 for (k
= 0; k
< sub_stripes
; k
++) {
1232 if (c
->devices
[j
+ k
]->devobj
) {
1235 context
->stripes
[j
+ k
].bad_csums
= ExAllocatePoolWithTag(PagedPool
, context
->stripes
[j
+ k
].length
* sizeof(uint32_t) / Vcb
->superblock
.node_size
, ALLOC_TAG
);
1236 if (!context
->stripes
[j
+ k
].bad_csums
) {
1237 ERR("out of memory\n");
1238 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1242 for (l
= 0; l
< context
->stripes
[j
+ k
].length
/ Vcb
->superblock
.node_size
; l
++) {
1243 tree_header
* th
= (tree_header
*)&context
->stripes
[j
+ k
].buf
[l
* Vcb
->superblock
.node_size
];
1244 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
1246 context
->stripes
[j
+ k
].bad_csums
[l
] = crc32
;
1254 stripe
= startoffstripe
;
1255 while (pos
< length
) {
1259 readlen
= (uint32_t)min(context
->stripes
[stripe
* sub_stripes
].length
,
1260 c
->chunk_item
->stripe_length
- (context
->stripes
[stripe
* sub_stripes
].start
% c
->chunk_item
->stripe_length
));
1262 readlen
= min(length
- pos
, (uint32_t)c
->chunk_item
->stripe_length
);
1264 if (stripe
== j
/ sub_stripes
) {
1268 for (l
= 0; l
< readlen
; l
+= Vcb
->superblock
.sector_size
) {
1269 uint32_t crc32
= csum
[pos
/ Vcb
->superblock
.sector_size
];
1270 bool has_error
= false;
1272 goodstripe
= 0xffffffff;
1273 for (k
= 0; k
< sub_stripes
; k
++) {
1274 if (c
->devices
[j
+ k
]->devobj
) {
1275 if (context
->stripes
[j
+ k
].bad_csums
[so
/ Vcb
->superblock
.sector_size
] != crc32
)
1283 if (goodstripe
!= 0xffffffff) {
1284 for (k
= 0; k
< sub_stripes
; k
++) {
1285 if (c
->devices
[j
+ k
]->devobj
&& context
->stripes
[j
+ k
].bad_csums
[so
/ Vcb
->superblock
.sector_size
] != crc32
) {
1286 uint64_t addr
= offset
+ pos
;
1288 log_error(Vcb
, addr
, c
->devices
[j
+ k
]->devitem
.dev_id
, false, true, false);
1292 RtlCopyMemory(context
->stripes
[j
+ k
].buf
+ so
, context
->stripes
[j
+ goodstripe
].buf
+ so
,
1293 Vcb
->superblock
.sector_size
);
1297 uint64_t addr
= offset
+ pos
;
1299 for (k
= 0; k
< sub_stripes
; k
++) {
1300 if (c
->devices
[j
+ j
]->devobj
) {
1301 log_error(Vcb
, addr
, c
->devices
[j
+ k
]->devitem
.dev_id
, false, false, false);
1302 log_device_error(Vcb
, c
->devices
[j
+ k
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1308 pos
+= Vcb
->superblock
.sector_size
;
1309 so
+= Vcb
->superblock
.sector_size
;
1312 for (l
= 0; l
< readlen
; l
+= Vcb
->superblock
.node_size
) {
1313 for (k
= 0; k
< sub_stripes
; k
++) {
1314 if (c
->devices
[j
+ k
]->devobj
) {
1315 tree_header
* th
= (tree_header
*)&context
->stripes
[j
+ k
].buf
[so
];
1316 uint64_t addr
= offset
+ pos
;
1318 if (context
->stripes
[j
+ k
].bad_csums
[so
/ Vcb
->superblock
.node_size
] != *((uint32_t*)th
->csum
) || th
->address
!= addr
) {
1323 for (m
= 0; m
< sub_stripes
; m
++) {
1325 tree_header
* th2
= (tree_header
*)&context
->stripes
[j
+ m
].buf
[so
];
1327 if (context
->stripes
[j
+ m
].bad_csums
[so
/ Vcb
->superblock
.node_size
] == *((uint32_t*)th2
->csum
) && th2
->address
== addr
) {
1328 log_error(Vcb
, addr
, c
->devices
[j
+ k
]->devitem
.dev_id
, true, true, false);
1330 RtlCopyMemory(th
, th2
, Vcb
->superblock
.node_size
);
1335 log_device_error(Vcb
, c
->devices
[j
+ m
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1340 log_error(Vcb
, addr
, c
->devices
[j
+ k
]->devitem
.dev_id
, true, false, false);
1345 pos
+= Vcb
->superblock
.node_size
;
1346 so
+= Vcb
->superblock
.node_size
;
1352 stripe
= (stripe
+ 1) % (c
->chunk_item
->num_stripes
/ sub_stripes
);
1356 // write good data over bad
1358 for (k
= 0; k
< sub_stripes
; k
++) {
1359 if (c
->devices
[j
+ k
]->devobj
&& !c
->devices
[j
+ k
]->readonly
) {
1360 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
1362 Status
= write_data_phys(c
->devices
[j
+ k
]->devobj
, c
->devices
[j
+ k
]->fileobj
, cis
[j
+ k
].offset
+ offset
- c
->offset
,
1363 context
->stripes
[j
+ k
].buf
, context
->stripes
[j
+ k
].length
);
1365 if (!NT_SUCCESS(Status
)) {
1366 ERR("write_data_phys returned %08x\n", Status
);
1367 log_device_error(Vcb
, c
->devices
[j
+ k
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1377 Status
= STATUS_SUCCESS
;
1380 ExFreePool(stripeoff
);
1385 static NTSTATUS
scrub_extent(device_extension
* Vcb
, chunk
* c
, ULONG type
, uint64_t offset
, uint32_t size
, uint32_t* csum
) {
1387 scrub_context context
;
1388 CHUNK_ITEM_STRIPE
* cis
;
1390 uint16_t startoffstripe
, num_missing
, allowed_missing
;
1392 TRACE("(%p, %p, %I64x, %I64x, %p)\n", Vcb
, c
, offset
, size
, csum
);
1394 context
.stripes
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(scrub_context_stripe
) * c
->chunk_item
->num_stripes
, ALLOC_TAG
);
1395 if (!context
.stripes
) {
1396 ERR("out of memory\n");
1397 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1401 RtlZeroMemory(context
.stripes
, sizeof(scrub_context_stripe
) * c
->chunk_item
->num_stripes
);
1403 context
.stripes_left
= 0;
1405 cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
1407 if (type
== BLOCK_FLAG_RAID0
) {
1408 uint64_t startoff
, endoff
;
1409 uint16_t endoffstripe
;
1411 get_raid0_offset(offset
- c
->offset
, c
->chunk_item
->stripe_length
, c
->chunk_item
->num_stripes
, &startoff
, &startoffstripe
);
1412 get_raid0_offset(offset
+ size
- c
->offset
- 1, c
->chunk_item
->stripe_length
, c
->chunk_item
->num_stripes
, &endoff
, &endoffstripe
);
1414 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1415 if (startoffstripe
> i
)
1416 context
.stripes
[i
].start
= startoff
- (startoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
1417 else if (startoffstripe
== i
)
1418 context
.stripes
[i
].start
= startoff
;
1420 context
.stripes
[i
].start
= startoff
- (startoff
% c
->chunk_item
->stripe_length
);
1422 if (endoffstripe
> i
)
1423 context
.stripes
[i
].length
= (uint32_t)(endoff
- (endoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
- context
.stripes
[i
].start
);
1424 else if (endoffstripe
== i
)
1425 context
.stripes
[i
].length
= (uint32_t)(endoff
+ 1 - context
.stripes
[i
].start
);
1427 context
.stripes
[i
].length
= (uint32_t)(endoff
- (endoff
% c
->chunk_item
->stripe_length
) - context
.stripes
[i
].start
);
1430 allowed_missing
= 0;
1431 } else if (type
== BLOCK_FLAG_RAID10
) {
1432 uint64_t startoff
, endoff
;
1433 uint16_t endoffstripe
, j
, sub_stripes
= max(c
->chunk_item
->sub_stripes
, 1);
1435 get_raid0_offset(offset
- c
->offset
, c
->chunk_item
->stripe_length
, c
->chunk_item
->num_stripes
/ sub_stripes
, &startoff
, &startoffstripe
);
1436 get_raid0_offset(offset
+ size
- c
->offset
- 1, c
->chunk_item
->stripe_length
, c
->chunk_item
->num_stripes
/ sub_stripes
, &endoff
, &endoffstripe
);
1438 if ((c
->chunk_item
->num_stripes
% sub_stripes
) != 0) {
1439 ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", c
->offset
, c
->chunk_item
->num_stripes
, sub_stripes
);
1440 Status
= STATUS_INTERNAL_ERROR
;
1444 startoffstripe
*= sub_stripes
;
1445 endoffstripe
*= sub_stripes
;
1447 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
+= sub_stripes
) {
1448 if (startoffstripe
> i
)
1449 context
.stripes
[i
].start
= startoff
- (startoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
;
1450 else if (startoffstripe
== i
)
1451 context
.stripes
[i
].start
= startoff
;
1453 context
.stripes
[i
].start
= startoff
- (startoff
% c
->chunk_item
->stripe_length
);
1455 if (endoffstripe
> i
)
1456 context
.stripes
[i
].length
= (uint32_t)(endoff
- (endoff
% c
->chunk_item
->stripe_length
) + c
->chunk_item
->stripe_length
- context
.stripes
[i
].start
);
1457 else if (endoffstripe
== i
)
1458 context
.stripes
[i
].length
= (uint32_t)(endoff
+ 1 - context
.stripes
[i
].start
);
1460 context
.stripes
[i
].length
= (uint32_t)(endoff
- (endoff
% c
->chunk_item
->stripe_length
) - context
.stripes
[i
].start
);
1462 for (j
= 1; j
< sub_stripes
; j
++) {
1463 context
.stripes
[i
+j
].start
= context
.stripes
[i
].start
;
1464 context
.stripes
[i
+j
].length
= context
.stripes
[i
].length
;
1468 startoffstripe
/= sub_stripes
;
1469 allowed_missing
= 1;
1471 allowed_missing
= c
->chunk_item
->num_stripes
- 1;
1475 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1476 PIO_STACK_LOCATION IrpSp
;
1478 context
.stripes
[i
].context
= (struct _scrub_context
*)&context
;
1480 if (type
== BLOCK_FLAG_DUPLICATE
) {
1481 context
.stripes
[i
].start
= offset
- c
->offset
;
1482 context
.stripes
[i
].length
= size
;
1483 } else if (type
!= BLOCK_FLAG_RAID0
&& type
!= BLOCK_FLAG_RAID10
) {
1484 ERR("unexpected chunk type %x\n", type
);
1485 Status
= STATUS_INTERNAL_ERROR
;
1489 if (!c
->devices
[i
]->devobj
) {
1492 if (num_missing
> allowed_missing
) {
1493 ERR("too many missing devices (at least %u, maximum allowed %u)\n", num_missing
, allowed_missing
);
1494 Status
= STATUS_INTERNAL_ERROR
;
1497 } else if (context
.stripes
[i
].length
> 0) {
1498 context
.stripes
[i
].buf
= ExAllocatePoolWithTag(NonPagedPool
, context
.stripes
[i
].length
, ALLOC_TAG
);
1500 if (!context
.stripes
[i
].buf
) {
1501 ERR("out of memory\n");
1502 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1506 context
.stripes
[i
].Irp
= IoAllocateIrp(c
->devices
[i
]->devobj
->StackSize
, false);
1508 if (!context
.stripes
[i
].Irp
) {
1509 ERR("IoAllocateIrp failed\n");
1510 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1514 IrpSp
= IoGetNextIrpStackLocation(context
.stripes
[i
].Irp
);
1515 IrpSp
->MajorFunction
= IRP_MJ_READ
;
1516 IrpSp
->FileObject
= c
->devices
[i
]->fileobj
;
1518 if (c
->devices
[i
]->devobj
->Flags
& DO_BUFFERED_IO
) {
1519 context
.stripes
[i
].Irp
->AssociatedIrp
.SystemBuffer
= ExAllocatePoolWithTag(NonPagedPool
, context
.stripes
[i
].length
, ALLOC_TAG
);
1520 if (!context
.stripes
[i
].Irp
->AssociatedIrp
.SystemBuffer
) {
1521 ERR("out of memory\n");
1522 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1526 context
.stripes
[i
].Irp
->Flags
|= IRP_BUFFERED_IO
| IRP_DEALLOCATE_BUFFER
| IRP_INPUT_OPERATION
;
1528 context
.stripes
[i
].Irp
->UserBuffer
= context
.stripes
[i
].buf
;
1529 } else if (c
->devices
[i
]->devobj
->Flags
& DO_DIRECT_IO
) {
1530 context
.stripes
[i
].Irp
->MdlAddress
= IoAllocateMdl(context
.stripes
[i
].buf
, context
.stripes
[i
].length
, false, false, NULL
);
1531 if (!context
.stripes
[i
].Irp
->MdlAddress
) {
1532 ERR("IoAllocateMdl failed\n");
1533 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1537 Status
= STATUS_SUCCESS
;
1540 MmProbeAndLockPages(context
.stripes
[i
].Irp
->MdlAddress
, KernelMode
, IoWriteAccess
);
1541 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1542 Status
= _SEH2_GetExceptionCode();
1545 if (!NT_SUCCESS(Status
)) {
1546 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
1547 IoFreeMdl(context
.stripes
[i
].Irp
->MdlAddress
);
1548 context
.stripes
[i
].Irp
->MdlAddress
= NULL
;
1552 context
.stripes
[i
].Irp
->UserBuffer
= context
.stripes
[i
].buf
;
1554 IrpSp
->Parameters
.Read
.Length
= context
.stripes
[i
].length
;
1555 IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
= context
.stripes
[i
].start
+ cis
[i
].offset
;
1557 context
.stripes
[i
].Irp
->UserIosb
= &context
.stripes
[i
].iosb
;
1559 IoSetCompletionRoutine(context
.stripes
[i
].Irp
, scrub_read_completion
, &context
.stripes
[i
], true, true, true);
1561 context
.stripes_left
++;
1563 Vcb
->scrub
.data_scrubbed
+= context
.stripes
[i
].length
;
1567 if (context
.stripes_left
== 0) {
1568 ERR("error - not reading any stripes\n");
1569 Status
= STATUS_INTERNAL_ERROR
;
1573 KeInitializeEvent(&context
.Event
, NotificationEvent
, false);
1575 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1576 if (c
->devices
[i
]->devobj
&& context
.stripes
[i
].length
> 0)
1577 IoCallDriver(c
->devices
[i
]->devobj
, context
.stripes
[i
].Irp
);
1580 KeWaitForSingleObject(&context
.Event
, Executive
, KernelMode
, false, NULL
);
1582 // return an error if any of the stripes returned an error
1583 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1584 if (!NT_SUCCESS(context
.stripes
[i
].iosb
.Status
)) {
1585 Status
= context
.stripes
[i
].iosb
.Status
;
1586 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_READ_ERRORS
);
1591 if (type
== BLOCK_FLAG_DUPLICATE
) {
1592 Status
= scrub_extent_dup(Vcb
, c
, offset
, csum
, &context
);
1593 if (!NT_SUCCESS(Status
)) {
1594 ERR("scrub_extent_dup returned %08x\n", Status
);
1597 } else if (type
== BLOCK_FLAG_RAID0
) {
1598 Status
= scrub_extent_raid0(Vcb
, c
, offset
, size
, startoffstripe
, csum
, &context
);
1599 if (!NT_SUCCESS(Status
)) {
1600 ERR("scrub_extent_raid0 returned %08x\n", Status
);
1603 } else if (type
== BLOCK_FLAG_RAID10
) {
1604 Status
= scrub_extent_raid10(Vcb
, c
, offset
, size
, startoffstripe
, csum
, &context
);
1605 if (!NT_SUCCESS(Status
)) {
1606 ERR("scrub_extent_raid10 returned %08x\n", Status
);
1612 if (context
.stripes
) {
1613 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
1614 if (context
.stripes
[i
].Irp
) {
1615 if (c
->devices
[i
]->devobj
->Flags
& DO_DIRECT_IO
&& context
.stripes
[i
].Irp
->MdlAddress
) {
1616 MmUnlockPages(context
.stripes
[i
].Irp
->MdlAddress
);
1617 IoFreeMdl(context
.stripes
[i
].Irp
->MdlAddress
);
1619 IoFreeIrp(context
.stripes
[i
].Irp
);
1622 if (context
.stripes
[i
].buf
)
1623 ExFreePool(context
.stripes
[i
].buf
);
1625 if (context
.stripes
[i
].bad_csums
)
1626 ExFreePool(context
.stripes
[i
].bad_csums
);
1629 ExFreePool(context
.stripes
);
1635 static NTSTATUS
scrub_data_extent(device_extension
* Vcb
, chunk
* c
, uint64_t offset
, ULONG type
, uint32_t* csum
, RTL_BITMAP
* bmp
, ULONG bmplen
) {
1637 ULONG runlength
, index
;
1639 runlength
= RtlFindFirstRunClear(bmp
, &index
);
1641 while (runlength
!= 0) {
1642 if (index
>= bmplen
)
1645 if (index
+ runlength
>= bmplen
) {
1646 runlength
= bmplen
- index
;
1655 if (runlength
* Vcb
->superblock
.sector_size
> SCRUB_UNIT
)
1656 rl
= SCRUB_UNIT
/ Vcb
->superblock
.sector_size
;
1660 Status
= scrub_extent(Vcb
, c
, type
, offset
+ UInt32x32To64(index
, Vcb
->superblock
.sector_size
), rl
* Vcb
->superblock
.sector_size
, &csum
[index
]);
1661 if (!NT_SUCCESS(Status
)) {
1662 ERR("scrub_data_extent_dup returned %08x\n", Status
);
1668 } while (runlength
> 0);
1670 runlength
= RtlFindNextForwardRunClear(bmp
, index
, &index
);
1673 return STATUS_SUCCESS
;
1680 IO_STATUS_BLOCK iosb
;
1682 bool rewrite
, missing
;
1685 } scrub_context_raid56_stripe
;
1688 scrub_context_raid56_stripe
* stripes
;
1692 RTL_BITMAP has_csum
;
1695 uint8_t* parity_scratch
;
1696 uint8_t* parity_scratch2
;
1697 } scrub_context_raid56
;
1699 _Function_class_(IO_COMPLETION_ROUTINE
)
1700 static NTSTATUS __stdcall
scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject
, PIRP Irp
, PVOID conptr
) {
1701 scrub_context_raid56_stripe
* stripe
= conptr
;
1702 scrub_context_raid56
* context
= (scrub_context_raid56
*)stripe
->context
;
1703 LONG left
= InterlockedDecrement(&context
->stripes_left
);
1705 UNUSED(DeviceObject
);
1707 stripe
->iosb
= Irp
->IoStatus
;
1710 KeSetEvent(&context
->Event
, 0, false);
1712 return STATUS_MORE_PROCESSING_REQUIRED
;
1715 static void scrub_raid5_stripe(device_extension
* Vcb
, chunk
* c
, scrub_context_raid56
* context
, uint64_t stripe_start
, uint64_t bit_start
,
1716 uint64_t num
, uint16_t missing_devices
) {
1717 ULONG sectors_per_stripe
= (ULONG
)(c
->chunk_item
->stripe_length
/ Vcb
->superblock
.sector_size
), i
, off
;
1718 uint16_t stripe
, parity
= (bit_start
+ num
+ c
->chunk_item
->num_stripes
- 1) % c
->chunk_item
->num_stripes
;
1721 stripe
= (parity
+ 1) % c
->chunk_item
->num_stripes
;
1722 off
= (ULONG
)(bit_start
+ num
- stripe_start
) * sectors_per_stripe
* (c
->chunk_item
->num_stripes
- 1);
1723 stripeoff
= num
* sectors_per_stripe
;
1725 if (missing_devices
== 0)
1726 RtlCopyMemory(context
->parity_scratch
, &context
->stripes
[parity
].buf
[num
* c
->chunk_item
->stripe_length
], (ULONG
)c
->chunk_item
->stripe_length
);
1728 while (stripe
!= parity
) {
1729 RtlClearAllBits(&context
->stripes
[stripe
].error
);
1731 for (i
= 0; i
< sectors_per_stripe
; i
++) {
1732 if (c
->devices
[stripe
]->devobj
&& RtlCheckBit(&context
->alloc
, off
)) {
1733 if (RtlCheckBit(&context
->is_tree
, off
)) {
1734 tree_header
* th
= (tree_header
*)&context
->stripes
[stripe
].buf
[stripeoff
* Vcb
->superblock
.sector_size
];
1735 uint64_t addr
= c
->offset
+ (stripe_start
* (c
->chunk_item
->num_stripes
- 1) * c
->chunk_item
->stripe_length
) + (off
* Vcb
->superblock
.sector_size
);
1736 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
1738 if (crc32
!= *((uint32_t*)th
->csum
) || th
->address
!= addr
) {
1739 RtlSetBits(&context
->stripes
[stripe
].error
, i
, Vcb
->superblock
.node_size
/ Vcb
->superblock
.sector_size
);
1740 log_device_error(Vcb
, c
->devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1742 if (missing_devices
> 0)
1743 log_error(Vcb
, addr
, c
->devices
[stripe
]->devitem
.dev_id
, true, false, false);
1746 off
+= Vcb
->superblock
.node_size
/ Vcb
->superblock
.sector_size
;
1747 stripeoff
+= Vcb
->superblock
.node_size
/ Vcb
->superblock
.sector_size
;
1748 i
+= (Vcb
->superblock
.node_size
/ Vcb
->superblock
.sector_size
) - 1;
1751 } else if (RtlCheckBit(&context
->has_csum
, off
)) {
1752 uint32_t crc32
= ~calc_crc32c(0xffffffff, context
->stripes
[stripe
].buf
+ (stripeoff
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1754 if (crc32
!= context
->csum
[off
]) {
1755 RtlSetBit(&context
->stripes
[stripe
].error
, i
);
1756 log_device_error(Vcb
, c
->devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1758 if (missing_devices
> 0) {
1759 uint64_t addr
= c
->offset
+ (stripe_start
* (c
->chunk_item
->num_stripes
- 1) * c
->chunk_item
->stripe_length
) + (off
* Vcb
->superblock
.sector_size
);
1761 log_error(Vcb
, addr
, c
->devices
[stripe
]->devitem
.dev_id
, false, false, false);
1771 if (missing_devices
== 0)
1772 do_xor(context
->parity_scratch
, &context
->stripes
[stripe
].buf
[num
* c
->chunk_item
->stripe_length
], (ULONG
)c
->chunk_item
->stripe_length
);
1774 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1775 stripeoff
= num
* sectors_per_stripe
;
1780 if (missing_devices
== 0) {
1781 RtlClearAllBits(&context
->stripes
[parity
].error
);
1783 for (i
= 0; i
< sectors_per_stripe
; i
++) {
1786 o
= i
* Vcb
->superblock
.sector_size
;
1787 for (j
= 0; j
< Vcb
->superblock
.sector_size
; j
++) { // FIXME - use SSE
1788 if (context
->parity_scratch
[o
] != 0) {
1789 RtlSetBit(&context
->stripes
[parity
].error
, i
);
1797 // log and fix errors
1799 if (missing_devices
> 0)
1802 for (i
= 0; i
< sectors_per_stripe
; i
++) {
1803 ULONG num_errors
= 0, bad_off
;
1804 uint64_t bad_stripe
;
1807 stripe
= (parity
+ 1) % c
->chunk_item
->num_stripes
;
1808 off
= (ULONG
)((bit_start
+ num
- stripe_start
) * sectors_per_stripe
* (c
->chunk_item
->num_stripes
- 1)) + i
;
1810 while (stripe
!= parity
) {
1811 if (RtlCheckBit(&context
->alloc
, off
)) {
1814 if (RtlCheckBit(&context
->stripes
[stripe
].error
, i
)) {
1815 bad_stripe
= stripe
;
1821 off
+= sectors_per_stripe
;
1822 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1828 if (num_errors
== 0 && !RtlCheckBit(&context
->stripes
[parity
].error
, i
)) // everything fine
1831 if (num_errors
== 0 && RtlCheckBit(&context
->stripes
[parity
].error
, i
)) { // parity error
1834 do_xor(&context
->stripes
[parity
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
1835 &context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
],
1836 Vcb
->superblock
.sector_size
);
1838 bad_off
= (ULONG
)((bit_start
+ num
- stripe_start
) * sectors_per_stripe
* (c
->chunk_item
->num_stripes
- 1)) + i
;
1839 addr
= c
->offset
+ (stripe_start
* (c
->chunk_item
->num_stripes
- 1) * c
->chunk_item
->stripe_length
) + (bad_off
* Vcb
->superblock
.sector_size
);
1841 context
->stripes
[parity
].rewrite
= true;
1843 log_error(Vcb
, addr
, c
->devices
[parity
]->devitem
.dev_id
, false, true, true);
1844 log_device_error(Vcb
, c
->devices
[parity
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1845 } else if (num_errors
== 1) {
1847 uint64_t addr
= c
->offset
+ (stripe_start
* (c
->chunk_item
->num_stripes
- 1) * c
->chunk_item
->stripe_length
) + (bad_off
* Vcb
->superblock
.sector_size
);
1849 if (RtlCheckBit(&context
->is_tree
, bad_off
)) {
1852 do_xor(&context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
],
1853 &context
->stripes
[bad_stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
1854 Vcb
->superblock
.node_size
);
1856 th
= (tree_header
*)&context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
];
1857 crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
1859 if (crc32
== *((uint32_t*)th
->csum
) && th
->address
== addr
) {
1860 RtlCopyMemory(&context
->stripes
[bad_stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
1861 &context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.node_size
);
1863 context
->stripes
[bad_stripe
].rewrite
= true;
1865 RtlClearBits(&context
->stripes
[bad_stripe
].error
, i
+ 1, (Vcb
->superblock
.node_size
/ Vcb
->superblock
.sector_size
) - 1);
1867 log_error(Vcb
, addr
, c
->devices
[bad_stripe
]->devitem
.dev_id
, true, true, false);
1869 log_error(Vcb
, addr
, c
->devices
[bad_stripe
]->devitem
.dev_id
, true, false, false);
1871 do_xor(&context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
],
1872 &context
->stripes
[bad_stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
1873 Vcb
->superblock
.sector_size
);
1875 crc32
= ~calc_crc32c(0xffffffff, &context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.sector_size
);
1877 if (crc32
== context
->csum
[bad_off
]) {
1878 RtlCopyMemory(&context
->stripes
[bad_stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
1879 &context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.sector_size
);
1881 context
->stripes
[bad_stripe
].rewrite
= true;
1883 log_error(Vcb
, addr
, c
->devices
[bad_stripe
]->devitem
.dev_id
, false, true, false);
1885 log_error(Vcb
, addr
, c
->devices
[bad_stripe
]->devitem
.dev_id
, false, false, false);
1888 stripe
= (parity
+ 1) % c
->chunk_item
->num_stripes
;
1889 off
= (ULONG
)((bit_start
+ num
- stripe_start
) * sectors_per_stripe
* (c
->chunk_item
->num_stripes
- 1)) + i
;
1891 while (stripe
!= parity
) {
1892 if (RtlCheckBit(&context
->alloc
, off
)) {
1893 if (RtlCheckBit(&context
->stripes
[stripe
].error
, i
)) {
1894 uint64_t addr
= c
->offset
+ (stripe_start
* (c
->chunk_item
->num_stripes
- 1) * c
->chunk_item
->stripe_length
) + (off
* Vcb
->superblock
.sector_size
);
1896 log_error(Vcb
, addr
, c
->devices
[stripe
]->devitem
.dev_id
, RtlCheckBit(&context
->is_tree
, off
), false, false);
1900 off
+= sectors_per_stripe
;
1901 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1907 static void scrub_raid6_stripe(device_extension
* Vcb
, chunk
* c
, scrub_context_raid56
* context
, uint64_t stripe_start
, uint64_t bit_start
,
1908 uint64_t num
, uint16_t missing_devices
) {
1909 ULONG sectors_per_stripe
= (ULONG
)(c
->chunk_item
->stripe_length
/ Vcb
->superblock
.sector_size
), i
, off
;
1910 uint16_t stripe
, parity1
= (bit_start
+ num
+ c
->chunk_item
->num_stripes
- 2) % c
->chunk_item
->num_stripes
;
1911 uint16_t parity2
= (parity1
+ 1) % c
->chunk_item
->num_stripes
;
1914 stripe
= (parity1
+ 2) % c
->chunk_item
->num_stripes
;
1915 off
= (ULONG
)(bit_start
+ num
- stripe_start
) * sectors_per_stripe
* (c
->chunk_item
->num_stripes
- 2);
1916 stripeoff
= num
* sectors_per_stripe
;
1918 if (c
->devices
[parity1
]->devobj
)
1919 RtlCopyMemory(context
->parity_scratch
, &context
->stripes
[parity1
].buf
[num
* c
->chunk_item
->stripe_length
], (ULONG
)c
->chunk_item
->stripe_length
);
1921 if (c
->devices
[parity2
]->devobj
)
1922 RtlZeroMemory(context
->parity_scratch2
, (ULONG
)c
->chunk_item
->stripe_length
);
1924 while (stripe
!= parity1
) {
1925 RtlClearAllBits(&context
->stripes
[stripe
].error
);
1927 for (i
= 0; i
< sectors_per_stripe
; i
++) {
1928 if (c
->devices
[stripe
]->devobj
&& RtlCheckBit(&context
->alloc
, off
)) {
1929 if (RtlCheckBit(&context
->is_tree
, off
)) {
1930 tree_header
* th
= (tree_header
*)&context
->stripes
[stripe
].buf
[stripeoff
* Vcb
->superblock
.sector_size
];
1931 uint64_t addr
= c
->offset
+ (stripe_start
* (c
->chunk_item
->num_stripes
- 2) * c
->chunk_item
->stripe_length
) + (off
* Vcb
->superblock
.sector_size
);
1932 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
1934 if (crc32
!= *((uint32_t*)th
->csum
) || th
->address
!= addr
) {
1935 RtlSetBits(&context
->stripes
[stripe
].error
, i
, Vcb
->superblock
.node_size
/ Vcb
->superblock
.sector_size
);
1936 log_device_error(Vcb
, c
->devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1938 if (missing_devices
== 2)
1939 log_error(Vcb
, addr
, c
->devices
[stripe
]->devitem
.dev_id
, true, false, false);
1942 off
+= Vcb
->superblock
.node_size
/ Vcb
->superblock
.sector_size
;
1943 stripeoff
+= Vcb
->superblock
.node_size
/ Vcb
->superblock
.sector_size
;
1944 i
+= (Vcb
->superblock
.node_size
/ Vcb
->superblock
.sector_size
) - 1;
1947 } else if (RtlCheckBit(&context
->has_csum
, off
)) {
1948 uint32_t crc32
= ~calc_crc32c(0xffffffff, context
->stripes
[stripe
].buf
+ (stripeoff
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1950 if (crc32
!= context
->csum
[off
]) {
1951 uint64_t addr
= c
->offset
+ (stripe_start
* (c
->chunk_item
->num_stripes
- 2) * c
->chunk_item
->stripe_length
) + (off
* Vcb
->superblock
.sector_size
);
1953 RtlSetBit(&context
->stripes
[stripe
].error
, i
);
1954 log_device_error(Vcb
, c
->devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1956 if (missing_devices
== 2)
1957 log_error(Vcb
, addr
, c
->devices
[stripe
]->devitem
.dev_id
, false, false, false);
1966 if (c
->devices
[parity1
]->devobj
)
1967 do_xor(context
->parity_scratch
, &context
->stripes
[stripe
].buf
[num
* c
->chunk_item
->stripe_length
], (uint32_t)c
->chunk_item
->stripe_length
);
1969 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
1970 stripeoff
= num
* sectors_per_stripe
;
1973 RtlClearAllBits(&context
->stripes
[parity1
].error
);
1975 if (missing_devices
== 0 || (missing_devices
== 1 && !c
->devices
[parity2
]->devobj
)) {
1978 for (i
= 0; i
< sectors_per_stripe
; i
++) {
1981 o
= i
* Vcb
->superblock
.sector_size
;
1982 for (j
= 0; j
< Vcb
->superblock
.sector_size
; j
++) { // FIXME - use SSE
1983 if (context
->parity_scratch
[o
] != 0) {
1984 RtlSetBit(&context
->stripes
[parity1
].error
, i
);
1992 RtlClearAllBits(&context
->stripes
[parity2
].error
);
1994 if (missing_devices
== 0 || (missing_devices
== 1 && !c
->devices
[parity1
]->devobj
)) {
1997 stripe
= parity1
== 0 ? (c
->chunk_item
->num_stripes
- 1) : (parity1
- 1);
1999 while (stripe
!= parity2
) {
2000 galois_double(context
->parity_scratch2
, (uint32_t)c
->chunk_item
->stripe_length
);
2001 do_xor(context
->parity_scratch2
, &context
->stripes
[stripe
].buf
[num
* c
->chunk_item
->stripe_length
], (uint32_t)c
->chunk_item
->stripe_length
);
2003 stripe
= stripe
== 0 ? (c
->chunk_item
->num_stripes
- 1) : (stripe
- 1);
2006 for (i
= 0; i
< sectors_per_stripe
; i
++) {
2007 if (RtlCompareMemory(&context
->stripes
[parity2
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2008 &context
->parity_scratch2
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.sector_size
) != Vcb
->superblock
.sector_size
)
2009 RtlSetBit(&context
->stripes
[parity2
].error
, i
);
2013 if (missing_devices
== 2)
2016 // log and fix errors
2018 for (i
= 0; i
< sectors_per_stripe
; i
++) {
2019 ULONG num_errors
= 0;
2020 uint64_t bad_stripe1
, bad_stripe2
;
2021 ULONG bad_off1
, bad_off2
;
2024 stripe
= (parity1
+ 2) % c
->chunk_item
->num_stripes
;
2025 off
= (ULONG
)((bit_start
+ num
- stripe_start
) * sectors_per_stripe
* (c
->chunk_item
->num_stripes
- 2)) + i
;
2027 while (stripe
!= parity1
) {
2028 if (RtlCheckBit(&context
->alloc
, off
)) {
2031 if (!c
->devices
[stripe
]->devobj
|| RtlCheckBit(&context
->stripes
[stripe
].error
, i
)) {
2032 if (num_errors
== 0) {
2033 bad_stripe1
= stripe
;
2035 } else if (num_errors
== 1) {
2036 bad_stripe2
= stripe
;
2043 off
+= sectors_per_stripe
;
2044 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
2050 if (num_errors
== 0 && !RtlCheckBit(&context
->stripes
[parity1
].error
, i
) && !RtlCheckBit(&context
->stripes
[parity2
].error
, i
)) // everything fine
2053 if (num_errors
== 0) { // parity error
2056 if (RtlCheckBit(&context
->stripes
[parity1
].error
, i
)) {
2057 do_xor(&context
->stripes
[parity1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2058 &context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
],
2059 Vcb
->superblock
.sector_size
);
2061 bad_off1
= (ULONG
)((bit_start
+ num
- stripe_start
) * sectors_per_stripe
* (c
->chunk_item
->num_stripes
- 2)) + i
;
2062 addr
= c
->offset
+ (stripe_start
* (c
->chunk_item
->num_stripes
- 2) * c
->chunk_item
->stripe_length
) + (bad_off1
* Vcb
->superblock
.sector_size
);
2064 context
->stripes
[parity1
].rewrite
= true;
2066 log_error(Vcb
, addr
, c
->devices
[parity1
]->devitem
.dev_id
, false, true, true);
2067 log_device_error(Vcb
, c
->devices
[parity1
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
2070 if (RtlCheckBit(&context
->stripes
[parity2
].error
, i
)) {
2071 RtlCopyMemory(&context
->stripes
[parity2
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2072 &context
->parity_scratch2
[i
* Vcb
->superblock
.sector_size
],
2073 Vcb
->superblock
.sector_size
);
2075 bad_off1
= (ULONG
)((bit_start
+ num
- stripe_start
) * sectors_per_stripe
* (c
->chunk_item
->num_stripes
- 2)) + i
;
2076 addr
= c
->offset
+ (stripe_start
* (c
->chunk_item
->num_stripes
- 2) * c
->chunk_item
->stripe_length
) + (bad_off1
* Vcb
->superblock
.sector_size
);
2078 context
->stripes
[parity2
].rewrite
= true;
2080 log_error(Vcb
, addr
, c
->devices
[parity2
]->devitem
.dev_id
, false, true, true);
2081 log_device_error(Vcb
, c
->devices
[parity2
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
2083 } else if (num_errors
== 1) {
2084 uint32_t crc32a
, crc32b
, len
;
2085 uint16_t stripe_num
, bad_stripe_num
;
2086 uint64_t addr
= c
->offset
+ (stripe_start
* (c
->chunk_item
->num_stripes
- 2) * c
->chunk_item
->stripe_length
) + (bad_off1
* Vcb
->superblock
.sector_size
);
2089 len
= RtlCheckBit(&context
->is_tree
, bad_off1
)? Vcb
->superblock
.node_size
: Vcb
->superblock
.sector_size
;
2091 scratch
= ExAllocatePoolWithTag(PagedPool
, len
, ALLOC_TAG
);
2093 ERR("out of memory\n");
2097 RtlZeroMemory(scratch
, len
);
2099 do_xor(&context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
],
2100 &context
->stripes
[bad_stripe1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)], len
);
2102 stripe
= parity1
== 0 ? (c
->chunk_item
->num_stripes
- 1) : (parity1
- 1);
2104 if (c
->devices
[parity2
]->devobj
) {
2105 stripe_num
= c
->chunk_item
->num_stripes
- 3;
2106 while (stripe
!= parity2
) {
2107 galois_double(scratch
, len
);
2109 if (stripe
!= bad_stripe1
)
2110 do_xor(scratch
, &context
->stripes
[stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)], len
);
2112 bad_stripe_num
= stripe_num
;
2114 stripe
= stripe
== 0 ? (c
->chunk_item
->num_stripes
- 1) : (stripe
- 1);
2118 do_xor(scratch
, &context
->stripes
[parity2
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)], len
);
2120 if (bad_stripe_num
!= 0)
2121 galois_divpower(scratch
, (uint8_t)bad_stripe_num
, len
);
2124 if (RtlCheckBit(&context
->is_tree
, bad_off1
)) {
2125 tree_header
*th1
= NULL
, *th2
= NULL
;
2127 if (c
->devices
[parity1
]->devobj
) {
2128 th1
= (tree_header
*)&context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
];
2129 crc32a
= ~calc_crc32c(0xffffffff, (uint8_t*)&th1
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th1
->csum
));
2132 if (c
->devices
[parity2
]->devobj
) {
2133 th2
= (tree_header
*)scratch
;
2134 crc32b
= ~calc_crc32c(0xffffffff, (uint8_t*)&th2
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th2
->csum
));
2137 if ((c
->devices
[parity1
]->devobj
&& crc32a
== *((uint32_t*)th1
->csum
) && th1
->address
== addr
) ||
2138 (c
->devices
[parity2
]->devobj
&& crc32b
== *((uint32_t*)th2
->csum
) && th2
->address
== addr
)) {
2139 if (!c
->devices
[parity1
]->devobj
|| crc32a
!= *((uint32_t*)th1
->csum
) || th1
->address
!= addr
) {
2140 RtlCopyMemory(&context
->stripes
[bad_stripe1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2141 scratch
, Vcb
->superblock
.node_size
);
2143 if (c
->devices
[parity1
]->devobj
) {
2146 stripe
= (parity1
+ 2) % c
->chunk_item
->num_stripes
;
2148 RtlCopyMemory(&context
->stripes
[parity1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2149 &context
->stripes
[stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2150 Vcb
->superblock
.node_size
);
2152 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
2154 while (stripe
!= parity1
) {
2155 do_xor(&context
->stripes
[parity1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2156 &context
->stripes
[stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2157 Vcb
->superblock
.node_size
);
2159 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
2162 context
->stripes
[parity1
].rewrite
= true;
2164 log_error(Vcb
, addr
, c
->devices
[parity1
]->devitem
.dev_id
, false, true, true);
2165 log_device_error(Vcb
, c
->devices
[parity1
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
2168 RtlCopyMemory(&context
->stripes
[bad_stripe1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2169 &context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.node_size
);
2171 if (!c
->devices
[parity2
]->devobj
|| crc32b
!= *((uint32_t*)th2
->csum
) || th2
->address
!= addr
) {
2173 stripe
= parity1
== 0 ? (c
->chunk_item
->num_stripes
- 1) : (parity1
- 1);
2175 if (c
->devices
[parity2
]->devobj
) {
2176 RtlCopyMemory(&context
->stripes
[parity2
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2177 &context
->stripes
[stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2178 Vcb
->superblock
.node_size
);
2180 stripe
= stripe
== 0 ? (c
->chunk_item
->num_stripes
- 1) : (stripe
- 1);
2182 while (stripe
!= parity2
) {
2183 galois_double(&context
->stripes
[parity2
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)], Vcb
->superblock
.node_size
);
2185 do_xor(&context
->stripes
[parity2
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2186 &context
->stripes
[stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2187 Vcb
->superblock
.node_size
);
2189 stripe
= stripe
== 0 ? (c
->chunk_item
->num_stripes
- 1) : (stripe
- 1);
2192 context
->stripes
[parity2
].rewrite
= true;
2194 log_error(Vcb
, addr
, c
->devices
[parity2
]->devitem
.dev_id
, false, true, true);
2195 log_device_error(Vcb
, c
->devices
[parity2
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
2200 context
->stripes
[bad_stripe1
].rewrite
= true;
2202 RtlClearBits(&context
->stripes
[bad_stripe1
].error
, i
+ 1, (Vcb
->superblock
.node_size
/ Vcb
->superblock
.sector_size
) - 1);
2204 log_error(Vcb
, addr
, c
->devices
[bad_stripe1
]->devitem
.dev_id
, true, true, false);
2206 log_error(Vcb
, addr
, c
->devices
[bad_stripe1
]->devitem
.dev_id
, true, false, false);
2208 if (c
->devices
[parity1
]->devobj
)
2209 crc32a
= ~calc_crc32c(0xffffffff, &context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.sector_size
);
2211 if (c
->devices
[parity2
]->devobj
)
2212 crc32b
= ~calc_crc32c(0xffffffff, scratch
, Vcb
->superblock
.sector_size
);
2214 if ((c
->devices
[parity1
]->devobj
&& crc32a
== context
->csum
[bad_off1
]) || (c
->devices
[parity2
]->devobj
&& crc32b
== context
->csum
[bad_off1
])) {
2215 if (c
->devices
[parity2
]->devobj
&& crc32b
== context
->csum
[bad_off1
]) {
2216 RtlCopyMemory(&context
->stripes
[bad_stripe1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2217 scratch
, Vcb
->superblock
.sector_size
);
2219 if (c
->devices
[parity1
]->devobj
&& crc32a
!= context
->csum
[bad_off1
]) {
2222 stripe
= (parity1
+ 2) % c
->chunk_item
->num_stripes
;
2224 RtlCopyMemory(&context
->stripes
[parity1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2225 &context
->stripes
[stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2226 Vcb
->superblock
.sector_size
);
2228 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
2230 while (stripe
!= parity1
) {
2231 do_xor(&context
->stripes
[parity1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2232 &context
->stripes
[stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2233 Vcb
->superblock
.sector_size
);
2235 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
2238 context
->stripes
[parity1
].rewrite
= true;
2240 log_error(Vcb
, addr
, c
->devices
[parity1
]->devitem
.dev_id
, false, true, true);
2241 log_device_error(Vcb
, c
->devices
[parity1
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
2244 RtlCopyMemory(&context
->stripes
[bad_stripe1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2245 &context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.sector_size
);
2247 if (c
->devices
[parity2
]->devobj
&& crc32b
!= context
->csum
[bad_off1
]) {
2249 stripe
= parity1
== 0 ? (c
->chunk_item
->num_stripes
- 1) : (parity1
- 1);
2251 RtlCopyMemory(&context
->stripes
[parity2
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2252 &context
->stripes
[stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2253 Vcb
->superblock
.sector_size
);
2255 stripe
= stripe
== 0 ? (c
->chunk_item
->num_stripes
- 1) : (stripe
- 1);
2257 while (stripe
!= parity2
) {
2258 galois_double(&context
->stripes
[parity2
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)], Vcb
->superblock
.sector_size
);
2260 do_xor(&context
->stripes
[parity2
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2261 &context
->stripes
[stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2262 Vcb
->superblock
.sector_size
);
2264 stripe
= stripe
== 0 ? (c
->chunk_item
->num_stripes
- 1) : (stripe
- 1);
2267 context
->stripes
[parity2
].rewrite
= true;
2269 log_error(Vcb
, addr
, c
->devices
[parity2
]->devitem
.dev_id
, false, true, true);
2270 log_device_error(Vcb
, c
->devices
[parity2
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
2274 context
->stripes
[bad_stripe1
].rewrite
= true;
2276 log_error(Vcb
, addr
, c
->devices
[bad_stripe1
]->devitem
.dev_id
, false, true, false);
2278 log_error(Vcb
, addr
, c
->devices
[bad_stripe1
]->devitem
.dev_id
, false, false, false);
2281 ExFreePool(scratch
);
2282 } else if (num_errors
== 2 && missing_devices
== 0) {
2285 uint32_t len
= (RtlCheckBit(&context
->is_tree
, bad_off1
) || RtlCheckBit(&context
->is_tree
, bad_off2
)) ? Vcb
->superblock
.node_size
: Vcb
->superblock
.sector_size
;
2286 uint8_t gyx
, gx
, denom
, a
, b
, *p
, *q
, *pxy
, *qxy
;
2289 stripe
= parity1
== 0 ? (c
->chunk_item
->num_stripes
- 1) : (parity1
- 1);
2291 // put qxy in parity_scratch
2292 // put pxy in parity_scratch2
2294 k
= c
->chunk_item
->num_stripes
- 3;
2295 if (stripe
== bad_stripe1
|| stripe
== bad_stripe2
) {
2296 RtlZeroMemory(&context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
], len
);
2297 RtlZeroMemory(&context
->parity_scratch2
[i
* Vcb
->superblock
.sector_size
], len
);
2299 if (stripe
== bad_stripe1
)
2304 RtlCopyMemory(&context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
],
2305 &context
->stripes
[stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)], len
);
2306 RtlCopyMemory(&context
->parity_scratch2
[i
* Vcb
->superblock
.sector_size
],
2307 &context
->stripes
[stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)], len
);
2310 stripe
= stripe
== 0 ? (c
->chunk_item
->num_stripes
- 1) : (stripe
- 1);
2314 galois_double(&context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
], len
);
2316 if (stripe
!= bad_stripe1
&& stripe
!= bad_stripe2
) {
2317 do_xor(&context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
],
2318 &context
->stripes
[stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)], len
);
2319 do_xor(&context
->parity_scratch2
[i
* Vcb
->superblock
.sector_size
],
2320 &context
->stripes
[stripe
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)], len
);
2321 } else if (stripe
== bad_stripe1
)
2323 else if (stripe
== bad_stripe2
)
2326 stripe
= stripe
== 0 ? (c
->chunk_item
->num_stripes
- 1) : (stripe
- 1);
2328 } while (stripe
!= parity2
);
2330 gyx
= gpow2(y
> x
? (y
-x
) : (255-x
+y
));
2333 denom
= gdiv(1, gyx
^ 1);
2334 a
= gmul(gyx
, denom
);
2335 b
= gmul(gx
, denom
);
2337 p
= &context
->stripes
[parity1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)];
2338 q
= &context
->stripes
[parity2
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)];
2339 pxy
= &context
->parity_scratch2
[i
* Vcb
->superblock
.sector_size
];
2340 qxy
= &context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
];
2342 for (j
= 0; j
< len
; j
++) {
2343 *qxy
= gmul(a
, *p
^ *pxy
) ^ gmul(b
, *q
^ *qxy
);
2351 do_xor(&context
->parity_scratch2
[i
* Vcb
->superblock
.sector_size
], &context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
], len
);
2352 do_xor(&context
->parity_scratch2
[i
* Vcb
->superblock
.sector_size
], &context
->stripes
[parity1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)], len
);
2354 addr
= c
->offset
+ (stripe_start
* (c
->chunk_item
->num_stripes
- 2) * c
->chunk_item
->stripe_length
) + (bad_off1
* Vcb
->superblock
.sector_size
);
2356 if (RtlCheckBit(&context
->is_tree
, bad_off1
)) {
2357 tree_header
* th
= (tree_header
*)&context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
];
2358 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
2360 if (crc32
== *((uint32_t*)th
->csum
) && th
->address
== addr
) {
2361 RtlCopyMemory(&context
->stripes
[bad_stripe1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2362 &context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.node_size
);
2364 context
->stripes
[bad_stripe1
].rewrite
= true;
2366 RtlClearBits(&context
->stripes
[bad_stripe1
].error
, i
+ 1, (Vcb
->superblock
.node_size
/ Vcb
->superblock
.sector_size
) - 1);
2368 log_error(Vcb
, addr
, c
->devices
[bad_stripe1
]->devitem
.dev_id
, true, true, false);
2370 log_error(Vcb
, addr
, c
->devices
[bad_stripe1
]->devitem
.dev_id
, true, false, false);
2372 uint32_t crc32
= ~calc_crc32c(0xffffffff, &context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.sector_size
);
2374 if (crc32
== context
->csum
[bad_off1
]) {
2375 RtlCopyMemory(&context
->stripes
[bad_stripe1
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2376 &context
->parity_scratch
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.sector_size
);
2378 context
->stripes
[bad_stripe1
].rewrite
= true;
2380 log_error(Vcb
, addr
, c
->devices
[bad_stripe1
]->devitem
.dev_id
, false, true, false);
2382 log_error(Vcb
, addr
, c
->devices
[bad_stripe1
]->devitem
.dev_id
, false, false, false);
2385 addr
= c
->offset
+ (stripe_start
* (c
->chunk_item
->num_stripes
- 2) * c
->chunk_item
->stripe_length
) + (bad_off2
* Vcb
->superblock
.sector_size
);
2387 if (RtlCheckBit(&context
->is_tree
, bad_off2
)) {
2388 tree_header
* th
= (tree_header
*)&context
->parity_scratch2
[i
* Vcb
->superblock
.sector_size
];
2389 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
2391 if (crc32
== *((uint32_t*)th
->csum
) && th
->address
== addr
) {
2392 RtlCopyMemory(&context
->stripes
[bad_stripe2
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2393 &context
->parity_scratch2
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.node_size
);
2395 context
->stripes
[bad_stripe2
].rewrite
= true;
2397 RtlClearBits(&context
->stripes
[bad_stripe2
].error
, i
+ 1, (Vcb
->superblock
.node_size
/ Vcb
->superblock
.sector_size
) - 1);
2399 log_error(Vcb
, addr
, c
->devices
[bad_stripe2
]->devitem
.dev_id
, true, true, false);
2401 log_error(Vcb
, addr
, c
->devices
[bad_stripe2
]->devitem
.dev_id
, true, false, false);
2403 uint32_t crc32
= ~calc_crc32c(0xffffffff, &context
->parity_scratch2
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.sector_size
);
2405 if (crc32
== context
->csum
[bad_off2
]) {
2406 RtlCopyMemory(&context
->stripes
[bad_stripe2
].buf
[(num
* c
->chunk_item
->stripe_length
) + (i
* Vcb
->superblock
.sector_size
)],
2407 &context
->parity_scratch2
[i
* Vcb
->superblock
.sector_size
], Vcb
->superblock
.sector_size
);
2409 context
->stripes
[bad_stripe2
].rewrite
= true;
2411 log_error(Vcb
, addr
, c
->devices
[bad_stripe2
]->devitem
.dev_id
, false, true, false);
2413 log_error(Vcb
, addr
, c
->devices
[bad_stripe2
]->devitem
.dev_id
, false, false, false);
2416 stripe
= (parity2
+ 1) % c
->chunk_item
->num_stripes
;
2417 off
= (ULONG
)((bit_start
+ num
- stripe_start
) * sectors_per_stripe
* (c
->chunk_item
->num_stripes
- 2)) + i
;
2419 while (stripe
!= parity1
) {
2420 if (c
->devices
[stripe
]->devobj
&& RtlCheckBit(&context
->alloc
, off
)) {
2421 if (RtlCheckBit(&context
->stripes
[stripe
].error
, i
)) {
2422 uint64_t addr
= c
->offset
+ (stripe_start
* (c
->chunk_item
->num_stripes
- 2) * c
->chunk_item
->stripe_length
) + (off
* Vcb
->superblock
.sector_size
);
2424 log_error(Vcb
, addr
, c
->devices
[stripe
]->devitem
.dev_id
, RtlCheckBit(&context
->is_tree
, off
), false, false);
2428 off
+= sectors_per_stripe
;
2429 stripe
= (stripe
+ 1) % c
->chunk_item
->num_stripes
;
2435 static NTSTATUS
scrub_chunk_raid56_stripe_run(device_extension
* Vcb
, chunk
* c
, uint64_t stripe_start
, uint64_t stripe_end
) {
2440 uint64_t run_start
, run_end
, full_stripe_len
, stripe
;
2441 uint32_t max_read
, num_sectors
;
2442 ULONG arrlen
, *allocarr
, *csumarr
= NULL
, *treearr
, num_parity_stripes
= c
->chunk_item
->type
& BLOCK_FLAG_RAID6
? 2 : 1;
2443 scrub_context_raid56 context
;
2445 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&c
->chunk_item
[1];
2447 TRACE("(%p, %p, %I64x, %I64x)\n", Vcb
, c
, stripe_start
, stripe_end
);
2449 full_stripe_len
= (c
->chunk_item
->num_stripes
- num_parity_stripes
) * c
->chunk_item
->stripe_length
;
2450 run_start
= c
->offset
+ (stripe_start
* full_stripe_len
);
2451 run_end
= c
->offset
+ ((stripe_end
+ 1) * full_stripe_len
);
2453 searchkey
.obj_id
= run_start
;
2454 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
2455 searchkey
.offset
= 0xffffffffffffffff;
2457 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, NULL
);
2458 if (!NT_SUCCESS(Status
)) {
2459 ERR("find_item returned %08x\n", Status
);
2463 num_sectors
= (uint32_t)((stripe_end
- stripe_start
+ 1) * full_stripe_len
/ Vcb
->superblock
.sector_size
);
2464 arrlen
= (ULONG
)sector_align((num_sectors
/ 8) + 1, sizeof(ULONG
));
2466 allocarr
= ExAllocatePoolWithTag(PagedPool
, arrlen
, ALLOC_TAG
);
2468 ERR("out of memory\n");
2469 return STATUS_INSUFFICIENT_RESOURCES
;
2472 treearr
= ExAllocatePoolWithTag(PagedPool
, arrlen
, ALLOC_TAG
);
2474 ERR("out of memory\n");
2475 ExFreePool(allocarr
);
2476 return STATUS_INSUFFICIENT_RESOURCES
;
2479 RtlInitializeBitMap(&context
.alloc
, allocarr
, num_sectors
);
2480 RtlClearAllBits(&context
.alloc
);
2482 RtlInitializeBitMap(&context
.is_tree
, treearr
, num_sectors
);
2483 RtlClearAllBits(&context
.is_tree
);
2485 context
.parity_scratch
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)c
->chunk_item
->stripe_length
, ALLOC_TAG
);
2486 if (!context
.parity_scratch
) {
2487 ERR("out of memory\n");
2488 ExFreePool(allocarr
);
2489 ExFreePool(treearr
);
2490 return STATUS_INSUFFICIENT_RESOURCES
;
2493 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
) {
2494 csumarr
= ExAllocatePoolWithTag(PagedPool
, arrlen
, ALLOC_TAG
);
2496 ERR("out of memory\n");
2497 ExFreePool(allocarr
);
2498 ExFreePool(treearr
);
2499 ExFreePool(context
.parity_scratch
);
2500 return STATUS_INSUFFICIENT_RESOURCES
;
2503 RtlInitializeBitMap(&context
.has_csum
, csumarr
, num_sectors
);
2504 RtlClearAllBits(&context
.has_csum
);
2506 context
.csum
= ExAllocatePoolWithTag(PagedPool
, num_sectors
* sizeof(uint32_t), ALLOC_TAG
);
2507 if (!context
.csum
) {
2508 ERR("out of memory\n");
2509 ExFreePool(allocarr
);
2510 ExFreePool(treearr
);
2511 ExFreePool(context
.parity_scratch
);
2512 ExFreePool(csumarr
);
2513 return STATUS_INSUFFICIENT_RESOURCES
;
2517 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
) {
2518 context
.parity_scratch2
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)c
->chunk_item
->stripe_length
, ALLOC_TAG
);
2519 if (!context
.parity_scratch2
) {
2520 ERR("out of memory\n");
2521 ExFreePool(allocarr
);
2522 ExFreePool(treearr
);
2523 ExFreePool(context
.parity_scratch
);
2525 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
) {
2526 ExFreePool(csumarr
);
2527 ExFreePool(context
.csum
);
2530 return STATUS_INSUFFICIENT_RESOURCES
;
2535 traverse_ptr next_tp
;
2537 if (tp
.item
->key
.obj_id
>= run_end
)
2540 if (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
|| tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
) {
2541 uint64_t size
= tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
? Vcb
->superblock
.node_size
: tp
.item
->key
.offset
;
2543 if (tp
.item
->key
.obj_id
+ size
> run_start
) {
2544 uint64_t extent_start
= max(run_start
, tp
.item
->key
.obj_id
);
2545 uint64_t extent_end
= min(tp
.item
->key
.obj_id
+ size
, run_end
);
2546 bool extent_is_tree
= false;
2548 RtlSetBits(&context
.alloc
, (ULONG
)((extent_start
- run_start
) / Vcb
->superblock
.sector_size
), (ULONG
)((extent_end
- extent_start
) / Vcb
->superblock
.sector_size
));
2550 if (tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
)
2551 extent_is_tree
= true;
2553 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
2555 if (tp
.item
->size
< sizeof(EXTENT_ITEM
)) {
2556 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(EXTENT_ITEM
));
2557 Status
= STATUS_INTERNAL_ERROR
;
2561 if (ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)
2562 extent_is_tree
= true;
2566 RtlSetBits(&context
.is_tree
, (ULONG
)((extent_start
- run_start
) / Vcb
->superblock
.sector_size
), (ULONG
)((extent_end
- extent_start
) / Vcb
->superblock
.sector_size
));
2567 else if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
) {
2571 searchkey
.obj_id
= EXTENT_CSUM_ID
;
2572 searchkey
.obj_type
= TYPE_EXTENT_CSUM
;
2573 searchkey
.offset
= extent_start
;
2575 Status
= find_item(Vcb
, Vcb
->checksum_root
, &tp2
, &searchkey
, false, NULL
);
2576 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
) {
2577 ERR("find_item returned %08x\n", Status
);
2582 traverse_ptr next_tp2
;
2584 if (tp2
.item
->key
.offset
>= extent_end
)
2587 if (tp2
.item
->key
.offset
>= extent_start
) {
2588 uint64_t csum_start
= max(extent_start
, tp2
.item
->key
.offset
);
2589 uint64_t csum_end
= min(extent_end
, tp2
.item
->key
.offset
+ (tp2
.item
->size
* Vcb
->superblock
.sector_size
/ sizeof(uint32_t)));
2591 RtlSetBits(&context
.has_csum
, (ULONG
)((csum_start
- run_start
) / Vcb
->superblock
.sector_size
), (ULONG
)((csum_end
- csum_start
) / Vcb
->superblock
.sector_size
));
2593 RtlCopyMemory(&context
.csum
[(csum_start
- run_start
) / Vcb
->superblock
.sector_size
],
2594 tp2
.item
->data
+ ((csum_start
- tp2
.item
->key
.offset
) * sizeof(uint32_t) / Vcb
->superblock
.sector_size
),
2595 (ULONG
)((csum_end
- csum_start
) * sizeof(uint32_t) / Vcb
->superblock
.sector_size
));
2598 b2
= find_next_item(Vcb
, &tp2
, &next_tp2
, false, NULL
);
2607 b
= find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
);
2613 context
.stripes
= ExAllocatePoolWithTag(PagedPool
, sizeof(scrub_context_raid56_stripe
) * c
->chunk_item
->num_stripes
, ALLOC_TAG
);
2614 if (!context
.stripes
) {
2615 ERR("out of memory\n");
2616 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2620 max_read
= (uint32_t)min(1048576 / c
->chunk_item
->stripe_length
, stripe_end
- stripe_start
+ 1); // only process 1 MB of data at a time
2622 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2623 context
.stripes
[i
].buf
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(max_read
* c
->chunk_item
->stripe_length
), ALLOC_TAG
);
2624 if (!context
.stripes
[i
].buf
) {
2627 ERR("out of memory\n");
2629 for (j
= 0; j
< i
; j
++) {
2630 ExFreePool(context
.stripes
[j
].buf
);
2632 ExFreePool(context
.stripes
);
2634 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2638 context
.stripes
[i
].errorarr
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)sector_align(((c
->chunk_item
->stripe_length
/ Vcb
->superblock
.sector_size
) / 8) + 1, sizeof(ULONG
)), ALLOC_TAG
);
2639 if (!context
.stripes
[i
].errorarr
) {
2642 ERR("out of memory\n");
2644 ExFreePool(context
.stripes
[i
].buf
);
2646 for (j
= 0; j
< i
; j
++) {
2647 ExFreePool(context
.stripes
[j
].buf
);
2649 ExFreePool(context
.stripes
);
2651 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2655 RtlInitializeBitMap(&context
.stripes
[i
].error
, context
.stripes
[i
].errorarr
, (ULONG
)(c
->chunk_item
->stripe_length
/ Vcb
->superblock
.sector_size
));
2657 context
.stripes
[i
].context
= &context
;
2658 context
.stripes
[i
].rewrite
= false;
2661 stripe
= stripe_start
;
2663 Status
= STATUS_SUCCESS
;
2665 chunk_lock_range(Vcb
, c
, run_start
, run_end
- run_start
);
2669 uint16_t missing_devices
= 0;
2670 bool need_wait
= false;
2672 if (max_read
< stripe_end
+ 1 - stripe
)
2673 read_stripes
= max_read
;
2675 read_stripes
= (ULONG
)(stripe_end
+ 1 - stripe
);
2677 context
.stripes_left
= c
->chunk_item
->num_stripes
;
2679 // read megabyte by megabyte
2680 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2681 if (c
->devices
[i
]->devobj
) {
2682 PIO_STACK_LOCATION IrpSp
;
2684 context
.stripes
[i
].Irp
= IoAllocateIrp(c
->devices
[i
]->devobj
->StackSize
, false);
2686 if (!context
.stripes
[i
].Irp
) {
2687 ERR("IoAllocateIrp failed\n");
2688 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2692 context
.stripes
[i
].Irp
->MdlAddress
= NULL
;
2694 IrpSp
= IoGetNextIrpStackLocation(context
.stripes
[i
].Irp
);
2695 IrpSp
->MajorFunction
= IRP_MJ_READ
;
2696 IrpSp
->FileObject
= c
->devices
[i
]->fileobj
;
2698 if (c
->devices
[i
]->devobj
->Flags
& DO_BUFFERED_IO
) {
2699 context
.stripes
[i
].Irp
->AssociatedIrp
.SystemBuffer
= ExAllocatePoolWithTag(NonPagedPool
, (ULONG
)(read_stripes
* c
->chunk_item
->stripe_length
), ALLOC_TAG
);
2700 if (!context
.stripes
[i
].Irp
->AssociatedIrp
.SystemBuffer
) {
2701 ERR("out of memory\n");
2702 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2706 context
.stripes
[i
].Irp
->Flags
|= IRP_BUFFERED_IO
| IRP_DEALLOCATE_BUFFER
| IRP_INPUT_OPERATION
;
2708 context
.stripes
[i
].Irp
->UserBuffer
= context
.stripes
[i
].buf
;
2709 } else if (c
->devices
[i
]->devobj
->Flags
& DO_DIRECT_IO
) {
2710 context
.stripes
[i
].Irp
->MdlAddress
= IoAllocateMdl(context
.stripes
[i
].buf
, (ULONG
)(read_stripes
* c
->chunk_item
->stripe_length
), false, false, NULL
);
2711 if (!context
.stripes
[i
].Irp
->MdlAddress
) {
2712 ERR("IoAllocateMdl failed\n");
2713 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2717 Status
= STATUS_SUCCESS
;
2720 MmProbeAndLockPages(context
.stripes
[i
].Irp
->MdlAddress
, KernelMode
, IoWriteAccess
);
2721 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
2722 Status
= _SEH2_GetExceptionCode();
2725 if (!NT_SUCCESS(Status
)) {
2726 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
2727 IoFreeMdl(context
.stripes
[i
].Irp
->MdlAddress
);
2731 context
.stripes
[i
].Irp
->UserBuffer
= context
.stripes
[i
].buf
;
2733 context
.stripes
[i
].offset
= stripe
* c
->chunk_item
->stripe_length
;
2735 IrpSp
->Parameters
.Read
.Length
= (ULONG
)(read_stripes
* c
->chunk_item
->stripe_length
);
2736 IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
= cis
[i
].offset
+ context
.stripes
[i
].offset
;
2738 context
.stripes
[i
].Irp
->UserIosb
= &context
.stripes
[i
].iosb
;
2739 context
.stripes
[i
].missing
= false;
2741 IoSetCompletionRoutine(context
.stripes
[i
].Irp
, scrub_read_completion_raid56
, &context
.stripes
[i
], true, true, true);
2743 Vcb
->scrub
.data_scrubbed
+= read_stripes
* c
->chunk_item
->stripe_length
;
2746 context
.stripes
[i
].Irp
= NULL
;
2747 context
.stripes
[i
].missing
= true;
2749 InterlockedDecrement(&context
.stripes_left
);
2753 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
&& missing_devices
> 1) {
2754 ERR("too many missing devices (%u, maximum 1)\n", missing_devices
);
2755 Status
= STATUS_UNEXPECTED_IO_ERROR
;
2757 } else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
&& missing_devices
> 2) {
2758 ERR("too many missing devices (%u, maximum 2)\n", missing_devices
);
2759 Status
= STATUS_UNEXPECTED_IO_ERROR
;
2764 KeInitializeEvent(&context
.Event
, NotificationEvent
, false);
2766 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2767 if (c
->devices
[i
]->devobj
)
2768 IoCallDriver(c
->devices
[i
]->devobj
, context
.stripes
[i
].Irp
);
2771 KeWaitForSingleObject(&context
.Event
, Executive
, KernelMode
, false, NULL
);
2774 // return an error if any of the stripes returned an error
2775 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2776 if (!context
.stripes
[i
].missing
&& !NT_SUCCESS(context
.stripes
[i
].iosb
.Status
)) {
2777 Status
= context
.stripes
[i
].iosb
.Status
;
2778 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_READ_ERRORS
);
2783 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
) {
2784 for (i
= 0; i
< read_stripes
; i
++) {
2785 scrub_raid6_stripe(Vcb
, c
, &context
, stripe_start
, stripe
, i
, missing_devices
);
2788 for (i
= 0; i
< read_stripes
; i
++) {
2789 scrub_raid5_stripe(Vcb
, c
, &context
, stripe_start
, stripe
, i
, missing_devices
);
2792 stripe
+= read_stripes
;
2795 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2796 if (context
.stripes
[i
].Irp
) {
2797 if (c
->devices
[i
]->devobj
->Flags
& DO_DIRECT_IO
&& context
.stripes
[i
].Irp
->MdlAddress
) {
2798 MmUnlockPages(context
.stripes
[i
].Irp
->MdlAddress
);
2799 IoFreeMdl(context
.stripes
[i
].Irp
->MdlAddress
);
2801 IoFreeIrp(context
.stripes
[i
].Irp
);
2802 context
.stripes
[i
].Irp
= NULL
;
2804 if (context
.stripes
[i
].rewrite
) {
2805 Status
= write_data_phys(c
->devices
[i
]->devobj
, c
->devices
[i
]->fileobj
, cis
[i
].offset
+ context
.stripes
[i
].offset
,
2806 context
.stripes
[i
].buf
, (uint32_t)(read_stripes
* c
->chunk_item
->stripe_length
));
2808 if (!NT_SUCCESS(Status
)) {
2809 ERR("write_data_phys returned %08x\n", Status
);
2810 log_device_error(Vcb
, c
->devices
[i
], BTRFS_DEV_STAT_WRITE_ERRORS
);
2817 if (!NT_SUCCESS(Status
))
2819 } while (stripe
< stripe_end
);
2822 chunk_unlock_range(Vcb
, c
, run_start
, run_end
- run_start
);
2824 for (i
= 0; i
< c
->chunk_item
->num_stripes
; i
++) {
2825 ExFreePool(context
.stripes
[i
].buf
);
2826 ExFreePool(context
.stripes
[i
].errorarr
);
2828 ExFreePool(context
.stripes
);
2831 ExFreePool(treearr
);
2832 ExFreePool(allocarr
);
2833 ExFreePool(context
.parity_scratch
);
2835 if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
)
2836 ExFreePool(context
.parity_scratch2
);
2838 if (c
->chunk_item
->type
& BLOCK_FLAG_DATA
) {
2839 ExFreePool(csumarr
);
2840 ExFreePool(context
.csum
);
2846 static NTSTATUS
scrub_chunk_raid56(device_extension
* Vcb
, chunk
* c
, uint64_t* offset
, bool* changed
) {
2851 uint64_t full_stripe_len
, stripe
, stripe_start
, stripe_end
, total_data
= 0;
2852 ULONG num_extents
= 0, num_parity_stripes
= c
->chunk_item
->type
& BLOCK_FLAG_RAID6
? 2 : 1;
2854 full_stripe_len
= (c
->chunk_item
->num_stripes
- num_parity_stripes
) * c
->chunk_item
->stripe_length
;
2855 stripe
= (*offset
- c
->offset
) / full_stripe_len
;
2857 *offset
= c
->offset
+ (stripe
* full_stripe_len
);
2859 searchkey
.obj_id
= *offset
;
2860 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
2861 searchkey
.offset
= 0xffffffffffffffff;
2863 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, NULL
);
2864 if (!NT_SUCCESS(Status
)) {
2865 ERR("find_item returned %08x\n", Status
);
2872 traverse_ptr next_tp
;
2874 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
2877 if (tp
.item
->key
.obj_id
>= *offset
&& (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
|| tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
)) {
2878 uint64_t size
= tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
? Vcb
->superblock
.node_size
: tp
.item
->key
.offset
;
2880 TRACE("%I64x\n", tp
.item
->key
.obj_id
);
2882 if (size
< Vcb
->superblock
.sector_size
) {
2883 ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp
.item
->key
.obj_id
, Vcb
->superblock
.sector_size
);
2884 return STATUS_INTERNAL_ERROR
;
2887 stripe
= (tp
.item
->key
.obj_id
- c
->offset
) / full_stripe_len
;
2890 if (stripe
> stripe_end
+ 1) {
2891 Status
= scrub_chunk_raid56_stripe_run(Vcb
, c
, stripe_start
, stripe_end
);
2892 if (!NT_SUCCESS(Status
)) {
2893 ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status
);
2897 stripe_start
= stripe
;
2900 stripe_start
= stripe
;
2902 stripe_end
= (tp
.item
->key
.obj_id
+ size
- 1 - c
->offset
) / full_stripe_len
;
2909 // only do so much at a time
2910 if (num_extents
>= 64 || total_data
>= 0x8000000) // 128 MB
2914 b
= find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
);
2921 Status
= scrub_chunk_raid56_stripe_run(Vcb
, c
, stripe_start
, stripe_end
);
2922 if (!NT_SUCCESS(Status
)) {
2923 ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status
);
2927 *offset
= c
->offset
+ ((stripe_end
+ 1) * full_stripe_len
);
2930 return STATUS_SUCCESS
;
2933 static NTSTATUS
scrub_chunk(device_extension
* Vcb
, chunk
* c
, uint64_t* offset
, bool* changed
) {
2937 bool b
= false, tree_run
= false;
2938 ULONG type
, num_extents
= 0;
2939 uint64_t total_data
= 0, tree_run_start
, tree_run_end
;
2941 TRACE("chunk %I64x\n", c
->offset
);
2943 ExAcquireResourceSharedLite(&Vcb
->tree_lock
, true);
2945 if (c
->chunk_item
->type
& BLOCK_FLAG_DUPLICATE
)
2946 type
= BLOCK_FLAG_DUPLICATE
;
2947 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID0
)
2948 type
= BLOCK_FLAG_RAID0
;
2949 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID1
)
2950 type
= BLOCK_FLAG_DUPLICATE
;
2951 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID10
)
2952 type
= BLOCK_FLAG_RAID10
;
2953 else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID5
) {
2954 Status
= scrub_chunk_raid56(Vcb
, c
, offset
, changed
);
2956 } else if (c
->chunk_item
->type
& BLOCK_FLAG_RAID6
) {
2957 Status
= scrub_chunk_raid56(Vcb
, c
, offset
, changed
);
2960 type
= BLOCK_FLAG_DUPLICATE
;
2962 searchkey
.obj_id
= *offset
;
2963 searchkey
.obj_type
= TYPE_METADATA_ITEM
;
2964 searchkey
.offset
= 0xffffffffffffffff;
2966 Status
= find_item(Vcb
, Vcb
->extent_root
, &tp
, &searchkey
, false, NULL
);
2967 if (!NT_SUCCESS(Status
)) {
2968 ERR("error - find_item returned %08x\n", Status
);
2973 traverse_ptr next_tp
;
2975 if (tp
.item
->key
.obj_id
>= c
->offset
+ c
->chunk_item
->size
)
2978 if (tp
.item
->key
.obj_id
>= *offset
&& (tp
.item
->key
.obj_type
== TYPE_EXTENT_ITEM
|| tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
)) {
2979 uint64_t size
= tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
? Vcb
->superblock
.node_size
: tp
.item
->key
.offset
;
2981 uint32_t* csum
= NULL
;
2983 ULONG
* bmparr
= NULL
, bmplen
;
2985 TRACE("%I64x\n", tp
.item
->key
.obj_id
);
2989 if (tp
.item
->key
.obj_type
== TYPE_METADATA_ITEM
)
2992 EXTENT_ITEM
* ei
= (EXTENT_ITEM
*)tp
.item
->data
;
2994 if (tp
.item
->size
< sizeof(EXTENT_ITEM
)) {
2995 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp
.item
->key
.obj_id
, tp
.item
->key
.obj_type
, tp
.item
->key
.offset
, tp
.item
->size
, sizeof(EXTENT_ITEM
));
2996 Status
= STATUS_INTERNAL_ERROR
;
3000 if (ei
->flags
& EXTENT_ITEM_TREE_BLOCK
)
3004 if (size
< Vcb
->superblock
.sector_size
) {
3005 ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp
.item
->key
.obj_id
, Vcb
->superblock
.sector_size
);
3006 Status
= STATUS_INTERNAL_ERROR
;
3014 csum
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(sizeof(uint32_t) * size
/ Vcb
->superblock
.sector_size
), ALLOC_TAG
);
3016 ERR("out of memory\n");
3017 Status
= STATUS_INSUFFICIENT_RESOURCES
;
3021 bmplen
= (ULONG
)(size
/ Vcb
->superblock
.sector_size
);
3023 bmparr
= ExAllocatePoolWithTag(PagedPool
, (ULONG
)(sector_align((bmplen
>> 3) + 1, sizeof(ULONG
))), ALLOC_TAG
);
3025 ERR("out of memory\n");
3027 Status
= STATUS_INSUFFICIENT_RESOURCES
;
3031 RtlInitializeBitMap(&bmp
, bmparr
, bmplen
);
3032 RtlSetAllBits(&bmp
); // 1 = no csum, 0 = csum
3034 searchkey
.obj_id
= EXTENT_CSUM_ID
;
3035 searchkey
.obj_type
= TYPE_EXTENT_CSUM
;
3036 searchkey
.offset
= tp
.item
->key
.obj_id
;
3038 Status
= find_item(Vcb
, Vcb
->checksum_root
, &tp2
, &searchkey
, false, NULL
);
3039 if (!NT_SUCCESS(Status
) && Status
!= STATUS_NOT_FOUND
) {
3040 ERR("find_item returned %08x\n", Status
);
3046 if (Status
!= STATUS_NOT_FOUND
) {
3048 traverse_ptr next_tp2
;
3050 if (tp2
.item
->key
.obj_type
== TYPE_EXTENT_CSUM
) {
3051 if (tp2
.item
->key
.offset
>= tp
.item
->key
.obj_id
+ size
)
3053 else if (tp2
.item
->size
>= sizeof(uint32_t) && tp2
.item
->key
.offset
+ (tp2
.item
->size
* Vcb
->superblock
.sector_size
/ sizeof(uint32_t)) >= tp
.item
->key
.obj_id
) {
3054 uint64_t cs
= max(tp
.item
->key
.obj_id
, tp2
.item
->key
.offset
);
3055 uint64_t ce
= min(tp
.item
->key
.obj_id
+ size
, tp2
.item
->key
.offset
+ (tp2
.item
->size
* Vcb
->superblock
.sector_size
/ sizeof(uint32_t)));
3057 RtlCopyMemory(csum
+ ((cs
- tp
.item
->key
.obj_id
) / Vcb
->superblock
.sector_size
),
3058 tp2
.item
->data
+ ((cs
- tp2
.item
->key
.offset
) * sizeof(uint32_t) / Vcb
->superblock
.sector_size
),
3059 (ULONG
)((ce
- cs
) * sizeof(uint32_t) / Vcb
->superblock
.sector_size
));
3061 RtlClearBits(&bmp
, (ULONG
)((cs
- tp
.item
->key
.obj_id
) / Vcb
->superblock
.sector_size
), (ULONG
)((ce
- cs
) / Vcb
->superblock
.sector_size
));
3063 if (ce
== tp
.item
->key
.obj_id
+ size
)
3068 if (find_next_item(Vcb
, &tp2
, &next_tp2
, false, NULL
))
3077 if (!is_tree
|| tp
.item
->key
.obj_id
> tree_run_end
) {
3078 Status
= scrub_extent(Vcb
, c
, type
, tree_run_start
, (uint32_t)(tree_run_end
- tree_run_start
), NULL
);
3079 if (!NT_SUCCESS(Status
)) {
3080 ERR("scrub_extent returned %08x\n", Status
);
3087 tree_run_start
= tp
.item
->key
.obj_id
;
3088 tree_run_end
= tp
.item
->key
.obj_id
+ Vcb
->superblock
.node_size
;
3091 tree_run_end
= tp
.item
->key
.obj_id
+ Vcb
->superblock
.node_size
;
3092 } else if (is_tree
) {
3094 tree_run_start
= tp
.item
->key
.obj_id
;
3095 tree_run_end
= tp
.item
->key
.obj_id
+ Vcb
->superblock
.node_size
;
3099 Status
= scrub_data_extent(Vcb
, c
, tp
.item
->key
.obj_id
, type
, csum
, &bmp
, bmplen
);
3100 if (!NT_SUCCESS(Status
)) {
3101 ERR("scrub_data_extent returned %08x\n", Status
);
3111 *offset
= tp
.item
->key
.obj_id
+ size
;
3117 // only do so much at a time
3118 if (num_extents
>= 64 || total_data
>= 0x8000000) // 128 MB
3122 b
= find_next_item(Vcb
, &tp
, &next_tp
, false, NULL
);
3129 Status
= scrub_extent(Vcb
, c
, type
, tree_run_start
, (uint32_t)(tree_run_end
- tree_run_start
), NULL
);
3130 if (!NT_SUCCESS(Status
)) {
3131 ERR("scrub_extent returned %08x\n", Status
);
3136 Status
= STATUS_SUCCESS
;
3139 ExReleaseResourceLite(&Vcb
->tree_lock
);
3144 _Function_class_(KSTART_ROUTINE
)
3145 static void __stdcall
scrub_thread(void* context
) {
3146 device_extension
* Vcb
= context
;
3147 LIST_ENTRY chunks
, *le
;
3151 KeInitializeEvent(&Vcb
->scrub
.finished
, NotificationEvent
, false);
3153 InitializeListHead(&chunks
);
3155 ExAcquireResourceExclusiveLite(&Vcb
->tree_lock
, true);
3157 if (Vcb
->need_write
&& !Vcb
->readonly
)
3158 Status
= do_write(Vcb
, NULL
);
3160 Status
= STATUS_SUCCESS
;
3164 if (!NT_SUCCESS(Status
)) {
3165 ExReleaseResourceLite(&Vcb
->tree_lock
);
3166 ERR("do_write returned %08x\n", Status
);
3167 Vcb
->scrub
.error
= Status
;
3171 ExConvertExclusiveToSharedLite(&Vcb
->tree_lock
);
3173 ExAcquireResourceExclusiveLite(&Vcb
->scrub
.stats_lock
, true);
3175 KeQuerySystemTime(&Vcb
->scrub
.start_time
);
3176 Vcb
->scrub
.finish_time
.QuadPart
= 0;
3177 Vcb
->scrub
.resume_time
.QuadPart
= Vcb
->scrub
.start_time
.QuadPart
;
3178 Vcb
->scrub
.duration
.QuadPart
= 0;
3179 Vcb
->scrub
.total_chunks
= 0;
3180 Vcb
->scrub
.chunks_left
= 0;
3181 Vcb
->scrub
.data_scrubbed
= 0;
3182 Vcb
->scrub
.num_errors
= 0;
3184 while (!IsListEmpty(&Vcb
->scrub
.errors
)) {
3185 scrub_error
* err
= CONTAINING_RECORD(RemoveHeadList(&Vcb
->scrub
.errors
), scrub_error
, list_entry
);
3189 ExAcquireResourceSharedLite(&Vcb
->chunk_lock
, true);
3191 le
= Vcb
->chunks
.Flink
;
3192 while (le
!= &Vcb
->chunks
) {
3193 chunk
* c
= CONTAINING_RECORD(le
, chunk
, list_entry
);
3195 acquire_chunk_lock(c
, Vcb
);
3198 InsertTailList(&chunks
, &c
->list_entry_balance
);
3199 Vcb
->scrub
.total_chunks
++;
3200 Vcb
->scrub
.chunks_left
++;
3203 release_chunk_lock(c
, Vcb
);
3208 ExReleaseResourceLite(&Vcb
->chunk_lock
);
3210 ExReleaseResource(&Vcb
->scrub
.stats_lock
);
3212 ExReleaseResourceLite(&Vcb
->tree_lock
);
3214 while (!IsListEmpty(&chunks
)) {
3215 chunk
* c
= CONTAINING_RECORD(RemoveHeadList(&chunks
), chunk
, list_entry_balance
);
3216 uint64_t offset
= c
->offset
;
3221 KeWaitForSingleObject(&Vcb
->scrub
.event
, Executive
, KernelMode
, false, NULL
);
3223 if (!Vcb
->scrub
.stopping
) {
3227 Status
= scrub_chunk(Vcb
, c
, &offset
, &changed
);
3228 if (!NT_SUCCESS(Status
)) {
3229 ERR("scrub_chunk returned %08x\n", Status
);
3230 Vcb
->scrub
.stopping
= true;
3231 Vcb
->scrub
.error
= Status
;
3235 if (offset
== c
->offset
+ c
->chunk_item
->size
|| Vcb
->scrub
.stopping
)
3238 KeWaitForSingleObject(&Vcb
->scrub
.event
, Executive
, KernelMode
, false, NULL
);
3242 ExAcquireResourceExclusiveLite(&Vcb
->scrub
.stats_lock
, true);
3244 if (!Vcb
->scrub
.stopping
)
3245 Vcb
->scrub
.chunks_left
--;
3247 if (IsListEmpty(&chunks
))
3248 KeQuerySystemTime(&Vcb
->scrub
.finish_time
);
3250 ExReleaseResource(&Vcb
->scrub
.stats_lock
);
3253 c
->list_entry_balance
.Flink
= NULL
;
3256 KeQuerySystemTime(&time
);
3257 Vcb
->scrub
.duration
.QuadPart
+= time
.QuadPart
- Vcb
->scrub
.resume_time
.QuadPart
;
3260 ZwClose(Vcb
->scrub
.thread
);
3261 Vcb
->scrub
.thread
= NULL
;
3263 KeSetEvent(&Vcb
->scrub
.finished
, 0, false);
3266 NTSTATUS
start_scrub(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3268 OBJECT_ATTRIBUTES oa
;
3270 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3271 return STATUS_PRIVILEGE_NOT_HELD
;
3274 WARN("cannot start scrub while locked\n");
3275 return STATUS_DEVICE_NOT_READY
;
3278 if (Vcb
->balance
.thread
) {
3279 WARN("cannot start scrub while balance running\n");
3280 return STATUS_DEVICE_NOT_READY
;
3283 if (Vcb
->scrub
.thread
) {
3284 WARN("scrub already running\n");
3285 return STATUS_DEVICE_NOT_READY
;
3289 return STATUS_MEDIA_WRITE_PROTECTED
;
3291 Vcb
->scrub
.stopping
= false;
3292 Vcb
->scrub
.paused
= false;
3293 Vcb
->scrub
.error
= STATUS_SUCCESS
;
3294 KeInitializeEvent(&Vcb
->scrub
.event
, NotificationEvent
, !Vcb
->scrub
.paused
);
3296 InitializeObjectAttributes(&oa
, NULL
, OBJ_KERNEL_HANDLE
, NULL
, NULL
);
3298 Status
= PsCreateSystemThread(&Vcb
->scrub
.thread
, 0, &oa
, NULL
, NULL
, scrub_thread
, Vcb
);
3299 if (!NT_SUCCESS(Status
)) {
3300 ERR("PsCreateSystemThread returned %08x\n", Status
);
3304 return STATUS_SUCCESS
;
3307 NTSTATUS
query_scrub(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
, void* data
, ULONG length
) {
3308 btrfs_query_scrub
* bqs
= (btrfs_query_scrub
*)data
;
3312 btrfs_scrub_error
* bse
= NULL
;
3314 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3315 return STATUS_PRIVILEGE_NOT_HELD
;
3317 if (length
< offsetof(btrfs_query_scrub
, errors
))
3318 return STATUS_BUFFER_TOO_SMALL
;
3320 ExAcquireResourceSharedLite(&Vcb
->scrub
.stats_lock
, true);
3322 if (Vcb
->scrub
.thread
&& Vcb
->scrub
.chunks_left
> 0)
3323 bqs
->status
= Vcb
->scrub
.paused
? BTRFS_SCRUB_PAUSED
: BTRFS_SCRUB_RUNNING
;
3325 bqs
->status
= BTRFS_SCRUB_STOPPED
;
3327 bqs
->start_time
.QuadPart
= Vcb
->scrub
.start_time
.QuadPart
;
3328 bqs
->finish_time
.QuadPart
= Vcb
->scrub
.finish_time
.QuadPart
;
3329 bqs
->chunks_left
= Vcb
->scrub
.chunks_left
;
3330 bqs
->total_chunks
= Vcb
->scrub
.total_chunks
;
3331 bqs
->data_scrubbed
= Vcb
->scrub
.data_scrubbed
;
3333 bqs
->duration
= Vcb
->scrub
.duration
.QuadPart
;
3335 if (bqs
->status
== BTRFS_SCRUB_RUNNING
) {
3338 KeQuerySystemTime(&time
);
3339 bqs
->duration
+= time
.QuadPart
- Vcb
->scrub
.resume_time
.QuadPart
;
3342 bqs
->error
= Vcb
->scrub
.error
;
3344 bqs
->num_errors
= Vcb
->scrub
.num_errors
;
3346 len
= length
- offsetof(btrfs_query_scrub
, errors
);
3348 le
= Vcb
->scrub
.errors
.Flink
;
3349 while (le
!= &Vcb
->scrub
.errors
) {
3350 scrub_error
* err
= CONTAINING_RECORD(le
, scrub_error
, list_entry
);
3353 if (err
->is_metadata
)
3354 errlen
= offsetof(btrfs_scrub_error
, metadata
.firstitem
) + sizeof(KEY
);
3356 errlen
= offsetof(btrfs_scrub_error
, data
.filename
) + err
->data
.filename_length
;
3359 Status
= STATUS_BUFFER_OVERFLOW
;
3368 if (bse
->is_metadata
)
3369 lastlen
= offsetof(btrfs_scrub_error
, metadata
.firstitem
) + sizeof(KEY
);
3371 lastlen
= offsetof(btrfs_scrub_error
, data
.filename
) + bse
->data
.filename_length
;
3373 bse
->next_entry
= lastlen
;
3374 bse
= (btrfs_scrub_error
*)(((uint8_t*)bse
) + lastlen
);
3377 bse
->next_entry
= 0;
3378 bse
->address
= err
->address
;
3379 bse
->device
= err
->device
;
3380 bse
->recovered
= err
->recovered
;
3381 bse
->is_metadata
= err
->is_metadata
;
3382 bse
->parity
= err
->parity
;
3384 if (err
->is_metadata
) {
3385 bse
->metadata
.root
= err
->metadata
.root
;
3386 bse
->metadata
.level
= err
->metadata
.level
;
3387 bse
->metadata
.firstitem
= err
->metadata
.firstitem
;
3389 bse
->data
.subvol
= err
->data
.subvol
;
3390 bse
->data
.offset
= err
->data
.offset
;
3391 bse
->data
.filename_length
= err
->data
.filename_length
;
3392 RtlCopyMemory(bse
->data
.filename
, err
->data
.filename
, err
->data
.filename_length
);
3399 Status
= STATUS_SUCCESS
;
3402 ExReleaseResourceLite(&Vcb
->scrub
.stats_lock
);
3407 NTSTATUS
pause_scrub(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3410 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3411 return STATUS_PRIVILEGE_NOT_HELD
;
3413 if (!Vcb
->scrub
.thread
)
3414 return STATUS_DEVICE_NOT_READY
;
3416 if (Vcb
->scrub
.paused
)
3417 return STATUS_DEVICE_NOT_READY
;
3419 Vcb
->scrub
.paused
= true;
3420 KeClearEvent(&Vcb
->scrub
.event
);
3422 KeQuerySystemTime(&time
);
3423 Vcb
->scrub
.duration
.QuadPart
+= time
.QuadPart
- Vcb
->scrub
.resume_time
.QuadPart
;
3425 return STATUS_SUCCESS
;
3428 NTSTATUS
resume_scrub(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3429 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3430 return STATUS_PRIVILEGE_NOT_HELD
;
3432 if (!Vcb
->scrub
.thread
)
3433 return STATUS_DEVICE_NOT_READY
;
3435 if (!Vcb
->scrub
.paused
)
3436 return STATUS_DEVICE_NOT_READY
;
3438 Vcb
->scrub
.paused
= false;
3439 KeSetEvent(&Vcb
->scrub
.event
, 0, false);
3441 KeQuerySystemTime(&Vcb
->scrub
.resume_time
);
3443 return STATUS_SUCCESS
;
3446 NTSTATUS
stop_scrub(device_extension
* Vcb
, KPROCESSOR_MODE processor_mode
) {
3447 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE
), processor_mode
))
3448 return STATUS_PRIVILEGE_NOT_HELD
;
3450 if (!Vcb
->scrub
.thread
)
3451 return STATUS_DEVICE_NOT_READY
;
3453 Vcb
->scrub
.paused
= false;
3454 Vcb
->scrub
.stopping
= true;
3455 KeSetEvent(&Vcb
->scrub
.event
, 0, false);
3457 return STATUS_SUCCESS
;