1 /* Copyright (c) Mark Harmstone 2016-17
3 * This file is part of WinBtrfs.
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
18 #include "btrfs_drv.h"
20 enum read_data_status
{
21 ReadDataStatus_Pending
,
22 ReadDataStatus_Success
,
24 ReadDataStatus_MissingDevice
,
28 struct read_data_context
;
31 struct read_data_context
* context
;
36 enum read_data_status status
;
48 LONG num_stripes
, stripes_left
;
51 uint16_t firstoff
, startoffstripe
, sectors_per_stripe
;
54 read_data_stripe
* stripes
;
59 extern tPsUpdateDiskCounters fPsUpdateDiskCounters
;
60 extern tCcCopyReadEx fCcCopyReadEx
;
61 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters
;
63 #define LZO_PAGE_SIZE 4096
65 _Function_class_(IO_COMPLETION_ROUTINE
)
66 static NTSTATUS __stdcall
read_data_completion(PDEVICE_OBJECT DeviceObject
, PIRP Irp
, PVOID conptr
) {
67 read_data_stripe
* stripe
= conptr
;
68 read_data_context
* context
= (read_data_context
*)stripe
->context
;
72 stripe
->iosb
= Irp
->IoStatus
;
74 if (NT_SUCCESS(Irp
->IoStatus
.Status
))
75 stripe
->status
= ReadDataStatus_Success
;
77 stripe
->status
= ReadDataStatus_Error
;
79 if (InterlockedDecrement(&context
->stripes_left
) == 0)
80 KeSetEvent(&context
->Event
, 0, false);
82 return STATUS_MORE_PROCESSING_REQUIRED
;
85 NTSTATUS
check_csum(device_extension
* Vcb
, uint8_t* data
, uint32_t sectors
, uint32_t* csum
) {
90 // From experimenting, it seems that 40 sectors is roughly the crossover
91 // point where offloading the crc32 calculation becomes worth it.
93 if (sectors
< 40 || get_num_of_processors() < 2) {
96 for (j
= 0; j
< sectors
; j
++) {
97 uint32_t crc32
= ~calc_crc32c(0xffffffff, data
+ (j
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
99 if (crc32
!= csum
[j
]) {
100 return STATUS_CRC_ERROR
;
104 return STATUS_SUCCESS
;
107 csum2
= ExAllocatePoolWithTag(PagedPool
, sizeof(uint32_t) * sectors
, ALLOC_TAG
);
109 ERR("out of memory\n");
110 return STATUS_INSUFFICIENT_RESOURCES
;
113 Status
= add_calc_job(Vcb
, data
, sectors
, csum2
, &cj
);
114 if (!NT_SUCCESS(Status
)) {
115 ERR("add_calc_job returned %08x\n", Status
);
120 KeWaitForSingleObject(&cj
->event
, Executive
, KernelMode
, false, NULL
);
122 if (RtlCompareMemory(csum2
, csum
, sectors
* sizeof(uint32_t)) != sectors
* sizeof(uint32_t)) {
125 return STATUS_CRC_ERROR
;
131 return STATUS_SUCCESS
;
134 static NTSTATUS
read_data_dup(device_extension
* Vcb
, uint8_t* buf
, uint64_t addr
, read_data_context
* context
, CHUNK_ITEM
* ci
,
135 device
** devices
, uint64_t generation
) {
137 bool checksum_error
= false;
138 uint16_t j
, stripe
= 0;
140 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
142 for (j
= 0; j
< ci
->num_stripes
; j
++) {
143 if (context
->stripes
[j
].status
== ReadDataStatus_Error
) {
144 WARN("stripe %u returned error %08x\n", j
, context
->stripes
[j
].iosb
.Status
);
145 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
146 return context
->stripes
[j
].iosb
.Status
;
147 } else if (context
->stripes
[j
].status
== ReadDataStatus_Success
) {
153 if (context
->stripes
[stripe
].status
!= ReadDataStatus_Success
)
154 return STATUS_INTERNAL_ERROR
;
157 tree_header
* th
= (tree_header
*)buf
;
160 crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, context
->buflen
- sizeof(th
->csum
));
162 if (th
->address
!= context
->address
|| crc32
!= *((uint32_t*)th
->csum
)) {
163 checksum_error
= true;
164 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
165 } else if (generation
!= 0 && th
->generation
!= generation
) {
166 checksum_error
= true;
167 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
169 } else if (context
->csum
) {
170 Status
= check_csum(Vcb
, buf
, (ULONG
)context
->stripes
[stripe
].Irp
->IoStatus
.Information
/ context
->sector_size
, context
->csum
);
172 if (Status
== STATUS_CRC_ERROR
) {
173 checksum_error
= true;
174 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
175 } else if (!NT_SUCCESS(Status
)) {
176 ERR("check_csum returned %08x\n", Status
);
182 return STATUS_SUCCESS
;
184 if (ci
->num_stripes
== 1)
185 return STATUS_CRC_ERROR
;
189 bool recovered
= false;
191 t2
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
193 ERR("out of memory\n");
194 return STATUS_INSUFFICIENT_RESOURCES
;
197 for (j
= 0; j
< ci
->num_stripes
; j
++) {
198 if (j
!= stripe
&& devices
[j
] && devices
[j
]->devobj
) {
199 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ context
->stripes
[stripe
].stripestart
,
200 Vcb
->superblock
.node_size
, (uint8_t*)t2
, false);
201 if (!NT_SUCCESS(Status
)) {
202 WARN("sync_read_phys returned %08x\n", Status
);
203 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
205 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&t2
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(t2
->csum
));
207 if (t2
->address
== addr
&& crc32
== *((uint32_t*)t2
->csum
) && (generation
== 0 || t2
->generation
== generation
)) {
208 RtlCopyMemory(buf
, t2
, Vcb
->superblock
.node_size
);
209 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
, devices
[stripe
]->devitem
.dev_id
);
212 if (!Vcb
->readonly
&& !devices
[stripe
]->readonly
) { // write good data over bad
213 Status
= write_data_phys(devices
[stripe
]->devobj
, devices
[stripe
]->fileobj
, cis
[stripe
].offset
+ context
->stripes
[stripe
].stripestart
,
214 t2
, Vcb
->superblock
.node_size
);
215 if (!NT_SUCCESS(Status
)) {
216 WARN("write_data_phys returned %08x\n", Status
);
217 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
222 } else if (t2
->address
!= addr
|| crc32
!= *((uint32_t*)t2
->csum
))
223 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
225 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
231 ERR("unrecoverable checksum error at %I64x\n", addr
);
233 return STATUS_CRC_ERROR
;
238 ULONG sectors
= (ULONG
)context
->stripes
[stripe
].Irp
->IoStatus
.Information
/ Vcb
->superblock
.sector_size
;
241 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.sector_size
, ALLOC_TAG
);
243 ERR("out of memory\n");
244 return STATUS_INSUFFICIENT_RESOURCES
;
247 for (i
= 0; i
< sectors
; i
++) {
248 uint32_t crc32
= ~calc_crc32c(0xffffffff, buf
+ (i
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
250 if (context
->csum
[i
] != crc32
) {
251 bool recovered
= false;
253 for (j
= 0; j
< ci
->num_stripes
; j
++) {
254 if (j
!= stripe
&& devices
[j
] && devices
[j
]->devobj
) {
255 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
,
256 cis
[j
].offset
+ context
->stripes
[stripe
].stripestart
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
),
257 Vcb
->superblock
.sector_size
, sector
, false);
258 if (!NT_SUCCESS(Status
)) {
259 WARN("sync_read_phys returned %08x\n", Status
);
260 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
262 uint32_t crc32b
= ~calc_crc32c(0xffffffff, sector
, Vcb
->superblock
.sector_size
);
264 if (crc32b
== context
->csum
[i
]) {
265 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
, Vcb
->superblock
.sector_size
);
266 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), devices
[stripe
]->devitem
.dev_id
);
269 if (!Vcb
->readonly
&& !devices
[stripe
]->readonly
) { // write good data over bad
270 Status
= write_data_phys(devices
[stripe
]->devobj
, devices
[stripe
]->fileobj
,
271 cis
[stripe
].offset
+ context
->stripes
[stripe
].stripestart
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
),
272 sector
, Vcb
->superblock
.sector_size
);
273 if (!NT_SUCCESS(Status
)) {
274 WARN("write_data_phys returned %08x\n", Status
);
275 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
281 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
287 ERR("unrecoverable checksum error at %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
));
289 return STATUS_CRC_ERROR
;
297 return STATUS_SUCCESS
;
300 static NTSTATUS
read_data_raid0(device_extension
* Vcb
, uint8_t* buf
, uint64_t addr
, uint32_t length
, read_data_context
* context
,
301 CHUNK_ITEM
* ci
, device
** devices
, uint64_t generation
, uint64_t offset
) {
304 for (i
= 0; i
< ci
->num_stripes
; i
++) {
305 if (context
->stripes
[i
].status
== ReadDataStatus_Error
) {
306 WARN("stripe %I64u returned error %08x\n", i
, context
->stripes
[i
].iosb
.Status
);
307 log_device_error(Vcb
, devices
[i
], BTRFS_DEV_STAT_READ_ERRORS
);
308 return context
->stripes
[i
].iosb
.Status
;
312 if (context
->tree
) { // shouldn't happen, as trees shouldn't cross stripe boundaries
313 tree_header
* th
= (tree_header
*)buf
;
314 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
316 if (crc32
!= *((uint32_t*)th
->csum
) || addr
!= th
->address
|| (generation
!= 0 && generation
!= th
->generation
)) {
320 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
, &off
, &stripe
);
322 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr
, devices
[stripe
]->devitem
.dev_id
);
324 if (crc32
!= *((uint32_t*)th
->csum
)) {
325 WARN("crc32 was %08x, expected %08x\n", crc32
, *((uint32_t*)th
->csum
));
326 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
327 return STATUS_CRC_ERROR
;
328 } else if (addr
!= th
->address
) {
329 WARN("address of tree was %I64x, not %I64x as expected\n", th
->address
, addr
);
330 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
331 return STATUS_CRC_ERROR
;
332 } else if (generation
!= 0 && generation
!= th
->generation
) {
333 WARN("generation of tree was %I64x, not %I64x as expected\n", th
->generation
, generation
);
334 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
335 return STATUS_CRC_ERROR
;
338 } else if (context
->csum
) {
341 Status
= check_csum(Vcb
, buf
, length
/ Vcb
->superblock
.sector_size
, context
->csum
);
343 if (Status
== STATUS_CRC_ERROR
) {
344 for (i
= 0; i
< length
/ Vcb
->superblock
.sector_size
; i
++) {
345 uint32_t crc32
= ~calc_crc32c(0xffffffff, buf
+ (i
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
347 if (context
->csum
[i
] != crc32
) {
351 get_raid0_offset(addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), ci
->stripe_length
, ci
->num_stripes
, &off
, &stripe
);
353 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr
, devices
[stripe
]->devitem
.dev_id
);
355 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
362 } else if (!NT_SUCCESS(Status
)) {
363 ERR("check_csum returned %08x\n", Status
);
368 return STATUS_SUCCESS
;
371 static NTSTATUS
read_data_raid10(device_extension
* Vcb
, uint8_t* buf
, uint64_t addr
, uint32_t length
, read_data_context
* context
,
372 CHUNK_ITEM
* ci
, device
** devices
, uint64_t generation
, uint64_t offset
) {
376 bool checksum_error
= false;
377 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
379 for (j
= 0; j
< ci
->num_stripes
; j
++) {
380 if (context
->stripes
[j
].status
== ReadDataStatus_Error
) {
381 WARN("stripe %I64u returned error %08x\n", j
, context
->stripes
[j
].iosb
.Status
);
382 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
383 return context
->stripes
[j
].iosb
.Status
;
384 } else if (context
->stripes
[j
].status
== ReadDataStatus_Success
)
389 tree_header
* th
= (tree_header
*)buf
;
390 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
392 if (crc32
!= *((uint32_t*)th
->csum
)) {
393 WARN("crc32 was %08x, expected %08x\n", crc32
, *((uint32_t*)th
->csum
));
394 checksum_error
= true;
395 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
396 } else if (addr
!= th
->address
) {
397 WARN("address of tree was %I64x, not %I64x as expected\n", th
->address
, addr
);
398 checksum_error
= true;
399 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
400 } else if (generation
!= 0 && generation
!= th
->generation
) {
401 WARN("generation of tree was %I64x, not %I64x as expected\n", th
->generation
, generation
);
402 checksum_error
= true;
403 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
405 } else if (context
->csum
) {
406 Status
= check_csum(Vcb
, buf
, length
/ Vcb
->superblock
.sector_size
, context
->csum
);
408 if (Status
== STATUS_CRC_ERROR
)
409 checksum_error
= true;
410 else if (!NT_SUCCESS(Status
)) {
411 ERR("check_csum returned %08x\n", Status
);
417 return STATUS_SUCCESS
;
422 uint16_t badsubstripe
= 0;
423 bool recovered
= false;
425 t2
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
427 ERR("out of memory\n");
428 return STATUS_INSUFFICIENT_RESOURCES
;
431 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
/ ci
->sub_stripes
, &off
, &stripe
);
433 stripe
*= ci
->sub_stripes
;
435 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
436 if (context
->stripes
[stripe
+ j
].status
== ReadDataStatus_Success
) {
442 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
443 if (context
->stripes
[stripe
+ j
].status
!= ReadDataStatus_Success
&& devices
[stripe
+ j
] && devices
[stripe
+ j
]->devobj
) {
444 Status
= sync_read_phys(devices
[stripe
+ j
]->devobj
, devices
[stripe
+ j
]->fileobj
, cis
[stripe
+ j
].offset
+ off
,
445 Vcb
->superblock
.node_size
, (uint8_t*)t2
, false);
446 if (!NT_SUCCESS(Status
)) {
447 WARN("sync_read_phys returned %08x\n", Status
);
448 log_device_error(Vcb
, devices
[stripe
+ j
], BTRFS_DEV_STAT_READ_ERRORS
);
450 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&t2
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(t2
->csum
));
452 if (t2
->address
== addr
&& crc32
== *((uint32_t*)t2
->csum
) && (generation
== 0 || t2
->generation
== generation
)) {
453 RtlCopyMemory(buf
, t2
, Vcb
->superblock
.node_size
);
454 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
, devices
[stripe
+ j
]->devitem
.dev_id
);
457 if (!Vcb
->readonly
&& !devices
[stripe
+ badsubstripe
]->readonly
&& devices
[stripe
+ badsubstripe
]->devobj
) { // write good data over bad
458 Status
= write_data_phys(devices
[stripe
+ badsubstripe
]->devobj
, devices
[stripe
+ badsubstripe
]->fileobj
,
459 cis
[stripe
+ badsubstripe
].offset
+ off
, t2
, Vcb
->superblock
.node_size
);
460 if (!NT_SUCCESS(Status
)) {
461 WARN("write_data_phys returned %08x\n", Status
);
462 log_device_error(Vcb
, devices
[stripe
+ badsubstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
467 } else if (t2
->address
!= addr
|| crc32
!= *((uint32_t*)t2
->csum
))
468 log_device_error(Vcb
, devices
[stripe
+ j
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
470 log_device_error(Vcb
, devices
[stripe
+ j
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
476 ERR("unrecoverable checksum error at %I64x\n", addr
);
478 return STATUS_CRC_ERROR
;
483 ULONG sectors
= length
/ Vcb
->superblock
.sector_size
;
486 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.sector_size
, ALLOC_TAG
);
488 ERR("out of memory\n");
489 return STATUS_INSUFFICIENT_RESOURCES
;
492 for (i
= 0; i
< sectors
; i
++) {
493 uint32_t crc32
= ~calc_crc32c(0xffffffff, buf
+ (i
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
495 if (context
->csum
[i
] != crc32
) {
497 uint16_t stripe2
, badsubstripe
= 0;
498 bool recovered
= false;
500 get_raid0_offset(addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), ci
->stripe_length
,
501 ci
->num_stripes
/ ci
->sub_stripes
, &off
, &stripe2
);
503 stripe2
*= ci
->sub_stripes
;
505 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
506 if (context
->stripes
[stripe2
+ j
].status
== ReadDataStatus_Success
) {
512 log_device_error(Vcb
, devices
[stripe2
+ badsubstripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
514 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
515 if (context
->stripes
[stripe2
+ j
].status
!= ReadDataStatus_Success
&& devices
[stripe2
+ j
] && devices
[stripe2
+ j
]->devobj
) {
516 Status
= sync_read_phys(devices
[stripe2
+ j
]->devobj
, devices
[stripe2
+ j
]->fileobj
, cis
[stripe2
+ j
].offset
+ off
,
517 Vcb
->superblock
.sector_size
, sector
, false);
518 if (!NT_SUCCESS(Status
)) {
519 WARN("sync_read_phys returned %08x\n", Status
);
520 log_device_error(Vcb
, devices
[stripe2
+ j
], BTRFS_DEV_STAT_READ_ERRORS
);
522 uint32_t crc32b
= ~calc_crc32c(0xffffffff, sector
, Vcb
->superblock
.sector_size
);
524 if (crc32b
== context
->csum
[i
]) {
525 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
, Vcb
->superblock
.sector_size
);
526 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), devices
[stripe2
+ j
]->devitem
.dev_id
);
529 if (!Vcb
->readonly
&& !devices
[stripe2
+ badsubstripe
]->readonly
&& devices
[stripe2
+ badsubstripe
]->devobj
) { // write good data over bad
530 Status
= write_data_phys(devices
[stripe2
+ badsubstripe
]->devobj
, devices
[stripe2
+ badsubstripe
]->fileobj
,
531 cis
[stripe2
+ badsubstripe
].offset
+ off
, sector
, Vcb
->superblock
.sector_size
);
532 if (!NT_SUCCESS(Status
)) {
533 WARN("write_data_phys returned %08x\n", Status
);
534 log_device_error(Vcb
, devices
[stripe2
+ badsubstripe
], BTRFS_DEV_STAT_READ_ERRORS
);
540 log_device_error(Vcb
, devices
[stripe2
+ j
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
546 ERR("unrecoverable checksum error at %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
));
548 return STATUS_CRC_ERROR
;
556 return STATUS_SUCCESS
;
559 static NTSTATUS
read_data_raid5(device_extension
* Vcb
, uint8_t* buf
, uint64_t addr
, uint32_t length
, read_data_context
* context
, CHUNK_ITEM
* ci
,
560 device
** devices
, uint64_t offset
, uint64_t generation
, chunk
* c
, bool degraded
) {
563 bool checksum_error
= false;
564 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
566 bool no_success
= true;
568 for (j
= 0; j
< ci
->num_stripes
; j
++) {
569 if (context
->stripes
[j
].status
== ReadDataStatus_Error
) {
570 WARN("stripe %u returned error %08x\n", j
, context
->stripes
[j
].iosb
.Status
);
571 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
572 return context
->stripes
[j
].iosb
.Status
;
573 } else if (context
->stripes
[j
].status
== ReadDataStatus_Success
) {
579 if (c
) { // check partial stripes
581 uint64_t ps_length
= (ci
->num_stripes
- 1) * ci
->stripe_length
;
583 ExAcquireResourceSharedLite(&c
->partial_stripes_lock
, true);
585 le
= c
->partial_stripes
.Flink
;
586 while (le
!= &c
->partial_stripes
) {
587 partial_stripe
* ps
= CONTAINING_RECORD(le
, partial_stripe
, list_entry
);
589 if (ps
->address
+ ps_length
> addr
&& ps
->address
< addr
+ length
) {
590 ULONG runlength
, index
;
592 runlength
= RtlFindFirstRunClear(&ps
->bmp
, &index
);
594 while (runlength
!= 0) {
596 uint64_t runstart
, runend
, start
, end
;
598 if (index
>= ps
->bmplen
)
601 if (index
+ runlength
>= ps
->bmplen
) {
602 runlength
= ps
->bmplen
- index
;
609 uint64_t runstart
= ps
->address
+ (index
* Vcb
->superblock
.sector_size
);
610 uint64_t runend
= runstart
+ (runlength
* Vcb
->superblock
.sector_size
);
611 uint64_t start
= max(runstart
, addr
);
612 uint64_t end
= min(runend
, addr
+ length
);
614 runstart
= ps
->address
+ (index
* Vcb
->superblock
.sector_size
);
615 runend
= runstart
+ (runlength
* Vcb
->superblock
.sector_size
);
616 start
= max(runstart
, addr
);
617 end
= min(runend
, addr
+ length
);
621 RtlCopyMemory(buf
+ start
- addr
, &ps
->data
[start
- ps
->address
], (ULONG
)(end
- start
));
623 runlength
= RtlFindNextForwardRunClear(&ps
->bmp
, index
+ runlength
, &index
);
625 } else if (ps
->address
>= addr
+ length
)
631 ExReleaseResourceLite(&c
->partial_stripes_lock
);
635 tree_header
* th
= (tree_header
*)buf
;
636 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
638 if (addr
!= th
->address
|| crc32
!= *((uint32_t*)th
->csum
)) {
639 checksum_error
= true;
640 if (!no_success
&& !degraded
)
641 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
642 } else if (generation
!= 0 && generation
!= th
->generation
) {
643 checksum_error
= true;
644 if (!no_success
&& !degraded
)
645 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
647 } else if (context
->csum
) {
648 Status
= check_csum(Vcb
, buf
, length
/ Vcb
->superblock
.sector_size
, context
->csum
);
650 if (Status
== STATUS_CRC_ERROR
) {
652 WARN("checksum error\n");
653 checksum_error
= true;
654 } else if (!NT_SUCCESS(Status
)) {
655 ERR("check_csum returned %08x\n", Status
);
659 checksum_error
= true;
662 return STATUS_SUCCESS
;
667 bool recovered
= false, first
= true, failed
= false;
670 t2
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.node_size
* 2, ALLOC_TAG
);
672 ERR("out of memory\n");
673 return STATUS_INSUFFICIENT_RESOURCES
;
676 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
- 1, &off
, &stripe
);
678 parity
= (((addr
- offset
) / ((ci
->num_stripes
- 1) * ci
->stripe_length
)) + ci
->num_stripes
- 1) % ci
->num_stripes
;
680 stripe
= (parity
+ stripe
+ 1) % ci
->num_stripes
;
682 for (j
= 0; j
< ci
->num_stripes
; j
++) {
684 if (devices
[j
] && devices
[j
]->devobj
) {
686 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.node_size
, t2
, false);
687 if (!NT_SUCCESS(Status
)) {
688 ERR("sync_read_phys returned %08x\n", Status
);
689 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
696 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.node_size
, t2
+ Vcb
->superblock
.node_size
, false);
697 if (!NT_SUCCESS(Status
)) {
698 ERR("sync_read_phys returned %08x\n", Status
);
699 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
704 do_xor(t2
, t2
+ Vcb
->superblock
.node_size
, Vcb
->superblock
.node_size
);
714 tree_header
* t3
= (tree_header
*)t2
;
715 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&t3
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(t3
->csum
));
717 if (t3
->address
== addr
&& crc32
== *((uint32_t*)t3
->csum
) && (generation
== 0 || t3
->generation
== generation
)) {
718 RtlCopyMemory(buf
, t2
, Vcb
->superblock
.node_size
);
721 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
, devices
[stripe
]->devitem
.dev_id
);
725 if (!Vcb
->readonly
&& devices
[stripe
] && !devices
[stripe
]->readonly
&& devices
[stripe
]->devobj
) { // write good data over bad
726 Status
= write_data_phys(devices
[stripe
]->devobj
, devices
[stripe
]->fileobj
, cis
[stripe
].offset
+ off
, t2
, Vcb
->superblock
.node_size
);
727 if (!NT_SUCCESS(Status
)) {
728 WARN("write_data_phys returned %08x\n", Status
);
729 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
736 ERR("unrecoverable checksum error at %I64x\n", addr
);
738 return STATUS_CRC_ERROR
;
743 ULONG sectors
= length
/ Vcb
->superblock
.sector_size
;
746 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.sector_size
* 2, ALLOC_TAG
);
748 ERR("out of memory\n");
749 return STATUS_INSUFFICIENT_RESOURCES
;
752 for (i
= 0; i
< sectors
; i
++) {
758 crc32
= ~calc_crc32c(0xffffffff, buf
+ (i
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
760 get_raid0_offset(addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), ci
->stripe_length
,
761 ci
->num_stripes
- 1, &off
, &stripe
);
763 parity
= (((addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
)) / ((ci
->num_stripes
- 1) * ci
->stripe_length
)) + ci
->num_stripes
- 1) % ci
->num_stripes
;
765 stripe
= (parity
+ stripe
+ 1) % ci
->num_stripes
;
767 if (!devices
[stripe
] || !devices
[stripe
]->devobj
|| (context
->csum
&& context
->csum
[i
] != crc32
)) {
768 bool recovered
= false, first
= true, failed
= false;
770 if (devices
[stripe
] && devices
[stripe
]->devobj
)
771 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_READ_ERRORS
);
773 for (j
= 0; j
< ci
->num_stripes
; j
++) {
775 if (devices
[j
] && devices
[j
]->devobj
) {
777 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.sector_size
, sector
, false);
778 if (!NT_SUCCESS(Status
)) {
779 ERR("sync_read_phys returned %08x\n", Status
);
781 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
787 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.sector_size
,
788 sector
+ Vcb
->superblock
.sector_size
, false);
789 if (!NT_SUCCESS(Status
)) {
790 ERR("sync_read_phys returned %08x\n", Status
);
792 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
796 do_xor(sector
, sector
+ Vcb
->superblock
.sector_size
, Vcb
->superblock
.sector_size
);
807 crc32
= ~calc_crc32c(0xffffffff, sector
, Vcb
->superblock
.sector_size
);
809 if (!context
->csum
|| crc32
== context
->csum
[i
]) {
810 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
, Vcb
->superblock
.sector_size
);
813 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), devices
[stripe
]->devitem
.dev_id
);
817 if (!Vcb
->readonly
&& devices
[stripe
] && !devices
[stripe
]->readonly
&& devices
[stripe
]->devobj
) { // write good data over bad
818 Status
= write_data_phys(devices
[stripe
]->devobj
, devices
[stripe
]->fileobj
, cis
[stripe
].offset
+ off
,
819 sector
, Vcb
->superblock
.sector_size
);
820 if (!NT_SUCCESS(Status
)) {
821 WARN("write_data_phys returned %08x\n", Status
);
822 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
829 ERR("unrecoverable checksum error at %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
));
831 return STATUS_CRC_ERROR
;
839 return STATUS_SUCCESS
;
842 void raid6_recover2(uint8_t* sectors
, uint16_t num_stripes
, ULONG sector_size
, uint16_t missing1
, uint16_t missing2
, uint8_t* out
) {
843 if (missing1
== num_stripes
- 2 || missing2
== num_stripes
- 2) { // reconstruct from q and data
844 uint16_t missing
= missing1
== (num_stripes
- 2) ? missing2
: missing1
;
847 stripe
= num_stripes
- 3;
849 if (stripe
== missing
)
850 RtlZeroMemory(out
, sector_size
);
852 RtlCopyMemory(out
, sectors
+ (stripe
* sector_size
), sector_size
);
857 galois_double(out
, sector_size
);
859 if (stripe
!= missing
)
860 do_xor(out
, sectors
+ (stripe
* sector_size
), sector_size
);
861 } while (stripe
> 0);
863 do_xor(out
, sectors
+ ((num_stripes
- 1) * sector_size
), sector_size
);
866 galois_divpower(out
, (uint8_t)missing
, sector_size
);
867 } else { // reconstruct from p and q
868 uint16_t x
, y
, stripe
;
869 uint8_t gyx
, gx
, denom
, a
, b
, *p
, *q
, *pxy
, *qxy
;
872 stripe
= num_stripes
- 3;
874 pxy
= out
+ sector_size
;
877 if (stripe
== missing1
|| stripe
== missing2
) {
878 RtlZeroMemory(qxy
, sector_size
);
879 RtlZeroMemory(pxy
, sector_size
);
881 if (stripe
== missing1
)
886 RtlCopyMemory(qxy
, sectors
+ (stripe
* sector_size
), sector_size
);
887 RtlCopyMemory(pxy
, sectors
+ (stripe
* sector_size
), sector_size
);
893 galois_double(qxy
, sector_size
);
895 if (stripe
!= missing1
&& stripe
!= missing2
) {
896 do_xor(qxy
, sectors
+ (stripe
* sector_size
), sector_size
);
897 do_xor(pxy
, sectors
+ (stripe
* sector_size
), sector_size
);
898 } else if (stripe
== missing1
)
900 else if (stripe
== missing2
)
902 } while (stripe
> 0);
904 gyx
= gpow2(y
> x
? (y
-x
) : (255-x
+y
));
907 denom
= gdiv(1, gyx
^ 1);
908 a
= gmul(gyx
, denom
);
911 p
= sectors
+ ((num_stripes
- 2) * sector_size
);
912 q
= sectors
+ ((num_stripes
- 1) * sector_size
);
914 for (j
= 0; j
< sector_size
; j
++) {
915 *qxy
= gmul(a
, *p
^ *pxy
) ^ gmul(b
, *q
^ *qxy
);
923 do_xor(out
+ sector_size
, out
, sector_size
);
924 do_xor(out
+ sector_size
, sectors
+ ((num_stripes
- 2) * sector_size
), sector_size
);
928 static NTSTATUS
read_data_raid6(device_extension
* Vcb
, uint8_t* buf
, uint64_t addr
, uint32_t length
, read_data_context
* context
, CHUNK_ITEM
* ci
,
929 device
** devices
, uint64_t offset
, uint64_t generation
, chunk
* c
, bool degraded
) {
932 bool checksum_error
= false;
933 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
935 bool no_success
= true;
937 for (j
= 0; j
< ci
->num_stripes
; j
++) {
938 if (context
->stripes
[j
].status
== ReadDataStatus_Error
) {
939 WARN("stripe %u returned error %08x\n", j
, context
->stripes
[j
].iosb
.Status
);
942 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
943 return context
->stripes
[j
].iosb
.Status
;
944 } else if (context
->stripes
[j
].status
== ReadDataStatus_Success
) {
950 if (c
) { // check partial stripes
952 uint64_t ps_length
= (ci
->num_stripes
- 2) * ci
->stripe_length
;
954 ExAcquireResourceSharedLite(&c
->partial_stripes_lock
, true);
956 le
= c
->partial_stripes
.Flink
;
957 while (le
!= &c
->partial_stripes
) {
958 partial_stripe
* ps
= CONTAINING_RECORD(le
, partial_stripe
, list_entry
);
960 if (ps
->address
+ ps_length
> addr
&& ps
->address
< addr
+ length
) {
961 ULONG runlength
, index
;
963 runlength
= RtlFindFirstRunClear(&ps
->bmp
, &index
);
965 while (runlength
!= 0) {
967 uint64_t runstart
, runend
, start
, end
;
969 if (index
>= ps
->bmplen
)
972 if (index
+ runlength
>= ps
->bmplen
) {
973 runlength
= ps
->bmplen
- index
;
980 uint64_t runstart
= ps
->address
+ (index
* Vcb
->superblock
.sector_size
);
981 uint64_t runend
= runstart
+ (runlength
* Vcb
->superblock
.sector_size
);
982 uint64_t start
= max(runstart
, addr
);
983 uint64_t end
= min(runend
, addr
+ length
);
985 runstart
= ps
->address
+ (index
* Vcb
->superblock
.sector_size
);
986 runend
= runstart
+ (runlength
* Vcb
->superblock
.sector_size
);
987 start
= max(runstart
, addr
);
988 end
= min(runend
, addr
+ length
);
992 RtlCopyMemory(buf
+ start
- addr
, &ps
->data
[start
- ps
->address
], (ULONG
)(end
- start
));
994 runlength
= RtlFindNextForwardRunClear(&ps
->bmp
, index
+ runlength
, &index
);
996 } else if (ps
->address
>= addr
+ length
)
1002 ExReleaseResourceLite(&c
->partial_stripes_lock
);
1005 if (context
->tree
) {
1006 tree_header
* th
= (tree_header
*)buf
;
1007 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
1009 if (addr
!= th
->address
|| crc32
!= *((uint32_t*)th
->csum
)) {
1010 checksum_error
= true;
1011 if (!no_success
&& !degraded
&& devices
[stripe
])
1012 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1013 } else if (generation
!= 0 && generation
!= th
->generation
) {
1014 checksum_error
= true;
1015 if (!no_success
&& !degraded
&& devices
[stripe
])
1016 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
1018 } else if (context
->csum
) {
1019 Status
= check_csum(Vcb
, buf
, length
/ Vcb
->superblock
.sector_size
, context
->csum
);
1021 if (Status
== STATUS_CRC_ERROR
) {
1023 WARN("checksum error\n");
1024 checksum_error
= true;
1025 } else if (!NT_SUCCESS(Status
)) {
1026 ERR("check_csum returned %08x\n", Status
);
1029 } else if (degraded
)
1030 checksum_error
= true;
1032 if (!checksum_error
)
1033 return STATUS_SUCCESS
;
1035 if (context
->tree
) {
1037 uint16_t k
, physstripe
, parity1
, parity2
, error_stripe
;
1039 bool recovered
= false, failed
= false;
1040 ULONG num_errors
= 0;
1042 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.node_size
* (ci
->num_stripes
+ 2), ALLOC_TAG
);
1044 ERR("out of memory\n");
1045 return STATUS_INSUFFICIENT_RESOURCES
;
1048 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
- 2, &off
, &stripe
);
1050 parity1
= (((addr
- offset
) / ((ci
->num_stripes
- 2) * ci
->stripe_length
)) + ci
->num_stripes
- 2) % ci
->num_stripes
;
1051 parity2
= (parity1
+ 1) % ci
->num_stripes
;
1053 physstripe
= (parity2
+ stripe
+ 1) % ci
->num_stripes
;
1055 j
= (parity2
+ 1) % ci
->num_stripes
;
1057 for (k
= 0; k
< ci
->num_stripes
- 1; k
++) {
1058 if (j
!= physstripe
) {
1059 if (devices
[j
] && devices
[j
]->devobj
) {
1060 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.node_size
,
1061 sector
+ (k
* Vcb
->superblock
.node_size
), false);
1062 if (!NT_SUCCESS(Status
)) {
1063 ERR("sync_read_phys returned %08x\n", Status
);
1064 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
1068 if (num_errors
> 1) {
1077 if (num_errors
> 1) {
1084 j
= (j
+ 1) % ci
->num_stripes
;
1088 if (num_errors
== 0) {
1089 tree_header
* th
= (tree_header
*)(sector
+ (stripe
* Vcb
->superblock
.node_size
));
1092 RtlCopyMemory(sector
+ (stripe
* Vcb
->superblock
.node_size
), sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.node_size
),
1093 Vcb
->superblock
.node_size
);
1095 for (j
= 0; j
< ci
->num_stripes
- 2; j
++) {
1097 do_xor(sector
+ (stripe
* Vcb
->superblock
.node_size
), sector
+ (j
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1100 crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
1102 if (th
->address
== addr
&& crc32
== *((uint32_t*)th
->csum
) && (generation
== 0 || th
->generation
== generation
)) {
1103 RtlCopyMemory(buf
, sector
+ (stripe
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1105 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1106 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
, devices
[physstripe
]->devitem
.dev_id
);
1110 if (!Vcb
->readonly
&& devices
[physstripe
] && devices
[physstripe
]->devobj
&& !devices
[physstripe
]->readonly
) { // write good data over bad
1111 Status
= write_data_phys(devices
[physstripe
]->devobj
, devices
[physstripe
]->fileobj
, cis
[physstripe
].offset
+ off
,
1112 sector
+ (stripe
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1113 if (!NT_SUCCESS(Status
)) {
1114 WARN("write_data_phys returned %08x\n", Status
);
1115 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1123 tree_header
* th
= (tree_header
*)(sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
));
1124 bool read_q
= false;
1126 if (devices
[parity2
] && devices
[parity2
]->devobj
) {
1127 Status
= sync_read_phys(devices
[parity2
]->devobj
, devices
[parity2
]->fileobj
, cis
[parity2
].offset
+ off
,
1128 Vcb
->superblock
.node_size
, sector
+ ((ci
->num_stripes
- 1) * Vcb
->superblock
.node_size
), false);
1129 if (!NT_SUCCESS(Status
)) {
1130 ERR("sync_read_phys returned %08x\n", Status
);
1131 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
1137 if (num_errors
== 1) {
1138 raid6_recover2(sector
, ci
->num_stripes
, Vcb
->superblock
.node_size
, stripe
, error_stripe
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
));
1140 crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
1142 if (th
->address
== addr
&& crc32
== *((uint32_t*)th
->csum
) && (generation
== 0 || th
->generation
== generation
))
1145 for (j
= 0; j
< ci
->num_stripes
- 1; j
++) {
1147 raid6_recover2(sector
, ci
->num_stripes
, Vcb
->superblock
.node_size
, stripe
, j
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
));
1149 crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
1151 if (th
->address
== addr
&& crc32
== *((uint32_t*)th
->csum
) && (generation
== 0 || th
->generation
== generation
)) {
1162 uint16_t error_stripe_phys
= (parity2
+ error_stripe
+ 1) % ci
->num_stripes
;
1164 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1165 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
, devices
[physstripe
]->devitem
.dev_id
);
1167 RtlCopyMemory(buf
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1169 if (!Vcb
->readonly
&& devices
[physstripe
] && devices
[physstripe
]->devobj
&& !devices
[physstripe
]->readonly
) { // write good data over bad
1170 Status
= write_data_phys(devices
[physstripe
]->devobj
, devices
[physstripe
]->fileobj
, cis
[physstripe
].offset
+ off
,
1171 sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1172 if (!NT_SUCCESS(Status
)) {
1173 WARN("write_data_phys returned %08x\n", Status
);
1174 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1178 if (devices
[error_stripe_phys
] && devices
[error_stripe_phys
]->devobj
) {
1179 if (error_stripe
== ci
->num_stripes
- 2) {
1180 ERR("recovering from parity error at %I64x, device %I64x\n", addr
, devices
[error_stripe_phys
]->devitem
.dev_id
);
1182 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1184 RtlZeroMemory(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1186 for (j
= 0; j
< ci
->num_stripes
- 2; j
++) {
1188 do_xor(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.node_size
), sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
),
1189 Vcb
->superblock
.node_size
);
1191 do_xor(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.node_size
), sector
+ (j
* Vcb
->superblock
.node_size
),
1192 Vcb
->superblock
.node_size
);
1196 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
+ ((error_stripe
- stripe
) * ci
->stripe_length
),
1197 devices
[error_stripe_phys
]->devitem
.dev_id
);
1199 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1201 RtlCopyMemory(sector
+ (error_stripe
* Vcb
->superblock
.node_size
),
1202 sector
+ ((ci
->num_stripes
+ 1) * Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1206 if (!Vcb
->readonly
&& devices
[error_stripe_phys
] && devices
[error_stripe_phys
]->devobj
&& !devices
[error_stripe_phys
]->readonly
) { // write good data over bad
1207 Status
= write_data_phys(devices
[error_stripe_phys
]->devobj
, devices
[error_stripe_phys
]->fileobj
, cis
[error_stripe_phys
].offset
+ off
,
1208 sector
+ (error_stripe
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1209 if (!NT_SUCCESS(Status
)) {
1210 WARN("write_data_phys returned %08x\n", Status
);
1211 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1219 ERR("unrecoverable checksum error at %I64x\n", addr
);
1221 return STATUS_CRC_ERROR
;
1226 ULONG sectors
= length
/ Vcb
->superblock
.sector_size
;
1229 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.sector_size
* (ci
->num_stripes
+ 2), ALLOC_TAG
);
1231 ERR("out of memory\n");
1232 return STATUS_INSUFFICIENT_RESOURCES
;
1235 for (i
= 0; i
< sectors
; i
++) {
1237 uint16_t physstripe
, parity1
, parity2
;
1241 crc32
= ~calc_crc32c(0xffffffff, buf
+ (i
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1243 get_raid0_offset(addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), ci
->stripe_length
,
1244 ci
->num_stripes
- 2, &off
, &stripe
);
1246 parity1
= (((addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
)) / ((ci
->num_stripes
- 2) * ci
->stripe_length
)) + ci
->num_stripes
- 2) % ci
->num_stripes
;
1247 parity2
= (parity1
+ 1) % ci
->num_stripes
;
1249 physstripe
= (parity2
+ stripe
+ 1) % ci
->num_stripes
;
1251 if (!devices
[physstripe
] || !devices
[physstripe
]->devobj
|| (context
->csum
&& context
->csum
[i
] != crc32
)) {
1252 uint16_t k
, error_stripe
;
1253 bool recovered
= false, failed
= false;
1254 ULONG num_errors
= 0;
1256 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1257 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_READ_ERRORS
);
1259 j
= (parity2
+ 1) % ci
->num_stripes
;
1261 for (k
= 0; k
< ci
->num_stripes
- 1; k
++) {
1262 if (j
!= physstripe
) {
1263 if (devices
[j
] && devices
[j
]->devobj
) {
1264 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.sector_size
,
1265 sector
+ (k
* Vcb
->superblock
.sector_size
), false);
1266 if (!NT_SUCCESS(Status
)) {
1267 ERR("sync_read_phys returned %08x\n", Status
);
1268 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
1272 if (num_errors
> 1) {
1281 if (num_errors
> 1) {
1288 j
= (j
+ 1) % ci
->num_stripes
;
1292 if (num_errors
== 0) {
1293 RtlCopyMemory(sector
+ (stripe
* Vcb
->superblock
.sector_size
), sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1295 for (j
= 0; j
< ci
->num_stripes
- 2; j
++) {
1297 do_xor(sector
+ (stripe
* Vcb
->superblock
.sector_size
), sector
+ (j
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1301 crc32
= ~calc_crc32c(0xffffffff, sector
+ (stripe
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1303 if (!context
->csum
|| crc32
== context
->csum
[i
]) {
1304 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
+ (stripe
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1306 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1307 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
),
1308 devices
[physstripe
]->devitem
.dev_id
);
1312 if (!Vcb
->readonly
&& devices
[physstripe
] && devices
[physstripe
]->devobj
&& !devices
[physstripe
]->readonly
) { // write good data over bad
1313 Status
= write_data_phys(devices
[physstripe
]->devobj
, devices
[physstripe
]->fileobj
, cis
[physstripe
].offset
+ off
,
1314 sector
+ (stripe
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1315 if (!NT_SUCCESS(Status
)) {
1316 WARN("write_data_phys returned %08x\n", Status
);
1317 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1324 bool read_q
= false;
1326 if (devices
[parity2
] && devices
[parity2
]->devobj
) {
1327 Status
= sync_read_phys(devices
[parity2
]->devobj
, devices
[parity2
]->fileobj
, cis
[parity2
].offset
+ off
,
1328 Vcb
->superblock
.sector_size
, sector
+ ((ci
->num_stripes
- 1) * Vcb
->superblock
.sector_size
), false);
1329 if (!NT_SUCCESS(Status
)) {
1330 ERR("sync_read_phys returned %08x\n", Status
);
1331 log_device_error(Vcb
, devices
[parity2
], BTRFS_DEV_STAT_READ_ERRORS
);
1337 if (num_errors
== 1) {
1338 raid6_recover2(sector
, ci
->num_stripes
, Vcb
->superblock
.sector_size
, stripe
, error_stripe
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
));
1340 if (!devices
[physstripe
] || !devices
[physstripe
]->devobj
)
1343 crc32
= ~calc_crc32c(0xffffffff, sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1345 if (crc32
== context
->csum
[i
])
1349 for (j
= 0; j
< ci
->num_stripes
- 1; j
++) {
1351 raid6_recover2(sector
, ci
->num_stripes
, Vcb
->superblock
.sector_size
, stripe
, j
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
));
1353 crc32
= ~calc_crc32c(0xffffffff, sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1355 if (crc32
== context
->csum
[i
]) {
1366 uint16_t error_stripe_phys
= (parity2
+ error_stripe
+ 1) % ci
->num_stripes
;
1368 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1369 ERR("recovering from checksum error at %I64x, device %I64x\n",
1370 addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), devices
[physstripe
]->devitem
.dev_id
);
1372 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1374 if (!Vcb
->readonly
&& devices
[physstripe
] && devices
[physstripe
]->devobj
&& !devices
[physstripe
]->readonly
) { // write good data over bad
1375 Status
= write_data_phys(devices
[physstripe
]->devobj
, devices
[physstripe
]->fileobj
, cis
[physstripe
].offset
+ off
,
1376 sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1377 if (!NT_SUCCESS(Status
)) {
1378 WARN("write_data_phys returned %08x\n", Status
);
1379 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1383 if (devices
[error_stripe_phys
] && devices
[error_stripe_phys
]->devobj
) {
1384 if (error_stripe
== ci
->num_stripes
- 2) {
1385 ERR("recovering from parity error at %I64x, device %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
),
1386 devices
[error_stripe_phys
]->devitem
.dev_id
);
1388 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1390 RtlZeroMemory(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1392 for (j
= 0; j
< ci
->num_stripes
- 2; j
++) {
1394 do_xor(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.sector_size
), sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
),
1395 Vcb
->superblock
.sector_size
);
1397 do_xor(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.sector_size
), sector
+ (j
* Vcb
->superblock
.sector_size
),
1398 Vcb
->superblock
.sector_size
);
1402 ERR("recovering from checksum error at %I64x, device %I64x\n",
1403 addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
) + ((error_stripe
- stripe
) * ci
->stripe_length
),
1404 devices
[error_stripe_phys
]->devitem
.dev_id
);
1406 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1408 RtlCopyMemory(sector
+ (error_stripe
* Vcb
->superblock
.sector_size
),
1409 sector
+ ((ci
->num_stripes
+ 1) * Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1413 if (!Vcb
->readonly
&& devices
[error_stripe_phys
] && devices
[error_stripe_phys
]->devobj
&& !devices
[error_stripe_phys
]->readonly
) { // write good data over bad
1414 Status
= write_data_phys(devices
[error_stripe_phys
]->devobj
, devices
[error_stripe_phys
]->fileobj
, cis
[error_stripe_phys
].offset
+ off
,
1415 sector
+ (error_stripe
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1416 if (!NT_SUCCESS(Status
)) {
1417 WARN("write_data_phys returned %08x\n", Status
);
1418 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1426 ERR("unrecoverable checksum error at %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
));
1428 return STATUS_CRC_ERROR
;
1436 return STATUS_SUCCESS
;
1439 NTSTATUS
read_data(_In_ device_extension
* Vcb
, _In_
uint64_t addr
, _In_
uint32_t length
, _In_reads_bytes_opt_(length
*sizeof(uint32_t)/Vcb
->superblock
.sector_size
) uint32_t* csum
,
1440 _In_
bool is_tree
, _Out_writes_bytes_(length
) uint8_t* buf
, _In_opt_ chunk
* c
, _Out_opt_ chunk
** pc
, _In_opt_ PIRP Irp
, _In_
uint64_t generation
, _In_
bool file_read
,
1441 _In_ ULONG priority
) {
1443 CHUNK_ITEM_STRIPE
* cis
;
1444 read_data_context context
;
1445 uint64_t type
, offset
, total_reading
= 0;
1447 device
** devices
= NULL
;
1448 uint16_t i
, startoffstripe
, allowed_missing
, missing_devices
= 0;
1449 uint8_t* dummypage
= NULL
;
1450 PMDL dummy_mdl
= NULL
;
1452 uint64_t lockaddr
, locklen
;
1454 if (Vcb
->log_to_phys_loaded
) {
1456 c
= get_chunk_from_address(Vcb
, addr
);
1459 ERR("get_chunk_from_address failed\n");
1460 return STATUS_INTERNAL_ERROR
;
1466 devices
= c
->devices
;
1471 LIST_ENTRY
* le
= Vcb
->sys_chunks
.Flink
;
1476 while (le
!= &Vcb
->sys_chunks
) {
1477 sys_chunk
* sc
= CONTAINING_RECORD(le
, sys_chunk
, list_entry
);
1479 if (sc
->key
.obj_id
== 0x100 && sc
->key
.obj_type
== TYPE_CHUNK_ITEM
&& sc
->key
.offset
<= addr
) {
1480 CHUNK_ITEM
* chunk_item
= sc
->data
;
1482 if ((addr
- sc
->key
.offset
) < chunk_item
->size
&& chunk_item
->num_stripes
> 0) {
1484 offset
= sc
->key
.offset
;
1485 cis
= (CHUNK_ITEM_STRIPE
*)&chunk_item
[1];
1487 devices
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(device
*) * ci
->num_stripes
, ALLOC_TAG
);
1489 ERR("out of memory\n");
1490 return STATUS_INSUFFICIENT_RESOURCES
;
1493 for (i
= 0; i
< ci
->num_stripes
; i
++) {
1494 devices
[i
] = find_device_from_uuid(Vcb
, &cis
[i
].dev_uuid
);
1505 ERR("could not find chunk for %I64x in bootstrap\n", addr
);
1506 return STATUS_INTERNAL_ERROR
;
1513 if (ci
->type
& BLOCK_FLAG_DUPLICATE
) {
1514 type
= BLOCK_FLAG_DUPLICATE
;
1515 allowed_missing
= ci
->num_stripes
- 1;
1516 } else if (ci
->type
& BLOCK_FLAG_RAID0
) {
1517 type
= BLOCK_FLAG_RAID0
;
1518 allowed_missing
= 0;
1519 } else if (ci
->type
& BLOCK_FLAG_RAID1
) {
1520 type
= BLOCK_FLAG_DUPLICATE
;
1521 allowed_missing
= 1;
1522 } else if (ci
->type
& BLOCK_FLAG_RAID10
) {
1523 type
= BLOCK_FLAG_RAID10
;
1524 allowed_missing
= 1;
1525 } else if (ci
->type
& BLOCK_FLAG_RAID5
) {
1526 type
= BLOCK_FLAG_RAID5
;
1527 allowed_missing
= 1;
1528 } else if (ci
->type
& BLOCK_FLAG_RAID6
) {
1529 type
= BLOCK_FLAG_RAID6
;
1530 allowed_missing
= 2;
1532 type
= BLOCK_FLAG_DUPLICATE
;
1533 allowed_missing
= 0;
1536 cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
1538 RtlZeroMemory(&context
, sizeof(read_data_context
));
1539 KeInitializeEvent(&context
.Event
, NotificationEvent
, false);
1541 context
.stripes
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(read_data_stripe
) * ci
->num_stripes
, ALLOC_TAG
);
1542 if (!context
.stripes
) {
1543 ERR("out of memory\n");
1544 return STATUS_INSUFFICIENT_RESOURCES
;
1547 if (c
&& (type
== BLOCK_FLAG_RAID5
|| type
== BLOCK_FLAG_RAID6
)) {
1548 get_raid56_lock_range(c
, addr
, length
, &lockaddr
, &locklen
);
1549 chunk_lock_range(Vcb
, c
, lockaddr
, locklen
);
1552 RtlZeroMemory(context
.stripes
, sizeof(read_data_stripe
) * ci
->num_stripes
);
1554 context
.buflen
= length
;
1555 context
.num_stripes
= ci
->num_stripes
;
1556 context
.stripes_left
= context
.num_stripes
;
1557 context
.sector_size
= Vcb
->superblock
.sector_size
;
1558 context
.csum
= csum
;
1559 context
.tree
= is_tree
;
1560 context
.type
= type
;
1562 if (type
== BLOCK_FLAG_RAID0
) {
1563 uint64_t startoff
, endoff
;
1564 uint16_t endoffstripe
, stripe
;
1565 uint32_t *stripeoff
, pos
;
1569 // FIXME - test this still works if page size isn't the same as sector size
1571 // This relies on the fact that MDLs are followed in memory by the page file numbers,
1572 // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1573 // data for you without doing a memcpy yourself.
1574 // MDLs are officially opaque, so this might very well break in future versions of Windows.
1576 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
, &startoff
, &startoffstripe
);
1577 get_raid0_offset(addr
+ length
- offset
- 1, ci
->stripe_length
, ci
->num_stripes
, &endoff
, &endoffstripe
);
1580 // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1581 // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1582 // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1584 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
1587 ERR("out of memory\n");
1588 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1594 master_mdl
= IoAllocateMdl(context
.va
, length
, false, false, NULL
);
1596 ERR("out of memory\n");
1597 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1601 Status
= STATUS_SUCCESS
;
1604 MmProbeAndLockPages(master_mdl
, KernelMode
, IoWriteAccess
);
1605 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1606 Status
= _SEH2_GetExceptionCode();
1609 if (!NT_SUCCESS(Status
)) {
1610 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
1611 IoFreeMdl(master_mdl
);
1615 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
1617 for (i
= 0; i
< ci
->num_stripes
; i
++) {
1618 if (startoffstripe
> i
)
1619 context
.stripes
[i
].stripestart
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
1620 else if (startoffstripe
== i
)
1621 context
.stripes
[i
].stripestart
= startoff
;
1623 context
.stripes
[i
].stripestart
= startoff
- (startoff
% ci
->stripe_length
);
1625 if (endoffstripe
> i
)
1626 context
.stripes
[i
].stripeend
= endoff
- (endoff
% ci
->stripe_length
) + ci
->stripe_length
;
1627 else if (endoffstripe
== i
)
1628 context
.stripes
[i
].stripeend
= endoff
+ 1;
1630 context
.stripes
[i
].stripeend
= endoff
- (endoff
% ci
->stripe_length
);
1632 if (context
.stripes
[i
].stripestart
!= context
.stripes
[i
].stripeend
) {
1633 context
.stripes
[i
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
), false, false, NULL
);
1635 if (!context
.stripes
[i
].mdl
) {
1636 ERR("IoAllocateMdl failed\n");
1637 MmUnlockPages(master_mdl
);
1638 IoFreeMdl(master_mdl
);
1639 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1645 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(uint32_t) * ci
->num_stripes
, ALLOC_TAG
);
1647 ERR("out of memory\n");
1648 MmUnlockPages(master_mdl
);
1649 IoFreeMdl(master_mdl
);
1650 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1654 RtlZeroMemory(stripeoff
, sizeof(uint32_t) * ci
->num_stripes
);
1657 stripe
= startoffstripe
;
1658 while (pos
< length
) {
1659 PFN_NUMBER
* stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
1662 uint32_t readlen
= (uint32_t)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
- (context
.stripes
[stripe
].stripestart
% ci
->stripe_length
));
1664 RtlCopyMemory(stripe_pfns
, pfns
, readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1666 stripeoff
[stripe
] += readlen
;
1668 } else if (length
- pos
< ci
->stripe_length
) {
1669 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (length
- pos
) * sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1673 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(ci
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
1675 stripeoff
[stripe
] += (uint32_t)ci
->stripe_length
;
1676 pos
+= (uint32_t)ci
->stripe_length
;
1679 stripe
= (stripe
+ 1) % ci
->num_stripes
;
1682 MmUnlockPages(master_mdl
);
1683 IoFreeMdl(master_mdl
);
1685 ExFreePool(stripeoff
);
1686 } else if (type
== BLOCK_FLAG_RAID10
) {
1687 uint64_t startoff
, endoff
;
1688 uint16_t endoffstripe
, j
, stripe
;
1692 uint32_t* stripeoff
, pos
;
1693 read_data_stripe
** stripes
;
1696 orig_ls
= c
->last_stripe
;
1700 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
/ ci
->sub_stripes
, &startoff
, &startoffstripe
);
1701 get_raid0_offset(addr
+ length
- offset
- 1, ci
->stripe_length
, ci
->num_stripes
/ ci
->sub_stripes
, &endoff
, &endoffstripe
);
1703 if ((ci
->num_stripes
% ci
->sub_stripes
) != 0) {
1704 ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset
, ci
->num_stripes
, ci
->sub_stripes
);
1705 Status
= STATUS_INTERNAL_ERROR
;
1710 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
1713 ERR("out of memory\n");
1714 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1720 context
.firstoff
= (uint16_t)((startoff
% ci
->stripe_length
) / Vcb
->superblock
.sector_size
);
1721 context
.startoffstripe
= startoffstripe
;
1722 context
.sectors_per_stripe
= (uint16_t)(ci
->stripe_length
/ Vcb
->superblock
.sector_size
);
1724 startoffstripe
*= ci
->sub_stripes
;
1725 endoffstripe
*= ci
->sub_stripes
;
1728 c
->last_stripe
= (orig_ls
+ 1) % ci
->sub_stripes
;
1730 master_mdl
= IoAllocateMdl(context
.va
, length
, false, false, NULL
);
1732 ERR("out of memory\n");
1733 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1737 Status
= STATUS_SUCCESS
;
1740 MmProbeAndLockPages(master_mdl
, KernelMode
, IoWriteAccess
);
1741 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1742 Status
= _SEH2_GetExceptionCode();
1745 if (!NT_SUCCESS(Status
)) {
1746 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
1747 IoFreeMdl(master_mdl
);
1751 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
1753 stripes
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(read_data_stripe
*) * ci
->num_stripes
/ ci
->sub_stripes
, ALLOC_TAG
);
1755 ERR("out of memory\n");
1756 MmUnlockPages(master_mdl
);
1757 IoFreeMdl(master_mdl
);
1758 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1762 RtlZeroMemory(stripes
, sizeof(read_data_stripe
*) * ci
->num_stripes
/ ci
->sub_stripes
);
1764 for (i
= 0; i
< ci
->num_stripes
; i
+= ci
->sub_stripes
) {
1765 uint64_t sstart
, send
;
1766 bool stripeset
= false;
1768 if (startoffstripe
> i
)
1769 sstart
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
1770 else if (startoffstripe
== i
)
1773 sstart
= startoff
- (startoff
% ci
->stripe_length
);
1775 if (endoffstripe
> i
)
1776 send
= endoff
- (endoff
% ci
->stripe_length
) + ci
->stripe_length
;
1777 else if (endoffstripe
== i
)
1780 send
= endoff
- (endoff
% ci
->stripe_length
);
1782 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
1783 if (j
== orig_ls
&& devices
[i
+j
] && devices
[i
+j
]->devobj
) {
1784 context
.stripes
[i
+j
].stripestart
= sstart
;
1785 context
.stripes
[i
+j
].stripeend
= send
;
1786 stripes
[i
/ ci
->sub_stripes
] = &context
.stripes
[i
+j
];
1788 if (sstart
!= send
) {
1789 context
.stripes
[i
+j
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(send
- sstart
), false, false, NULL
);
1791 if (!context
.stripes
[i
+j
].mdl
) {
1792 ERR("IoAllocateMdl failed\n");
1793 MmUnlockPages(master_mdl
);
1794 IoFreeMdl(master_mdl
);
1795 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1802 context
.stripes
[i
+j
].status
= ReadDataStatus_Skip
;
1806 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
1807 if (devices
[i
+j
] && devices
[i
+j
]->devobj
) {
1808 context
.stripes
[i
+j
].stripestart
= sstart
;
1809 context
.stripes
[i
+j
].stripeend
= send
;
1810 context
.stripes
[i
+j
].status
= ReadDataStatus_Pending
;
1811 stripes
[i
/ ci
->sub_stripes
] = &context
.stripes
[i
+j
];
1813 if (sstart
!= send
) {
1814 context
.stripes
[i
+j
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(send
- sstart
), false, false, NULL
);
1816 if (!context
.stripes
[i
+j
].mdl
) {
1817 ERR("IoAllocateMdl failed\n");
1818 MmUnlockPages(master_mdl
);
1819 IoFreeMdl(master_mdl
);
1820 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1831 ERR("could not find stripe to read\n");
1832 Status
= STATUS_DEVICE_NOT_READY
;
1838 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(uint32_t) * ci
->num_stripes
/ ci
->sub_stripes
, ALLOC_TAG
);
1840 ERR("out of memory\n");
1841 MmUnlockPages(master_mdl
);
1842 IoFreeMdl(master_mdl
);
1843 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1847 RtlZeroMemory(stripeoff
, sizeof(uint32_t) * ci
->num_stripes
/ ci
->sub_stripes
);
1850 stripe
= startoffstripe
/ ci
->sub_stripes
;
1851 while (pos
< length
) {
1852 PFN_NUMBER
* stripe_pfns
= (PFN_NUMBER
*)(stripes
[stripe
]->mdl
+ 1);
1855 uint32_t readlen
= (uint32_t)min(stripes
[stripe
]->stripeend
- stripes
[stripe
]->stripestart
,
1856 ci
->stripe_length
- (stripes
[stripe
]->stripestart
% ci
->stripe_length
));
1858 RtlCopyMemory(stripe_pfns
, pfns
, readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1860 stripeoff
[stripe
] += readlen
;
1862 } else if (length
- pos
< ci
->stripe_length
) {
1863 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (length
- pos
) * sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1867 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(ci
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
1869 stripeoff
[stripe
] += (ULONG
)ci
->stripe_length
;
1870 pos
+= (ULONG
)ci
->stripe_length
;
1873 stripe
= (stripe
+ 1) % (ci
->num_stripes
/ ci
->sub_stripes
);
1876 MmUnlockPages(master_mdl
);
1877 IoFreeMdl(master_mdl
);
1879 ExFreePool(stripeoff
);
1880 ExFreePool(stripes
);
1881 } else if (type
== BLOCK_FLAG_DUPLICATE
) {
1885 orig_ls
= i
= c
->last_stripe
;
1889 while (!devices
[i
] || !devices
[i
]->devobj
) {
1890 i
= (i
+ 1) % ci
->num_stripes
;
1893 ERR("no devices available to service request\n");
1894 Status
= STATUS_DEVICE_NOT_READY
;
1900 c
->last_stripe
= (i
+ 1) % ci
->num_stripes
;
1902 context
.stripes
[i
].stripestart
= addr
- offset
;
1903 context
.stripes
[i
].stripeend
= context
.stripes
[i
].stripestart
+ length
;
1906 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
1909 ERR("out of memory\n");
1910 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1914 context
.stripes
[i
].mdl
= IoAllocateMdl(context
.va
, length
, false, false, NULL
);
1915 if (!context
.stripes
[i
].mdl
) {
1916 ERR("IoAllocateMdl failed\n");
1917 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1921 MmBuildMdlForNonPagedPool(context
.stripes
[i
].mdl
);
1923 context
.stripes
[i
].mdl
= IoAllocateMdl(buf
, length
, false, false, NULL
);
1925 if (!context
.stripes
[i
].mdl
) {
1926 ERR("IoAllocateMdl failed\n");
1927 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1931 Status
= STATUS_SUCCESS
;
1934 MmProbeAndLockPages(context
.stripes
[i
].mdl
, KernelMode
, IoWriteAccess
);
1935 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1936 Status
= _SEH2_GetExceptionCode();
1939 if (!NT_SUCCESS(Status
)) {
1940 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
1944 } else if (type
== BLOCK_FLAG_RAID5
) {
1945 uint64_t startoff
, endoff
;
1946 uint16_t endoffstripe
, parity
;
1947 uint32_t *stripeoff
, pos
;
1949 PFN_NUMBER
*pfns
, dummy
;
1950 bool need_dummy
= false;
1952 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
- 1, &startoff
, &startoffstripe
);
1953 get_raid0_offset(addr
+ length
- offset
- 1, ci
->stripe_length
, ci
->num_stripes
- 1, &endoff
, &endoffstripe
);
1956 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
1959 ERR("out of memory\n");
1960 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1966 master_mdl
= IoAllocateMdl(context
.va
, length
, false, false, NULL
);
1968 ERR("out of memory\n");
1969 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1973 Status
= STATUS_SUCCESS
;
1976 MmProbeAndLockPages(master_mdl
, KernelMode
, IoWriteAccess
);
1977 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1978 Status
= _SEH2_GetExceptionCode();
1981 if (!NT_SUCCESS(Status
)) {
1982 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
1983 IoFreeMdl(master_mdl
);
1987 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
1990 while (pos
< length
) {
1991 parity
= (((addr
- offset
+ pos
) / ((ci
->num_stripes
- 1) * ci
->stripe_length
)) + ci
->num_stripes
- 1) % ci
->num_stripes
;
1994 uint16_t stripe
= (parity
+ startoffstripe
+ 1) % ci
->num_stripes
;
1995 ULONG skip
, readlen
;
1998 while (stripe
!= parity
) {
1999 if (i
== startoffstripe
) {
2000 readlen
= min(length
, (ULONG
)(ci
->stripe_length
- (startoff
% ci
->stripe_length
)));
2002 context
.stripes
[stripe
].stripestart
= startoff
;
2003 context
.stripes
[stripe
].stripeend
= startoff
+ readlen
;
2010 readlen
= min(length
- pos
, (ULONG
)ci
->stripe_length
);
2012 context
.stripes
[stripe
].stripestart
= startoff
- (startoff
% ci
->stripe_length
);
2013 context
.stripes
[stripe
].stripeend
= context
.stripes
[stripe
].stripestart
+ readlen
;
2022 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2028 for (i
= 0; i
< startoffstripe
; i
++) {
2029 uint16_t stripe2
= (parity
+ i
+ 1) % ci
->num_stripes
;
2031 context
.stripes
[stripe2
].stripestart
= context
.stripes
[stripe2
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2034 context
.stripes
[parity
].stripestart
= context
.stripes
[parity
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2036 if (length
- pos
> ci
->num_stripes
* (ci
->num_stripes
- 1) * ci
->stripe_length
) {
2037 skip
= (ULONG
)(((length
- pos
) / (ci
->num_stripes
* (ci
->num_stripes
- 1) * ci
->stripe_length
)) - 1);
2039 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2040 context
.stripes
[i
].stripeend
+= skip
* ci
->num_stripes
* ci
->stripe_length
;
2043 pos
+= (uint32_t)(skip
* (ci
->num_stripes
- 1) * ci
->num_stripes
* ci
->stripe_length
);
2046 } else if (length
- pos
>= ci
->stripe_length
* (ci
->num_stripes
- 1)) {
2047 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2048 context
.stripes
[i
].stripeend
+= ci
->stripe_length
;
2051 pos
+= (uint32_t)(ci
->stripe_length
* (ci
->num_stripes
- 1));
2054 uint16_t stripe
= (parity
+ 1) % ci
->num_stripes
;
2057 while (stripe
!= parity
) {
2058 if (endoffstripe
== i
) {
2059 context
.stripes
[stripe
].stripeend
= endoff
+ 1;
2061 } else if (endoffstripe
> i
)
2062 context
.stripes
[stripe
].stripeend
= endoff
- (endoff
% ci
->stripe_length
) + ci
->stripe_length
;
2065 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2072 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2073 if (context
.stripes
[i
].stripestart
!= context
.stripes
[i
].stripeend
) {
2074 context
.stripes
[i
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
),
2075 false, false, NULL
);
2077 if (!context
.stripes
[i
].mdl
) {
2078 ERR("IoAllocateMdl failed\n");
2079 MmUnlockPages(master_mdl
);
2080 IoFreeMdl(master_mdl
);
2081 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2088 dummypage
= ExAllocatePoolWithTag(NonPagedPool
, PAGE_SIZE
, ALLOC_TAG
);
2090 ERR("out of memory\n");
2091 MmUnlockPages(master_mdl
);
2092 IoFreeMdl(master_mdl
);
2093 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2097 dummy_mdl
= IoAllocateMdl(dummypage
, PAGE_SIZE
, false, false, NULL
);
2099 ERR("IoAllocateMdl failed\n");
2100 MmUnlockPages(master_mdl
);
2101 IoFreeMdl(master_mdl
);
2102 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2106 MmBuildMdlForNonPagedPool(dummy_mdl
);
2108 dummy
= *(PFN_NUMBER
*)(dummy_mdl
+ 1);
2111 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(uint32_t) * ci
->num_stripes
, ALLOC_TAG
);
2113 ERR("out of memory\n");
2114 MmUnlockPages(master_mdl
);
2115 IoFreeMdl(master_mdl
);
2116 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2120 RtlZeroMemory(stripeoff
, sizeof(uint32_t) * ci
->num_stripes
);
2124 while (pos
< length
) {
2125 PFN_NUMBER
* stripe_pfns
;
2127 parity
= (((addr
- offset
+ pos
) / ((ci
->num_stripes
- 1) * ci
->stripe_length
)) + ci
->num_stripes
- 1) % ci
->num_stripes
;
2130 uint16_t stripe
= (parity
+ startoffstripe
+ 1) % ci
->num_stripes
;
2131 uint32_t readlen
= min(length
- pos
, (uint32_t)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
,
2132 ci
->stripe_length
- (context
.stripes
[stripe
].stripestart
% ci
->stripe_length
)));
2134 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2136 RtlCopyMemory(stripe_pfns
, pfns
, readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2138 stripeoff
[stripe
] = readlen
;
2141 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2143 while (stripe
!= parity
) {
2144 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2145 readlen
= min(length
- pos
, (uint32_t)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
));
2150 RtlCopyMemory(stripe_pfns
, &pfns
[pos
>> PAGE_SHIFT
], readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2152 stripeoff
[stripe
] = readlen
;
2155 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2157 } else if (length
- pos
>= ci
->stripe_length
* (ci
->num_stripes
- 1)) {
2158 uint16_t stripe
= (parity
+ 1) % ci
->num_stripes
;
2161 while (stripe
!= parity
) {
2162 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2164 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(ci
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
2166 stripeoff
[stripe
] += (uint32_t)ci
->stripe_length
;
2167 pos
+= (uint32_t)ci
->stripe_length
;
2169 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2172 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[parity
].mdl
+ 1);
2174 for (k
= 0; k
< ci
->stripe_length
>> PAGE_SHIFT
; k
++) {
2175 stripe_pfns
[stripeoff
[parity
] >> PAGE_SHIFT
] = dummy
;
2176 stripeoff
[parity
] += PAGE_SIZE
;
2179 uint16_t stripe
= (parity
+ 1) % ci
->num_stripes
;
2182 while (pos
< length
) {
2183 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2184 readlen
= min(length
- pos
, (ULONG
)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
));
2189 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2191 stripeoff
[stripe
] += readlen
;
2194 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2199 MmUnlockPages(master_mdl
);
2200 IoFreeMdl(master_mdl
);
2202 ExFreePool(stripeoff
);
2203 } else if (type
== BLOCK_FLAG_RAID6
) {
2204 uint64_t startoff
, endoff
;
2205 uint16_t endoffstripe
, parity1
;
2206 uint32_t *stripeoff
, pos
;
2208 PFN_NUMBER
*pfns
, dummy
;
2209 bool need_dummy
= false;
2211 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
- 2, &startoff
, &startoffstripe
);
2212 get_raid0_offset(addr
+ length
- offset
- 1, ci
->stripe_length
, ci
->num_stripes
- 2, &endoff
, &endoffstripe
);
2215 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
2218 ERR("out of memory\n");
2219 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2225 master_mdl
= IoAllocateMdl(context
.va
, length
, false, false, NULL
);
2227 ERR("out of memory\n");
2228 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2232 Status
= STATUS_SUCCESS
;
2235 MmProbeAndLockPages(master_mdl
, KernelMode
, IoWriteAccess
);
2236 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
2237 Status
= _SEH2_GetExceptionCode();
2240 if (!NT_SUCCESS(Status
)) {
2241 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
2242 IoFreeMdl(master_mdl
);
2246 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
2249 while (pos
< length
) {
2250 parity1
= (((addr
- offset
+ pos
) / ((ci
->num_stripes
- 2) * ci
->stripe_length
)) + ci
->num_stripes
- 2) % ci
->num_stripes
;
2253 uint16_t stripe
= (parity1
+ startoffstripe
+ 2) % ci
->num_stripes
, parity2
;
2254 ULONG skip
, readlen
;
2257 while (stripe
!= parity1
) {
2258 if (i
== startoffstripe
) {
2259 readlen
= (ULONG
)min(length
, ci
->stripe_length
- (startoff
% ci
->stripe_length
));
2261 context
.stripes
[stripe
].stripestart
= startoff
;
2262 context
.stripes
[stripe
].stripeend
= startoff
+ readlen
;
2269 readlen
= min(length
- pos
, (ULONG
)ci
->stripe_length
);
2271 context
.stripes
[stripe
].stripestart
= startoff
- (startoff
% ci
->stripe_length
);
2272 context
.stripes
[stripe
].stripeend
= context
.stripes
[stripe
].stripestart
+ readlen
;
2281 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2287 for (i
= 0; i
< startoffstripe
; i
++) {
2288 uint16_t stripe2
= (parity1
+ i
+ 2) % ci
->num_stripes
;
2290 context
.stripes
[stripe2
].stripestart
= context
.stripes
[stripe2
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2293 context
.stripes
[parity1
].stripestart
= context
.stripes
[parity1
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2295 parity2
= (parity1
+ 1) % ci
->num_stripes
;
2296 context
.stripes
[parity2
].stripestart
= context
.stripes
[parity2
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2298 if (length
- pos
> ci
->num_stripes
* (ci
->num_stripes
- 2) * ci
->stripe_length
) {
2299 skip
= (ULONG
)(((length
- pos
) / (ci
->num_stripes
* (ci
->num_stripes
- 2) * ci
->stripe_length
)) - 1);
2301 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2302 context
.stripes
[i
].stripeend
+= skip
* ci
->num_stripes
* ci
->stripe_length
;
2305 pos
+= (uint32_t)(skip
* (ci
->num_stripes
- 2) * ci
->num_stripes
* ci
->stripe_length
);
2308 } else if (length
- pos
>= ci
->stripe_length
* (ci
->num_stripes
- 2)) {
2309 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2310 context
.stripes
[i
].stripeend
+= ci
->stripe_length
;
2313 pos
+= (uint32_t)(ci
->stripe_length
* (ci
->num_stripes
- 2));
2316 uint16_t stripe
= (parity1
+ 2) % ci
->num_stripes
;
2319 while (stripe
!= parity1
) {
2320 if (endoffstripe
== i
) {
2321 context
.stripes
[stripe
].stripeend
= endoff
+ 1;
2323 } else if (endoffstripe
> i
)
2324 context
.stripes
[stripe
].stripeend
= endoff
- (endoff
% ci
->stripe_length
) + ci
->stripe_length
;
2327 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2334 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2335 if (context
.stripes
[i
].stripestart
!= context
.stripes
[i
].stripeend
) {
2336 context
.stripes
[i
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
), false, false, NULL
);
2338 if (!context
.stripes
[i
].mdl
) {
2339 ERR("IoAllocateMdl failed\n");
2340 MmUnlockPages(master_mdl
);
2341 IoFreeMdl(master_mdl
);
2342 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2349 dummypage
= ExAllocatePoolWithTag(NonPagedPool
, PAGE_SIZE
, ALLOC_TAG
);
2351 ERR("out of memory\n");
2352 MmUnlockPages(master_mdl
);
2353 IoFreeMdl(master_mdl
);
2354 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2358 dummy_mdl
= IoAllocateMdl(dummypage
, PAGE_SIZE
, false, false, NULL
);
2360 ERR("IoAllocateMdl failed\n");
2361 MmUnlockPages(master_mdl
);
2362 IoFreeMdl(master_mdl
);
2363 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2367 MmBuildMdlForNonPagedPool(dummy_mdl
);
2369 dummy
= *(PFN_NUMBER
*)(dummy_mdl
+ 1);
2372 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(uint32_t) * ci
->num_stripes
, ALLOC_TAG
);
2374 ERR("out of memory\n");
2375 MmUnlockPages(master_mdl
);
2376 IoFreeMdl(master_mdl
);
2377 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2381 RtlZeroMemory(stripeoff
, sizeof(uint32_t) * ci
->num_stripes
);
2385 while (pos
< length
) {
2386 PFN_NUMBER
* stripe_pfns
;
2388 parity1
= (((addr
- offset
+ pos
) / ((ci
->num_stripes
- 2) * ci
->stripe_length
)) + ci
->num_stripes
- 2) % ci
->num_stripes
;
2391 uint16_t stripe
= (parity1
+ startoffstripe
+ 2) % ci
->num_stripes
;
2392 uint32_t readlen
= min(length
- pos
, (uint32_t)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
,
2393 ci
->stripe_length
- (context
.stripes
[stripe
].stripestart
% ci
->stripe_length
)));
2395 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2397 RtlCopyMemory(stripe_pfns
, pfns
, readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2399 stripeoff
[stripe
] = readlen
;
2402 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2404 while (stripe
!= parity1
) {
2405 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2406 readlen
= (uint32_t)min(length
- pos
, min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
));
2411 RtlCopyMemory(stripe_pfns
, &pfns
[pos
>> PAGE_SHIFT
], readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2413 stripeoff
[stripe
] = readlen
;
2416 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2418 } else if (length
- pos
>= ci
->stripe_length
* (ci
->num_stripes
- 2)) {
2419 uint16_t stripe
= (parity1
+ 2) % ci
->num_stripes
;
2420 uint16_t parity2
= (parity1
+ 1) % ci
->num_stripes
;
2423 while (stripe
!= parity1
) {
2424 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2426 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(ci
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
2428 stripeoff
[stripe
] += (uint32_t)ci
->stripe_length
;
2429 pos
+= (uint32_t)ci
->stripe_length
;
2431 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2434 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[parity1
].mdl
+ 1);
2436 for (k
= 0; k
< ci
->stripe_length
>> PAGE_SHIFT
; k
++) {
2437 stripe_pfns
[stripeoff
[parity1
] >> PAGE_SHIFT
] = dummy
;
2438 stripeoff
[parity1
] += PAGE_SIZE
;
2441 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[parity2
].mdl
+ 1);
2443 for (k
= 0; k
< ci
->stripe_length
>> PAGE_SHIFT
; k
++) {
2444 stripe_pfns
[stripeoff
[parity2
] >> PAGE_SHIFT
] = dummy
;
2445 stripeoff
[parity2
] += PAGE_SIZE
;
2448 uint16_t stripe
= (parity1
+ 2) % ci
->num_stripes
;
2451 while (pos
< length
) {
2452 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2453 readlen
= (uint32_t)min(length
- pos
, min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
));
2458 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2460 stripeoff
[stripe
] += readlen
;
2463 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2468 MmUnlockPages(master_mdl
);
2469 IoFreeMdl(master_mdl
);
2471 ExFreePool(stripeoff
);
2474 context
.address
= addr
;
2476 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2477 if (!devices
[i
] || !devices
[i
]->devobj
|| context
.stripes
[i
].stripestart
== context
.stripes
[i
].stripeend
) {
2478 context
.stripes
[i
].status
= ReadDataStatus_MissingDevice
;
2479 context
.stripes_left
--;
2481 if (!devices
[i
] || !devices
[i
]->devobj
)
2486 if (missing_devices
> allowed_missing
) {
2487 ERR("not enough devices to service request (%u missing)\n", missing_devices
);
2488 Status
= STATUS_UNEXPECTED_IO_ERROR
;
2492 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2493 PIO_STACK_LOCATION IrpSp
;
2495 if (devices
[i
] && devices
[i
]->devobj
&& context
.stripes
[i
].stripestart
!= context
.stripes
[i
].stripeend
&& context
.stripes
[i
].status
!= ReadDataStatus_Skip
) {
2496 context
.stripes
[i
].context
= (struct read_data_context
*)&context
;
2498 if (type
== BLOCK_FLAG_RAID10
) {
2499 context
.stripes
[i
].stripenum
= i
/ ci
->sub_stripes
;
2503 context
.stripes
[i
].Irp
= IoAllocateIrp(devices
[i
]->devobj
->StackSize
, false);
2505 if (!context
.stripes
[i
].Irp
) {
2506 ERR("IoAllocateIrp failed\n");
2507 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2511 context
.stripes
[i
].Irp
= IoMakeAssociatedIrp(Irp
, devices
[i
]->devobj
->StackSize
);
2513 if (!context
.stripes
[i
].Irp
) {
2514 ERR("IoMakeAssociatedIrp failed\n");
2515 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2520 IrpSp
= IoGetNextIrpStackLocation(context
.stripes
[i
].Irp
);
2521 IrpSp
->MajorFunction
= IRP_MJ_READ
;
2522 IrpSp
->MinorFunction
= IRP_MN_NORMAL
;
2523 IrpSp
->FileObject
= devices
[i
]->fileobj
;
2525 if (devices
[i
]->devobj
->Flags
& DO_BUFFERED_IO
) {
2526 context
.stripes
[i
].Irp
->AssociatedIrp
.SystemBuffer
= ExAllocatePoolWithTag(NonPagedPool
, (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
), ALLOC_TAG
);
2527 if (!context
.stripes
[i
].Irp
->AssociatedIrp
.SystemBuffer
) {
2528 ERR("out of memory\n");
2529 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2533 context
.stripes
[i
].Irp
->Flags
|= IRP_BUFFERED_IO
| IRP_DEALLOCATE_BUFFER
| IRP_INPUT_OPERATION
;
2535 context
.stripes
[i
].Irp
->UserBuffer
= MmGetSystemAddressForMdlSafe(context
.stripes
[i
].mdl
, priority
);
2536 } else if (devices
[i
]->devobj
->Flags
& DO_DIRECT_IO
)
2537 context
.stripes
[i
].Irp
->MdlAddress
= context
.stripes
[i
].mdl
;
2539 context
.stripes
[i
].Irp
->UserBuffer
= MmGetSystemAddressForMdlSafe(context
.stripes
[i
].mdl
, priority
);
2541 IrpSp
->Parameters
.Read
.Length
= (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
);
2542 IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
= context
.stripes
[i
].stripestart
+ cis
[i
].offset
;
2544 total_reading
+= IrpSp
->Parameters
.Read
.Length
;
2546 context
.stripes
[i
].Irp
->UserIosb
= &context
.stripes
[i
].iosb
;
2548 IoSetCompletionRoutine(context
.stripes
[i
].Irp
, read_data_completion
, &context
.stripes
[i
], true, true, true);
2550 context
.stripes
[i
].status
= ReadDataStatus_Pending
;
2554 need_to_wait
= false;
2555 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2556 if (context
.stripes
[i
].status
!= ReadDataStatus_MissingDevice
&& context
.stripes
[i
].status
!= ReadDataStatus_Skip
) {
2557 IoCallDriver(devices
[i
]->devobj
, context
.stripes
[i
].Irp
);
2558 need_to_wait
= true;
2563 KeWaitForSingleObject(&context
.Event
, Executive
, KernelMode
, false, NULL
);
2566 fFsRtlUpdateDiskCounters(total_reading
, 0);
2568 // check if any of the devices return a "user-induced" error
2570 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2571 if (context
.stripes
[i
].status
== ReadDataStatus_Error
&& IoIsErrorUserInduced(context
.stripes
[i
].iosb
.Status
)) {
2572 Status
= context
.stripes
[i
].iosb
.Status
;
2577 if (type
== BLOCK_FLAG_RAID0
) {
2578 Status
= read_data_raid0(Vcb
, file_read
? context
.va
: buf
, addr
, length
, &context
, ci
, devices
, generation
, offset
);
2579 if (!NT_SUCCESS(Status
)) {
2580 ERR("read_data_raid0 returned %08x\n", Status
);
2583 ExFreePool(context
.va
);
2589 RtlCopyMemory(buf
, context
.va
, length
);
2590 ExFreePool(context
.va
);
2592 } else if (type
== BLOCK_FLAG_RAID10
) {
2593 Status
= read_data_raid10(Vcb
, file_read
? context
.va
: buf
, addr
, length
, &context
, ci
, devices
, generation
, offset
);
2595 if (!NT_SUCCESS(Status
)) {
2596 ERR("read_data_raid10 returned %08x\n", Status
);
2599 ExFreePool(context
.va
);
2605 RtlCopyMemory(buf
, context
.va
, length
);
2606 ExFreePool(context
.va
);
2608 } else if (type
== BLOCK_FLAG_DUPLICATE
) {
2609 Status
= read_data_dup(Vcb
, file_read
? context
.va
: buf
, addr
, &context
, ci
, devices
, generation
);
2610 if (!NT_SUCCESS(Status
)) {
2611 ERR("read_data_dup returned %08x\n", Status
);
2614 ExFreePool(context
.va
);
2620 RtlCopyMemory(buf
, context
.va
, length
);
2621 ExFreePool(context
.va
);
2623 } else if (type
== BLOCK_FLAG_RAID5
) {
2624 Status
= read_data_raid5(Vcb
, file_read
? context
.va
: buf
, addr
, length
, &context
, ci
, devices
, offset
, generation
, c
, missing_devices
> 0 ? true : false);
2625 if (!NT_SUCCESS(Status
)) {
2626 ERR("read_data_raid5 returned %08x\n", Status
);
2629 ExFreePool(context
.va
);
2635 RtlCopyMemory(buf
, context
.va
, length
);
2636 ExFreePool(context
.va
);
2638 } else if (type
== BLOCK_FLAG_RAID6
) {
2639 Status
= read_data_raid6(Vcb
, file_read
? context
.va
: buf
, addr
, length
, &context
, ci
, devices
, offset
, generation
, c
, missing_devices
> 0 ? true : false);
2640 if (!NT_SUCCESS(Status
)) {
2641 ERR("read_data_raid6 returned %08x\n", Status
);
2644 ExFreePool(context
.va
);
2650 RtlCopyMemory(buf
, context
.va
, length
);
2651 ExFreePool(context
.va
);
2656 if (c
&& (type
== BLOCK_FLAG_RAID5
|| type
== BLOCK_FLAG_RAID6
))
2657 chunk_unlock_range(Vcb
, c
, lockaddr
, locklen
);
2660 IoFreeMdl(dummy_mdl
);
2663 ExFreePool(dummypage
);
2665 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2666 if (context
.stripes
[i
].mdl
) {
2667 if (context
.stripes
[i
].mdl
->MdlFlags
& MDL_PAGES_LOCKED
)
2668 MmUnlockPages(context
.stripes
[i
].mdl
);
2670 IoFreeMdl(context
.stripes
[i
].mdl
);
2673 if (context
.stripes
[i
].Irp
)
2674 IoFreeIrp(context
.stripes
[i
].Irp
);
2677 ExFreePool(context
.stripes
);
2679 if (!Vcb
->log_to_phys_loaded
)
2680 ExFreePool(devices
);
2685 NTSTATUS
read_stream(fcb
* fcb
, uint8_t* data
, uint64_t start
, ULONG length
, ULONG
* pbr
) {
2688 TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb
, data
, start
, length
, pbr
);
2692 if (start
>= fcb
->adsdata
.Length
) {
2693 TRACE("tried to read beyond end of stream\n");
2694 return STATUS_END_OF_FILE
;
2698 WARN("tried to read zero bytes\n");
2699 return STATUS_SUCCESS
;
2702 if (start
+ length
< fcb
->adsdata
.Length
)
2705 readlen
= fcb
->adsdata
.Length
- (ULONG
)start
;
2708 RtlCopyMemory(data
+ start
, fcb
->adsdata
.Buffer
, readlen
);
2710 if (pbr
) *pbr
= readlen
;
2712 return STATUS_SUCCESS
;
2715 NTSTATUS
read_file(fcb
* fcb
, uint8_t* data
, uint64_t start
, uint64_t length
, ULONG
* pbr
, PIRP Irp
) {
2718 uint32_t bytes_read
= 0;
2721 POOL_TYPE pool_type
;
2723 TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb
, data
, start
, length
, pbr
);
2728 if (start
>= fcb
->inode_item
.st_size
) {
2729 WARN("Tried to read beyond end of file\n");
2730 Status
= STATUS_END_OF_FILE
;
2734 pool_type
= fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
? NonPagedPool
: PagedPool
;
2736 le
= fcb
->extents
.Flink
;
2740 while (le
!= &fcb
->extents
) {
2742 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
2746 ed
= &ext
->extent_data
;
2748 ed2
= (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) ? (EXTENT_DATA2
*)ed
->data
: NULL
;
2750 len
= ed2
? ed2
->num_bytes
: ed
->decoded_size
;
2752 if (ext
->offset
+ len
<= start
) {
2753 last_end
= ext
->offset
+ len
;
2757 if (ext
->offset
> last_end
&& ext
->offset
> start
+ bytes_read
) {
2758 uint32_t read
= (uint32_t)min(length
, ext
->offset
- max(start
, last_end
));
2760 RtlZeroMemory(data
+ bytes_read
, read
);
2765 if (length
== 0 || ext
->offset
> start
+ bytes_read
+ length
)
2768 if (ed
->encryption
!= BTRFS_ENCRYPTION_NONE
) {
2769 WARN("Encryption not supported\n");
2770 Status
= STATUS_NOT_IMPLEMENTED
;
2774 if (ed
->encoding
!= BTRFS_ENCODING_NONE
) {
2775 WARN("Other encodings not supported\n");
2776 Status
= STATUS_NOT_IMPLEMENTED
;
2781 case EXTENT_TYPE_INLINE
:
2783 uint64_t off
= start
+ bytes_read
- ext
->offset
;
2786 if (ed
->compression
== BTRFS_COMPRESSION_NONE
) {
2787 read
= (uint32_t)min(min(len
, ext
->datalen
) - off
, length
);
2789 RtlCopyMemory(data
+ bytes_read
, &ed
->data
[off
], read
);
2790 } else if (ed
->compression
== BTRFS_COMPRESSION_ZLIB
|| ed
->compression
== BTRFS_COMPRESSION_LZO
|| ed
->compression
== BTRFS_COMPRESSION_ZSTD
) {
2793 uint16_t inlen
= ext
->datalen
- (uint16_t)offsetof(EXTENT_DATA
, data
[0]);
2795 if (ed
->decoded_size
== 0 || ed
->decoded_size
> 0xffffffff) {
2796 ERR("ed->decoded_size was invalid (%I64x)\n", ed
->decoded_size
);
2797 Status
= STATUS_INTERNAL_ERROR
;
2801 read
= (uint32_t)min(ed
->decoded_size
- off
, length
);
2804 decomp
= ExAllocatePoolWithTag(NonPagedPool
, (uint32_t)ed
->decoded_size
, ALLOC_TAG
);
2806 ERR("out of memory\n");
2807 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2811 decomp_alloc
= true;
2813 decomp
= data
+ bytes_read
;
2814 decomp_alloc
= false;
2817 if (ed
->compression
== BTRFS_COMPRESSION_ZLIB
) {
2818 Status
= zlib_decompress(ed
->data
, inlen
, decomp
, (uint32_t)(read
+ off
));
2819 if (!NT_SUCCESS(Status
)) {
2820 ERR("zlib_decompress returned %08x\n", Status
);
2821 if (decomp_alloc
) ExFreePool(decomp
);
2824 } else if (ed
->compression
== BTRFS_COMPRESSION_LZO
) {
2825 if (inlen
< sizeof(uint32_t)) {
2826 ERR("extent data was truncated\n");
2827 Status
= STATUS_INTERNAL_ERROR
;
2828 if (decomp_alloc
) ExFreePool(decomp
);
2831 inlen
-= sizeof(uint32_t);
2833 Status
= lzo_decompress(ed
->data
+ sizeof(uint32_t), inlen
, decomp
, (uint32_t)(read
+ off
), sizeof(uint32_t));
2834 if (!NT_SUCCESS(Status
)) {
2835 ERR("lzo_decompress returned %08x\n", Status
);
2836 if (decomp_alloc
) ExFreePool(decomp
);
2839 } else if (ed
->compression
== BTRFS_COMPRESSION_ZSTD
) {
2840 Status
= zstd_decompress(ed
->data
, inlen
, decomp
, (uint32_t)(read
+ off
));
2841 if (!NT_SUCCESS(Status
)) {
2842 ERR("zstd_decompress returned %08x\n", Status
);
2843 if (decomp_alloc
) ExFreePool(decomp
);
2849 RtlCopyMemory(data
+ bytes_read
, decomp
+ off
, read
);
2853 ERR("unhandled compression type %x\n", ed
->compression
);
2854 Status
= STATUS_NOT_IMPLEMENTED
;
2864 case EXTENT_TYPE_REGULAR
:
2866 uint64_t off
= start
+ bytes_read
- ext
->offset
;
2867 uint32_t to_read
, read
;
2869 bool mdl
= (Irp
&& Irp
->MdlAddress
) ? true : false;
2871 uint32_t bumpoff
= 0, *csum
;
2875 read
= (uint32_t)(len
- off
);
2876 if (read
> length
) read
= (uint32_t)length
;
2878 if (ed
->compression
== BTRFS_COMPRESSION_NONE
) {
2879 addr
= ed2
->address
+ ed2
->offset
+ off
;
2880 to_read
= (uint32_t)sector_align(read
, fcb
->Vcb
->superblock
.sector_size
);
2882 if (addr
% fcb
->Vcb
->superblock
.sector_size
> 0) {
2883 bumpoff
= addr
% fcb
->Vcb
->superblock
.sector_size
;
2885 to_read
= (uint32_t)sector_align(read
+ bumpoff
, fcb
->Vcb
->superblock
.sector_size
);
2888 addr
= ed2
->address
;
2889 to_read
= (uint32_t)sector_align(ed2
->size
, fcb
->Vcb
->superblock
.sector_size
);
2892 if (ed
->compression
== BTRFS_COMPRESSION_NONE
&& start
% fcb
->Vcb
->superblock
.sector_size
== 0 &&
2893 length
% fcb
->Vcb
->superblock
.sector_size
== 0) {
2894 buf
= data
+ bytes_read
;
2897 buf
= ExAllocatePoolWithTag(pool_type
, to_read
, ALLOC_TAG
);
2901 ERR("out of memory\n");
2902 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2909 c
= get_chunk_from_address(fcb
->Vcb
, addr
);
2912 ERR("get_chunk_from_address(%I64x) failed\n", addr
);
2921 if (ed
->compression
== BTRFS_COMPRESSION_NONE
)
2922 csum
= &ext
->csum
[off
/ fcb
->Vcb
->superblock
.sector_size
];
2928 Status
= read_data(fcb
->Vcb
, addr
, to_read
, csum
, false, buf
, c
, NULL
, Irp
, 0, mdl
,
2929 fcb
&& fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
? HighPagePriority
: NormalPagePriority
);
2930 if (!NT_SUCCESS(Status
)) {
2931 ERR("read_data returned %08x\n", Status
);
2939 if (ed
->compression
== BTRFS_COMPRESSION_NONE
) {
2941 RtlCopyMemory(data
+ bytes_read
, buf
+ bumpoff
, read
);
2943 uint8_t *decomp
= NULL
, *buf2
;
2944 ULONG outlen
, inlen
, off2
;
2945 uint32_t inpageoff
= 0;
2947 off2
= (ULONG
)(ed2
->offset
+ off
);
2949 inlen
= (ULONG
)ed2
->size
;
2951 if (ed
->compression
== BTRFS_COMPRESSION_LZO
) {
2952 ULONG inoff
= sizeof(uint32_t);
2954 inlen
-= sizeof(uint32_t);
2956 // If reading a few sectors in, skip to the interesting bit
2957 while (off2
> LZO_PAGE_SIZE
) {
2960 if (inlen
< sizeof(uint32_t))
2963 partlen
= *(uint32_t*)(buf2
+ inoff
);
2965 if (partlen
< inlen
) {
2966 off2
-= LZO_PAGE_SIZE
;
2967 inoff
+= partlen
+ sizeof(uint32_t);
2968 inlen
-= partlen
+ sizeof(uint32_t);
2970 if (LZO_PAGE_SIZE
- (inoff
% LZO_PAGE_SIZE
) < sizeof(uint32_t))
2971 inoff
= ((inoff
/ LZO_PAGE_SIZE
) + 1) * LZO_PAGE_SIZE
;
2976 buf2
= &buf2
[inoff
];
2977 inpageoff
= inoff
% LZO_PAGE_SIZE
;
2981 outlen
= off2
+ min(read
, (uint32_t)(ed2
->num_bytes
- off
));
2983 decomp
= ExAllocatePoolWithTag(pool_type
, outlen
, ALLOC_TAG
);
2985 ERR("out of memory\n");
2987 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2991 outlen
= min(read
, (uint32_t)(ed2
->num_bytes
- off
));
2993 if (ed
->compression
== BTRFS_COMPRESSION_ZLIB
) {
2994 Status
= zlib_decompress(buf2
, inlen
, decomp
? decomp
: (data
+ bytes_read
), outlen
);
2996 if (!NT_SUCCESS(Status
)) {
2997 ERR("zlib_decompress returned %08x\n", Status
);
3005 } else if (ed
->compression
== BTRFS_COMPRESSION_LZO
) {
3006 Status
= lzo_decompress(buf2
, inlen
, decomp
? decomp
: (data
+ bytes_read
), outlen
, inpageoff
);
3008 if (!NT_SUCCESS(Status
)) {
3009 ERR("lzo_decompress returned %08x\n", Status
);
3017 } else if (ed
->compression
== BTRFS_COMPRESSION_ZSTD
) {
3018 Status
= zstd_decompress(buf2
, inlen
, decomp
? decomp
: (data
+ bytes_read
), outlen
);
3020 if (!NT_SUCCESS(Status
)) {
3021 ERR("zstd_decompress returned %08x\n", Status
);
3030 ERR("unsupported compression type %x\n", ed
->compression
);
3031 Status
= STATUS_NOT_SUPPORTED
;
3042 RtlCopyMemory(data
+ bytes_read
, decomp
+ off2
, (size_t)min(read
, ed2
->num_bytes
- off
));
3056 case EXTENT_TYPE_PREALLOC
:
3058 uint64_t off
= start
+ bytes_read
- ext
->offset
;
3059 uint32_t read
= (uint32_t)(len
- off
);
3061 if (read
> length
) read
= (uint32_t)length
;
3063 RtlZeroMemory(data
+ bytes_read
, read
);
3072 WARN("Unsupported extent data type %u\n", ed
->type
);
3073 Status
= STATUS_NOT_IMPLEMENTED
;
3077 last_end
= ext
->offset
+ len
;
3087 if (length
> 0 && start
+ bytes_read
< fcb
->inode_item
.st_size
) {
3088 uint32_t read
= (uint32_t)min(fcb
->inode_item
.st_size
- start
- bytes_read
, length
);
3090 RtlZeroMemory(data
+ bytes_read
, read
);
3096 Status
= STATUS_SUCCESS
;
3104 NTSTATUS
do_read(PIRP Irp
, bool wait
, ULONG
* bytes_read
) {
3105 PIO_STACK_LOCATION IrpSp
= IoGetCurrentIrpStackLocation(Irp
);
3106 PFILE_OBJECT FileObject
= IrpSp
->FileObject
;
3107 fcb
* fcb
= FileObject
->FsContext
;
3108 uint8_t* data
= NULL
;
3109 ULONG length
= IrpSp
->Parameters
.Read
.Length
, addon
= 0;
3110 uint64_t start
= IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
;
3114 if (!fcb
|| !fcb
->Vcb
|| !fcb
->subvol
)
3115 return STATUS_INTERNAL_ERROR
;
3117 TRACE("file = %S (fcb = %p)\n", file_desc(FileObject
), fcb
);
3118 TRACE("offset = %I64x, length = %x\n", start
, length
);
3119 TRACE("paging_io = %s, no cache = %s\n", Irp
->Flags
& IRP_PAGING_IO
? "true" : "false", Irp
->Flags
& IRP_NOCACHE
? "true" : "false");
3121 if (!fcb
->ads
&& fcb
->type
== BTRFS_TYPE_DIRECTORY
)
3122 return STATUS_INVALID_DEVICE_REQUEST
;
3124 if (!(Irp
->Flags
& IRP_PAGING_IO
) && !FsRtlCheckLockForReadAccess(&fcb
->lock
, Irp
)) {
3125 WARN("tried to read locked region\n");
3126 return STATUS_FILE_LOCK_CONFLICT
;
3130 TRACE("tried to read zero bytes\n");
3131 return STATUS_SUCCESS
;
3134 if (start
>= (uint64_t)fcb
->Header
.FileSize
.QuadPart
) {
3135 TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start
, fcb
->Header
.FileSize
.QuadPart
);
3136 return STATUS_END_OF_FILE
;
3139 TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject
, fcb
, fcb
->Header
.FileSize
.QuadPart
, fcb
->inode_item
.st_size
, &fcb
->inode_item
.st_size
);
3141 if (Irp
->Flags
& IRP_NOCACHE
|| !(IrpSp
->MinorFunction
& IRP_MN_MDL
)) {
3142 data
= map_user_buffer(Irp
, fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
? HighPagePriority
: NormalPagePriority
);
3144 if (Irp
->MdlAddress
&& !data
) {
3145 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3146 return STATUS_INSUFFICIENT_RESOURCES
;
3149 if (start
>= (uint64_t)fcb
->Header
.ValidDataLength
.QuadPart
) {
3150 length
= (ULONG
)min(length
, min(start
+ length
, (uint64_t)fcb
->Header
.FileSize
.QuadPart
) - fcb
->Header
.ValidDataLength
.QuadPart
);
3151 RtlZeroMemory(data
, length
);
3152 Irp
->IoStatus
.Information
= *bytes_read
= length
;
3153 return STATUS_SUCCESS
;
3156 if (length
+ start
> (uint64_t)fcb
->Header
.ValidDataLength
.QuadPart
) {
3157 addon
= (ULONG
)(min(start
+ length
, (uint64_t)fcb
->Header
.FileSize
.QuadPart
) - fcb
->Header
.ValidDataLength
.QuadPart
);
3158 RtlZeroMemory(data
+ (fcb
->Header
.ValidDataLength
.QuadPart
- start
), addon
);
3159 length
= (ULONG
)(fcb
->Header
.ValidDataLength
.QuadPart
- start
);
3163 if (!(Irp
->Flags
& IRP_NOCACHE
)) {
3164 NTSTATUS Status
= STATUS_SUCCESS
;
3167 if (!FileObject
->PrivateCacheMap
) {
3170 ccfs
.AllocationSize
= fcb
->Header
.AllocationSize
;
3171 ccfs
.FileSize
= fcb
->Header
.FileSize
;
3172 ccfs
.ValidDataLength
= fcb
->Header
.ValidDataLength
;
3174 init_file_cache(FileObject
, &ccfs
);
3177 if (IrpSp
->MinorFunction
& IRP_MN_MDL
) {
3178 CcMdlRead(FileObject
,&IrpSp
->Parameters
.Read
.ByteOffset
, length
, &Irp
->MdlAddress
, &Irp
->IoStatus
);
3180 if (fCcCopyReadEx
) {
3181 TRACE("CcCopyReadEx(%p, %I64x, %x, %u, %p, %p, %p, %p)\n", FileObject
, IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
,
3182 length
, wait
, data
, &Irp
->IoStatus
, Irp
->Tail
.Overlay
.Thread
);
3183 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb
->Header
.AllocationSize
, fcb
->Header
.FileSize
, fcb
->Header
.ValidDataLength
);
3184 if (!fCcCopyReadEx(FileObject
, &IrpSp
->Parameters
.Read
.ByteOffset
, length
, wait
, data
, &Irp
->IoStatus
, Irp
->Tail
.Overlay
.Thread
)) {
3185 TRACE("CcCopyReadEx could not wait\n");
3187 IoMarkIrpPending(Irp
);
3188 return STATUS_PENDING
;
3190 TRACE("CcCopyReadEx finished\n");
3192 TRACE("CcCopyRead(%p, %I64x, %x, %u, %p, %p)\n", FileObject
, IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
, length
, wait
, data
, &Irp
->IoStatus
);
3193 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb
->Header
.AllocationSize
, fcb
->Header
.FileSize
, fcb
->Header
.ValidDataLength
);
3194 if (!CcCopyRead(FileObject
, &IrpSp
->Parameters
.Read
.ByteOffset
, length
, wait
, data
, &Irp
->IoStatus
)) {
3195 TRACE("CcCopyRead could not wait\n");
3197 IoMarkIrpPending(Irp
);
3198 return STATUS_PENDING
;
3200 TRACE("CcCopyRead finished\n");
3203 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
3204 Status
= _SEH2_GetExceptionCode();
3207 if (NT_SUCCESS(Status
)) {
3208 Status
= Irp
->IoStatus
.Status
;
3209 Irp
->IoStatus
.Information
+= addon
;
3210 *bytes_read
= (ULONG
)Irp
->IoStatus
.Information
;
3212 ERR("EXCEPTION - %08x\n", Status
);
3219 IoMarkIrpPending(Irp
);
3220 return STATUS_PENDING
;
3224 Status
= read_stream(fcb
, data
, start
, length
, bytes_read
);
3226 Status
= read_file(fcb
, data
, start
, length
, bytes_read
, Irp
);
3228 *bytes_read
+= addon
;
3229 TRACE("read %u bytes\n", *bytes_read
);
3231 Irp
->IoStatus
.Information
= *bytes_read
;
3233 if (diskacc
&& Status
!= STATUS_PENDING
) {
3234 PETHREAD thread
= NULL
;
3236 if (Irp
->Tail
.Overlay
.Thread
&& !IoIsSystemThread(Irp
->Tail
.Overlay
.Thread
))
3237 thread
= Irp
->Tail
.Overlay
.Thread
;
3238 else if (!IoIsSystemThread(PsGetCurrentThread()))
3239 thread
= PsGetCurrentThread();
3240 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp
)
3241 thread
= PsGetCurrentThread();
3244 fPsUpdateDiskCounters(PsGetThreadProcess(thread
), *bytes_read
, 0, 1, 0, 0);
3251 _Dispatch_type_(IRP_MJ_READ
)
3252 _Function_class_(DRIVER_DISPATCH
)
3253 NTSTATUS __stdcall
drv_read(PDEVICE_OBJECT DeviceObject
, PIRP Irp
) {
3254 device_extension
* Vcb
= DeviceObject
->DeviceExtension
;
3255 PIO_STACK_LOCATION IrpSp
= IoGetCurrentIrpStackLocation(Irp
);
3256 PFILE_OBJECT FileObject
= IrpSp
->FileObject
;
3257 ULONG bytes_read
= 0;
3262 bool acquired_fcb_lock
= false, wait
;
3264 FsRtlEnterFileSystem();
3266 top_level
= is_top_level(Irp
);
3270 if (Vcb
&& Vcb
->type
== VCB_TYPE_VOLUME
) {
3271 Status
= vol_read(DeviceObject
, Irp
);
3273 } else if (!Vcb
|| Vcb
->type
!= VCB_TYPE_FS
) {
3274 Status
= STATUS_INVALID_PARAMETER
;
3278 Irp
->IoStatus
.Information
= 0;
3280 if (IrpSp
->MinorFunction
& IRP_MN_COMPLETE
) {
3281 CcMdlReadComplete(IrpSp
->FileObject
, Irp
->MdlAddress
);
3283 Irp
->MdlAddress
= NULL
;
3284 Status
= STATUS_SUCCESS
;
3289 fcb
= FileObject
->FsContext
;
3292 ERR("fcb was NULL\n");
3293 Status
= STATUS_INVALID_PARAMETER
;
3297 ccb
= FileObject
->FsContext2
;
3300 ERR("ccb was NULL\n");
3301 Status
= STATUS_INVALID_PARAMETER
;
3305 if (Irp
->RequestorMode
== UserMode
&& !(ccb
->access
& FILE_READ_DATA
)) {
3306 WARN("insufficient privileges\n");
3307 Status
= STATUS_ACCESS_DENIED
;
3311 if (fcb
== Vcb
->volume_fcb
) {
3312 TRACE("reading volume FCB\n");
3314 IoSkipCurrentIrpStackLocation(Irp
);
3316 Status
= IoCallDriver(Vcb
->Vpb
->RealDevice
, Irp
);
3321 wait
= IoIsOperationSynchronous(Irp
);
3323 // Don't offload jobs when doing paging IO - otherwise this can lead to
3324 // deadlocks in CcCopyRead.
3325 if (Irp
->Flags
& IRP_PAGING_IO
)
3328 if (!(Irp
->Flags
& IRP_PAGING_IO
) && FileObject
->SectionObjectPointer
&& FileObject
->SectionObjectPointer
->DataSectionObject
) {
3329 IO_STATUS_BLOCK iosb
;
3331 CcFlushCache(FileObject
->SectionObjectPointer
, &IrpSp
->Parameters
.Read
.ByteOffset
, IrpSp
->Parameters
.Read
.Length
, &iosb
);
3332 if (!NT_SUCCESS(iosb
.Status
)) {
3333 ERR("CcFlushCache returned %08x\n", iosb
.Status
);
3338 if (!ExIsResourceAcquiredSharedLite(fcb
->Header
.Resource
)) {
3339 if (!ExAcquireResourceSharedLite(fcb
->Header
.Resource
, wait
)) {
3340 Status
= STATUS_PENDING
;
3341 IoMarkIrpPending(Irp
);
3345 acquired_fcb_lock
= true;
3348 Status
= do_read(Irp
, wait
, &bytes_read
);
3350 if (acquired_fcb_lock
)
3351 ExReleaseResourceLite(fcb
->Header
.Resource
);
3354 if (FileObject
->Flags
& FO_SYNCHRONOUS_IO
&& !(Irp
->Flags
& IRP_PAGING_IO
))
3355 FileObject
->CurrentByteOffset
.QuadPart
= IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
+ (NT_SUCCESS(Status
) ? bytes_read
: 0);
3358 Irp
->IoStatus
.Status
= Status
;
3360 TRACE("Irp->IoStatus.Status = %08x\n", Irp
->IoStatus
.Status
);
3361 TRACE("Irp->IoStatus.Information = %lu\n", Irp
->IoStatus
.Information
);
3362 TRACE("returning %08x\n", Status
);
3364 if (Status
!= STATUS_PENDING
)
3365 IoCompleteRequest(Irp
, IO_NO_INCREMENT
);
3367 if (!add_thread_job(Vcb
, Irp
))
3368 Status
= do_read_job(Irp
);
3373 IoSetTopLevelIrp(NULL
);
3375 FsRtlExitFileSystem();