1 /* Copyright (c) Mark Harmstone 2016-17
3 * This file is part of WinBtrfs.
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
18 #include "btrfs_drv.h"
20 enum read_data_status
{
21 ReadDataStatus_Pending
,
22 ReadDataStatus_Success
,
24 ReadDataStatus_MissingDevice
,
28 struct read_data_context
;
31 struct read_data_context
* context
;
36 enum read_data_status status
;
48 LONG num_stripes
, stripes_left
;
51 UINT16 firstoff
, startoffstripe
, sectors_per_stripe
;
54 read_data_stripe
* stripes
;
59 extern tPsUpdateDiskCounters fPsUpdateDiskCounters
;
60 extern tCcCopyReadEx fCcCopyReadEx
;
61 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters
;
63 #define LINUX_PAGE_SIZE 4096
65 _Function_class_(IO_COMPLETION_ROUTINE
)
67 static NTSTATUS NTAPI
read_data_completion(PDEVICE_OBJECT DeviceObject
, PIRP Irp
, PVOID conptr
) {
69 static NTSTATUS
read_data_completion(PDEVICE_OBJECT DeviceObject
, PIRP Irp
, PVOID conptr
) {
71 read_data_stripe
* stripe
= conptr
;
72 read_data_context
* context
= (read_data_context
*)stripe
->context
;
76 stripe
->iosb
= Irp
->IoStatus
;
78 if (NT_SUCCESS(Irp
->IoStatus
.Status
))
79 stripe
->status
= ReadDataStatus_Success
;
81 stripe
->status
= ReadDataStatus_Error
;
83 if (InterlockedDecrement(&context
->stripes_left
) == 0)
84 KeSetEvent(&context
->Event
, 0, FALSE
);
86 return STATUS_MORE_PROCESSING_REQUIRED
;
89 NTSTATUS
check_csum(device_extension
* Vcb
, UINT8
* data
, UINT32 sectors
, UINT32
* csum
) {
94 // From experimenting, it seems that 40 sectors is roughly the crossover
95 // point where offloading the crc32 calculation becomes worth it.
97 if (sectors
< 40 || KeQueryActiveProcessorCount(NULL
) < 2) {
100 for (j
= 0; j
< sectors
; j
++) {
101 UINT32 crc32
= ~calc_crc32c(0xffffffff, data
+ (j
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
103 if (crc32
!= csum
[j
]) {
104 return STATUS_CRC_ERROR
;
108 return STATUS_SUCCESS
;
111 csum2
= ExAllocatePoolWithTag(PagedPool
, sizeof(UINT32
) * sectors
, ALLOC_TAG
);
113 ERR("out of memory\n");
114 return STATUS_INSUFFICIENT_RESOURCES
;
117 Status
= add_calc_job(Vcb
, data
, sectors
, csum2
, &cj
);
118 if (!NT_SUCCESS(Status
)) {
119 ERR("add_calc_job returned %08x\n", Status
);
124 KeWaitForSingleObject(&cj
->event
, Executive
, KernelMode
, FALSE
, NULL
);
126 if (RtlCompareMemory(csum2
, csum
, sectors
* sizeof(UINT32
)) != sectors
* sizeof(UINT32
)) {
129 return STATUS_CRC_ERROR
;
135 return STATUS_SUCCESS
;
138 static NTSTATUS
read_data_dup(device_extension
* Vcb
, UINT8
* buf
, UINT64 addr
, read_data_context
* context
, CHUNK_ITEM
* ci
,
139 device
** devices
, UINT64 generation
) {
141 BOOL checksum_error
= FALSE
;
142 UINT16 j
, stripe
= 0;
144 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
146 for (j
= 0; j
< ci
->num_stripes
; j
++) {
147 if (context
->stripes
[j
].status
== ReadDataStatus_Error
) {
148 WARN("stripe %u returned error %08x\n", j
, context
->stripes
[j
].iosb
.Status
);
149 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
150 return context
->stripes
[j
].iosb
.Status
;
151 } else if (context
->stripes
[j
].status
== ReadDataStatus_Success
) {
157 if (context
->stripes
[stripe
].status
!= ReadDataStatus_Success
)
158 return STATUS_INTERNAL_ERROR
;
161 tree_header
* th
= (tree_header
*)buf
;
164 crc32
= ~calc_crc32c(0xffffffff, (UINT8
*)&th
->fs_uuid
, context
->buflen
- sizeof(th
->csum
));
166 if (th
->address
!= context
->address
|| crc32
!= *((UINT32
*)th
->csum
)) {
167 checksum_error
= TRUE
;
168 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
169 } else if (generation
!= 0 && th
->generation
!= generation
) {
170 checksum_error
= TRUE
;
171 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
173 } else if (context
->csum
) {
175 LARGE_INTEGER time1
, time2
;
177 time1
= KeQueryPerformanceCounter(NULL
);
179 Status
= check_csum(Vcb
, buf
, (ULONG
)context
->stripes
[stripe
].Irp
->IoStatus
.Information
/ context
->sector_size
, context
->csum
);
181 if (Status
== STATUS_CRC_ERROR
) {
182 checksum_error
= TRUE
;
183 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
184 } else if (!NT_SUCCESS(Status
)) {
185 ERR("check_csum returned %08x\n", Status
);
189 time2
= KeQueryPerformanceCounter(NULL
);
191 Vcb
->stats
.read_csum_time
+= time2
.QuadPart
- time1
.QuadPart
;
196 return STATUS_SUCCESS
;
198 if (ci
->num_stripes
== 1)
199 return STATUS_CRC_ERROR
;
203 BOOL recovered
= FALSE
;
205 t2
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
207 ERR("out of memory\n");
208 return STATUS_INSUFFICIENT_RESOURCES
;
211 for (j
= 0; j
< ci
->num_stripes
; j
++) {
212 if (j
!= stripe
&& devices
[j
] && devices
[j
]->devobj
) {
213 Status
= sync_read_phys(devices
[j
]->devobj
, cis
[j
].offset
+ context
->stripes
[stripe
].stripestart
, Vcb
->superblock
.node_size
, (UINT8
*)t2
, FALSE
);
214 if (!NT_SUCCESS(Status
)) {
215 WARN("sync_read_phys returned %08x\n", Status
);
216 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
218 UINT32 crc32
= ~calc_crc32c(0xffffffff, (UINT8
*)&t2
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(t2
->csum
));
220 if (t2
->address
== addr
&& crc32
== *((UINT32
*)t2
->csum
) && (generation
== 0 || t2
->generation
== generation
)) {
221 RtlCopyMemory(buf
, t2
, Vcb
->superblock
.node_size
);
222 ERR("recovering from checksum error at %llx, device %llx\n", addr
, devices
[stripe
]->devitem
.dev_id
);
225 if (!Vcb
->readonly
&& !devices
[stripe
]->readonly
) { // write good data over bad
226 Status
= write_data_phys(devices
[stripe
]->devobj
, cis
[stripe
].offset
+ context
->stripes
[stripe
].stripestart
,
227 t2
, Vcb
->superblock
.node_size
);
228 if (!NT_SUCCESS(Status
)) {
229 WARN("write_data_phys returned %08x\n", Status
);
230 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
235 } else if (t2
->address
!= addr
|| crc32
!= *((UINT32
*)t2
->csum
))
236 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
238 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
244 ERR("unrecoverable checksum error at %llx\n", addr
);
246 return STATUS_CRC_ERROR
;
251 ULONG sectors
= (ULONG
)context
->stripes
[stripe
].Irp
->IoStatus
.Information
/ Vcb
->superblock
.sector_size
;
254 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.sector_size
, ALLOC_TAG
);
256 ERR("out of memory\n");
257 return STATUS_INSUFFICIENT_RESOURCES
;
260 for (i
= 0; i
< sectors
; i
++) {
261 UINT32 crc32
= ~calc_crc32c(0xffffffff, buf
+ (i
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
263 if (context
->csum
[i
] != crc32
) {
264 BOOL recovered
= FALSE
;
266 for (j
= 0; j
< ci
->num_stripes
; j
++) {
267 if (j
!= stripe
&& devices
[j
] && devices
[j
]->devobj
) {
268 Status
= sync_read_phys(devices
[j
]->devobj
, cis
[j
].offset
+ context
->stripes
[stripe
].stripestart
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
),
269 Vcb
->superblock
.sector_size
, sector
, FALSE
);
270 if (!NT_SUCCESS(Status
)) {
271 WARN("sync_read_phys returned %08x\n", Status
);
272 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
274 UINT32 crc32b
= ~calc_crc32c(0xffffffff, sector
, Vcb
->superblock
.sector_size
);
276 if (crc32b
== context
->csum
[i
]) {
277 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
, Vcb
->superblock
.sector_size
);
278 ERR("recovering from checksum error at %llx, device %llx\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), devices
[stripe
]->devitem
.dev_id
);
281 if (!Vcb
->readonly
&& !devices
[stripe
]->readonly
) { // write good data over bad
282 Status
= write_data_phys(devices
[stripe
]->devobj
, cis
[stripe
].offset
+ context
->stripes
[stripe
].stripestart
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
),
283 sector
, Vcb
->superblock
.sector_size
);
284 if (!NT_SUCCESS(Status
)) {
285 WARN("write_data_phys returned %08x\n", Status
);
286 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
292 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
298 ERR("unrecoverable checksum error at %llx\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
));
300 return STATUS_CRC_ERROR
;
308 return STATUS_SUCCESS
;
311 static NTSTATUS
read_data_raid0(device_extension
* Vcb
, UINT8
* buf
, UINT64 addr
, UINT32 length
, read_data_context
* context
,
312 CHUNK_ITEM
* ci
, device
** devices
, UINT64 generation
, UINT64 offset
) {
315 for (i
= 0; i
< ci
->num_stripes
; i
++) {
316 if (context
->stripes
[i
].status
== ReadDataStatus_Error
) {
317 WARN("stripe %llu returned error %08x\n", i
, context
->stripes
[i
].iosb
.Status
);
318 log_device_error(Vcb
, devices
[i
], BTRFS_DEV_STAT_READ_ERRORS
);
319 return context
->stripes
[i
].iosb
.Status
;
323 if (context
->tree
) { // shouldn't happen, as trees shouldn't cross stripe boundaries
324 tree_header
* th
= (tree_header
*)buf
;
325 UINT32 crc32
= ~calc_crc32c(0xffffffff, (UINT8
*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
327 if (crc32
!= *((UINT32
*)th
->csum
) || addr
!= th
->address
|| (generation
!= 0 && generation
!= th
->generation
)) {
331 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
, &off
, &stripe
);
333 ERR("unrecoverable checksum error at %llx, device %llx\n", addr
, devices
[stripe
]->devitem
.dev_id
);
335 if (crc32
!= *((UINT32
*)th
->csum
)) {
336 WARN("crc32 was %08x, expected %08x\n", crc32
, *((UINT32
*)th
->csum
));
337 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
338 return STATUS_CRC_ERROR
;
339 } else if (addr
!= th
->address
) {
340 WARN("address of tree was %llx, not %llx as expected\n", th
->address
, addr
);
341 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
342 return STATUS_CRC_ERROR
;
343 } else if (generation
!= 0 && generation
!= th
->generation
) {
344 WARN("generation of tree was %llx, not %llx as expected\n", th
->generation
, generation
);
345 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
346 return STATUS_CRC_ERROR
;
349 } else if (context
->csum
) {
352 LARGE_INTEGER time1
, time2
;
354 time1
= KeQueryPerformanceCounter(NULL
);
356 Status
= check_csum(Vcb
, buf
, length
/ Vcb
->superblock
.sector_size
, context
->csum
);
358 if (Status
== STATUS_CRC_ERROR
) {
359 for (i
= 0; i
< length
/ Vcb
->superblock
.sector_size
; i
++) {
360 UINT32 crc32
= ~calc_crc32c(0xffffffff, buf
+ (i
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
362 if (context
->csum
[i
] != crc32
) {
366 get_raid0_offset(addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), ci
->stripe_length
, ci
->num_stripes
, &off
, &stripe
);
368 ERR("unrecoverable checksum error at %llx, device %llx\n", addr
, devices
[stripe
]->devitem
.dev_id
);
370 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
377 } else if (!NT_SUCCESS(Status
)) {
378 ERR("check_csum returned %08x\n", Status
);
382 time2
= KeQueryPerformanceCounter(NULL
);
384 Vcb
->stats
.read_csum_time
+= time2
.QuadPart
- time1
.QuadPart
;
388 return STATUS_SUCCESS
;
391 static NTSTATUS
read_data_raid10(device_extension
* Vcb
, UINT8
* buf
, UINT64 addr
, UINT32 length
, read_data_context
* context
,
392 CHUNK_ITEM
* ci
, device
** devices
, UINT64 generation
, UINT64 offset
) {
396 BOOL checksum_error
= FALSE
;
397 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
399 for (j
= 0; j
< ci
->num_stripes
; j
++) {
400 if (context
->stripes
[j
].status
== ReadDataStatus_Error
) {
401 WARN("stripe %llu returned error %08x\n", j
, context
->stripes
[j
].iosb
.Status
);
402 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
403 return context
->stripes
[j
].iosb
.Status
;
404 } else if (context
->stripes
[j
].status
== ReadDataStatus_Success
)
409 tree_header
* th
= (tree_header
*)buf
;
410 UINT32 crc32
= ~calc_crc32c(0xffffffff, (UINT8
*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
412 if (crc32
!= *((UINT32
*)th
->csum
)) {
413 WARN("crc32 was %08x, expected %08x\n", crc32
, *((UINT32
*)th
->csum
));
414 checksum_error
= TRUE
;
415 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
416 } else if (addr
!= th
->address
) {
417 WARN("address of tree was %llx, not %llx as expected\n", th
->address
, addr
);
418 checksum_error
= TRUE
;
419 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
420 } else if (generation
!= 0 && generation
!= th
->generation
) {
421 WARN("generation of tree was %llx, not %llx as expected\n", th
->generation
, generation
);
422 checksum_error
= TRUE
;
423 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
425 } else if (context
->csum
) {
427 LARGE_INTEGER time1
, time2
;
429 time1
= KeQueryPerformanceCounter(NULL
);
431 Status
= check_csum(Vcb
, buf
, length
/ Vcb
->superblock
.sector_size
, context
->csum
);
433 if (Status
== STATUS_CRC_ERROR
)
434 checksum_error
= TRUE
;
435 else if (!NT_SUCCESS(Status
)) {
436 ERR("check_csum returned %08x\n", Status
);
440 time2
= KeQueryPerformanceCounter(NULL
);
442 Vcb
->stats
.read_csum_time
+= time2
.QuadPart
- time1
.QuadPart
;
447 return STATUS_SUCCESS
;
452 UINT16 badsubstripe
= 0;
453 BOOL recovered
= FALSE
;
455 t2
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
457 ERR("out of memory\n");
458 return STATUS_INSUFFICIENT_RESOURCES
;
461 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
/ ci
->sub_stripes
, &off
, &stripe
);
463 stripe
*= ci
->sub_stripes
;
465 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
466 if (context
->stripes
[stripe
+ j
].status
== ReadDataStatus_Success
) {
472 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
473 if (context
->stripes
[stripe
+ j
].status
!= ReadDataStatus_Success
&& devices
[stripe
+ j
] && devices
[stripe
+ j
]->devobj
) {
474 Status
= sync_read_phys(devices
[stripe
+ j
]->devobj
, cis
[stripe
+ j
].offset
+ off
,
475 Vcb
->superblock
.node_size
, (UINT8
*)t2
, FALSE
);
476 if (!NT_SUCCESS(Status
)) {
477 WARN("sync_read_phys returned %08x\n", Status
);
478 log_device_error(Vcb
, devices
[stripe
+ j
], BTRFS_DEV_STAT_READ_ERRORS
);
480 UINT32 crc32
= ~calc_crc32c(0xffffffff, (UINT8
*)&t2
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(t2
->csum
));
482 if (t2
->address
== addr
&& crc32
== *((UINT32
*)t2
->csum
) && (generation
== 0 || t2
->generation
== generation
)) {
483 RtlCopyMemory(buf
, t2
, Vcb
->superblock
.node_size
);
484 ERR("recovering from checksum error at %llx, device %llx\n", addr
, devices
[stripe
+ j
]->devitem
.dev_id
);
487 if (!Vcb
->readonly
&& !devices
[stripe
+ badsubstripe
]->readonly
&& devices
[stripe
+ badsubstripe
]->devobj
) { // write good data over bad
488 Status
= write_data_phys(devices
[stripe
+ badsubstripe
]->devobj
, cis
[stripe
+ badsubstripe
].offset
+ off
,
489 t2
, Vcb
->superblock
.node_size
);
490 if (!NT_SUCCESS(Status
)) {
491 WARN("write_data_phys returned %08x\n", Status
);
492 log_device_error(Vcb
, devices
[stripe
+ badsubstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
497 } else if (t2
->address
!= addr
|| crc32
!= *((UINT32
*)t2
->csum
))
498 log_device_error(Vcb
, devices
[stripe
+ j
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
500 log_device_error(Vcb
, devices
[stripe
+ j
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
506 ERR("unrecoverable checksum error at %llx\n", addr
);
508 return STATUS_CRC_ERROR
;
513 ULONG sectors
= length
/ Vcb
->superblock
.sector_size
;
516 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.sector_size
, ALLOC_TAG
);
518 ERR("out of memory\n");
519 return STATUS_INSUFFICIENT_RESOURCES
;
522 for (i
= 0; i
< sectors
; i
++) {
523 UINT32 crc32
= ~calc_crc32c(0xffffffff, buf
+ (i
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
525 if (context
->csum
[i
] != crc32
) {
527 UINT16 stripe2
, badsubstripe
= 0;
528 BOOL recovered
= FALSE
;
530 get_raid0_offset(addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), ci
->stripe_length
,
531 ci
->num_stripes
/ ci
->sub_stripes
, &off
, &stripe2
);
533 stripe2
*= ci
->sub_stripes
;
535 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
536 if (context
->stripes
[stripe2
+ j
].status
== ReadDataStatus_Success
) {
542 log_device_error(Vcb
, devices
[stripe2
+ badsubstripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
544 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
545 if (context
->stripes
[stripe2
+ j
].status
!= ReadDataStatus_Success
&& devices
[stripe2
+ j
] && devices
[stripe2
+ j
]->devobj
) {
546 Status
= sync_read_phys(devices
[stripe2
+ j
]->devobj
, cis
[stripe2
+ j
].offset
+ off
,
547 Vcb
->superblock
.sector_size
, sector
, FALSE
);
548 if (!NT_SUCCESS(Status
)) {
549 WARN("sync_read_phys returned %08x\n", Status
);
550 log_device_error(Vcb
, devices
[stripe2
+ j
], BTRFS_DEV_STAT_READ_ERRORS
);
552 UINT32 crc32b
= ~calc_crc32c(0xffffffff, sector
, Vcb
->superblock
.sector_size
);
554 if (crc32b
== context
->csum
[i
]) {
555 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
, Vcb
->superblock
.sector_size
);
556 ERR("recovering from checksum error at %llx, device %llx\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), devices
[stripe2
+ j
]->devitem
.dev_id
);
559 if (!Vcb
->readonly
&& !devices
[stripe2
+ badsubstripe
]->readonly
&& devices
[stripe2
+ badsubstripe
]->devobj
) { // write good data over bad
560 Status
= write_data_phys(devices
[stripe2
+ badsubstripe
]->devobj
, cis
[stripe2
+ badsubstripe
].offset
+ off
,
561 sector
, Vcb
->superblock
.sector_size
);
562 if (!NT_SUCCESS(Status
)) {
563 WARN("write_data_phys returned %08x\n", Status
);
564 log_device_error(Vcb
, devices
[stripe2
+ badsubstripe
], BTRFS_DEV_STAT_READ_ERRORS
);
570 log_device_error(Vcb
, devices
[stripe2
+ j
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
576 ERR("unrecoverable checksum error at %llx\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
));
578 return STATUS_CRC_ERROR
;
586 return STATUS_SUCCESS
;
589 static NTSTATUS
read_data_raid5(device_extension
* Vcb
, UINT8
* buf
, UINT64 addr
, UINT32 length
, read_data_context
* context
, CHUNK_ITEM
* ci
,
590 device
** devices
, UINT64 offset
, UINT64 generation
, chunk
* c
, BOOL degraded
) {
593 BOOL checksum_error
= FALSE
;
594 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
596 BOOL no_success
= TRUE
;
598 for (j
= 0; j
< ci
->num_stripes
; j
++) {
599 if (context
->stripes
[j
].status
== ReadDataStatus_Error
) {
600 WARN("stripe %u returned error %08x\n", j
, context
->stripes
[j
].iosb
.Status
);
601 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
602 return context
->stripes
[j
].iosb
.Status
;
603 } else if (context
->stripes
[j
].status
== ReadDataStatus_Success
) {
609 if (c
) { // check partial stripes
611 UINT64 ps_length
= (ci
->num_stripes
- 1) * ci
->stripe_length
;
613 ExAcquireResourceSharedLite(&c
->partial_stripes_lock
, TRUE
);
615 le
= c
->partial_stripes
.Flink
;
616 while (le
!= &c
->partial_stripes
) {
617 partial_stripe
* ps
= CONTAINING_RECORD(le
, partial_stripe
, list_entry
);
619 if (ps
->address
+ ps_length
> addr
&& ps
->address
< addr
+ length
) {
620 ULONG runlength
, index
;
622 runlength
= RtlFindFirstRunClear(&ps
->bmp
, &index
);
624 while (runlength
!= 0) {
625 UINT64 runstart
= ps
->address
+ (index
* Vcb
->superblock
.sector_size
);
626 UINT64 runend
= runstart
+ (runlength
* Vcb
->superblock
.sector_size
);
627 UINT64 start
= max(runstart
, addr
);
628 UINT64 end
= min(runend
, addr
+ length
);
631 RtlCopyMemory(buf
+ start
- addr
, &ps
->data
[start
- ps
->address
], (ULONG
)(end
- start
));
633 runlength
= RtlFindNextForwardRunClear(&ps
->bmp
, index
+ runlength
, &index
);
635 } else if (ps
->address
>= addr
+ length
)
641 ExReleaseResourceLite(&c
->partial_stripes_lock
);
645 tree_header
* th
= (tree_header
*)buf
;
646 UINT32 crc32
= ~calc_crc32c(0xffffffff, (UINT8
*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
648 if (addr
!= th
->address
|| crc32
!= *((UINT32
*)th
->csum
)) {
649 checksum_error
= TRUE
;
650 if (!no_success
&& !degraded
)
651 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
652 } else if (generation
!= 0 && generation
!= th
->generation
) {
653 checksum_error
= TRUE
;
654 if (!no_success
&& !degraded
)
655 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
657 } else if (context
->csum
) {
659 LARGE_INTEGER time1
, time2
;
661 time1
= KeQueryPerformanceCounter(NULL
);
663 Status
= check_csum(Vcb
, buf
, length
/ Vcb
->superblock
.sector_size
, context
->csum
);
665 if (Status
== STATUS_CRC_ERROR
) {
667 WARN("checksum error\n");
668 checksum_error
= TRUE
;
669 } else if (!NT_SUCCESS(Status
)) {
670 ERR("check_csum returned %08x\n", Status
);
675 time2
= KeQueryPerformanceCounter(NULL
);
677 Vcb
->stats
.read_csum_time
+= time2
.QuadPart
- time1
.QuadPart
;
680 checksum_error
= TRUE
;
683 return STATUS_SUCCESS
;
688 BOOL recovered
= FALSE
, first
= TRUE
, failed
= FALSE
;
691 t2
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.node_size
* 2, ALLOC_TAG
);
693 ERR("out of memory\n");
694 return STATUS_INSUFFICIENT_RESOURCES
;
697 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
- 1, &off
, &stripe
);
699 parity
= (((addr
- offset
) / ((ci
->num_stripes
- 1) * ci
->stripe_length
)) + ci
->num_stripes
- 1) % ci
->num_stripes
;
701 stripe
= (parity
+ stripe
+ 1) % ci
->num_stripes
;
703 for (j
= 0; j
< ci
->num_stripes
; j
++) {
705 if (devices
[j
] && devices
[j
]->devobj
) {
707 Status
= sync_read_phys(devices
[j
]->devobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.node_size
, t2
, FALSE
);
708 if (!NT_SUCCESS(Status
)) {
709 ERR("sync_read_phys returned %08x\n", Status
);
710 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
717 Status
= sync_read_phys(devices
[j
]->devobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.node_size
, t2
+ Vcb
->superblock
.node_size
, FALSE
);
718 if (!NT_SUCCESS(Status
)) {
719 ERR("sync_read_phys returned %08x\n", Status
);
720 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
725 do_xor(t2
, t2
+ Vcb
->superblock
.node_size
, Vcb
->superblock
.node_size
);
735 tree_header
* t3
= (tree_header
*)t2
;
736 UINT32 crc32
= ~calc_crc32c(0xffffffff, (UINT8
*)&t3
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(t3
->csum
));
738 if (t3
->address
== addr
&& crc32
== *((UINT32
*)t3
->csum
) && (generation
== 0 || t3
->generation
== generation
)) {
739 RtlCopyMemory(buf
, t2
, Vcb
->superblock
.node_size
);
742 ERR("recovering from checksum error at %llx, device %llx\n", addr
, devices
[stripe
]->devitem
.dev_id
);
746 if (!Vcb
->readonly
&& devices
[stripe
] && !devices
[stripe
]->readonly
&& devices
[stripe
]->devobj
) { // write good data over bad
747 Status
= write_data_phys(devices
[stripe
]->devobj
, cis
[stripe
].offset
+ off
, t2
, Vcb
->superblock
.node_size
);
748 if (!NT_SUCCESS(Status
)) {
749 WARN("write_data_phys returned %08x\n", Status
);
750 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
757 ERR("unrecoverable checksum error at %llx\n", addr
);
759 return STATUS_CRC_ERROR
;
764 ULONG sectors
= length
/ Vcb
->superblock
.sector_size
;
767 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.sector_size
* 2, ALLOC_TAG
);
769 ERR("out of memory\n");
770 return STATUS_INSUFFICIENT_RESOURCES
;
773 for (i
= 0; i
< sectors
; i
++) {
779 crc32
= ~calc_crc32c(0xffffffff, buf
+ (i
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
781 get_raid0_offset(addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), ci
->stripe_length
,
782 ci
->num_stripes
- 1, &off
, &stripe
);
784 parity
= (((addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
)) / ((ci
->num_stripes
- 1) * ci
->stripe_length
)) + ci
->num_stripes
- 1) % ci
->num_stripes
;
786 stripe
= (parity
+ stripe
+ 1) % ci
->num_stripes
;
788 if (!devices
[stripe
] || !devices
[stripe
]->devobj
|| (context
->csum
&& context
->csum
[i
] != crc32
)) {
789 BOOL recovered
= FALSE
, first
= TRUE
, failed
= FALSE
;
791 if (devices
[stripe
] && devices
[stripe
]->devobj
)
792 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_READ_ERRORS
);
794 for (j
= 0; j
< ci
->num_stripes
; j
++) {
796 if (devices
[j
] && devices
[j
]->devobj
) {
798 Status
= sync_read_phys(devices
[j
]->devobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.sector_size
, sector
, FALSE
);
799 if (!NT_SUCCESS(Status
)) {
800 ERR("sync_read_phys returned %08x\n", Status
);
802 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
808 Status
= sync_read_phys(devices
[j
]->devobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.sector_size
, sector
+ Vcb
->superblock
.sector_size
, FALSE
);
809 if (!NT_SUCCESS(Status
)) {
810 ERR("sync_read_phys returned %08x\n", Status
);
812 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
816 do_xor(sector
, sector
+ Vcb
->superblock
.sector_size
, Vcb
->superblock
.sector_size
);
827 crc32
= ~calc_crc32c(0xffffffff, sector
, Vcb
->superblock
.sector_size
);
829 if (!context
->csum
|| crc32
== context
->csum
[i
]) {
830 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
, Vcb
->superblock
.sector_size
);
833 ERR("recovering from checksum error at %llx, device %llx\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), devices
[stripe
]->devitem
.dev_id
);
837 if (!Vcb
->readonly
&& devices
[stripe
] && !devices
[stripe
]->readonly
&& devices
[stripe
]->devobj
) { // write good data over bad
838 Status
= write_data_phys(devices
[stripe
]->devobj
, cis
[stripe
].offset
+ off
,
839 sector
, Vcb
->superblock
.sector_size
);
840 if (!NT_SUCCESS(Status
)) {
841 WARN("write_data_phys returned %08x\n", Status
);
842 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
849 ERR("unrecoverable checksum error at %llx\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
));
851 return STATUS_CRC_ERROR
;
859 return STATUS_SUCCESS
;
862 void raid6_recover2(UINT8
* sectors
, UINT16 num_stripes
, ULONG sector_size
, UINT16 missing1
, UINT16 missing2
, UINT8
* out
) {
863 if (missing1
== num_stripes
- 2 || missing2
== num_stripes
- 2) { // reconstruct from q and data
864 UINT16 missing
= missing1
== (num_stripes
- 2) ? missing2
: missing1
;
867 stripe
= num_stripes
- 3;
869 if (stripe
== missing
)
870 RtlZeroMemory(out
, sector_size
);
872 RtlCopyMemory(out
, sectors
+ (stripe
* sector_size
), sector_size
);
877 galois_double(out
, sector_size
);
879 if (stripe
!= missing
)
880 do_xor(out
, sectors
+ (stripe
* sector_size
), sector_size
);
881 } while (stripe
> 0);
883 do_xor(out
, sectors
+ ((num_stripes
- 1) * sector_size
), sector_size
);
886 galois_divpower(out
, (UINT8
)missing
, sector_size
);
887 } else { // reconstruct from p and q
889 UINT8 gyx
, gx
, denom
, a
, b
, *p
, *q
, *pxy
, *qxy
;
892 stripe
= num_stripes
- 3;
894 pxy
= out
+ sector_size
;
897 if (stripe
== missing1
|| stripe
== missing2
) {
898 RtlZeroMemory(qxy
, sector_size
);
899 RtlZeroMemory(pxy
, sector_size
);
901 if (stripe
== missing1
)
906 RtlCopyMemory(qxy
, sectors
+ (stripe
* sector_size
), sector_size
);
907 RtlCopyMemory(pxy
, sectors
+ (stripe
* sector_size
), sector_size
);
913 galois_double(qxy
, sector_size
);
915 if (stripe
!= missing1
&& stripe
!= missing2
) {
916 do_xor(qxy
, sectors
+ (stripe
* sector_size
), sector_size
);
917 do_xor(pxy
, sectors
+ (stripe
* sector_size
), sector_size
);
918 } else if (stripe
== missing1
)
920 else if (stripe
== missing2
)
922 } while (stripe
> 0);
924 gyx
= gpow2(y
> x
? (y
-x
) : (255-x
+y
));
927 denom
= gdiv(1, gyx
^ 1);
928 a
= gmul(gyx
, denom
);
931 p
= sectors
+ ((num_stripes
- 2) * sector_size
);
932 q
= sectors
+ ((num_stripes
- 1) * sector_size
);
934 for (j
= 0; j
< sector_size
; j
++) {
935 *qxy
= gmul(a
, *p
^ *pxy
) ^ gmul(b
, *q
^ *qxy
);
943 do_xor(out
+ sector_size
, out
, sector_size
);
944 do_xor(out
+ sector_size
, sectors
+ ((num_stripes
- 2) * sector_size
), sector_size
);
948 static NTSTATUS
read_data_raid6(device_extension
* Vcb
, UINT8
* buf
, UINT64 addr
, UINT32 length
, read_data_context
* context
, CHUNK_ITEM
* ci
,
949 device
** devices
, UINT64 offset
, UINT64 generation
, chunk
* c
, BOOL degraded
) {
952 BOOL checksum_error
= FALSE
;
953 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
955 BOOL no_success
= TRUE
;
957 for (j
= 0; j
< ci
->num_stripes
; j
++) {
958 if (context
->stripes
[j
].status
== ReadDataStatus_Error
) {
959 WARN("stripe %u returned error %08x\n", j
, context
->stripes
[j
].iosb
.Status
);
962 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
963 return context
->stripes
[j
].iosb
.Status
;
964 } else if (context
->stripes
[j
].status
== ReadDataStatus_Success
) {
970 if (c
) { // check partial stripes
972 UINT64 ps_length
= (ci
->num_stripes
- 2) * ci
->stripe_length
;
974 ExAcquireResourceSharedLite(&c
->partial_stripes_lock
, TRUE
);
976 le
= c
->partial_stripes
.Flink
;
977 while (le
!= &c
->partial_stripes
) {
978 partial_stripe
* ps
= CONTAINING_RECORD(le
, partial_stripe
, list_entry
);
980 if (ps
->address
+ ps_length
> addr
&& ps
->address
< addr
+ length
) {
981 ULONG runlength
, index
;
983 runlength
= RtlFindFirstRunClear(&ps
->bmp
, &index
);
985 while (runlength
!= 0) {
986 UINT64 runstart
= ps
->address
+ (index
* Vcb
->superblock
.sector_size
);
987 UINT64 runend
= runstart
+ (runlength
* Vcb
->superblock
.sector_size
);
988 UINT64 start
= max(runstart
, addr
);
989 UINT64 end
= min(runend
, addr
+ length
);
992 RtlCopyMemory(buf
+ start
- addr
, &ps
->data
[start
- ps
->address
], (ULONG
)(end
- start
));
994 runlength
= RtlFindNextForwardRunClear(&ps
->bmp
, index
+ runlength
, &index
);
996 } else if (ps
->address
>= addr
+ length
)
1002 ExReleaseResourceLite(&c
->partial_stripes_lock
);
1005 if (context
->tree
) {
1006 tree_header
* th
= (tree_header
*)buf
;
1007 UINT32 crc32
= ~calc_crc32c(0xffffffff, (UINT8
*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
1009 if (addr
!= th
->address
|| crc32
!= *((UINT32
*)th
->csum
)) {
1010 checksum_error
= TRUE
;
1011 if (!no_success
&& !degraded
&& devices
[stripe
])
1012 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1013 } else if (generation
!= 0 && generation
!= th
->generation
) {
1014 checksum_error
= TRUE
;
1015 if (!no_success
&& !degraded
&& devices
[stripe
])
1016 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
1018 } else if (context
->csum
) {
1020 LARGE_INTEGER time1
, time2
;
1022 time1
= KeQueryPerformanceCounter(NULL
);
1024 Status
= check_csum(Vcb
, buf
, length
/ Vcb
->superblock
.sector_size
, context
->csum
);
1026 if (Status
== STATUS_CRC_ERROR
) {
1028 WARN("checksum error\n");
1029 checksum_error
= TRUE
;
1030 } else if (!NT_SUCCESS(Status
)) {
1031 ERR("check_csum returned %08x\n", Status
);
1035 time2
= KeQueryPerformanceCounter(NULL
);
1037 Vcb
->stats
.read_csum_time
+= time2
.QuadPart
- time1
.QuadPart
;
1039 } else if (degraded
)
1040 checksum_error
= TRUE
;
1042 if (!checksum_error
)
1043 return STATUS_SUCCESS
;
1045 if (context
->tree
) {
1047 UINT16 k
, physstripe
, parity1
, parity2
, error_stripe
;
1049 BOOL recovered
= FALSE
, failed
= FALSE
;
1050 ULONG num_errors
= 0;
1052 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.node_size
* (ci
->num_stripes
+ 2), ALLOC_TAG
);
1054 ERR("out of memory\n");
1055 return STATUS_INSUFFICIENT_RESOURCES
;
1058 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
- 2, &off
, &stripe
);
1060 parity1
= (((addr
- offset
) / ((ci
->num_stripes
- 2) * ci
->stripe_length
)) + ci
->num_stripes
- 2) % ci
->num_stripes
;
1061 parity2
= (parity1
+ 1) % ci
->num_stripes
;
1063 physstripe
= (parity2
+ stripe
+ 1) % ci
->num_stripes
;
1065 j
= (parity2
+ 1) % ci
->num_stripes
;
1067 for (k
= 0; k
< ci
->num_stripes
- 1; k
++) {
1068 if (j
!= physstripe
) {
1069 if (devices
[j
] && devices
[j
]->devobj
) {
1070 Status
= sync_read_phys(devices
[j
]->devobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.node_size
, sector
+ (k
* Vcb
->superblock
.node_size
), FALSE
);
1071 if (!NT_SUCCESS(Status
)) {
1072 ERR("sync_read_phys returned %08x\n", Status
);
1073 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
1077 if (num_errors
> 1) {
1086 if (num_errors
> 1) {
1093 j
= (j
+ 1) % ci
->num_stripes
;
1097 if (num_errors
== 0) {
1098 tree_header
* th
= (tree_header
*)(sector
+ (stripe
* Vcb
->superblock
.node_size
));
1101 RtlCopyMemory(sector
+ (stripe
* Vcb
->superblock
.node_size
), sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.node_size
),
1102 Vcb
->superblock
.node_size
);
1104 for (j
= 0; j
< ci
->num_stripes
- 2; j
++) {
1106 do_xor(sector
+ (stripe
* Vcb
->superblock
.node_size
), sector
+ (j
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1109 crc32
= ~calc_crc32c(0xffffffff, (UINT8
*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
1111 if (th
->address
== addr
&& crc32
== *((UINT32
*)th
->csum
) && (generation
== 0 || th
->generation
== generation
)) {
1112 RtlCopyMemory(buf
, sector
+ (stripe
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1114 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1115 ERR("recovering from checksum error at %llx, device %llx\n", addr
, devices
[physstripe
]->devitem
.dev_id
);
1119 if (!Vcb
->readonly
&& devices
[physstripe
] && devices
[physstripe
]->devobj
&& !devices
[physstripe
]->readonly
) { // write good data over bad
1120 Status
= write_data_phys(devices
[physstripe
]->devobj
, cis
[physstripe
].offset
+ off
,
1121 sector
+ (stripe
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1122 if (!NT_SUCCESS(Status
)) {
1123 WARN("write_data_phys returned %08x\n", Status
);
1124 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1132 tree_header
* th
= (tree_header
*)(sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
));
1133 BOOL read_q
= FALSE
;
1135 if (devices
[parity2
] && devices
[parity2
]->devobj
) {
1136 Status
= sync_read_phys(devices
[parity2
]->devobj
, cis
[parity2
].offset
+ off
,
1137 Vcb
->superblock
.node_size
, sector
+ ((ci
->num_stripes
- 1) * Vcb
->superblock
.node_size
), FALSE
);
1138 if (!NT_SUCCESS(Status
)) {
1139 ERR("sync_read_phys returned %08x\n", Status
);
1140 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
1146 if (num_errors
== 1) {
1147 raid6_recover2(sector
, ci
->num_stripes
, Vcb
->superblock
.node_size
, stripe
, error_stripe
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
));
1149 crc32
= ~calc_crc32c(0xffffffff, (UINT8
*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
1151 if (th
->address
== addr
&& crc32
== *((UINT32
*)th
->csum
) && (generation
== 0 || th
->generation
== generation
))
1154 for (j
= 0; j
< ci
->num_stripes
- 1; j
++) {
1156 raid6_recover2(sector
, ci
->num_stripes
, Vcb
->superblock
.node_size
, stripe
, j
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
));
1158 crc32
= ~calc_crc32c(0xffffffff, (UINT8
*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
1160 if (th
->address
== addr
&& crc32
== *((UINT32
*)th
->csum
) && (generation
== 0 || th
->generation
== generation
)) {
1171 UINT16 error_stripe_phys
= (parity2
+ error_stripe
+ 1) % ci
->num_stripes
;
1173 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1174 ERR("recovering from checksum error at %llx, device %llx\n", addr
, devices
[physstripe
]->devitem
.dev_id
);
1176 RtlCopyMemory(buf
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1178 if (!Vcb
->readonly
&& devices
[physstripe
] && devices
[physstripe
]->devobj
&& !devices
[physstripe
]->readonly
) { // write good data over bad
1179 Status
= write_data_phys(devices
[physstripe
]->devobj
, cis
[physstripe
].offset
+ off
,
1180 sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1181 if (!NT_SUCCESS(Status
)) {
1182 WARN("write_data_phys returned %08x\n", Status
);
1183 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1187 if (devices
[error_stripe_phys
] && devices
[error_stripe_phys
]->devobj
) {
1188 if (error_stripe
== ci
->num_stripes
- 2) {
1189 ERR("recovering from parity error at %llx, device %llx\n", addr
, devices
[error_stripe_phys
]->devitem
.dev_id
);
1191 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1193 RtlZeroMemory(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1195 for (j
= 0; j
< ci
->num_stripes
- 2; j
++) {
1197 do_xor(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.node_size
), sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
),
1198 Vcb
->superblock
.node_size
);
1200 do_xor(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.node_size
), sector
+ (j
* Vcb
->superblock
.node_size
),
1201 Vcb
->superblock
.node_size
);
1205 ERR("recovering from checksum error at %llx, device %llx\n", addr
+ ((error_stripe
- stripe
) * ci
->stripe_length
),
1206 devices
[error_stripe_phys
]->devitem
.dev_id
);
1208 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1210 RtlCopyMemory(sector
+ (error_stripe
* Vcb
->superblock
.node_size
),
1211 sector
+ ((ci
->num_stripes
+ 1) * Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1215 if (!Vcb
->readonly
&& devices
[error_stripe_phys
] && devices
[error_stripe_phys
]->devobj
&& !devices
[error_stripe_phys
]->readonly
) { // write good data over bad
1216 Status
= write_data_phys(devices
[error_stripe_phys
]->devobj
, cis
[error_stripe_phys
].offset
+ off
,
1217 sector
+ (error_stripe
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1218 if (!NT_SUCCESS(Status
)) {
1219 WARN("write_data_phys returned %08x\n", Status
);
1220 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1228 ERR("unrecoverable checksum error at %llx\n", addr
);
1230 return STATUS_CRC_ERROR
;
1235 ULONG sectors
= length
/ Vcb
->superblock
.sector_size
;
1238 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.sector_size
* (ci
->num_stripes
+ 2), ALLOC_TAG
);
1240 ERR("out of memory\n");
1241 return STATUS_INSUFFICIENT_RESOURCES
;
1244 for (i
= 0; i
< sectors
; i
++) {
1246 UINT16 physstripe
, parity1
, parity2
;
1250 crc32
= ~calc_crc32c(0xffffffff, buf
+ (i
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1252 get_raid0_offset(addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), ci
->stripe_length
,
1253 ci
->num_stripes
- 2, &off
, &stripe
);
1255 parity1
= (((addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
)) / ((ci
->num_stripes
- 2) * ci
->stripe_length
)) + ci
->num_stripes
- 2) % ci
->num_stripes
;
1256 parity2
= (parity1
+ 1) % ci
->num_stripes
;
1258 physstripe
= (parity2
+ stripe
+ 1) % ci
->num_stripes
;
1260 if (!devices
[physstripe
] || !devices
[physstripe
]->devobj
|| (context
->csum
&& context
->csum
[i
] != crc32
)) {
1261 UINT16 k
, error_stripe
;
1262 BOOL recovered
= FALSE
, failed
= FALSE
;
1263 ULONG num_errors
= 0;
1265 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1266 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_READ_ERRORS
);
1268 j
= (parity2
+ 1) % ci
->num_stripes
;
1270 for (k
= 0; k
< ci
->num_stripes
- 1; k
++) {
1271 if (j
!= physstripe
) {
1272 if (devices
[j
] && devices
[j
]->devobj
) {
1273 Status
= sync_read_phys(devices
[j
]->devobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.sector_size
, sector
+ (k
* Vcb
->superblock
.sector_size
), FALSE
);
1274 if (!NT_SUCCESS(Status
)) {
1275 ERR("sync_read_phys returned %08x\n", Status
);
1276 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
1280 if (num_errors
> 1) {
1289 if (num_errors
> 1) {
1296 j
= (j
+ 1) % ci
->num_stripes
;
1300 if (num_errors
== 0) {
1301 RtlCopyMemory(sector
+ (stripe
* Vcb
->superblock
.sector_size
), sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1303 for (j
= 0; j
< ci
->num_stripes
- 2; j
++) {
1305 do_xor(sector
+ (stripe
* Vcb
->superblock
.sector_size
), sector
+ (j
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1309 crc32
= ~calc_crc32c(0xffffffff, sector
+ (stripe
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1311 if (!context
->csum
|| crc32
== context
->csum
[i
]) {
1312 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
+ (stripe
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1314 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1315 ERR("recovering from checksum error at %llx, device %llx\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
),
1316 devices
[physstripe
]->devitem
.dev_id
);
1320 if (!Vcb
->readonly
&& devices
[physstripe
] && devices
[physstripe
]->devobj
&& !devices
[physstripe
]->readonly
) { // write good data over bad
1321 Status
= write_data_phys(devices
[physstripe
]->devobj
, cis
[physstripe
].offset
+ off
,
1322 sector
+ (stripe
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1323 if (!NT_SUCCESS(Status
)) {
1324 WARN("write_data_phys returned %08x\n", Status
);
1325 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1332 BOOL read_q
= FALSE
;
1334 if (devices
[parity2
] && devices
[parity2
]->devobj
) {
1335 Status
= sync_read_phys(devices
[parity2
]->devobj
, cis
[parity2
].offset
+ off
,
1336 Vcb
->superblock
.sector_size
, sector
+ ((ci
->num_stripes
- 1) * Vcb
->superblock
.sector_size
), FALSE
);
1337 if (!NT_SUCCESS(Status
)) {
1338 ERR("sync_read_phys returned %08x\n", Status
);
1339 log_device_error(Vcb
, devices
[parity2
], BTRFS_DEV_STAT_READ_ERRORS
);
1345 if (num_errors
== 1) {
1346 raid6_recover2(sector
, ci
->num_stripes
, Vcb
->superblock
.sector_size
, stripe
, error_stripe
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
));
1348 if (!devices
[physstripe
] || !devices
[physstripe
]->devobj
)
1351 crc32
= ~calc_crc32c(0xffffffff, sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1353 if (crc32
== context
->csum
[i
])
1357 for (j
= 0; j
< ci
->num_stripes
- 1; j
++) {
1359 raid6_recover2(sector
, ci
->num_stripes
, Vcb
->superblock
.sector_size
, stripe
, j
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
));
1361 crc32
= ~calc_crc32c(0xffffffff, sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1363 if (crc32
== context
->csum
[i
]) {
1374 UINT16 error_stripe_phys
= (parity2
+ error_stripe
+ 1) % ci
->num_stripes
;
1376 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1377 ERR("recovering from checksum error at %llx, device %llx\n",
1378 addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), devices
[physstripe
]->devitem
.dev_id
);
1380 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1382 if (!Vcb
->readonly
&& devices
[physstripe
] && devices
[physstripe
]->devobj
&& !devices
[physstripe
]->readonly
) { // write good data over bad
1383 Status
= write_data_phys(devices
[physstripe
]->devobj
, cis
[physstripe
].offset
+ off
,
1384 sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1385 if (!NT_SUCCESS(Status
)) {
1386 WARN("write_data_phys returned %08x\n", Status
);
1387 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1391 if (devices
[error_stripe_phys
] && devices
[error_stripe_phys
]->devobj
) {
1392 if (error_stripe
== ci
->num_stripes
- 2) {
1393 ERR("recovering from parity error at %llx, device %llx\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
),
1394 devices
[error_stripe_phys
]->devitem
.dev_id
);
1396 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1398 RtlZeroMemory(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1400 for (j
= 0; j
< ci
->num_stripes
- 2; j
++) {
1402 do_xor(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.sector_size
), sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
),
1403 Vcb
->superblock
.sector_size
);
1405 do_xor(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.sector_size
), sector
+ (j
* Vcb
->superblock
.sector_size
),
1406 Vcb
->superblock
.sector_size
);
1410 ERR("recovering from checksum error at %llx, device %llx\n",
1411 addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
) + ((error_stripe
- stripe
) * ci
->stripe_length
),
1412 devices
[error_stripe_phys
]->devitem
.dev_id
);
1414 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1416 RtlCopyMemory(sector
+ (error_stripe
* Vcb
->superblock
.sector_size
),
1417 sector
+ ((ci
->num_stripes
+ 1) * Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1421 if (!Vcb
->readonly
&& devices
[error_stripe_phys
] && devices
[error_stripe_phys
]->devobj
&& !devices
[error_stripe_phys
]->readonly
) { // write good data over bad
1422 Status
= write_data_phys(devices
[error_stripe_phys
]->devobj
, cis
[error_stripe_phys
].offset
+ off
,
1423 sector
+ (error_stripe
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1424 if (!NT_SUCCESS(Status
)) {
1425 WARN("write_data_phys returned %08x\n", Status
);
1426 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1434 ERR("unrecoverable checksum error at %llx\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
));
1436 return STATUS_CRC_ERROR
;
1444 return STATUS_SUCCESS
;
1447 NTSTATUS
read_data(_In_ device_extension
* Vcb
, _In_ UINT64 addr
, _In_ UINT32 length
, _In_reads_bytes_opt_(length
*sizeof(UINT32
)/Vcb
->superblock
.sector_size
) UINT32
* csum
,
1448 _In_ BOOL is_tree
, _Out_writes_bytes_(length
) UINT8
* buf
, _In_opt_ chunk
* c
, _Out_opt_ chunk
** pc
, _In_opt_ PIRP Irp
, _In_ UINT64 generation
, _In_ BOOL file_read
,
1449 _In_ ULONG priority
) {
1451 CHUNK_ITEM_STRIPE
* cis
;
1452 read_data_context context
;
1453 UINT64 type
, offset
, total_reading
= 0;
1455 device
** devices
= NULL
;
1456 UINT16 i
, startoffstripe
, allowed_missing
, missing_devices
= 0;
1457 UINT8
* dummypage
= NULL
;
1458 PMDL dummy_mdl
= NULL
;
1460 UINT64 lockaddr
, locklen
;
1462 LARGE_INTEGER time1
, time2
;
1465 if (Vcb
->log_to_phys_loaded
) {
1467 c
= get_chunk_from_address(Vcb
, addr
);
1470 ERR("get_chunk_from_address failed\n");
1471 return STATUS_INTERNAL_ERROR
;
1477 devices
= c
->devices
;
1482 LIST_ENTRY
* le
= Vcb
->sys_chunks
.Flink
;
1487 while (le
!= &Vcb
->sys_chunks
) {
1488 sys_chunk
* sc
= CONTAINING_RECORD(le
, sys_chunk
, list_entry
);
1490 if (sc
->key
.obj_id
== 0x100 && sc
->key
.obj_type
== TYPE_CHUNK_ITEM
&& sc
->key
.offset
<= addr
) {
1491 CHUNK_ITEM
* chunk_item
= sc
->data
;
1493 if ((addr
- sc
->key
.offset
) < chunk_item
->size
&& chunk_item
->num_stripes
> 0) {
1495 offset
= sc
->key
.offset
;
1496 cis
= (CHUNK_ITEM_STRIPE
*)&chunk_item
[1];
1498 devices
= ExAllocatePoolWithTag(PagedPool
, sizeof(device
*) * ci
->num_stripes
, ALLOC_TAG
);
1500 ERR("out of memory\n");
1501 return STATUS_INSUFFICIENT_RESOURCES
;
1504 for (i
= 0; i
< ci
->num_stripes
; i
++) {
1505 devices
[i
] = find_device_from_uuid(Vcb
, &cis
[i
].dev_uuid
);
1516 ERR("could not find chunk for %llx in bootstrap\n", addr
);
1517 return STATUS_INTERNAL_ERROR
;
1524 if (ci
->type
& BLOCK_FLAG_DUPLICATE
) {
1525 type
= BLOCK_FLAG_DUPLICATE
;
1526 allowed_missing
= ci
->num_stripes
- 1;
1527 } else if (ci
->type
& BLOCK_FLAG_RAID0
) {
1528 type
= BLOCK_FLAG_RAID0
;
1529 allowed_missing
= 0;
1530 } else if (ci
->type
& BLOCK_FLAG_RAID1
) {
1531 type
= BLOCK_FLAG_DUPLICATE
;
1532 allowed_missing
= 1;
1533 } else if (ci
->type
& BLOCK_FLAG_RAID10
) {
1534 type
= BLOCK_FLAG_RAID10
;
1535 allowed_missing
= 1;
1536 } else if (ci
->type
& BLOCK_FLAG_RAID5
) {
1537 type
= BLOCK_FLAG_RAID5
;
1538 allowed_missing
= 1;
1539 } else if (ci
->type
& BLOCK_FLAG_RAID6
) {
1540 type
= BLOCK_FLAG_RAID6
;
1541 allowed_missing
= 2;
1543 type
= BLOCK_FLAG_DUPLICATE
;
1544 allowed_missing
= 0;
1547 cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
1549 RtlZeroMemory(&context
, sizeof(read_data_context
));
1550 KeInitializeEvent(&context
.Event
, NotificationEvent
, FALSE
);
1552 context
.stripes
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(read_data_stripe
) * ci
->num_stripes
, ALLOC_TAG
);
1553 if (!context
.stripes
) {
1554 ERR("out of memory\n");
1555 return STATUS_INSUFFICIENT_RESOURCES
;
1558 if (c
&& (type
== BLOCK_FLAG_RAID5
|| type
== BLOCK_FLAG_RAID6
)) {
1559 get_raid56_lock_range(c
, addr
, length
, &lockaddr
, &locklen
);
1560 chunk_lock_range(Vcb
, c
, lockaddr
, locklen
);
1563 RtlZeroMemory(context
.stripes
, sizeof(read_data_stripe
) * ci
->num_stripes
);
1565 context
.buflen
= length
;
1566 context
.num_stripes
= ci
->num_stripes
;
1567 context
.stripes_left
= context
.num_stripes
;
1568 context
.sector_size
= Vcb
->superblock
.sector_size
;
1569 context
.csum
= csum
;
1570 context
.tree
= is_tree
;
1571 context
.type
= type
;
1573 if (type
== BLOCK_FLAG_RAID0
) {
1574 UINT64 startoff
, endoff
;
1575 UINT16 endoffstripe
, stripe
;
1576 UINT32
*stripeoff
, pos
;
1580 // FIXME - test this still works if page size isn't the same as sector size
1582 // This relies on the fact that MDLs are followed in memory by the page file numbers,
1583 // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1584 // data for you without doing a memcpy yourself.
1585 // MDLs are officially opaque, so this might very well break in future versions of Windows.
1587 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
, &startoff
, &startoffstripe
);
1588 get_raid0_offset(addr
+ length
- offset
- 1, ci
->stripe_length
, ci
->num_stripes
, &endoff
, &endoffstripe
);
1591 // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1592 // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1593 // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1595 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
1598 ERR("out of memory\n");
1599 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1605 master_mdl
= IoAllocateMdl(context
.va
, length
, FALSE
, FALSE
, NULL
);
1607 ERR("out of memory\n");
1608 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1612 Status
= STATUS_SUCCESS
;
1615 MmProbeAndLockPages(master_mdl
, KernelMode
, IoWriteAccess
);
1616 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1617 Status
= _SEH2_GetExceptionCode();
1620 if (!NT_SUCCESS(Status
)) {
1621 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
1622 IoFreeMdl(master_mdl
);
1626 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
1628 for (i
= 0; i
< ci
->num_stripes
; i
++) {
1629 if (startoffstripe
> i
)
1630 context
.stripes
[i
].stripestart
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
1631 else if (startoffstripe
== i
)
1632 context
.stripes
[i
].stripestart
= startoff
;
1634 context
.stripes
[i
].stripestart
= startoff
- (startoff
% ci
->stripe_length
);
1636 if (endoffstripe
> i
)
1637 context
.stripes
[i
].stripeend
= endoff
- (endoff
% ci
->stripe_length
) + ci
->stripe_length
;
1638 else if (endoffstripe
== i
)
1639 context
.stripes
[i
].stripeend
= endoff
+ 1;
1641 context
.stripes
[i
].stripeend
= endoff
- (endoff
% ci
->stripe_length
);
1643 if (context
.stripes
[i
].stripestart
!= context
.stripes
[i
].stripeend
) {
1644 context
.stripes
[i
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
), FALSE
, FALSE
, NULL
);
1646 if (!context
.stripes
[i
].mdl
) {
1647 ERR("IoAllocateMdl failed\n");
1648 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1654 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(UINT32
) * ci
->num_stripes
, ALLOC_TAG
);
1656 ERR("out of memory\n");
1657 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1661 RtlZeroMemory(stripeoff
, sizeof(UINT32
) * ci
->num_stripes
);
1664 stripe
= startoffstripe
;
1665 while (pos
< length
) {
1666 PFN_NUMBER
* stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
1669 UINT32 readlen
= (UINT32
)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
- (context
.stripes
[stripe
].stripestart
% ci
->stripe_length
));
1671 RtlCopyMemory(stripe_pfns
, pfns
, readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1673 stripeoff
[stripe
] += readlen
;
1675 } else if (length
- pos
< ci
->stripe_length
) {
1676 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (length
- pos
) * sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1680 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(ci
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
1682 stripeoff
[stripe
] += (UINT32
)ci
->stripe_length
;
1683 pos
+= (UINT32
)ci
->stripe_length
;
1686 stripe
= (stripe
+ 1) % ci
->num_stripes
;
1689 MmUnlockPages(master_mdl
);
1690 IoFreeMdl(master_mdl
);
1692 ExFreePool(stripeoff
);
1693 } else if (type
== BLOCK_FLAG_RAID10
) {
1694 UINT64 startoff
, endoff
;
1695 UINT16 endoffstripe
, j
, stripe
;
1699 UINT32
* stripeoff
, pos
;
1700 read_data_stripe
** stripes
;
1703 orig_ls
= c
->last_stripe
;
1707 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
/ ci
->sub_stripes
, &startoff
, &startoffstripe
);
1708 get_raid0_offset(addr
+ length
- offset
- 1, ci
->stripe_length
, ci
->num_stripes
/ ci
->sub_stripes
, &endoff
, &endoffstripe
);
1710 if ((ci
->num_stripes
% ci
->sub_stripes
) != 0) {
1711 ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset
, ci
->num_stripes
, ci
->sub_stripes
);
1712 Status
= STATUS_INTERNAL_ERROR
;
1717 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
1720 ERR("out of memory\n");
1721 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1727 context
.firstoff
= (UINT16
)((startoff
% ci
->stripe_length
) / Vcb
->superblock
.sector_size
);
1728 context
.startoffstripe
= startoffstripe
;
1729 context
.sectors_per_stripe
= (UINT16
)(ci
->stripe_length
/ Vcb
->superblock
.sector_size
);
1731 startoffstripe
*= ci
->sub_stripes
;
1732 endoffstripe
*= ci
->sub_stripes
;
1735 c
->last_stripe
= (orig_ls
+ 1) % ci
->sub_stripes
;
1737 master_mdl
= IoAllocateMdl(context
.va
, length
, FALSE
, FALSE
, NULL
);
1739 ERR("out of memory\n");
1740 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1744 Status
= STATUS_SUCCESS
;
1747 MmProbeAndLockPages(master_mdl
, KernelMode
, IoWriteAccess
);
1748 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1749 Status
= _SEH2_GetExceptionCode();
1752 if (!NT_SUCCESS(Status
)) {
1753 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
1754 IoFreeMdl(master_mdl
);
1758 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
1760 stripes
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(read_data_stripe
*) * ci
->num_stripes
/ ci
->sub_stripes
, ALLOC_TAG
);
1762 ERR("out of memory\n");
1763 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1767 RtlZeroMemory(stripes
, sizeof(read_data_stripe
*) * ci
->num_stripes
/ ci
->sub_stripes
);
1769 for (i
= 0; i
< ci
->num_stripes
; i
+= ci
->sub_stripes
) {
1770 UINT64 sstart
, send
;
1771 BOOL stripeset
= FALSE
;
1773 if (startoffstripe
> i
)
1774 sstart
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
1775 else if (startoffstripe
== i
)
1778 sstart
= startoff
- (startoff
% ci
->stripe_length
);
1780 if (endoffstripe
> i
)
1781 send
= endoff
- (endoff
% ci
->stripe_length
) + ci
->stripe_length
;
1782 else if (endoffstripe
== i
)
1785 send
= endoff
- (endoff
% ci
->stripe_length
);
1787 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
1788 if (j
== orig_ls
&& devices
[i
+j
] && devices
[i
+j
]->devobj
) {
1789 context
.stripes
[i
+j
].stripestart
= sstart
;
1790 context
.stripes
[i
+j
].stripeend
= send
;
1791 stripes
[i
/ ci
->sub_stripes
] = &context
.stripes
[i
+j
];
1793 if (sstart
!= send
) {
1794 context
.stripes
[i
+j
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(send
- sstart
), FALSE
, FALSE
, NULL
);
1796 if (!context
.stripes
[i
+j
].mdl
) {
1797 ERR("IoAllocateMdl failed\n");
1798 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1805 context
.stripes
[i
+j
].status
= ReadDataStatus_Skip
;
1809 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
1810 if (devices
[i
+j
] && devices
[i
+j
]->devobj
) {
1811 context
.stripes
[i
+j
].stripestart
= sstart
;
1812 context
.stripes
[i
+j
].stripeend
= send
;
1813 context
.stripes
[i
+j
].status
= ReadDataStatus_Pending
;
1814 stripes
[i
/ ci
->sub_stripes
] = &context
.stripes
[i
+j
];
1816 if (sstart
!= send
) {
1817 context
.stripes
[i
+j
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(send
- sstart
), FALSE
, FALSE
, NULL
);
1819 if (!context
.stripes
[i
+j
].mdl
) {
1820 ERR("IoAllocateMdl failed\n");
1821 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1832 ERR("could not find stripe to read\n");
1833 Status
= STATUS_DEVICE_NOT_READY
;
1839 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(UINT32
) * ci
->num_stripes
/ ci
->sub_stripes
, ALLOC_TAG
);
1841 ERR("out of memory\n");
1842 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1846 RtlZeroMemory(stripeoff
, sizeof(UINT32
) * ci
->num_stripes
/ ci
->sub_stripes
);
1849 stripe
= startoffstripe
/ ci
->sub_stripes
;
1850 while (pos
< length
) {
1851 PFN_NUMBER
* stripe_pfns
= (PFN_NUMBER
*)(stripes
[stripe
]->mdl
+ 1);
1854 UINT32 readlen
= (UINT32
)min(stripes
[stripe
]->stripeend
- stripes
[stripe
]->stripestart
,
1855 ci
->stripe_length
- (stripes
[stripe
]->stripestart
% ci
->stripe_length
));
1857 RtlCopyMemory(stripe_pfns
, pfns
, readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1859 stripeoff
[stripe
] += readlen
;
1861 } else if (length
- pos
< ci
->stripe_length
) {
1862 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (length
- pos
) * sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1866 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(ci
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
1868 stripeoff
[stripe
] += (ULONG
)ci
->stripe_length
;
1869 pos
+= (ULONG
)ci
->stripe_length
;
1872 stripe
= (stripe
+ 1) % (ci
->num_stripes
/ ci
->sub_stripes
);
1875 MmUnlockPages(master_mdl
);
1876 IoFreeMdl(master_mdl
);
1878 ExFreePool(stripeoff
);
1879 ExFreePool(stripes
);
1880 } else if (type
== BLOCK_FLAG_DUPLICATE
) {
1884 orig_ls
= i
= c
->last_stripe
;
1888 while (!devices
[i
] || !devices
[i
]->devobj
) {
1889 i
= (i
+ 1) % ci
->num_stripes
;
1892 ERR("no devices available to service request\n");
1893 Status
= STATUS_DEVICE_NOT_READY
;
1899 c
->last_stripe
= (i
+ 1) % ci
->num_stripes
;
1901 context
.stripes
[i
].stripestart
= addr
- offset
;
1902 context
.stripes
[i
].stripeend
= context
.stripes
[i
].stripestart
+ length
;
1905 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
1908 ERR("out of memory\n");
1909 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1913 context
.stripes
[i
].mdl
= IoAllocateMdl(context
.va
, length
, FALSE
, FALSE
, NULL
);
1914 if (!context
.stripes
[i
].mdl
) {
1915 ERR("IoAllocateMdl failed\n");
1916 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1920 MmBuildMdlForNonPagedPool(context
.stripes
[i
].mdl
);
1922 context
.stripes
[i
].mdl
= IoAllocateMdl(buf
, length
, FALSE
, FALSE
, NULL
);
1924 if (!context
.stripes
[i
].mdl
) {
1925 ERR("IoAllocateMdl failed\n");
1926 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1930 Status
= STATUS_SUCCESS
;
1933 MmProbeAndLockPages(context
.stripes
[i
].mdl
, KernelMode
, IoWriteAccess
);
1934 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1935 Status
= _SEH2_GetExceptionCode();
1938 if (!NT_SUCCESS(Status
)) {
1939 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
1943 } else if (type
== BLOCK_FLAG_RAID5
) {
1944 UINT64 startoff
, endoff
;
1945 UINT16 endoffstripe
, parity
;
1946 UINT32
*stripeoff
, pos
;
1948 PFN_NUMBER
*pfns
, dummy
;
1949 BOOL need_dummy
= FALSE
;
1951 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
- 1, &startoff
, &startoffstripe
);
1952 get_raid0_offset(addr
+ length
- offset
- 1, ci
->stripe_length
, ci
->num_stripes
- 1, &endoff
, &endoffstripe
);
1955 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
1958 ERR("out of memory\n");
1959 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1965 master_mdl
= IoAllocateMdl(context
.va
, length
, FALSE
, FALSE
, NULL
);
1967 ERR("out of memory\n");
1968 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1972 Status
= STATUS_SUCCESS
;
1975 MmProbeAndLockPages(master_mdl
, KernelMode
, IoWriteAccess
);
1976 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1977 Status
= _SEH2_GetExceptionCode();
1980 if (!NT_SUCCESS(Status
)) {
1981 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
1982 IoFreeMdl(master_mdl
);
1986 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
1989 while (pos
< length
) {
1990 parity
= (((addr
- offset
+ pos
) / ((ci
->num_stripes
- 1) * ci
->stripe_length
)) + ci
->num_stripes
- 1) % ci
->num_stripes
;
1993 UINT16 stripe
= (parity
+ startoffstripe
+ 1) % ci
->num_stripes
;
1994 ULONG skip
, readlen
;
1997 while (stripe
!= parity
) {
1998 if (i
== startoffstripe
) {
1999 readlen
= min(length
, (ULONG
)(ci
->stripe_length
- (startoff
% ci
->stripe_length
)));
2001 context
.stripes
[stripe
].stripestart
= startoff
;
2002 context
.stripes
[stripe
].stripeend
= startoff
+ readlen
;
2009 readlen
= min(length
- pos
, (ULONG
)ci
->stripe_length
);
2011 context
.stripes
[stripe
].stripestart
= startoff
- (startoff
% ci
->stripe_length
);
2012 context
.stripes
[stripe
].stripeend
= context
.stripes
[stripe
].stripestart
+ readlen
;
2021 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2027 for (i
= 0; i
< startoffstripe
; i
++) {
2028 UINT16 stripe2
= (parity
+ i
+ 1) % ci
->num_stripes
;
2030 context
.stripes
[stripe2
].stripestart
= context
.stripes
[stripe2
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2033 context
.stripes
[parity
].stripestart
= context
.stripes
[parity
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2035 if (length
- pos
> ci
->num_stripes
* (ci
->num_stripes
- 1) * ci
->stripe_length
) {
2036 skip
= (ULONG
)(((length
- pos
) / (ci
->num_stripes
* (ci
->num_stripes
- 1) * ci
->stripe_length
)) - 1);
2038 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2039 context
.stripes
[i
].stripeend
+= skip
* ci
->num_stripes
* ci
->stripe_length
;
2042 pos
+= (UINT32
)(skip
* (ci
->num_stripes
- 1) * ci
->num_stripes
* ci
->stripe_length
);
2045 } else if (length
- pos
>= ci
->stripe_length
* (ci
->num_stripes
- 1)) {
2046 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2047 context
.stripes
[i
].stripeend
+= ci
->stripe_length
;
2050 pos
+= (UINT32
)(ci
->stripe_length
* (ci
->num_stripes
- 1));
2053 UINT16 stripe
= (parity
+ 1) % ci
->num_stripes
;
2056 while (stripe
!= parity
) {
2057 if (endoffstripe
== i
) {
2058 context
.stripes
[stripe
].stripeend
= endoff
+ 1;
2060 } else if (endoffstripe
> i
)
2061 context
.stripes
[stripe
].stripeend
= endoff
- (endoff
% ci
->stripe_length
) + ci
->stripe_length
;
2064 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2071 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2072 if (context
.stripes
[i
].stripestart
!= context
.stripes
[i
].stripeend
) {
2073 context
.stripes
[i
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
),
2074 FALSE
, FALSE
, NULL
);
2076 if (!context
.stripes
[i
].mdl
) {
2077 ERR("IoAllocateMdl failed\n");
2078 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2085 dummypage
= ExAllocatePoolWithTag(NonPagedPool
, PAGE_SIZE
, ALLOC_TAG
);
2087 ERR("out of memory\n");
2088 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2092 dummy_mdl
= IoAllocateMdl(dummypage
, PAGE_SIZE
, FALSE
, FALSE
, NULL
);
2094 ERR("IoAllocateMdl failed\n");
2095 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2099 MmBuildMdlForNonPagedPool(dummy_mdl
);
2101 dummy
= *(PFN_NUMBER
*)(dummy_mdl
+ 1);
2104 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(UINT32
) * ci
->num_stripes
, ALLOC_TAG
);
2106 ERR("out of memory\n");
2107 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2111 RtlZeroMemory(stripeoff
, sizeof(UINT32
) * ci
->num_stripes
);
2115 while (pos
< length
) {
2116 PFN_NUMBER
* stripe_pfns
;
2118 parity
= (((addr
- offset
+ pos
) / ((ci
->num_stripes
- 1) * ci
->stripe_length
)) + ci
->num_stripes
- 1) % ci
->num_stripes
;
2121 UINT16 stripe
= (parity
+ startoffstripe
+ 1) % ci
->num_stripes
;
2122 UINT32 readlen
= min(length
- pos
, (UINT32
)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
,
2123 ci
->stripe_length
- (context
.stripes
[stripe
].stripestart
% ci
->stripe_length
)));
2125 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2127 RtlCopyMemory(stripe_pfns
, pfns
, readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2129 stripeoff
[stripe
] = readlen
;
2132 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2134 while (stripe
!= parity
) {
2135 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2136 readlen
= min(length
- pos
, (UINT32
)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
));
2141 RtlCopyMemory(stripe_pfns
, &pfns
[pos
>> PAGE_SHIFT
], readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2143 stripeoff
[stripe
] = readlen
;
2146 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2148 } else if (length
- pos
>= ci
->stripe_length
* (ci
->num_stripes
- 1)) {
2149 UINT16 stripe
= (parity
+ 1) % ci
->num_stripes
;
2152 while (stripe
!= parity
) {
2153 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2155 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(ci
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
2157 stripeoff
[stripe
] += (UINT32
)ci
->stripe_length
;
2158 pos
+= (UINT32
)ci
->stripe_length
;
2160 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2163 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[parity
].mdl
+ 1);
2165 for (k
= 0; k
< ci
->stripe_length
>> PAGE_SHIFT
; k
++) {
2166 stripe_pfns
[stripeoff
[parity
] >> PAGE_SHIFT
] = dummy
;
2167 stripeoff
[parity
] += PAGE_SIZE
;
2170 UINT16 stripe
= (parity
+ 1) % ci
->num_stripes
;
2173 while (pos
< length
) {
2174 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2175 readlen
= min(length
- pos
, (ULONG
)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
));
2180 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2182 stripeoff
[stripe
] += readlen
;
2185 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2190 MmUnlockPages(master_mdl
);
2191 IoFreeMdl(master_mdl
);
2193 ExFreePool(stripeoff
);
2194 } else if (type
== BLOCK_FLAG_RAID6
) {
2195 UINT64 startoff
, endoff
;
2196 UINT16 endoffstripe
, parity1
;
2197 UINT32
*stripeoff
, pos
;
2199 PFN_NUMBER
*pfns
, dummy
;
2200 BOOL need_dummy
= FALSE
;
2202 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
- 2, &startoff
, &startoffstripe
);
2203 get_raid0_offset(addr
+ length
- offset
- 1, ci
->stripe_length
, ci
->num_stripes
- 2, &endoff
, &endoffstripe
);
2206 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
2209 ERR("out of memory\n");
2210 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2216 master_mdl
= IoAllocateMdl(context
.va
, length
, FALSE
, FALSE
, NULL
);
2218 ERR("out of memory\n");
2219 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2223 Status
= STATUS_SUCCESS
;
2226 MmProbeAndLockPages(master_mdl
, KernelMode
, IoWriteAccess
);
2227 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
2228 Status
= _SEH2_GetExceptionCode();
2231 if (!NT_SUCCESS(Status
)) {
2232 ERR("MmProbeAndLockPages threw exception %08x\n", Status
);
2233 IoFreeMdl(master_mdl
);
2237 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
2240 while (pos
< length
) {
2241 parity1
= (((addr
- offset
+ pos
) / ((ci
->num_stripes
- 2) * ci
->stripe_length
)) + ci
->num_stripes
- 2) % ci
->num_stripes
;
2244 UINT16 stripe
= (parity1
+ startoffstripe
+ 2) % ci
->num_stripes
, parity2
;
2245 ULONG skip
, readlen
;
2248 while (stripe
!= parity1
) {
2249 if (i
== startoffstripe
) {
2250 readlen
= (ULONG
)min(length
, ci
->stripe_length
- (startoff
% ci
->stripe_length
));
2252 context
.stripes
[stripe
].stripestart
= startoff
;
2253 context
.stripes
[stripe
].stripeend
= startoff
+ readlen
;
2260 readlen
= min(length
- pos
, (ULONG
)ci
->stripe_length
);
2262 context
.stripes
[stripe
].stripestart
= startoff
- (startoff
% ci
->stripe_length
);
2263 context
.stripes
[stripe
].stripeend
= context
.stripes
[stripe
].stripestart
+ readlen
;
2272 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2278 for (i
= 0; i
< startoffstripe
; i
++) {
2279 UINT16 stripe2
= (parity1
+ i
+ 2) % ci
->num_stripes
;
2281 context
.stripes
[stripe2
].stripestart
= context
.stripes
[stripe2
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2284 context
.stripes
[parity1
].stripestart
= context
.stripes
[parity1
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2286 parity2
= (parity1
+ 1) % ci
->num_stripes
;
2287 context
.stripes
[parity2
].stripestart
= context
.stripes
[parity2
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2289 if (length
- pos
> ci
->num_stripes
* (ci
->num_stripes
- 2) * ci
->stripe_length
) {
2290 skip
= (ULONG
)(((length
- pos
) / (ci
->num_stripes
* (ci
->num_stripes
- 2) * ci
->stripe_length
)) - 1);
2292 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2293 context
.stripes
[i
].stripeend
+= skip
* ci
->num_stripes
* ci
->stripe_length
;
2296 pos
+= (UINT32
)(skip
* (ci
->num_stripes
- 2) * ci
->num_stripes
* ci
->stripe_length
);
2299 } else if (length
- pos
>= ci
->stripe_length
* (ci
->num_stripes
- 2)) {
2300 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2301 context
.stripes
[i
].stripeend
+= ci
->stripe_length
;
2304 pos
+= (UINT32
)(ci
->stripe_length
* (ci
->num_stripes
- 2));
2307 UINT16 stripe
= (parity1
+ 2) % ci
->num_stripes
;
2310 while (stripe
!= parity1
) {
2311 if (endoffstripe
== i
) {
2312 context
.stripes
[stripe
].stripeend
= endoff
+ 1;
2314 } else if (endoffstripe
> i
)
2315 context
.stripes
[stripe
].stripeend
= endoff
- (endoff
% ci
->stripe_length
) + ci
->stripe_length
;
2318 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2325 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2326 if (context
.stripes
[i
].stripestart
!= context
.stripes
[i
].stripeend
) {
2327 context
.stripes
[i
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
), FALSE
, FALSE
, NULL
);
2329 if (!context
.stripes
[i
].mdl
) {
2330 ERR("IoAllocateMdl failed\n");
2331 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2338 dummypage
= ExAllocatePoolWithTag(NonPagedPool
, PAGE_SIZE
, ALLOC_TAG
);
2340 ERR("out of memory\n");
2341 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2345 dummy_mdl
= IoAllocateMdl(dummypage
, PAGE_SIZE
, FALSE
, FALSE
, NULL
);
2347 ERR("IoAllocateMdl failed\n");
2348 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2352 MmBuildMdlForNonPagedPool(dummy_mdl
);
2354 dummy
= *(PFN_NUMBER
*)(dummy_mdl
+ 1);
2357 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(UINT32
) * ci
->num_stripes
, ALLOC_TAG
);
2359 ERR("out of memory\n");
2360 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2364 RtlZeroMemory(stripeoff
, sizeof(UINT32
) * ci
->num_stripes
);
2368 while (pos
< length
) {
2369 PFN_NUMBER
* stripe_pfns
;
2371 parity1
= (((addr
- offset
+ pos
) / ((ci
->num_stripes
- 2) * ci
->stripe_length
)) + ci
->num_stripes
- 2) % ci
->num_stripes
;
2374 UINT16 stripe
= (parity1
+ startoffstripe
+ 2) % ci
->num_stripes
;
2375 UINT32 readlen
= min(length
- pos
, (UINT32
)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
,
2376 ci
->stripe_length
- (context
.stripes
[stripe
].stripestart
% ci
->stripe_length
)));
2378 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2380 RtlCopyMemory(stripe_pfns
, pfns
, readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2382 stripeoff
[stripe
] = readlen
;
2385 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2387 while (stripe
!= parity1
) {
2388 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2389 readlen
= (UINT32
)min(length
- pos
, min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
));
2394 RtlCopyMemory(stripe_pfns
, &pfns
[pos
>> PAGE_SHIFT
], readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2396 stripeoff
[stripe
] = readlen
;
2399 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2401 } else if (length
- pos
>= ci
->stripe_length
* (ci
->num_stripes
- 2)) {
2402 UINT16 stripe
= (parity1
+ 2) % ci
->num_stripes
;
2403 UINT16 parity2
= (parity1
+ 1) % ci
->num_stripes
;
2406 while (stripe
!= parity1
) {
2407 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2409 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(ci
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
2411 stripeoff
[stripe
] += (UINT32
)ci
->stripe_length
;
2412 pos
+= (UINT32
)ci
->stripe_length
;
2414 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2417 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[parity1
].mdl
+ 1);
2419 for (k
= 0; k
< ci
->stripe_length
>> PAGE_SHIFT
; k
++) {
2420 stripe_pfns
[stripeoff
[parity1
] >> PAGE_SHIFT
] = dummy
;
2421 stripeoff
[parity1
] += PAGE_SIZE
;
2424 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[parity2
].mdl
+ 1);
2426 for (k
= 0; k
< ci
->stripe_length
>> PAGE_SHIFT
; k
++) {
2427 stripe_pfns
[stripeoff
[parity2
] >> PAGE_SHIFT
] = dummy
;
2428 stripeoff
[parity2
] += PAGE_SIZE
;
2431 UINT16 stripe
= (parity1
+ 2) % ci
->num_stripes
;
2434 while (pos
< length
) {
2435 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2436 readlen
= (UINT32
)min(length
- pos
, min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
));
2441 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2443 stripeoff
[stripe
] += readlen
;
2446 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2451 MmUnlockPages(master_mdl
);
2452 IoFreeMdl(master_mdl
);
2454 ExFreePool(stripeoff
);
2457 context
.address
= addr
;
2459 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2460 if (!devices
[i
] || !devices
[i
]->devobj
|| context
.stripes
[i
].stripestart
== context
.stripes
[i
].stripeend
) {
2461 context
.stripes
[i
].status
= ReadDataStatus_MissingDevice
;
2462 context
.stripes_left
--;
2464 if (!devices
[i
] || !devices
[i
]->devobj
)
2469 if (missing_devices
> allowed_missing
) {
2470 ERR("not enough devices to service request (%u missing)\n", missing_devices
);
2471 Status
= STATUS_UNEXPECTED_IO_ERROR
;
2475 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2476 PIO_STACK_LOCATION IrpSp
;
2478 if (devices
[i
] && devices
[i
]->devobj
&& context
.stripes
[i
].stripestart
!= context
.stripes
[i
].stripeend
&& context
.stripes
[i
].status
!= ReadDataStatus_Skip
) {
2479 context
.stripes
[i
].context
= (struct read_data_context
*)&context
;
2481 if (type
== BLOCK_FLAG_RAID10
) {
2482 context
.stripes
[i
].stripenum
= i
/ ci
->sub_stripes
;
2486 context
.stripes
[i
].Irp
= IoAllocateIrp(devices
[i
]->devobj
->StackSize
, FALSE
);
2488 if (!context
.stripes
[i
].Irp
) {
2489 ERR("IoAllocateIrp failed\n");
2490 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2494 context
.stripes
[i
].Irp
= IoMakeAssociatedIrp(Irp
, devices
[i
]->devobj
->StackSize
);
2496 if (!context
.stripes
[i
].Irp
) {
2497 ERR("IoMakeAssociatedIrp failed\n");
2498 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2503 IrpSp
= IoGetNextIrpStackLocation(context
.stripes
[i
].Irp
);
2504 IrpSp
->MajorFunction
= IRP_MJ_READ
;
2506 if (devices
[i
]->devobj
->Flags
& DO_BUFFERED_IO
) {
2507 context
.stripes
[i
].Irp
->AssociatedIrp
.SystemBuffer
= ExAllocatePoolWithTag(NonPagedPool
, (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
), ALLOC_TAG
);
2508 if (!context
.stripes
[i
].Irp
->AssociatedIrp
.SystemBuffer
) {
2509 ERR("out of memory\n");
2510 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2514 context
.stripes
[i
].Irp
->Flags
|= IRP_BUFFERED_IO
| IRP_DEALLOCATE_BUFFER
| IRP_INPUT_OPERATION
;
2516 context
.stripes
[i
].Irp
->UserBuffer
= MmGetSystemAddressForMdlSafe(context
.stripes
[i
].mdl
, priority
);
2517 } else if (devices
[i
]->devobj
->Flags
& DO_DIRECT_IO
)
2518 context
.stripes
[i
].Irp
->MdlAddress
= context
.stripes
[i
].mdl
;
2520 context
.stripes
[i
].Irp
->UserBuffer
= MmGetSystemAddressForMdlSafe(context
.stripes
[i
].mdl
, priority
);
2522 IrpSp
->Parameters
.Read
.Length
= (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
);
2523 IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
= context
.stripes
[i
].stripestart
+ cis
[i
].offset
;
2525 total_reading
+= IrpSp
->Parameters
.Read
.Length
;
2527 context
.stripes
[i
].Irp
->UserIosb
= &context
.stripes
[i
].iosb
;
2529 IoSetCompletionRoutine(context
.stripes
[i
].Irp
, read_data_completion
, &context
.stripes
[i
], TRUE
, TRUE
, TRUE
);
2531 context
.stripes
[i
].status
= ReadDataStatus_Pending
;
2537 time1
= KeQueryPerformanceCounter(NULL
);
2540 need_to_wait
= FALSE
;
2541 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2542 if (context
.stripes
[i
].status
!= ReadDataStatus_MissingDevice
&& context
.stripes
[i
].status
!= ReadDataStatus_Skip
) {
2543 IoCallDriver(devices
[i
]->devobj
, context
.stripes
[i
].Irp
);
2544 need_to_wait
= TRUE
;
2549 KeWaitForSingleObject(&context
.Event
, Executive
, KernelMode
, FALSE
, NULL
);
2553 time2
= KeQueryPerformanceCounter(NULL
);
2555 Vcb
->stats
.read_disk_time
+= time2
.QuadPart
- time1
.QuadPart
;
2560 fFsRtlUpdateDiskCounters(total_reading
, 0);
2562 // check if any of the devices return a "user-induced" error
2564 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2565 if (context
.stripes
[i
].status
== ReadDataStatus_Error
&& IoIsErrorUserInduced(context
.stripes
[i
].iosb
.Status
)) {
2566 Status
= context
.stripes
[i
].iosb
.Status
;
2571 if (type
== BLOCK_FLAG_RAID0
) {
2572 Status
= read_data_raid0(Vcb
, file_read
? context
.va
: buf
, addr
, length
, &context
, ci
, devices
, generation
, offset
);
2573 if (!NT_SUCCESS(Status
)) {
2574 ERR("read_data_raid0 returned %08x\n", Status
);
2577 ExFreePool(context
.va
);
2583 RtlCopyMemory(buf
, context
.va
, length
);
2584 ExFreePool(context
.va
);
2586 } else if (type
== BLOCK_FLAG_RAID10
) {
2587 Status
= read_data_raid10(Vcb
, file_read
? context
.va
: buf
, addr
, length
, &context
, ci
, devices
, generation
, offset
);
2589 if (!NT_SUCCESS(Status
)) {
2590 ERR("read_data_raid10 returned %08x\n", Status
);
2593 ExFreePool(context
.va
);
2599 RtlCopyMemory(buf
, context
.va
, length
);
2600 ExFreePool(context
.va
);
2602 } else if (type
== BLOCK_FLAG_DUPLICATE
) {
2603 Status
= read_data_dup(Vcb
, file_read
? context
.va
: buf
, addr
, &context
, ci
, devices
, generation
);
2604 if (!NT_SUCCESS(Status
)) {
2605 ERR("read_data_dup returned %08x\n", Status
);
2608 ExFreePool(context
.va
);
2614 RtlCopyMemory(buf
, context
.va
, length
);
2615 ExFreePool(context
.va
);
2617 } else if (type
== BLOCK_FLAG_RAID5
) {
2618 Status
= read_data_raid5(Vcb
, file_read
? context
.va
: buf
, addr
, length
, &context
, ci
, devices
, offset
, generation
, c
, missing_devices
> 0 ? TRUE
: FALSE
);
2619 if (!NT_SUCCESS(Status
)) {
2620 ERR("read_data_raid5 returned %08x\n", Status
);
2623 ExFreePool(context
.va
);
2629 RtlCopyMemory(buf
, context
.va
, length
);
2630 ExFreePool(context
.va
);
2632 } else if (type
== BLOCK_FLAG_RAID6
) {
2633 Status
= read_data_raid6(Vcb
, file_read
? context
.va
: buf
, addr
, length
, &context
, ci
, devices
, offset
, generation
, c
, missing_devices
> 0 ? TRUE
: FALSE
);
2634 if (!NT_SUCCESS(Status
)) {
2635 ERR("read_data_raid6 returned %08x\n", Status
);
2638 ExFreePool(context
.va
);
2644 RtlCopyMemory(buf
, context
.va
, length
);
2645 ExFreePool(context
.va
);
2650 if (c
&& (type
== BLOCK_FLAG_RAID5
|| type
== BLOCK_FLAG_RAID6
))
2651 chunk_unlock_range(Vcb
, c
, lockaddr
, locklen
);
2654 IoFreeMdl(dummy_mdl
);
2657 ExFreePool(dummypage
);
2659 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2660 if (context
.stripes
[i
].mdl
) {
2661 if (context
.stripes
[i
].mdl
->MdlFlags
& MDL_PAGES_LOCKED
)
2662 MmUnlockPages(context
.stripes
[i
].mdl
);
2664 IoFreeMdl(context
.stripes
[i
].mdl
);
2667 if (context
.stripes
[i
].Irp
)
2668 IoFreeIrp(context
.stripes
[i
].Irp
);
2671 ExFreePool(context
.stripes
);
2673 if (!Vcb
->log_to_phys_loaded
)
2674 ExFreePool(devices
);
2679 NTSTATUS
read_stream(fcb
* fcb
, UINT8
* data
, UINT64 start
, ULONG length
, ULONG
* pbr
) {
2682 TRACE("(%p, %p, %llx, %llx, %p)\n", fcb
, data
, start
, length
, pbr
);
2686 if (start
>= fcb
->adsdata
.Length
) {
2687 TRACE("tried to read beyond end of stream\n");
2688 return STATUS_END_OF_FILE
;
2692 WARN("tried to read zero bytes\n");
2693 return STATUS_SUCCESS
;
2696 if (start
+ length
< fcb
->adsdata
.Length
)
2699 readlen
= fcb
->adsdata
.Length
- (ULONG
)start
;
2702 RtlCopyMemory(data
+ start
, fcb
->adsdata
.Buffer
, readlen
);
2704 if (pbr
) *pbr
= readlen
;
2706 return STATUS_SUCCESS
;
2709 NTSTATUS
read_file(fcb
* fcb
, UINT8
* data
, UINT64 start
, UINT64 length
, ULONG
* pbr
, PIRP Irp
) {
2712 UINT32 bytes_read
= 0;
2716 LARGE_INTEGER time1
, time2
;
2719 TRACE("(%p, %p, %llx, %llx, %p)\n", fcb
, data
, start
, length
, pbr
);
2724 if (start
>= fcb
->inode_item
.st_size
) {
2725 WARN("Tried to read beyond end of file\n");
2726 Status
= STATUS_END_OF_FILE
;
2731 time1
= KeQueryPerformanceCounter(NULL
);
2734 le
= fcb
->extents
.Flink
;
2738 while (le
!= &fcb
->extents
) {
2740 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
2744 ed
= &ext
->extent_data
;
2746 ed2
= (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) ? (EXTENT_DATA2
*)ed
->data
: NULL
;
2748 len
= ed2
? ed2
->num_bytes
: ed
->decoded_size
;
2750 if (ext
->offset
+ len
<= start
) {
2751 last_end
= ext
->offset
+ len
;
2755 if (ext
->offset
> last_end
&& ext
->offset
> start
+ bytes_read
) {
2756 UINT32 read
= (UINT32
)min(length
, ext
->offset
- max(start
, last_end
));
2758 RtlZeroMemory(data
+ bytes_read
, read
);
2763 if (length
== 0 || ext
->offset
> start
+ bytes_read
+ length
)
2766 if (ed
->encryption
!= BTRFS_ENCRYPTION_NONE
) {
2767 WARN("Encryption not supported\n");
2768 Status
= STATUS_NOT_IMPLEMENTED
;
2772 if (ed
->encoding
!= BTRFS_ENCODING_NONE
) {
2773 WARN("Other encodings not supported\n");
2774 Status
= STATUS_NOT_IMPLEMENTED
;
2779 case EXTENT_TYPE_INLINE
:
2781 UINT64 off
= start
+ bytes_read
- ext
->offset
;
2784 if (ed
->compression
== BTRFS_COMPRESSION_NONE
) {
2785 read
= (UINT32
)min(min(len
, ext
->datalen
) - off
, length
);
2787 RtlCopyMemory(data
+ bytes_read
, &ed
->data
[off
], read
);
2788 } else if (ed
->compression
== BTRFS_COMPRESSION_ZLIB
|| ed
->compression
== BTRFS_COMPRESSION_LZO
) {
2791 UINT16 inlen
= ext
->datalen
- (UINT16
)offsetof(EXTENT_DATA
, data
[0]);
2793 if (ed
->decoded_size
== 0 || ed
->decoded_size
> 0xffffffff) {
2794 ERR("ed->decoded_size was invalid (%llx)\n", ed
->decoded_size
);
2795 Status
= STATUS_INTERNAL_ERROR
;
2799 read
= (UINT32
)min(ed
->decoded_size
- off
, length
);
2802 decomp
= ExAllocatePoolWithTag(NonPagedPool
, (UINT32
)ed
->decoded_size
, ALLOC_TAG
);
2804 ERR("out of memory\n");
2805 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2809 decomp_alloc
= TRUE
;
2811 decomp
= data
+ bytes_read
;
2812 decomp_alloc
= FALSE
;
2815 if (ed
->compression
== BTRFS_COMPRESSION_ZLIB
) {
2816 Status
= zlib_decompress(ed
->data
, inlen
, decomp
, (UINT32
)(read
+ off
));
2817 if (!NT_SUCCESS(Status
)) {
2818 ERR("zlib_decompress returned %08x\n", Status
);
2819 if (decomp_alloc
) ExFreePool(decomp
);
2822 } else if (ed
->compression
== BTRFS_COMPRESSION_LZO
) {
2823 if (inlen
< sizeof(UINT32
)) {
2824 ERR("extent data was truncated\n");
2825 Status
= STATUS_INTERNAL_ERROR
;
2826 if (decomp_alloc
) ExFreePool(decomp
);
2829 inlen
-= sizeof(UINT32
);
2831 Status
= lzo_decompress(ed
->data
+ sizeof(UINT32
), inlen
, decomp
, (UINT32
)(read
+ off
), sizeof(UINT32
));
2832 if (!NT_SUCCESS(Status
)) {
2833 ERR("lzo_decompress returned %08x\n", Status
);
2834 if (decomp_alloc
) ExFreePool(decomp
);
2840 RtlCopyMemory(data
+ bytes_read
, decomp
+ off
, read
);
2844 ERR("unhandled compression type %x\n", ed
->compression
);
2845 Status
= STATUS_NOT_IMPLEMENTED
;
2855 case EXTENT_TYPE_REGULAR
:
2857 UINT64 off
= start
+ bytes_read
- ext
->offset
;
2858 UINT32 to_read
, read
;
2860 BOOL mdl
= (Irp
&& Irp
->MdlAddress
) ? TRUE
: FALSE
;
2862 UINT32 bumpoff
= 0, *csum
;
2866 read
= (UINT32
)(len
- off
);
2867 if (read
> length
) read
= (UINT32
)length
;
2869 if (ed
->compression
== BTRFS_COMPRESSION_NONE
) {
2870 addr
= ed2
->address
+ ed2
->offset
+ off
;
2871 to_read
= (UINT32
)sector_align(read
, fcb
->Vcb
->superblock
.sector_size
);
2873 if (addr
% fcb
->Vcb
->superblock
.sector_size
> 0) {
2874 bumpoff
= addr
% fcb
->Vcb
->superblock
.sector_size
;
2876 to_read
= (UINT32
)sector_align(read
+ bumpoff
, fcb
->Vcb
->superblock
.sector_size
);
2879 addr
= ed2
->address
;
2880 to_read
= (UINT32
)sector_align(ed2
->size
, fcb
->Vcb
->superblock
.sector_size
);
2883 if (ed
->compression
== BTRFS_COMPRESSION_NONE
&& start
% fcb
->Vcb
->superblock
.sector_size
== 0 &&
2884 length
% fcb
->Vcb
->superblock
.sector_size
== 0) {
2885 buf
= data
+ bytes_read
;
2888 buf
= ExAllocatePoolWithTag(PagedPool
, to_read
, ALLOC_TAG
);
2892 ERR("out of memory\n");
2893 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2900 c
= get_chunk_from_address(fcb
->Vcb
, addr
);
2903 ERR("get_chunk_from_address(%llx) failed\n", addr
);
2912 if (ed
->compression
== BTRFS_COMPRESSION_NONE
)
2913 csum
= &ext
->csum
[off
/ fcb
->Vcb
->superblock
.sector_size
];
2919 Status
= read_data(fcb
->Vcb
, addr
, to_read
, csum
, FALSE
, buf
, c
, NULL
, Irp
, 0, mdl
,
2920 fcb
&& fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
? HighPagePriority
: NormalPagePriority
);
2921 if (!NT_SUCCESS(Status
)) {
2922 ERR("read_data returned %08x\n", Status
);
2930 if (ed
->compression
== BTRFS_COMPRESSION_NONE
) {
2932 RtlCopyMemory(data
+ bytes_read
, buf
+ bumpoff
, read
);
2934 UINT8
*decomp
= NULL
, *buf2
;
2935 ULONG outlen
, inlen
, off2
;
2936 UINT32 inpageoff
= 0;
2938 off2
= (ULONG
)(ed2
->offset
+ off
);
2940 inlen
= (ULONG
)ed2
->size
;
2942 if (ed
->compression
== BTRFS_COMPRESSION_LZO
) {
2943 ULONG inoff
= sizeof(UINT32
);
2945 inlen
-= sizeof(UINT32
);
2947 // If reading a few sectors in, skip to the interesting bit
2948 while (off2
> LINUX_PAGE_SIZE
) {
2951 if (inlen
< sizeof(UINT32
))
2954 partlen
= *(UINT32
*)(buf2
+ inoff
);
2956 if (partlen
< inlen
) {
2957 off2
-= LINUX_PAGE_SIZE
;
2958 inoff
+= partlen
+ sizeof(UINT32
);
2959 inlen
-= partlen
+ sizeof(UINT32
);
2961 if (LINUX_PAGE_SIZE
- (inoff
% LINUX_PAGE_SIZE
) < sizeof(UINT32
))
2962 inoff
= ((inoff
/ LINUX_PAGE_SIZE
) + 1) * LINUX_PAGE_SIZE
;
2967 buf2
= &buf2
[inoff
];
2968 inpageoff
= inoff
% LINUX_PAGE_SIZE
;
2972 outlen
= off2
+ min(read
, (UINT32
)(ed2
->num_bytes
- off
));
2974 decomp
= ExAllocatePoolWithTag(PagedPool
, outlen
, ALLOC_TAG
);
2976 ERR("out of memory\n");
2978 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2982 outlen
= min(read
, (UINT32
)(ed2
->num_bytes
- off
));
2984 if (ed
->compression
== BTRFS_COMPRESSION_ZLIB
) {
2985 Status
= zlib_decompress(buf2
, inlen
, decomp
? decomp
: (data
+ bytes_read
), outlen
);
2987 if (!NT_SUCCESS(Status
)) {
2988 ERR("zlib_decompress returned %08x\n", Status
);
2996 } else if (ed
->compression
== BTRFS_COMPRESSION_LZO
) {
2997 Status
= lzo_decompress(buf2
, inlen
, decomp
? decomp
: (data
+ bytes_read
), outlen
, inpageoff
);
2999 if (!NT_SUCCESS(Status
)) {
3000 ERR("lzo_decompress returned %08x\n", Status
);
3009 ERR("unsupported compression type %x\n", ed
->compression
);
3010 Status
= STATUS_NOT_SUPPORTED
;
3021 RtlCopyMemory(data
+ bytes_read
, decomp
+ off2
, (size_t)min(read
, ed2
->num_bytes
- off
));
3035 case EXTENT_TYPE_PREALLOC
:
3037 UINT64 off
= start
+ bytes_read
- ext
->offset
;
3038 UINT32 read
= (UINT32
)(len
- off
);
3040 if (read
> length
) read
= (UINT32
)length
;
3042 RtlZeroMemory(data
+ bytes_read
, read
);
3051 WARN("Unsupported extent data type %u\n", ed
->type
);
3052 Status
= STATUS_NOT_IMPLEMENTED
;
3056 last_end
= ext
->offset
+ len
;
3066 if (length
> 0 && start
+ bytes_read
< fcb
->inode_item
.st_size
) {
3067 UINT32 read
= (UINT32
)min(fcb
->inode_item
.st_size
- start
- bytes_read
, length
);
3069 RtlZeroMemory(data
+ bytes_read
, read
);
3075 Status
= STATUS_SUCCESS
;
3080 time2
= KeQueryPerformanceCounter(NULL
);
3082 fcb
->Vcb
->stats
.num_reads
++;
3083 fcb
->Vcb
->stats
.data_read
+= bytes_read
;
3084 fcb
->Vcb
->stats
.read_total_time
+= time2
.QuadPart
- time1
.QuadPart
;
3091 NTSTATUS
do_read(PIRP Irp
, BOOLEAN wait
, ULONG
* bytes_read
) {
3092 PIO_STACK_LOCATION IrpSp
= IoGetCurrentIrpStackLocation(Irp
);
3093 PFILE_OBJECT FileObject
= IrpSp
->FileObject
;
3094 fcb
* fcb
= FileObject
->FsContext
;
3096 ULONG length
= IrpSp
->Parameters
.Read
.Length
, addon
= 0;
3097 UINT64 start
= IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
;
3101 if (!fcb
|| !fcb
->Vcb
|| !fcb
->subvol
)
3102 return STATUS_INTERNAL_ERROR
;
3104 TRACE("file = %S (fcb = %p)\n", file_desc(FileObject
), fcb
);
3105 TRACE("offset = %llx, length = %x\n", start
, length
);
3106 TRACE("paging_io = %s, no cache = %s\n", Irp
->Flags
& IRP_PAGING_IO
? "TRUE" : "FALSE", Irp
->Flags
& IRP_NOCACHE
? "TRUE" : "FALSE");
3108 if (!fcb
->ads
&& fcb
->type
== BTRFS_TYPE_DIRECTORY
)
3109 return STATUS_INVALID_DEVICE_REQUEST
;
3111 if (!(Irp
->Flags
& IRP_PAGING_IO
) && !FsRtlCheckLockForReadAccess(&fcb
->lock
, Irp
)) {
3112 WARN("tried to read locked region\n");
3113 return STATUS_FILE_LOCK_CONFLICT
;
3117 TRACE("tried to read zero bytes\n");
3118 return STATUS_SUCCESS
;
3121 if (start
>= (UINT64
)fcb
->Header
.FileSize
.QuadPart
) {
3122 TRACE("tried to read with offset after file end (%llx >= %llx)\n", start
, fcb
->Header
.FileSize
.QuadPart
);
3123 return STATUS_END_OF_FILE
;
3126 TRACE("FileObject %p fcb %p FileSize = %llx st_size = %llx (%p)\n", FileObject
, fcb
, fcb
->Header
.FileSize
.QuadPart
, fcb
->inode_item
.st_size
, &fcb
->inode_item
.st_size
);
3128 if (Irp
->Flags
& IRP_NOCACHE
|| !(IrpSp
->MinorFunction
& IRP_MN_MDL
)) {
3129 data
= map_user_buffer(Irp
, fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
? HighPagePriority
: NormalPagePriority
);
3131 if (Irp
->MdlAddress
&& !data
) {
3132 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3133 return STATUS_INSUFFICIENT_RESOURCES
;
3136 if (start
>= (UINT64
)fcb
->Header
.ValidDataLength
.QuadPart
) {
3137 length
= (ULONG
)min(length
, min(start
+ length
, (UINT64
)fcb
->Header
.FileSize
.QuadPart
) - fcb
->Header
.ValidDataLength
.QuadPart
);
3138 RtlZeroMemory(data
, length
);
3139 Irp
->IoStatus
.Information
= *bytes_read
= length
;
3140 return STATUS_SUCCESS
;
3143 if (length
+ start
> (UINT64
)fcb
->Header
.ValidDataLength
.QuadPart
) {
3144 addon
= (ULONG
)(min(start
+ length
, (UINT64
)fcb
->Header
.FileSize
.QuadPart
) - fcb
->Header
.ValidDataLength
.QuadPart
);
3145 RtlZeroMemory(data
+ (fcb
->Header
.ValidDataLength
.QuadPart
- start
), addon
);
3146 length
= (ULONG
)(fcb
->Header
.ValidDataLength
.QuadPart
- start
);
3150 if (!(Irp
->Flags
& IRP_NOCACHE
)) {
3151 NTSTATUS Status
= STATUS_SUCCESS
;
3154 if (!FileObject
->PrivateCacheMap
) {
3157 ccfs
.AllocationSize
= fcb
->Header
.AllocationSize
;
3158 ccfs
.FileSize
= fcb
->Header
.FileSize
;
3159 ccfs
.ValidDataLength
= fcb
->Header
.ValidDataLength
;
3161 init_file_cache(FileObject
, &ccfs
);
3164 if (IrpSp
->MinorFunction
& IRP_MN_MDL
) {
3165 CcMdlRead(FileObject
,&IrpSp
->Parameters
.Read
.ByteOffset
, length
, &Irp
->MdlAddress
, &Irp
->IoStatus
);
3167 if (fCcCopyReadEx
) {
3168 TRACE("CcCopyReadEx(%p, %llx, %x, %u, %p, %p, %p, %p)\n", FileObject
, IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
,
3169 length
, wait
, data
, &Irp
->IoStatus
, Irp
->Tail
.Overlay
.Thread
);
3170 TRACE("sizes = %llx, %llx, %llx\n", fcb
->Header
.AllocationSize
, fcb
->Header
.FileSize
, fcb
->Header
.ValidDataLength
);
3171 if (!fCcCopyReadEx(FileObject
, &IrpSp
->Parameters
.Read
.ByteOffset
, length
, wait
, data
, &Irp
->IoStatus
, Irp
->Tail
.Overlay
.Thread
)) {
3172 TRACE("CcCopyReadEx could not wait\n");
3174 IoMarkIrpPending(Irp
);
3175 return STATUS_PENDING
;
3177 TRACE("CcCopyReadEx finished\n");
3179 TRACE("CcCopyRead(%p, %llx, %x, %u, %p, %p)\n", FileObject
, IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
, length
, wait
, data
, &Irp
->IoStatus
);
3180 TRACE("sizes = %llx, %llx, %llx\n", fcb
->Header
.AllocationSize
, fcb
->Header
.FileSize
, fcb
->Header
.ValidDataLength
);
3181 if (!CcCopyRead(FileObject
, &IrpSp
->Parameters
.Read
.ByteOffset
, length
, wait
, data
, &Irp
->IoStatus
)) {
3182 TRACE("CcCopyRead could not wait\n");
3184 IoMarkIrpPending(Irp
);
3185 return STATUS_PENDING
;
3187 TRACE("CcCopyRead finished\n");
3190 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
3191 Status
= _SEH2_GetExceptionCode();
3194 if (NT_SUCCESS(Status
)) {
3195 Status
= Irp
->IoStatus
.Status
;
3196 Irp
->IoStatus
.Information
+= addon
;
3197 *bytes_read
= (ULONG
)Irp
->IoStatus
.Information
;
3199 ERR("EXCEPTION - %08x\n", Status
);
3206 IoMarkIrpPending(Irp
);
3207 return STATUS_PENDING
;
3211 Status
= read_stream(fcb
, data
, start
, length
, bytes_read
);
3213 Status
= read_file(fcb
, data
, start
, length
, bytes_read
, Irp
);
3215 *bytes_read
+= addon
;
3216 TRACE("read %u bytes\n", *bytes_read
);
3218 Irp
->IoStatus
.Information
= *bytes_read
;
3220 if (diskacc
&& Status
!= STATUS_PENDING
) {
3221 PETHREAD thread
= NULL
;
3223 if (Irp
->Tail
.Overlay
.Thread
&& !IoIsSystemThread(Irp
->Tail
.Overlay
.Thread
))
3224 thread
= Irp
->Tail
.Overlay
.Thread
;
3225 else if (!IoIsSystemThread(PsGetCurrentThread()))
3226 thread
= PsGetCurrentThread();
3227 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp
)
3228 thread
= PsGetCurrentThread();
3231 fPsUpdateDiskCounters(PsGetThreadProcess(thread
), *bytes_read
, 0, 1, 0, 0);
3238 _Dispatch_type_(IRP_MJ_READ
)
3239 _Function_class_(DRIVER_DISPATCH
)
3240 NTSTATUS
drv_read(PDEVICE_OBJECT DeviceObject
, PIRP Irp
) {
3241 device_extension
* Vcb
= DeviceObject
->DeviceExtension
;
3242 PIO_STACK_LOCATION IrpSp
= IoGetCurrentIrpStackLocation(Irp
);
3243 PFILE_OBJECT FileObject
= IrpSp
->FileObject
;
3244 ULONG bytes_read
= 0;
3249 BOOLEAN fcb_lock
= FALSE
, wait
;
3251 FsRtlEnterFileSystem();
3253 top_level
= is_top_level(Irp
);
3257 if (Vcb
&& Vcb
->type
== VCB_TYPE_VOLUME
) {
3258 Status
= vol_read(DeviceObject
, Irp
);
3260 } else if (!Vcb
|| Vcb
->type
!= VCB_TYPE_FS
) {
3261 Status
= STATUS_INVALID_PARAMETER
;
3265 Irp
->IoStatus
.Information
= 0;
3267 if (IrpSp
->MinorFunction
& IRP_MN_COMPLETE
) {
3268 CcMdlReadComplete(IrpSp
->FileObject
, Irp
->MdlAddress
);
3270 Irp
->MdlAddress
= NULL
;
3271 Status
= STATUS_SUCCESS
;
3276 fcb
= FileObject
->FsContext
;
3279 ERR("fcb was NULL\n");
3280 Status
= STATUS_INVALID_PARAMETER
;
3284 ccb
= FileObject
->FsContext2
;
3287 ERR("ccb was NULL\n");
3288 Status
= STATUS_INVALID_PARAMETER
;
3292 if (Irp
->RequestorMode
== UserMode
&& !(ccb
->access
& FILE_READ_DATA
)) {
3293 WARN("insufficient privileges\n");
3294 Status
= STATUS_ACCESS_DENIED
;
3298 if (fcb
== Vcb
->volume_fcb
) {
3299 TRACE("reading volume FCB\n");
3301 IoSkipCurrentIrpStackLocation(Irp
);
3303 Status
= IoCallDriver(Vcb
->Vpb
->RealDevice
, Irp
);
3308 wait
= IoIsOperationSynchronous(Irp
);
3310 // Don't offload jobs when doing paging IO - otherwise this can lead to
3311 // deadlocks in CcCopyRead.
3312 if (Irp
->Flags
& IRP_PAGING_IO
)
3315 if (!(Irp
->Flags
& IRP_PAGING_IO
) && FileObject
->SectionObjectPointer
->DataSectionObject
) {
3316 IO_STATUS_BLOCK iosb
;
3318 CcFlushCache(FileObject
->SectionObjectPointer
, &IrpSp
->Parameters
.Read
.ByteOffset
, IrpSp
->Parameters
.Read
.Length
, &iosb
);
3319 if (!NT_SUCCESS(iosb
.Status
)) {
3320 ERR("CcFlushCache returned %08x\n", iosb
.Status
);
3325 if (!ExIsResourceAcquiredSharedLite(fcb
->Header
.Resource
)) {
3326 if (!ExAcquireResourceSharedLite(fcb
->Header
.Resource
, wait
)) {
3327 Status
= STATUS_PENDING
;
3328 IoMarkIrpPending(Irp
);
3335 Status
= do_read(Irp
, wait
, &bytes_read
);
3338 ExReleaseResourceLite(fcb
->Header
.Resource
);
3341 if (FileObject
->Flags
& FO_SYNCHRONOUS_IO
&& !(Irp
->Flags
& IRP_PAGING_IO
))
3342 FileObject
->CurrentByteOffset
.QuadPart
= IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
+ (NT_SUCCESS(Status
) ? bytes_read
: 0);
3345 Irp
->IoStatus
.Status
= Status
;
3347 TRACE("Irp->IoStatus.Status = %08x\n", Irp
->IoStatus
.Status
);
3348 TRACE("Irp->IoStatus.Information = %lu\n", Irp
->IoStatus
.Information
);
3349 TRACE("returning %08x\n", Status
);
3351 if (Status
!= STATUS_PENDING
)
3352 IoCompleteRequest(Irp
, IO_NO_INCREMENT
);
3354 if (!add_thread_job(Vcb
, Irp
))
3360 IoSetTopLevelIrp(NULL
);
3362 FsRtlExitFileSystem();