1 /* Copyright (c) Mark Harmstone 2016-17
3 * This file is part of WinBtrfs.
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
18 #include "btrfs_drv.h"
22 enum read_data_status
{
23 ReadDataStatus_Pending
,
24 ReadDataStatus_Success
,
26 ReadDataStatus_MissingDevice
,
30 struct read_data_context
;
33 struct read_data_context
* context
;
38 enum read_data_status status
;
50 LONG num_stripes
, stripes_left
;
53 uint16_t firstoff
, startoffstripe
, sectors_per_stripe
;
56 read_data_stripe
* stripes
;
61 extern tPsUpdateDiskCounters fPsUpdateDiskCounters
;
62 extern tCcCopyReadEx fCcCopyReadEx
;
63 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters
;
65 #define LZO_PAGE_SIZE 4096
67 _Function_class_(IO_COMPLETION_ROUTINE
)
68 static NTSTATUS __stdcall
read_data_completion(PDEVICE_OBJECT DeviceObject
, PIRP Irp
, PVOID conptr
) {
69 read_data_stripe
* stripe
= conptr
;
70 read_data_context
* context
= (read_data_context
*)stripe
->context
;
74 stripe
->iosb
= Irp
->IoStatus
;
76 if (NT_SUCCESS(Irp
->IoStatus
.Status
))
77 stripe
->status
= ReadDataStatus_Success
;
79 stripe
->status
= ReadDataStatus_Error
;
81 if (InterlockedDecrement(&context
->stripes_left
) == 0)
82 KeSetEvent(&context
->Event
, 0, false);
84 return STATUS_MORE_PROCESSING_REQUIRED
;
87 NTSTATUS
check_csum(device_extension
* Vcb
, uint8_t* data
, uint32_t sectors
, void* csum
) {
90 csum2
= ExAllocatePoolWithTag(PagedPool
, Vcb
->csum_size
* sectors
, ALLOC_TAG
);
92 ERR("out of memory\n");
93 return STATUS_INSUFFICIENT_RESOURCES
;
96 do_calc_job(Vcb
, data
, sectors
, csum2
);
98 if (RtlCompareMemory(csum2
, csum
, sectors
* Vcb
->csum_size
) != sectors
* Vcb
->csum_size
) {
100 return STATUS_CRC_ERROR
;
105 return STATUS_SUCCESS
;
108 void get_tree_checksum(device_extension
* Vcb
, tree_header
* th
, void* csum
) {
109 switch (Vcb
->superblock
.csum_type
) {
110 case CSUM_TYPE_CRC32C
:
111 *(uint32_t*)csum
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
114 case CSUM_TYPE_XXHASH
:
115 *(uint64_t*)csum
= XXH64((uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
), 0);
118 case CSUM_TYPE_SHA256
:
119 calc_sha256(csum
, &th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
122 case CSUM_TYPE_BLAKE2
:
123 blake2b(csum
, BLAKE2_HASH_SIZE
, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
128 bool check_tree_checksum(device_extension
* Vcb
, tree_header
* th
) {
129 switch (Vcb
->superblock
.csum_type
) {
130 case CSUM_TYPE_CRC32C
: {
131 uint32_t crc32
= ~calc_crc32c(0xffffffff, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
133 if (crc32
== *((uint32_t*)th
->csum
))
136 WARN("hash was %08x, expected %08x\n", crc32
, *((uint32_t*)th
->csum
));
141 case CSUM_TYPE_XXHASH
: {
142 uint64_t hash
= XXH64((uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
), 0);
144 if (hash
== *((uint64_t*)th
->csum
))
147 WARN("hash was %I64x, expected %I64x\n", hash
, *((uint64_t*)th
->csum
));
152 case CSUM_TYPE_SHA256
: {
153 uint8_t hash
[SHA256_HASH_SIZE
];
155 calc_sha256(hash
, (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
157 if (RtlCompareMemory(hash
, th
, SHA256_HASH_SIZE
) == SHA256_HASH_SIZE
)
160 WARN("hash was invalid\n");
165 case CSUM_TYPE_BLAKE2
: {
166 uint8_t hash
[BLAKE2_HASH_SIZE
];
168 blake2b(hash
, sizeof(hash
), (uint8_t*)&th
->fs_uuid
, Vcb
->superblock
.node_size
- sizeof(th
->csum
));
170 if (RtlCompareMemory(hash
, th
, BLAKE2_HASH_SIZE
) == BLAKE2_HASH_SIZE
)
173 WARN("hash was invalid\n");
182 void get_sector_csum(device_extension
* Vcb
, void* buf
, void* csum
) {
183 switch (Vcb
->superblock
.csum_type
) {
184 case CSUM_TYPE_CRC32C
:
185 *(uint32_t*)csum
= ~calc_crc32c(0xffffffff, buf
, Vcb
->superblock
.sector_size
);
188 case CSUM_TYPE_XXHASH
:
189 *(uint64_t*)csum
= XXH64(buf
, Vcb
->superblock
.sector_size
, 0);
192 case CSUM_TYPE_SHA256
:
193 calc_sha256(csum
, buf
, Vcb
->superblock
.sector_size
);
196 case CSUM_TYPE_BLAKE2
:
197 blake2b(csum
, BLAKE2_HASH_SIZE
, buf
, Vcb
->superblock
.sector_size
);
202 bool check_sector_csum(device_extension
* Vcb
, void* buf
, void* csum
) {
203 switch (Vcb
->superblock
.csum_type
) {
204 case CSUM_TYPE_CRC32C
: {
205 uint32_t crc32
= ~calc_crc32c(0xffffffff, buf
, Vcb
->superblock
.sector_size
);
207 return *(uint32_t*)csum
== crc32
;
210 case CSUM_TYPE_XXHASH
: {
211 uint64_t hash
= XXH64(buf
, Vcb
->superblock
.sector_size
, 0);
213 return *(uint64_t*)csum
== hash
;
216 case CSUM_TYPE_SHA256
: {
217 uint8_t hash
[SHA256_HASH_SIZE
];
219 calc_sha256(hash
, buf
, Vcb
->superblock
.sector_size
);
221 return RtlCompareMemory(hash
, csum
, SHA256_HASH_SIZE
) == SHA256_HASH_SIZE
;
224 case CSUM_TYPE_BLAKE2
: {
225 uint8_t hash
[BLAKE2_HASH_SIZE
];
227 blake2b(hash
, sizeof(hash
), buf
, Vcb
->superblock
.sector_size
);
229 return RtlCompareMemory(hash
, csum
, BLAKE2_HASH_SIZE
) == BLAKE2_HASH_SIZE
;
236 static NTSTATUS
read_data_dup(device_extension
* Vcb
, uint8_t* buf
, uint64_t addr
, read_data_context
* context
, CHUNK_ITEM
* ci
,
237 device
** devices
, uint64_t generation
) {
239 bool checksum_error
= false;
240 uint16_t j
, stripe
= 0;
242 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
244 for (j
= 0; j
< ci
->num_stripes
; j
++) {
245 if (context
->stripes
[j
].status
== ReadDataStatus_Error
) {
246 WARN("stripe %u returned error %08lx\n", j
, context
->stripes
[j
].iosb
.Status
);
247 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
248 return context
->stripes
[j
].iosb
.Status
;
249 } else if (context
->stripes
[j
].status
== ReadDataStatus_Success
) {
255 if (context
->stripes
[stripe
].status
!= ReadDataStatus_Success
)
256 return STATUS_INTERNAL_ERROR
;
259 tree_header
* th
= (tree_header
*)buf
;
261 if (th
->address
!= context
->address
|| !check_tree_checksum(Vcb
, th
)) {
262 checksum_error
= true;
263 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
264 } else if (generation
!= 0 && th
->generation
!= generation
) {
265 checksum_error
= true;
266 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
268 } else if (context
->csum
) {
269 Status
= check_csum(Vcb
, buf
, (ULONG
)context
->stripes
[stripe
].Irp
->IoStatus
.Information
/ context
->sector_size
, context
->csum
);
271 if (Status
== STATUS_CRC_ERROR
) {
272 checksum_error
= true;
273 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
274 } else if (!NT_SUCCESS(Status
)) {
275 ERR("check_csum returned %08lx\n", Status
);
281 return STATUS_SUCCESS
;
283 if (ci
->num_stripes
== 1)
284 return STATUS_CRC_ERROR
;
288 bool recovered
= false;
290 t2
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
292 ERR("out of memory\n");
293 return STATUS_INSUFFICIENT_RESOURCES
;
296 for (j
= 0; j
< ci
->num_stripes
; j
++) {
297 if (j
!= stripe
&& devices
[j
] && devices
[j
]->devobj
) {
298 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ context
->stripes
[stripe
].stripestart
,
299 Vcb
->superblock
.node_size
, (uint8_t*)t2
, false);
300 if (!NT_SUCCESS(Status
)) {
301 WARN("sync_read_phys returned %08lx\n", Status
);
302 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
304 bool checksum_error
= !check_tree_checksum(Vcb
, t2
);
306 if (t2
->address
== addr
&& !checksum_error
&& (generation
== 0 || t2
->generation
== generation
)) {
307 RtlCopyMemory(buf
, t2
, Vcb
->superblock
.node_size
);
308 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
, devices
[stripe
]->devitem
.dev_id
);
311 if (!Vcb
->readonly
&& !devices
[stripe
]->readonly
) { // write good data over bad
312 Status
= write_data_phys(devices
[stripe
]->devobj
, devices
[stripe
]->fileobj
, cis
[stripe
].offset
+ context
->stripes
[stripe
].stripestart
,
313 t2
, Vcb
->superblock
.node_size
);
314 if (!NT_SUCCESS(Status
)) {
315 WARN("write_data_phys returned %08lx\n", Status
);
316 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
321 } else if (t2
->address
!= addr
|| checksum_error
)
322 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
324 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
330 ERR("unrecoverable checksum error at %I64x\n", addr
);
332 return STATUS_CRC_ERROR
;
337 ULONG sectors
= (ULONG
)context
->stripes
[stripe
].Irp
->IoStatus
.Information
/ Vcb
->superblock
.sector_size
;
339 void* ptr
= context
->csum
;
341 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.sector_size
, ALLOC_TAG
);
343 ERR("out of memory\n");
344 return STATUS_INSUFFICIENT_RESOURCES
;
347 for (i
= 0; i
< sectors
; i
++) {
348 if (!check_sector_csum(Vcb
, buf
+ (i
* Vcb
->superblock
.sector_size
), ptr
)) {
349 bool recovered
= false;
351 for (j
= 0; j
< ci
->num_stripes
; j
++) {
352 if (j
!= stripe
&& devices
[j
] && devices
[j
]->devobj
) {
353 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
,
354 cis
[j
].offset
+ context
->stripes
[stripe
].stripestart
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
),
355 Vcb
->superblock
.sector_size
, sector
, false);
356 if (!NT_SUCCESS(Status
)) {
357 WARN("sync_read_phys returned %08lx\n", Status
);
358 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
360 if (check_sector_csum(Vcb
, sector
, ptr
)) {
361 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
, Vcb
->superblock
.sector_size
);
362 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), devices
[stripe
]->devitem
.dev_id
);
365 if (!Vcb
->readonly
&& !devices
[stripe
]->readonly
) { // write good data over bad
366 Status
= write_data_phys(devices
[stripe
]->devobj
, devices
[stripe
]->fileobj
,
367 cis
[stripe
].offset
+ context
->stripes
[stripe
].stripestart
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
),
368 sector
, Vcb
->superblock
.sector_size
);
369 if (!NT_SUCCESS(Status
)) {
370 WARN("write_data_phys returned %08lx\n", Status
);
371 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
377 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
383 ERR("unrecoverable checksum error at %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
));
385 return STATUS_CRC_ERROR
;
389 ptr
= (uint8_t*)ptr
+ Vcb
->csum_size
;
395 return STATUS_SUCCESS
;
398 static NTSTATUS
read_data_raid0(device_extension
* Vcb
, uint8_t* buf
, uint64_t addr
, uint32_t length
, read_data_context
* context
,
399 CHUNK_ITEM
* ci
, device
** devices
, uint64_t generation
, uint64_t offset
) {
402 for (i
= 0; i
< ci
->num_stripes
; i
++) {
403 if (context
->stripes
[i
].status
== ReadDataStatus_Error
) {
404 WARN("stripe %I64u returned error %08lx\n", i
, context
->stripes
[i
].iosb
.Status
);
405 log_device_error(Vcb
, devices
[i
], BTRFS_DEV_STAT_READ_ERRORS
);
406 return context
->stripes
[i
].iosb
.Status
;
410 if (context
->tree
) { // shouldn't happen, as trees shouldn't cross stripe boundaries
411 tree_header
* th
= (tree_header
*)buf
;
412 bool checksum_error
= !check_tree_checksum(Vcb
, th
);
414 if (checksum_error
|| addr
!= th
->address
|| (generation
!= 0 && generation
!= th
->generation
)) {
418 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
, &off
, &stripe
);
420 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr
, devices
[stripe
]->devitem
.dev_id
);
422 if (checksum_error
) {
423 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
424 return STATUS_CRC_ERROR
;
425 } else if (addr
!= th
->address
) {
426 WARN("address of tree was %I64x, not %I64x as expected\n", th
->address
, addr
);
427 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
428 return STATUS_CRC_ERROR
;
429 } else if (generation
!= 0 && generation
!= th
->generation
) {
430 WARN("generation of tree was %I64x, not %I64x as expected\n", th
->generation
, generation
);
431 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
432 return STATUS_CRC_ERROR
;
435 } else if (context
->csum
) {
438 Status
= check_csum(Vcb
, buf
, length
/ Vcb
->superblock
.sector_size
, context
->csum
);
440 if (Status
== STATUS_CRC_ERROR
) {
441 void* ptr
= context
->csum
;
443 for (i
= 0; i
< length
/ Vcb
->superblock
.sector_size
; i
++) {
444 if (!check_sector_csum(Vcb
, buf
+ (i
* Vcb
->superblock
.sector_size
), ptr
)) {
448 get_raid0_offset(addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), ci
->stripe_length
, ci
->num_stripes
, &off
, &stripe
);
450 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr
, devices
[stripe
]->devitem
.dev_id
);
452 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
457 ptr
= (uint8_t*)ptr
+ Vcb
->csum_size
;
461 } else if (!NT_SUCCESS(Status
)) {
462 ERR("check_csum returned %08lx\n", Status
);
467 return STATUS_SUCCESS
;
470 static NTSTATUS
read_data_raid10(device_extension
* Vcb
, uint8_t* buf
, uint64_t addr
, uint32_t length
, read_data_context
* context
,
471 CHUNK_ITEM
* ci
, device
** devices
, uint64_t generation
, uint64_t offset
) {
475 bool checksum_error
= false;
476 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
478 for (j
= 0; j
< ci
->num_stripes
; j
++) {
479 if (context
->stripes
[j
].status
== ReadDataStatus_Error
) {
480 WARN("stripe %u returned error %08lx\n", j
, context
->stripes
[j
].iosb
.Status
);
481 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
482 return context
->stripes
[j
].iosb
.Status
;
483 } else if (context
->stripes
[j
].status
== ReadDataStatus_Success
)
488 tree_header
* th
= (tree_header
*)buf
;
490 if (!check_tree_checksum(Vcb
, th
)) {
491 checksum_error
= true;
492 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
493 } else if (addr
!= th
->address
) {
494 WARN("address of tree was %I64x, not %I64x as expected\n", th
->address
, addr
);
495 checksum_error
= true;
496 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
497 } else if (generation
!= 0 && generation
!= th
->generation
) {
498 WARN("generation of tree was %I64x, not %I64x as expected\n", th
->generation
, generation
);
499 checksum_error
= true;
500 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
502 } else if (context
->csum
) {
503 Status
= check_csum(Vcb
, buf
, length
/ Vcb
->superblock
.sector_size
, context
->csum
);
505 if (Status
== STATUS_CRC_ERROR
)
506 checksum_error
= true;
507 else if (!NT_SUCCESS(Status
)) {
508 ERR("check_csum returned %08lx\n", Status
);
514 return STATUS_SUCCESS
;
519 uint16_t badsubstripe
= 0;
520 bool recovered
= false;
522 t2
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.node_size
, ALLOC_TAG
);
524 ERR("out of memory\n");
525 return STATUS_INSUFFICIENT_RESOURCES
;
528 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
/ ci
->sub_stripes
, &off
, &stripe
);
530 stripe
*= ci
->sub_stripes
;
532 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
533 if (context
->stripes
[stripe
+ j
].status
== ReadDataStatus_Success
) {
539 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
540 if (context
->stripes
[stripe
+ j
].status
!= ReadDataStatus_Success
&& devices
[stripe
+ j
] && devices
[stripe
+ j
]->devobj
) {
541 Status
= sync_read_phys(devices
[stripe
+ j
]->devobj
, devices
[stripe
+ j
]->fileobj
, cis
[stripe
+ j
].offset
+ off
,
542 Vcb
->superblock
.node_size
, (uint8_t*)t2
, false);
543 if (!NT_SUCCESS(Status
)) {
544 WARN("sync_read_phys returned %08lx\n", Status
);
545 log_device_error(Vcb
, devices
[stripe
+ j
], BTRFS_DEV_STAT_READ_ERRORS
);
547 bool checksum_error
= !check_tree_checksum(Vcb
, t2
);
549 if (t2
->address
== addr
&& !checksum_error
&& (generation
== 0 || t2
->generation
== generation
)) {
550 RtlCopyMemory(buf
, t2
, Vcb
->superblock
.node_size
);
551 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
, devices
[stripe
+ j
]->devitem
.dev_id
);
554 if (!Vcb
->readonly
&& !devices
[stripe
+ badsubstripe
]->readonly
&& devices
[stripe
+ badsubstripe
]->devobj
) { // write good data over bad
555 Status
= write_data_phys(devices
[stripe
+ badsubstripe
]->devobj
, devices
[stripe
+ badsubstripe
]->fileobj
,
556 cis
[stripe
+ badsubstripe
].offset
+ off
, t2
, Vcb
->superblock
.node_size
);
557 if (!NT_SUCCESS(Status
)) {
558 WARN("write_data_phys returned %08lx\n", Status
);
559 log_device_error(Vcb
, devices
[stripe
+ badsubstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
564 } else if (t2
->address
!= addr
|| checksum_error
)
565 log_device_error(Vcb
, devices
[stripe
+ j
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
567 log_device_error(Vcb
, devices
[stripe
+ j
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
573 ERR("unrecoverable checksum error at %I64x\n", addr
);
575 return STATUS_CRC_ERROR
;
580 ULONG sectors
= length
/ Vcb
->superblock
.sector_size
;
582 void* ptr
= context
->csum
;
584 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.sector_size
, ALLOC_TAG
);
586 ERR("out of memory\n");
587 return STATUS_INSUFFICIENT_RESOURCES
;
590 for (i
= 0; i
< sectors
; i
++) {
591 if (!check_sector_csum(Vcb
, buf
+ (i
* Vcb
->superblock
.sector_size
), ptr
)) {
593 uint16_t stripe2
, badsubstripe
= 0;
594 bool recovered
= false;
596 get_raid0_offset(addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), ci
->stripe_length
,
597 ci
->num_stripes
/ ci
->sub_stripes
, &off
, &stripe2
);
599 stripe2
*= ci
->sub_stripes
;
601 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
602 if (context
->stripes
[stripe2
+ j
].status
== ReadDataStatus_Success
) {
608 log_device_error(Vcb
, devices
[stripe2
+ badsubstripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
610 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
611 if (context
->stripes
[stripe2
+ j
].status
!= ReadDataStatus_Success
&& devices
[stripe2
+ j
] && devices
[stripe2
+ j
]->devobj
) {
612 Status
= sync_read_phys(devices
[stripe2
+ j
]->devobj
, devices
[stripe2
+ j
]->fileobj
, cis
[stripe2
+ j
].offset
+ off
,
613 Vcb
->superblock
.sector_size
, sector
, false);
614 if (!NT_SUCCESS(Status
)) {
615 WARN("sync_read_phys returned %08lx\n", Status
);
616 log_device_error(Vcb
, devices
[stripe2
+ j
], BTRFS_DEV_STAT_READ_ERRORS
);
618 if (check_sector_csum(Vcb
, sector
, ptr
)) {
619 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
, Vcb
->superblock
.sector_size
);
620 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), devices
[stripe2
+ j
]->devitem
.dev_id
);
623 if (!Vcb
->readonly
&& !devices
[stripe2
+ badsubstripe
]->readonly
&& devices
[stripe2
+ badsubstripe
]->devobj
) { // write good data over bad
624 Status
= write_data_phys(devices
[stripe2
+ badsubstripe
]->devobj
, devices
[stripe2
+ badsubstripe
]->fileobj
,
625 cis
[stripe2
+ badsubstripe
].offset
+ off
, sector
, Vcb
->superblock
.sector_size
);
626 if (!NT_SUCCESS(Status
)) {
627 WARN("write_data_phys returned %08lx\n", Status
);
628 log_device_error(Vcb
, devices
[stripe2
+ badsubstripe
], BTRFS_DEV_STAT_READ_ERRORS
);
634 log_device_error(Vcb
, devices
[stripe2
+ j
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
640 ERR("unrecoverable checksum error at %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
));
642 return STATUS_CRC_ERROR
;
646 ptr
= (uint8_t*)ptr
+ Vcb
->csum_size
;
652 return STATUS_SUCCESS
;
655 static NTSTATUS
read_data_raid5(device_extension
* Vcb
, uint8_t* buf
, uint64_t addr
, uint32_t length
, read_data_context
* context
, CHUNK_ITEM
* ci
,
656 device
** devices
, uint64_t offset
, uint64_t generation
, chunk
* c
, bool degraded
) {
659 bool checksum_error
= false;
660 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
662 bool no_success
= true;
664 for (j
= 0; j
< ci
->num_stripes
; j
++) {
665 if (context
->stripes
[j
].status
== ReadDataStatus_Error
) {
666 WARN("stripe %u returned error %08lx\n", j
, context
->stripes
[j
].iosb
.Status
);
667 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
668 return context
->stripes
[j
].iosb
.Status
;
669 } else if (context
->stripes
[j
].status
== ReadDataStatus_Success
) {
675 if (c
) { // check partial stripes
677 uint64_t ps_length
= (ci
->num_stripes
- 1) * ci
->stripe_length
;
679 ExAcquireResourceSharedLite(&c
->partial_stripes_lock
, true);
681 le
= c
->partial_stripes
.Flink
;
682 while (le
!= &c
->partial_stripes
) {
683 partial_stripe
* ps
= CONTAINING_RECORD(le
, partial_stripe
, list_entry
);
685 if (ps
->address
+ ps_length
> addr
&& ps
->address
< addr
+ length
) {
686 ULONG runlength
, index
;
688 runlength
= RtlFindFirstRunClear(&ps
->bmp
, &index
);
690 while (runlength
!= 0) {
692 uint64_t runstart
, runend
, start
, end
;
694 if (index
>= ps
->bmplen
)
697 if (index
+ runlength
>= ps
->bmplen
) {
698 runlength
= ps
->bmplen
- index
;
705 uint64_t runstart
= ps
->address
+ (index
* Vcb
->superblock
.sector_size
);
706 uint64_t runend
= runstart
+ (runlength
* Vcb
->superblock
.sector_size
);
707 uint64_t start
= max(runstart
, addr
);
708 uint64_t end
= min(runend
, addr
+ length
);
710 runstart
= ps
->address
+ (index
* Vcb
->superblock
.sector_size
);
711 runend
= runstart
+ (runlength
* Vcb
->superblock
.sector_size
);
712 start
= max(runstart
, addr
);
713 end
= min(runend
, addr
+ length
);
717 RtlCopyMemory(buf
+ start
- addr
, &ps
->data
[start
- ps
->address
], (ULONG
)(end
- start
));
719 runlength
= RtlFindNextForwardRunClear(&ps
->bmp
, index
+ runlength
, &index
);
721 } else if (ps
->address
>= addr
+ length
)
727 ExReleaseResourceLite(&c
->partial_stripes_lock
);
731 tree_header
* th
= (tree_header
*)buf
;
733 if (addr
!= th
->address
|| !check_tree_checksum(Vcb
, th
)) {
734 checksum_error
= true;
735 if (!no_success
&& !degraded
)
736 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
737 } else if (generation
!= 0 && generation
!= th
->generation
) {
738 checksum_error
= true;
739 if (!no_success
&& !degraded
)
740 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
742 } else if (context
->csum
) {
743 Status
= check_csum(Vcb
, buf
, length
/ Vcb
->superblock
.sector_size
, context
->csum
);
745 if (Status
== STATUS_CRC_ERROR
) {
747 WARN("checksum error\n");
748 checksum_error
= true;
749 } else if (!NT_SUCCESS(Status
)) {
750 ERR("check_csum returned %08lx\n", Status
);
754 checksum_error
= true;
757 return STATUS_SUCCESS
;
762 bool recovered
= false, first
= true, failed
= false;
765 t2
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.node_size
* 2, ALLOC_TAG
);
767 ERR("out of memory\n");
768 return STATUS_INSUFFICIENT_RESOURCES
;
771 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
- 1, &off
, &stripe
);
773 parity
= (((addr
- offset
) / ((ci
->num_stripes
- 1) * ci
->stripe_length
)) + ci
->num_stripes
- 1) % ci
->num_stripes
;
775 stripe
= (parity
+ stripe
+ 1) % ci
->num_stripes
;
777 for (j
= 0; j
< ci
->num_stripes
; j
++) {
779 if (devices
[j
] && devices
[j
]->devobj
) {
781 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.node_size
, t2
, false);
782 if (!NT_SUCCESS(Status
)) {
783 ERR("sync_read_phys returned %08lx\n", Status
);
784 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
791 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.node_size
, t2
+ Vcb
->superblock
.node_size
, false);
792 if (!NT_SUCCESS(Status
)) {
793 ERR("sync_read_phys returned %08lx\n", Status
);
794 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
799 do_xor(t2
, t2
+ Vcb
->superblock
.node_size
, Vcb
->superblock
.node_size
);
809 tree_header
* t3
= (tree_header
*)t2
;
811 if (t3
->address
== addr
&& check_tree_checksum(Vcb
, t3
) && (generation
== 0 || t3
->generation
== generation
)) {
812 RtlCopyMemory(buf
, t2
, Vcb
->superblock
.node_size
);
815 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
, devices
[stripe
]->devitem
.dev_id
);
819 if (!Vcb
->readonly
&& devices
[stripe
] && !devices
[stripe
]->readonly
&& devices
[stripe
]->devobj
) { // write good data over bad
820 Status
= write_data_phys(devices
[stripe
]->devobj
, devices
[stripe
]->fileobj
, cis
[stripe
].offset
+ off
, t2
, Vcb
->superblock
.node_size
);
821 if (!NT_SUCCESS(Status
)) {
822 WARN("write_data_phys returned %08lx\n", Status
);
823 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
830 ERR("unrecoverable checksum error at %I64x\n", addr
);
832 return STATUS_CRC_ERROR
;
837 ULONG sectors
= length
/ Vcb
->superblock
.sector_size
;
839 void* ptr
= context
->csum
;
841 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.sector_size
* 2, ALLOC_TAG
);
843 ERR("out of memory\n");
844 return STATUS_INSUFFICIENT_RESOURCES
;
847 for (i
= 0; i
< sectors
; i
++) {
851 get_raid0_offset(addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), ci
->stripe_length
,
852 ci
->num_stripes
- 1, &off
, &stripe
);
854 parity
= (((addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
)) / ((ci
->num_stripes
- 1) * ci
->stripe_length
)) + ci
->num_stripes
- 1) % ci
->num_stripes
;
856 stripe
= (parity
+ stripe
+ 1) % ci
->num_stripes
;
858 if (!devices
[stripe
] || !devices
[stripe
]->devobj
|| (ptr
&& !check_sector_csum(Vcb
, buf
+ (i
* Vcb
->superblock
.sector_size
), ptr
))) {
859 bool recovered
= false, first
= true, failed
= false;
861 if (devices
[stripe
] && devices
[stripe
]->devobj
)
862 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_READ_ERRORS
);
864 for (j
= 0; j
< ci
->num_stripes
; j
++) {
866 if (devices
[j
] && devices
[j
]->devobj
) {
868 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.sector_size
, sector
, false);
869 if (!NT_SUCCESS(Status
)) {
870 ERR("sync_read_phys returned %08lx\n", Status
);
872 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
878 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.sector_size
,
879 sector
+ Vcb
->superblock
.sector_size
, false);
880 if (!NT_SUCCESS(Status
)) {
881 ERR("sync_read_phys returned %08lx\n", Status
);
883 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
887 do_xor(sector
, sector
+ Vcb
->superblock
.sector_size
, Vcb
->superblock
.sector_size
);
897 if (!ptr
|| check_sector_csum(Vcb
, sector
, ptr
)) {
898 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
, Vcb
->superblock
.sector_size
);
901 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), devices
[stripe
]->devitem
.dev_id
);
905 if (!Vcb
->readonly
&& devices
[stripe
] && !devices
[stripe
]->readonly
&& devices
[stripe
]->devobj
) { // write good data over bad
906 Status
= write_data_phys(devices
[stripe
]->devobj
, devices
[stripe
]->fileobj
, cis
[stripe
].offset
+ off
,
907 sector
, Vcb
->superblock
.sector_size
);
908 if (!NT_SUCCESS(Status
)) {
909 WARN("write_data_phys returned %08lx\n", Status
);
910 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
917 ERR("unrecoverable checksum error at %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
));
919 return STATUS_CRC_ERROR
;
924 ptr
= (uint8_t*)ptr
+ Vcb
->csum_size
;
930 return STATUS_SUCCESS
;
933 void raid6_recover2(uint8_t* sectors
, uint16_t num_stripes
, ULONG sector_size
, uint16_t missing1
, uint16_t missing2
, uint8_t* out
) {
934 if (missing1
== num_stripes
- 2 || missing2
== num_stripes
- 2) { // reconstruct from q and data
935 uint16_t missing
= missing1
== (num_stripes
- 2) ? missing2
: missing1
;
938 stripe
= num_stripes
- 3;
940 if (stripe
== missing
)
941 RtlZeroMemory(out
, sector_size
);
943 RtlCopyMemory(out
, sectors
+ (stripe
* sector_size
), sector_size
);
948 galois_double(out
, sector_size
);
950 if (stripe
!= missing
)
951 do_xor(out
, sectors
+ (stripe
* sector_size
), sector_size
);
952 } while (stripe
> 0);
954 do_xor(out
, sectors
+ ((num_stripes
- 1) * sector_size
), sector_size
);
957 galois_divpower(out
, (uint8_t)missing
, sector_size
);
958 } else { // reconstruct from p and q
959 uint16_t x
, y
, stripe
;
960 uint8_t gyx
, gx
, denom
, a
, b
, *p
, *q
, *pxy
, *qxy
;
963 stripe
= num_stripes
- 3;
965 pxy
= out
+ sector_size
;
968 if (stripe
== missing1
|| stripe
== missing2
) {
969 RtlZeroMemory(qxy
, sector_size
);
970 RtlZeroMemory(pxy
, sector_size
);
972 if (stripe
== missing1
)
977 RtlCopyMemory(qxy
, sectors
+ (stripe
* sector_size
), sector_size
);
978 RtlCopyMemory(pxy
, sectors
+ (stripe
* sector_size
), sector_size
);
984 galois_double(qxy
, sector_size
);
986 if (stripe
!= missing1
&& stripe
!= missing2
) {
987 do_xor(qxy
, sectors
+ (stripe
* sector_size
), sector_size
);
988 do_xor(pxy
, sectors
+ (stripe
* sector_size
), sector_size
);
989 } else if (stripe
== missing1
)
991 else if (stripe
== missing2
)
993 } while (stripe
> 0);
995 gyx
= gpow2(y
> x
? (y
-x
) : (255-x
+y
));
998 denom
= gdiv(1, gyx
^ 1);
999 a
= gmul(gyx
, denom
);
1000 b
= gmul(gx
, denom
);
1002 p
= sectors
+ ((num_stripes
- 2) * sector_size
);
1003 q
= sectors
+ ((num_stripes
- 1) * sector_size
);
1005 for (j
= 0; j
< sector_size
; j
++) {
1006 *qxy
= gmul(a
, *p
^ *pxy
) ^ gmul(b
, *q
^ *qxy
);
1014 do_xor(out
+ sector_size
, out
, sector_size
);
1015 do_xor(out
+ sector_size
, sectors
+ ((num_stripes
- 2) * sector_size
), sector_size
);
1019 static NTSTATUS
read_data_raid6(device_extension
* Vcb
, uint8_t* buf
, uint64_t addr
, uint32_t length
, read_data_context
* context
, CHUNK_ITEM
* ci
,
1020 device
** devices
, uint64_t offset
, uint64_t generation
, chunk
* c
, bool degraded
) {
1023 bool checksum_error
= false;
1024 CHUNK_ITEM_STRIPE
* cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
1026 bool no_success
= true;
1028 for (j
= 0; j
< ci
->num_stripes
; j
++) {
1029 if (context
->stripes
[j
].status
== ReadDataStatus_Error
) {
1030 WARN("stripe %u returned error %08lx\n", j
, context
->stripes
[j
].iosb
.Status
);
1033 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
1034 return context
->stripes
[j
].iosb
.Status
;
1035 } else if (context
->stripes
[j
].status
== ReadDataStatus_Success
) {
1041 if (c
) { // check partial stripes
1043 uint64_t ps_length
= (ci
->num_stripes
- 2) * ci
->stripe_length
;
1045 ExAcquireResourceSharedLite(&c
->partial_stripes_lock
, true);
1047 le
= c
->partial_stripes
.Flink
;
1048 while (le
!= &c
->partial_stripes
) {
1049 partial_stripe
* ps
= CONTAINING_RECORD(le
, partial_stripe
, list_entry
);
1051 if (ps
->address
+ ps_length
> addr
&& ps
->address
< addr
+ length
) {
1052 ULONG runlength
, index
;
1054 runlength
= RtlFindFirstRunClear(&ps
->bmp
, &index
);
1056 while (runlength
!= 0) {
1058 uint64_t runstart
, runend
, start
, end
;
1060 if (index
>= ps
->bmplen
)
1063 if (index
+ runlength
>= ps
->bmplen
) {
1064 runlength
= ps
->bmplen
- index
;
1071 uint64_t runstart
= ps
->address
+ (index
* Vcb
->superblock
.sector_size
);
1072 uint64_t runend
= runstart
+ (runlength
* Vcb
->superblock
.sector_size
);
1073 uint64_t start
= max(runstart
, addr
);
1074 uint64_t end
= min(runend
, addr
+ length
);
1076 runstart
= ps
->address
+ (index
* Vcb
->superblock
.sector_size
);
1077 runend
= runstart
+ (runlength
* Vcb
->superblock
.sector_size
);
1078 start
= max(runstart
, addr
);
1079 end
= min(runend
, addr
+ length
);
1083 RtlCopyMemory(buf
+ start
- addr
, &ps
->data
[start
- ps
->address
], (ULONG
)(end
- start
));
1085 runlength
= RtlFindNextForwardRunClear(&ps
->bmp
, index
+ runlength
, &index
);
1087 } else if (ps
->address
>= addr
+ length
)
1093 ExReleaseResourceLite(&c
->partial_stripes_lock
);
1096 if (context
->tree
) {
1097 tree_header
* th
= (tree_header
*)buf
;
1099 if (addr
!= th
->address
|| !check_tree_checksum(Vcb
, th
)) {
1100 checksum_error
= true;
1101 if (!no_success
&& !degraded
&& devices
[stripe
])
1102 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1103 } else if (generation
!= 0 && generation
!= th
->generation
) {
1104 checksum_error
= true;
1105 if (!no_success
&& !degraded
&& devices
[stripe
])
1106 log_device_error(Vcb
, devices
[stripe
], BTRFS_DEV_STAT_GENERATION_ERRORS
);
1108 } else if (context
->csum
) {
1109 Status
= check_csum(Vcb
, buf
, length
/ Vcb
->superblock
.sector_size
, context
->csum
);
1111 if (Status
== STATUS_CRC_ERROR
) {
1113 WARN("checksum error\n");
1114 checksum_error
= true;
1115 } else if (!NT_SUCCESS(Status
)) {
1116 ERR("check_csum returned %08lx\n", Status
);
1119 } else if (degraded
)
1120 checksum_error
= true;
1122 if (!checksum_error
)
1123 return STATUS_SUCCESS
;
1125 if (context
->tree
) {
1127 uint16_t k
, physstripe
, parity1
, parity2
, error_stripe
;
1129 bool recovered
= false, failed
= false;
1130 ULONG num_errors
= 0;
1132 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.node_size
* (ci
->num_stripes
+ 2), ALLOC_TAG
);
1134 ERR("out of memory\n");
1135 return STATUS_INSUFFICIENT_RESOURCES
;
1138 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
- 2, &off
, &stripe
);
1140 parity1
= (((addr
- offset
) / ((ci
->num_stripes
- 2) * ci
->stripe_length
)) + ci
->num_stripes
- 2) % ci
->num_stripes
;
1141 parity2
= (parity1
+ 1) % ci
->num_stripes
;
1143 physstripe
= (parity2
+ stripe
+ 1) % ci
->num_stripes
;
1145 j
= (parity2
+ 1) % ci
->num_stripes
;
1147 for (k
= 0; k
< ci
->num_stripes
- 1; k
++) {
1148 if (j
!= physstripe
) {
1149 if (devices
[j
] && devices
[j
]->devobj
) {
1150 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.node_size
,
1151 sector
+ (k
* Vcb
->superblock
.node_size
), false);
1152 if (!NT_SUCCESS(Status
)) {
1153 ERR("sync_read_phys returned %08lx\n", Status
);
1154 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
1158 if (num_errors
> 1) {
1167 if (num_errors
> 1) {
1174 j
= (j
+ 1) % ci
->num_stripes
;
1178 if (num_errors
== 0) {
1179 tree_header
* th
= (tree_header
*)(sector
+ (stripe
* Vcb
->superblock
.node_size
));
1181 RtlCopyMemory(sector
+ (stripe
* Vcb
->superblock
.node_size
), sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.node_size
),
1182 Vcb
->superblock
.node_size
);
1184 for (j
= 0; j
< ci
->num_stripes
- 2; j
++) {
1186 do_xor(sector
+ (stripe
* Vcb
->superblock
.node_size
), sector
+ (j
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1189 if (th
->address
== addr
&& check_tree_checksum(Vcb
, th
) && (generation
== 0 || th
->generation
== generation
)) {
1190 RtlCopyMemory(buf
, sector
+ (stripe
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1192 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1193 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
, devices
[physstripe
]->devitem
.dev_id
);
1197 if (!Vcb
->readonly
&& devices
[physstripe
] && devices
[physstripe
]->devobj
&& !devices
[physstripe
]->readonly
) { // write good data over bad
1198 Status
= write_data_phys(devices
[physstripe
]->devobj
, devices
[physstripe
]->fileobj
, cis
[physstripe
].offset
+ off
,
1199 sector
+ (stripe
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1200 if (!NT_SUCCESS(Status
)) {
1201 WARN("write_data_phys returned %08lx\n", Status
);
1202 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1209 tree_header
* th
= (tree_header
*)(sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
));
1210 bool read_q
= false;
1212 if (devices
[parity2
] && devices
[parity2
]->devobj
) {
1213 Status
= sync_read_phys(devices
[parity2
]->devobj
, devices
[parity2
]->fileobj
, cis
[parity2
].offset
+ off
,
1214 Vcb
->superblock
.node_size
, sector
+ ((ci
->num_stripes
- 1) * Vcb
->superblock
.node_size
), false);
1215 if (!NT_SUCCESS(Status
)) {
1216 ERR("sync_read_phys returned %08lx\n", Status
);
1217 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
1223 if (num_errors
== 1) {
1224 raid6_recover2(sector
, ci
->num_stripes
, Vcb
->superblock
.node_size
, stripe
, error_stripe
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
));
1226 if (th
->address
== addr
&& check_tree_checksum(Vcb
, th
) && (generation
== 0 || th
->generation
== generation
))
1229 for (j
= 0; j
< ci
->num_stripes
- 1; j
++) {
1231 raid6_recover2(sector
, ci
->num_stripes
, Vcb
->superblock
.node_size
, stripe
, j
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
));
1233 if (th
->address
== addr
&& check_tree_checksum(Vcb
, th
) && (generation
== 0 || th
->generation
== generation
)) {
1244 uint16_t error_stripe_phys
= (parity2
+ error_stripe
+ 1) % ci
->num_stripes
;
1246 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1247 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
, devices
[physstripe
]->devitem
.dev_id
);
1249 RtlCopyMemory(buf
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1251 if (!Vcb
->readonly
&& devices
[physstripe
] && devices
[physstripe
]->devobj
&& !devices
[physstripe
]->readonly
) { // write good data over bad
1252 Status
= write_data_phys(devices
[physstripe
]->devobj
, devices
[physstripe
]->fileobj
, cis
[physstripe
].offset
+ off
,
1253 sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1254 if (!NT_SUCCESS(Status
)) {
1255 WARN("write_data_phys returned %08lx\n", Status
);
1256 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1260 if (devices
[error_stripe_phys
] && devices
[error_stripe_phys
]->devobj
) {
1261 if (error_stripe
== ci
->num_stripes
- 2) {
1262 ERR("recovering from parity error at %I64x, device %I64x\n", addr
, devices
[error_stripe_phys
]->devitem
.dev_id
);
1264 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1266 RtlZeroMemory(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1268 for (j
= 0; j
< ci
->num_stripes
- 2; j
++) {
1270 do_xor(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.node_size
), sector
+ (ci
->num_stripes
* Vcb
->superblock
.node_size
),
1271 Vcb
->superblock
.node_size
);
1273 do_xor(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.node_size
), sector
+ (j
* Vcb
->superblock
.node_size
),
1274 Vcb
->superblock
.node_size
);
1278 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
+ ((error_stripe
- stripe
) * ci
->stripe_length
),
1279 devices
[error_stripe_phys
]->devitem
.dev_id
);
1281 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1283 RtlCopyMemory(sector
+ (error_stripe
* Vcb
->superblock
.node_size
),
1284 sector
+ ((ci
->num_stripes
+ 1) * Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1288 if (!Vcb
->readonly
&& devices
[error_stripe_phys
] && devices
[error_stripe_phys
]->devobj
&& !devices
[error_stripe_phys
]->readonly
) { // write good data over bad
1289 Status
= write_data_phys(devices
[error_stripe_phys
]->devobj
, devices
[error_stripe_phys
]->fileobj
, cis
[error_stripe_phys
].offset
+ off
,
1290 sector
+ (error_stripe
* Vcb
->superblock
.node_size
), Vcb
->superblock
.node_size
);
1291 if (!NT_SUCCESS(Status
)) {
1292 WARN("write_data_phys returned %08lx\n", Status
);
1293 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1301 ERR("unrecoverable checksum error at %I64x\n", addr
);
1303 return STATUS_CRC_ERROR
;
1308 ULONG sectors
= length
/ Vcb
->superblock
.sector_size
;
1310 void* ptr
= context
->csum
;
1312 sector
= ExAllocatePoolWithTag(NonPagedPool
, Vcb
->superblock
.sector_size
* (ci
->num_stripes
+ 2), ALLOC_TAG
);
1314 ERR("out of memory\n");
1315 return STATUS_INSUFFICIENT_RESOURCES
;
1318 for (i
= 0; i
< sectors
; i
++) {
1320 uint16_t physstripe
, parity1
, parity2
;
1322 get_raid0_offset(addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), ci
->stripe_length
,
1323 ci
->num_stripes
- 2, &off
, &stripe
);
1325 parity1
= (((addr
- offset
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
)) / ((ci
->num_stripes
- 2) * ci
->stripe_length
)) + ci
->num_stripes
- 2) % ci
->num_stripes
;
1326 parity2
= (parity1
+ 1) % ci
->num_stripes
;
1328 physstripe
= (parity2
+ stripe
+ 1) % ci
->num_stripes
;
1330 if (!devices
[physstripe
] || !devices
[physstripe
]->devobj
|| (context
->csum
&& !check_sector_csum(Vcb
, buf
+ (i
* Vcb
->superblock
.sector_size
), ptr
))) {
1331 uint16_t k
, error_stripe
;
1332 bool recovered
= false, failed
= false;
1333 ULONG num_errors
= 0;
1335 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1336 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_READ_ERRORS
);
1338 j
= (parity2
+ 1) % ci
->num_stripes
;
1340 for (k
= 0; k
< ci
->num_stripes
- 1; k
++) {
1341 if (j
!= physstripe
) {
1342 if (devices
[j
] && devices
[j
]->devobj
) {
1343 Status
= sync_read_phys(devices
[j
]->devobj
, devices
[j
]->fileobj
, cis
[j
].offset
+ off
, Vcb
->superblock
.sector_size
,
1344 sector
+ (k
* Vcb
->superblock
.sector_size
), false);
1345 if (!NT_SUCCESS(Status
)) {
1346 ERR("sync_read_phys returned %08lx\n", Status
);
1347 log_device_error(Vcb
, devices
[j
], BTRFS_DEV_STAT_READ_ERRORS
);
1351 if (num_errors
> 1) {
1360 if (num_errors
> 1) {
1367 j
= (j
+ 1) % ci
->num_stripes
;
1371 if (num_errors
== 0) {
1372 RtlCopyMemory(sector
+ (stripe
* Vcb
->superblock
.sector_size
), sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1374 for (j
= 0; j
< ci
->num_stripes
- 2; j
++) {
1376 do_xor(sector
+ (stripe
* Vcb
->superblock
.sector_size
), sector
+ (j
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1379 if (!ptr
|| check_sector_csum(Vcb
, sector
+ (stripe
* Vcb
->superblock
.sector_size
), ptr
)) {
1380 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
+ (stripe
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1382 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1383 ERR("recovering from checksum error at %I64x, device %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
),
1384 devices
[physstripe
]->devitem
.dev_id
);
1388 if (!Vcb
->readonly
&& devices
[physstripe
] && devices
[physstripe
]->devobj
&& !devices
[physstripe
]->readonly
) { // write good data over bad
1389 Status
= write_data_phys(devices
[physstripe
]->devobj
, devices
[physstripe
]->fileobj
, cis
[physstripe
].offset
+ off
,
1390 sector
+ (stripe
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1391 if (!NT_SUCCESS(Status
)) {
1392 WARN("write_data_phys returned %08lx\n", Status
);
1393 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1400 bool read_q
= false;
1402 if (devices
[parity2
] && devices
[parity2
]->devobj
) {
1403 Status
= sync_read_phys(devices
[parity2
]->devobj
, devices
[parity2
]->fileobj
, cis
[parity2
].offset
+ off
,
1404 Vcb
->superblock
.sector_size
, sector
+ ((ci
->num_stripes
- 1) * Vcb
->superblock
.sector_size
), false);
1405 if (!NT_SUCCESS(Status
)) {
1406 ERR("sync_read_phys returned %08lx\n", Status
);
1407 log_device_error(Vcb
, devices
[parity2
], BTRFS_DEV_STAT_READ_ERRORS
);
1413 if (num_errors
== 1) {
1414 raid6_recover2(sector
, ci
->num_stripes
, Vcb
->superblock
.sector_size
, stripe
, error_stripe
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
));
1416 if (!devices
[physstripe
] || !devices
[physstripe
]->devobj
)
1419 recovered
= check_sector_csum(Vcb
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
), ptr
);
1421 for (j
= 0; j
< ci
->num_stripes
- 1; j
++) {
1423 raid6_recover2(sector
, ci
->num_stripes
, Vcb
->superblock
.sector_size
, stripe
, j
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
));
1425 if (check_sector_csum(Vcb
, sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
), ptr
)) {
1436 uint16_t error_stripe_phys
= (parity2
+ error_stripe
+ 1) % ci
->num_stripes
;
1438 if (devices
[physstripe
] && devices
[physstripe
]->devobj
)
1439 ERR("recovering from checksum error at %I64x, device %I64x\n",
1440 addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
), devices
[physstripe
]->devitem
.dev_id
);
1442 RtlCopyMemory(buf
+ (i
* Vcb
->superblock
.sector_size
), sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1444 if (!Vcb
->readonly
&& devices
[physstripe
] && devices
[physstripe
]->devobj
&& !devices
[physstripe
]->readonly
) { // write good data over bad
1445 Status
= write_data_phys(devices
[physstripe
]->devobj
, devices
[physstripe
]->fileobj
, cis
[physstripe
].offset
+ off
,
1446 sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1447 if (!NT_SUCCESS(Status
)) {
1448 WARN("write_data_phys returned %08lx\n", Status
);
1449 log_device_error(Vcb
, devices
[physstripe
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1453 if (devices
[error_stripe_phys
] && devices
[error_stripe_phys
]->devobj
) {
1454 if (error_stripe
== ci
->num_stripes
- 2) {
1455 ERR("recovering from parity error at %I64x, device %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
),
1456 devices
[error_stripe_phys
]->devitem
.dev_id
);
1458 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1460 RtlZeroMemory(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1462 for (j
= 0; j
< ci
->num_stripes
- 2; j
++) {
1464 do_xor(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.sector_size
), sector
+ (ci
->num_stripes
* Vcb
->superblock
.sector_size
),
1465 Vcb
->superblock
.sector_size
);
1467 do_xor(sector
+ ((ci
->num_stripes
- 2) * Vcb
->superblock
.sector_size
), sector
+ (j
* Vcb
->superblock
.sector_size
),
1468 Vcb
->superblock
.sector_size
);
1472 ERR("recovering from checksum error at %I64x, device %I64x\n",
1473 addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
) + ((error_stripe
- stripe
) * ci
->stripe_length
),
1474 devices
[error_stripe_phys
]->devitem
.dev_id
);
1476 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_CORRUPTION_ERRORS
);
1478 RtlCopyMemory(sector
+ (error_stripe
* Vcb
->superblock
.sector_size
),
1479 sector
+ ((ci
->num_stripes
+ 1) * Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1483 if (!Vcb
->readonly
&& devices
[error_stripe_phys
] && devices
[error_stripe_phys
]->devobj
&& !devices
[error_stripe_phys
]->readonly
) { // write good data over bad
1484 Status
= write_data_phys(devices
[error_stripe_phys
]->devobj
, devices
[error_stripe_phys
]->fileobj
, cis
[error_stripe_phys
].offset
+ off
,
1485 sector
+ (error_stripe
* Vcb
->superblock
.sector_size
), Vcb
->superblock
.sector_size
);
1486 if (!NT_SUCCESS(Status
)) {
1487 WARN("write_data_phys returned %08lx\n", Status
);
1488 log_device_error(Vcb
, devices
[error_stripe_phys
], BTRFS_DEV_STAT_WRITE_ERRORS
);
1496 ERR("unrecoverable checksum error at %I64x\n", addr
+ UInt32x32To64(i
, Vcb
->superblock
.sector_size
));
1498 return STATUS_CRC_ERROR
;
1503 ptr
= (uint8_t*)ptr
+ Vcb
->csum_size
;
1509 return STATUS_SUCCESS
;
1512 NTSTATUS
read_data(_In_ device_extension
* Vcb
, _In_
uint64_t addr
, _In_
uint32_t length
, _In_reads_bytes_opt_(length
*sizeof(uint32_t)/Vcb
->superblock
.sector_size
) void* csum
,
1513 _In_
bool is_tree
, _Out_writes_bytes_(length
) uint8_t* buf
, _In_opt_ chunk
* c
, _Out_opt_ chunk
** pc
, _In_opt_ PIRP Irp
, _In_
uint64_t generation
, _In_
bool file_read
,
1514 _In_ ULONG priority
) {
1516 CHUNK_ITEM_STRIPE
* cis
;
1517 read_data_context context
;
1518 uint64_t type
, offset
, total_reading
= 0;
1520 device
** devices
= NULL
;
1521 uint16_t i
, startoffstripe
, allowed_missing
, missing_devices
= 0;
1522 uint8_t* dummypage
= NULL
;
1523 PMDL dummy_mdl
= NULL
;
1525 uint64_t lockaddr
, locklen
;
1527 if (Vcb
->log_to_phys_loaded
) {
1529 c
= get_chunk_from_address(Vcb
, addr
);
1532 ERR("get_chunk_from_address failed\n");
1533 return STATUS_INTERNAL_ERROR
;
1539 devices
= c
->devices
;
1544 LIST_ENTRY
* le
= Vcb
->sys_chunks
.Flink
;
1549 while (le
!= &Vcb
->sys_chunks
) {
1550 sys_chunk
* sc
= CONTAINING_RECORD(le
, sys_chunk
, list_entry
);
1552 if (sc
->key
.obj_id
== 0x100 && sc
->key
.obj_type
== TYPE_CHUNK_ITEM
&& sc
->key
.offset
<= addr
) {
1553 CHUNK_ITEM
* chunk_item
= sc
->data
;
1555 if ((addr
- sc
->key
.offset
) < chunk_item
->size
&& chunk_item
->num_stripes
> 0) {
1557 offset
= sc
->key
.offset
;
1558 cis
= (CHUNK_ITEM_STRIPE
*)&chunk_item
[1];
1560 devices
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(device
*) * ci
->num_stripes
, ALLOC_TAG
);
1562 ERR("out of memory\n");
1563 return STATUS_INSUFFICIENT_RESOURCES
;
1566 for (i
= 0; i
< ci
->num_stripes
; i
++) {
1567 devices
[i
] = find_device_from_uuid(Vcb
, &cis
[i
].dev_uuid
);
1578 ERR("could not find chunk for %I64x in bootstrap\n", addr
);
1579 return STATUS_INTERNAL_ERROR
;
1586 if (ci
->type
& BLOCK_FLAG_DUPLICATE
) {
1587 type
= BLOCK_FLAG_DUPLICATE
;
1588 allowed_missing
= ci
->num_stripes
- 1;
1589 } else if (ci
->type
& BLOCK_FLAG_RAID0
) {
1590 type
= BLOCK_FLAG_RAID0
;
1591 allowed_missing
= 0;
1592 } else if (ci
->type
& BLOCK_FLAG_RAID1
) {
1593 type
= BLOCK_FLAG_DUPLICATE
;
1594 allowed_missing
= 1;
1595 } else if (ci
->type
& BLOCK_FLAG_RAID10
) {
1596 type
= BLOCK_FLAG_RAID10
;
1597 allowed_missing
= 1;
1598 } else if (ci
->type
& BLOCK_FLAG_RAID5
) {
1599 type
= BLOCK_FLAG_RAID5
;
1600 allowed_missing
= 1;
1601 } else if (ci
->type
& BLOCK_FLAG_RAID6
) {
1602 type
= BLOCK_FLAG_RAID6
;
1603 allowed_missing
= 2;
1604 } else if (ci
->type
& BLOCK_FLAG_RAID1C3
) {
1605 type
= BLOCK_FLAG_DUPLICATE
;
1606 allowed_missing
= 2;
1607 } else if (ci
->type
& BLOCK_FLAG_RAID1C4
) {
1608 type
= BLOCK_FLAG_DUPLICATE
;
1609 allowed_missing
= 3;
1611 type
= BLOCK_FLAG_DUPLICATE
;
1612 allowed_missing
= 0;
1615 cis
= (CHUNK_ITEM_STRIPE
*)&ci
[1];
1617 RtlZeroMemory(&context
, sizeof(read_data_context
));
1618 KeInitializeEvent(&context
.Event
, NotificationEvent
, false);
1620 context
.stripes
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(read_data_stripe
) * ci
->num_stripes
, ALLOC_TAG
);
1621 if (!context
.stripes
) {
1622 ERR("out of memory\n");
1623 return STATUS_INSUFFICIENT_RESOURCES
;
1626 if (c
&& (type
== BLOCK_FLAG_RAID5
|| type
== BLOCK_FLAG_RAID6
)) {
1627 get_raid56_lock_range(c
, addr
, length
, &lockaddr
, &locklen
);
1628 chunk_lock_range(Vcb
, c
, lockaddr
, locklen
);
1631 RtlZeroMemory(context
.stripes
, sizeof(read_data_stripe
) * ci
->num_stripes
);
1633 context
.buflen
= length
;
1634 context
.num_stripes
= ci
->num_stripes
;
1635 context
.stripes_left
= context
.num_stripes
;
1636 context
.sector_size
= Vcb
->superblock
.sector_size
;
1637 context
.csum
= csum
;
1638 context
.tree
= is_tree
;
1639 context
.type
= type
;
1641 if (type
== BLOCK_FLAG_RAID0
) {
1642 uint64_t startoff
, endoff
;
1643 uint16_t endoffstripe
, stripe
;
1644 uint32_t *stripeoff
, pos
;
1648 // FIXME - test this still works if page size isn't the same as sector size
1650 // This relies on the fact that MDLs are followed in memory by the page file numbers,
1651 // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1652 // data for you without doing a memcpy yourself.
1653 // MDLs are officially opaque, so this might very well break in future versions of Windows.
1655 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
, &startoff
, &startoffstripe
);
1656 get_raid0_offset(addr
+ length
- offset
- 1, ci
->stripe_length
, ci
->num_stripes
, &endoff
, &endoffstripe
);
1659 // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1660 // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1661 // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1663 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
1666 ERR("out of memory\n");
1667 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1673 master_mdl
= IoAllocateMdl(context
.va
, length
, false, false, NULL
);
1675 ERR("out of memory\n");
1676 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1680 Status
= STATUS_SUCCESS
;
1683 MmProbeAndLockPages(master_mdl
, KernelMode
, IoWriteAccess
);
1684 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1685 Status
= _SEH2_GetExceptionCode();
1688 if (!NT_SUCCESS(Status
)) {
1689 ERR("MmProbeAndLockPages threw exception %08lx\n", Status
);
1690 IoFreeMdl(master_mdl
);
1694 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
1696 for (i
= 0; i
< ci
->num_stripes
; i
++) {
1697 if (startoffstripe
> i
)
1698 context
.stripes
[i
].stripestart
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
1699 else if (startoffstripe
== i
)
1700 context
.stripes
[i
].stripestart
= startoff
;
1702 context
.stripes
[i
].stripestart
= startoff
- (startoff
% ci
->stripe_length
);
1704 if (endoffstripe
> i
)
1705 context
.stripes
[i
].stripeend
= endoff
- (endoff
% ci
->stripe_length
) + ci
->stripe_length
;
1706 else if (endoffstripe
== i
)
1707 context
.stripes
[i
].stripeend
= endoff
+ 1;
1709 context
.stripes
[i
].stripeend
= endoff
- (endoff
% ci
->stripe_length
);
1711 if (context
.stripes
[i
].stripestart
!= context
.stripes
[i
].stripeend
) {
1712 context
.stripes
[i
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
), false, false, NULL
);
1714 if (!context
.stripes
[i
].mdl
) {
1715 ERR("IoAllocateMdl failed\n");
1716 MmUnlockPages(master_mdl
);
1717 IoFreeMdl(master_mdl
);
1718 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1724 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(uint32_t) * ci
->num_stripes
, ALLOC_TAG
);
1726 ERR("out of memory\n");
1727 MmUnlockPages(master_mdl
);
1728 IoFreeMdl(master_mdl
);
1729 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1733 RtlZeroMemory(stripeoff
, sizeof(uint32_t) * ci
->num_stripes
);
1736 stripe
= startoffstripe
;
1737 while (pos
< length
) {
1738 PFN_NUMBER
* stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
1741 uint32_t readlen
= (uint32_t)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
- (context
.stripes
[stripe
].stripestart
% ci
->stripe_length
));
1743 RtlCopyMemory(stripe_pfns
, pfns
, readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1745 stripeoff
[stripe
] += readlen
;
1747 } else if (length
- pos
< ci
->stripe_length
) {
1748 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (length
- pos
) * sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1752 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(ci
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
1754 stripeoff
[stripe
] += (uint32_t)ci
->stripe_length
;
1755 pos
+= (uint32_t)ci
->stripe_length
;
1758 stripe
= (stripe
+ 1) % ci
->num_stripes
;
1761 MmUnlockPages(master_mdl
);
1762 IoFreeMdl(master_mdl
);
1764 ExFreePool(stripeoff
);
1765 } else if (type
== BLOCK_FLAG_RAID10
) {
1766 uint64_t startoff
, endoff
;
1767 uint16_t endoffstripe
, j
, stripe
;
1771 uint32_t* stripeoff
, pos
;
1772 read_data_stripe
** stripes
;
1775 orig_ls
= c
->last_stripe
;
1779 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
/ ci
->sub_stripes
, &startoff
, &startoffstripe
);
1780 get_raid0_offset(addr
+ length
- offset
- 1, ci
->stripe_length
, ci
->num_stripes
/ ci
->sub_stripes
, &endoff
, &endoffstripe
);
1782 if ((ci
->num_stripes
% ci
->sub_stripes
) != 0) {
1783 ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset
, ci
->num_stripes
, ci
->sub_stripes
);
1784 Status
= STATUS_INTERNAL_ERROR
;
1789 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
1792 ERR("out of memory\n");
1793 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1799 context
.firstoff
= (uint16_t)((startoff
% ci
->stripe_length
) / Vcb
->superblock
.sector_size
);
1800 context
.startoffstripe
= startoffstripe
;
1801 context
.sectors_per_stripe
= (uint16_t)(ci
->stripe_length
/ Vcb
->superblock
.sector_size
);
1803 startoffstripe
*= ci
->sub_stripes
;
1804 endoffstripe
*= ci
->sub_stripes
;
1807 c
->last_stripe
= (orig_ls
+ 1) % ci
->sub_stripes
;
1809 master_mdl
= IoAllocateMdl(context
.va
, length
, false, false, NULL
);
1811 ERR("out of memory\n");
1812 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1816 Status
= STATUS_SUCCESS
;
1819 MmProbeAndLockPages(master_mdl
, KernelMode
, IoWriteAccess
);
1820 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
1821 Status
= _SEH2_GetExceptionCode();
1824 if (!NT_SUCCESS(Status
)) {
1825 ERR("MmProbeAndLockPages threw exception %08lx\n", Status
);
1826 IoFreeMdl(master_mdl
);
1830 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
1832 stripes
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(read_data_stripe
*) * ci
->num_stripes
/ ci
->sub_stripes
, ALLOC_TAG
);
1834 ERR("out of memory\n");
1835 MmUnlockPages(master_mdl
);
1836 IoFreeMdl(master_mdl
);
1837 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1841 RtlZeroMemory(stripes
, sizeof(read_data_stripe
*) * ci
->num_stripes
/ ci
->sub_stripes
);
1843 for (i
= 0; i
< ci
->num_stripes
; i
+= ci
->sub_stripes
) {
1844 uint64_t sstart
, send
;
1845 bool stripeset
= false;
1847 if (startoffstripe
> i
)
1848 sstart
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
1849 else if (startoffstripe
== i
)
1852 sstart
= startoff
- (startoff
% ci
->stripe_length
);
1854 if (endoffstripe
> i
)
1855 send
= endoff
- (endoff
% ci
->stripe_length
) + ci
->stripe_length
;
1856 else if (endoffstripe
== i
)
1859 send
= endoff
- (endoff
% ci
->stripe_length
);
1861 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
1862 if (j
== orig_ls
&& devices
[i
+j
] && devices
[i
+j
]->devobj
) {
1863 context
.stripes
[i
+j
].stripestart
= sstart
;
1864 context
.stripes
[i
+j
].stripeend
= send
;
1865 stripes
[i
/ ci
->sub_stripes
] = &context
.stripes
[i
+j
];
1867 if (sstart
!= send
) {
1868 context
.stripes
[i
+j
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(send
- sstart
), false, false, NULL
);
1870 if (!context
.stripes
[i
+j
].mdl
) {
1871 ERR("IoAllocateMdl failed\n");
1872 MmUnlockPages(master_mdl
);
1873 IoFreeMdl(master_mdl
);
1874 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1881 context
.stripes
[i
+j
].status
= ReadDataStatus_Skip
;
1885 for (j
= 0; j
< ci
->sub_stripes
; j
++) {
1886 if (devices
[i
+j
] && devices
[i
+j
]->devobj
) {
1887 context
.stripes
[i
+j
].stripestart
= sstart
;
1888 context
.stripes
[i
+j
].stripeend
= send
;
1889 context
.stripes
[i
+j
].status
= ReadDataStatus_Pending
;
1890 stripes
[i
/ ci
->sub_stripes
] = &context
.stripes
[i
+j
];
1892 if (sstart
!= send
) {
1893 context
.stripes
[i
+j
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(send
- sstart
), false, false, NULL
);
1895 if (!context
.stripes
[i
+j
].mdl
) {
1896 ERR("IoAllocateMdl failed\n");
1897 MmUnlockPages(master_mdl
);
1898 IoFreeMdl(master_mdl
);
1899 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1910 ERR("could not find stripe to read\n");
1911 Status
= STATUS_DEVICE_NOT_READY
;
1917 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(uint32_t) * ci
->num_stripes
/ ci
->sub_stripes
, ALLOC_TAG
);
1919 ERR("out of memory\n");
1920 MmUnlockPages(master_mdl
);
1921 IoFreeMdl(master_mdl
);
1922 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1926 RtlZeroMemory(stripeoff
, sizeof(uint32_t) * ci
->num_stripes
/ ci
->sub_stripes
);
1929 stripe
= startoffstripe
/ ci
->sub_stripes
;
1930 while (pos
< length
) {
1931 PFN_NUMBER
* stripe_pfns
= (PFN_NUMBER
*)(stripes
[stripe
]->mdl
+ 1);
1934 uint32_t readlen
= (uint32_t)min(stripes
[stripe
]->stripeend
- stripes
[stripe
]->stripestart
,
1935 ci
->stripe_length
- (stripes
[stripe
]->stripestart
% ci
->stripe_length
));
1937 RtlCopyMemory(stripe_pfns
, pfns
, readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1939 stripeoff
[stripe
] += readlen
;
1941 } else if (length
- pos
< ci
->stripe_length
) {
1942 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (length
- pos
) * sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
1946 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(ci
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
1948 stripeoff
[stripe
] += (ULONG
)ci
->stripe_length
;
1949 pos
+= (ULONG
)ci
->stripe_length
;
1952 stripe
= (stripe
+ 1) % (ci
->num_stripes
/ ci
->sub_stripes
);
1955 MmUnlockPages(master_mdl
);
1956 IoFreeMdl(master_mdl
);
1958 ExFreePool(stripeoff
);
1959 ExFreePool(stripes
);
1960 } else if (type
== BLOCK_FLAG_DUPLICATE
) {
1964 orig_ls
= i
= c
->last_stripe
;
1968 while (!devices
[i
] || !devices
[i
]->devobj
) {
1969 i
= (i
+ 1) % ci
->num_stripes
;
1972 ERR("no devices available to service request\n");
1973 Status
= STATUS_DEVICE_NOT_READY
;
1979 c
->last_stripe
= (i
+ 1) % ci
->num_stripes
;
1981 context
.stripes
[i
].stripestart
= addr
- offset
;
1982 context
.stripes
[i
].stripeend
= context
.stripes
[i
].stripestart
+ length
;
1985 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
1988 ERR("out of memory\n");
1989 Status
= STATUS_INSUFFICIENT_RESOURCES
;
1993 context
.stripes
[i
].mdl
= IoAllocateMdl(context
.va
, length
, false, false, NULL
);
1994 if (!context
.stripes
[i
].mdl
) {
1995 ERR("IoAllocateMdl failed\n");
1996 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2000 MmBuildMdlForNonPagedPool(context
.stripes
[i
].mdl
);
2002 context
.stripes
[i
].mdl
= IoAllocateMdl(buf
, length
, false, false, NULL
);
2004 if (!context
.stripes
[i
].mdl
) {
2005 ERR("IoAllocateMdl failed\n");
2006 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2010 Status
= STATUS_SUCCESS
;
2013 MmProbeAndLockPages(context
.stripes
[i
].mdl
, KernelMode
, IoWriteAccess
);
2014 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
2015 Status
= _SEH2_GetExceptionCode();
2018 if (!NT_SUCCESS(Status
)) {
2019 ERR("MmProbeAndLockPages threw exception %08lx\n", Status
);
2023 } else if (type
== BLOCK_FLAG_RAID5
) {
2024 uint64_t startoff
, endoff
;
2025 uint16_t endoffstripe
, parity
;
2026 uint32_t *stripeoff
, pos
;
2028 PFN_NUMBER
*pfns
, dummy
;
2029 bool need_dummy
= false;
2031 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
- 1, &startoff
, &startoffstripe
);
2032 get_raid0_offset(addr
+ length
- offset
- 1, ci
->stripe_length
, ci
->num_stripes
- 1, &endoff
, &endoffstripe
);
2035 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
2038 ERR("out of memory\n");
2039 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2045 master_mdl
= IoAllocateMdl(context
.va
, length
, false, false, NULL
);
2047 ERR("out of memory\n");
2048 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2052 Status
= STATUS_SUCCESS
;
2055 MmProbeAndLockPages(master_mdl
, KernelMode
, IoWriteAccess
);
2056 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
2057 Status
= _SEH2_GetExceptionCode();
2060 if (!NT_SUCCESS(Status
)) {
2061 ERR("MmProbeAndLockPages threw exception %08lx\n", Status
);
2062 IoFreeMdl(master_mdl
);
2066 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
2069 while (pos
< length
) {
2070 parity
= (((addr
- offset
+ pos
) / ((ci
->num_stripes
- 1) * ci
->stripe_length
)) + ci
->num_stripes
- 1) % ci
->num_stripes
;
2073 uint16_t stripe
= (parity
+ startoffstripe
+ 1) % ci
->num_stripes
;
2074 ULONG skip
, readlen
;
2077 while (stripe
!= parity
) {
2078 if (i
== startoffstripe
) {
2079 readlen
= min(length
, (ULONG
)(ci
->stripe_length
- (startoff
% ci
->stripe_length
)));
2081 context
.stripes
[stripe
].stripestart
= startoff
;
2082 context
.stripes
[stripe
].stripeend
= startoff
+ readlen
;
2089 readlen
= min(length
- pos
, (ULONG
)ci
->stripe_length
);
2091 context
.stripes
[stripe
].stripestart
= startoff
- (startoff
% ci
->stripe_length
);
2092 context
.stripes
[stripe
].stripeend
= context
.stripes
[stripe
].stripestart
+ readlen
;
2101 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2107 for (i
= 0; i
< startoffstripe
; i
++) {
2108 uint16_t stripe2
= (parity
+ i
+ 1) % ci
->num_stripes
;
2110 context
.stripes
[stripe2
].stripestart
= context
.stripes
[stripe2
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2113 context
.stripes
[parity
].stripestart
= context
.stripes
[parity
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2115 if (length
- pos
> ci
->num_stripes
* (ci
->num_stripes
- 1) * ci
->stripe_length
) {
2116 skip
= (ULONG
)(((length
- pos
) / (ci
->num_stripes
* (ci
->num_stripes
- 1) * ci
->stripe_length
)) - 1);
2118 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2119 context
.stripes
[i
].stripeend
+= skip
* ci
->num_stripes
* ci
->stripe_length
;
2122 pos
+= (uint32_t)(skip
* (ci
->num_stripes
- 1) * ci
->num_stripes
* ci
->stripe_length
);
2125 } else if (length
- pos
>= ci
->stripe_length
* (ci
->num_stripes
- 1)) {
2126 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2127 context
.stripes
[i
].stripeend
+= ci
->stripe_length
;
2130 pos
+= (uint32_t)(ci
->stripe_length
* (ci
->num_stripes
- 1));
2133 uint16_t stripe
= (parity
+ 1) % ci
->num_stripes
;
2136 while (stripe
!= parity
) {
2137 if (endoffstripe
== i
) {
2138 context
.stripes
[stripe
].stripeend
= endoff
+ 1;
2140 } else if (endoffstripe
> i
)
2141 context
.stripes
[stripe
].stripeend
= endoff
- (endoff
% ci
->stripe_length
) + ci
->stripe_length
;
2144 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2151 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2152 if (context
.stripes
[i
].stripestart
!= context
.stripes
[i
].stripeend
) {
2153 context
.stripes
[i
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
),
2154 false, false, NULL
);
2156 if (!context
.stripes
[i
].mdl
) {
2157 ERR("IoAllocateMdl failed\n");
2158 MmUnlockPages(master_mdl
);
2159 IoFreeMdl(master_mdl
);
2160 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2167 dummypage
= ExAllocatePoolWithTag(NonPagedPool
, PAGE_SIZE
, ALLOC_TAG
);
2169 ERR("out of memory\n");
2170 MmUnlockPages(master_mdl
);
2171 IoFreeMdl(master_mdl
);
2172 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2176 dummy_mdl
= IoAllocateMdl(dummypage
, PAGE_SIZE
, false, false, NULL
);
2178 ERR("IoAllocateMdl failed\n");
2179 MmUnlockPages(master_mdl
);
2180 IoFreeMdl(master_mdl
);
2181 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2185 MmBuildMdlForNonPagedPool(dummy_mdl
);
2187 dummy
= *(PFN_NUMBER
*)(dummy_mdl
+ 1);
2190 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(uint32_t) * ci
->num_stripes
, ALLOC_TAG
);
2192 ERR("out of memory\n");
2193 MmUnlockPages(master_mdl
);
2194 IoFreeMdl(master_mdl
);
2195 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2199 RtlZeroMemory(stripeoff
, sizeof(uint32_t) * ci
->num_stripes
);
2203 while (pos
< length
) {
2204 PFN_NUMBER
* stripe_pfns
;
2206 parity
= (((addr
- offset
+ pos
) / ((ci
->num_stripes
- 1) * ci
->stripe_length
)) + ci
->num_stripes
- 1) % ci
->num_stripes
;
2209 uint16_t stripe
= (parity
+ startoffstripe
+ 1) % ci
->num_stripes
;
2210 uint32_t readlen
= min(length
- pos
, (uint32_t)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
,
2211 ci
->stripe_length
- (context
.stripes
[stripe
].stripestart
% ci
->stripe_length
)));
2213 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2215 RtlCopyMemory(stripe_pfns
, pfns
, readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2217 stripeoff
[stripe
] = readlen
;
2220 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2222 while (stripe
!= parity
) {
2223 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2224 readlen
= min(length
- pos
, (uint32_t)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
));
2229 RtlCopyMemory(stripe_pfns
, &pfns
[pos
>> PAGE_SHIFT
], readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2231 stripeoff
[stripe
] = readlen
;
2234 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2236 } else if (length
- pos
>= ci
->stripe_length
* (ci
->num_stripes
- 1)) {
2237 uint16_t stripe
= (parity
+ 1) % ci
->num_stripes
;
2240 while (stripe
!= parity
) {
2241 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2243 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(ci
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
2245 stripeoff
[stripe
] += (uint32_t)ci
->stripe_length
;
2246 pos
+= (uint32_t)ci
->stripe_length
;
2248 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2251 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[parity
].mdl
+ 1);
2253 for (k
= 0; k
< ci
->stripe_length
>> PAGE_SHIFT
; k
++) {
2254 stripe_pfns
[stripeoff
[parity
] >> PAGE_SHIFT
] = dummy
;
2255 stripeoff
[parity
] += PAGE_SIZE
;
2258 uint16_t stripe
= (parity
+ 1) % ci
->num_stripes
;
2261 while (pos
< length
) {
2262 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2263 readlen
= min(length
- pos
, (ULONG
)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
));
2268 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2270 stripeoff
[stripe
] += readlen
;
2273 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2278 MmUnlockPages(master_mdl
);
2279 IoFreeMdl(master_mdl
);
2281 ExFreePool(stripeoff
);
2282 } else if (type
== BLOCK_FLAG_RAID6
) {
2283 uint64_t startoff
, endoff
;
2284 uint16_t endoffstripe
, parity1
;
2285 uint32_t *stripeoff
, pos
;
2287 PFN_NUMBER
*pfns
, dummy
;
2288 bool need_dummy
= false;
2290 get_raid0_offset(addr
- offset
, ci
->stripe_length
, ci
->num_stripes
- 2, &startoff
, &startoffstripe
);
2291 get_raid0_offset(addr
+ length
- offset
- 1, ci
->stripe_length
, ci
->num_stripes
- 2, &endoff
, &endoffstripe
);
2294 context
.va
= ExAllocatePoolWithTag(NonPagedPool
, length
, ALLOC_TAG
);
2297 ERR("out of memory\n");
2298 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2304 master_mdl
= IoAllocateMdl(context
.va
, length
, false, false, NULL
);
2306 ERR("out of memory\n");
2307 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2311 Status
= STATUS_SUCCESS
;
2314 MmProbeAndLockPages(master_mdl
, KernelMode
, IoWriteAccess
);
2315 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
2316 Status
= _SEH2_GetExceptionCode();
2319 if (!NT_SUCCESS(Status
)) {
2320 ERR("MmProbeAndLockPages threw exception %08lx\n", Status
);
2321 IoFreeMdl(master_mdl
);
2325 pfns
= (PFN_NUMBER
*)(master_mdl
+ 1);
2328 while (pos
< length
) {
2329 parity1
= (((addr
- offset
+ pos
) / ((ci
->num_stripes
- 2) * ci
->stripe_length
)) + ci
->num_stripes
- 2) % ci
->num_stripes
;
2332 uint16_t stripe
= (parity1
+ startoffstripe
+ 2) % ci
->num_stripes
, parity2
;
2333 ULONG skip
, readlen
;
2336 while (stripe
!= parity1
) {
2337 if (i
== startoffstripe
) {
2338 readlen
= (ULONG
)min(length
, ci
->stripe_length
- (startoff
% ci
->stripe_length
));
2340 context
.stripes
[stripe
].stripestart
= startoff
;
2341 context
.stripes
[stripe
].stripeend
= startoff
+ readlen
;
2348 readlen
= min(length
- pos
, (ULONG
)ci
->stripe_length
);
2350 context
.stripes
[stripe
].stripestart
= startoff
- (startoff
% ci
->stripe_length
);
2351 context
.stripes
[stripe
].stripeend
= context
.stripes
[stripe
].stripestart
+ readlen
;
2360 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2366 for (i
= 0; i
< startoffstripe
; i
++) {
2367 uint16_t stripe2
= (parity1
+ i
+ 2) % ci
->num_stripes
;
2369 context
.stripes
[stripe2
].stripestart
= context
.stripes
[stripe2
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2372 context
.stripes
[parity1
].stripestart
= context
.stripes
[parity1
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2374 parity2
= (parity1
+ 1) % ci
->num_stripes
;
2375 context
.stripes
[parity2
].stripestart
= context
.stripes
[parity2
].stripeend
= startoff
- (startoff
% ci
->stripe_length
) + ci
->stripe_length
;
2377 if (length
- pos
> ci
->num_stripes
* (ci
->num_stripes
- 2) * ci
->stripe_length
) {
2378 skip
= (ULONG
)(((length
- pos
) / (ci
->num_stripes
* (ci
->num_stripes
- 2) * ci
->stripe_length
)) - 1);
2380 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2381 context
.stripes
[i
].stripeend
+= skip
* ci
->num_stripes
* ci
->stripe_length
;
2384 pos
+= (uint32_t)(skip
* (ci
->num_stripes
- 2) * ci
->num_stripes
* ci
->stripe_length
);
2387 } else if (length
- pos
>= ci
->stripe_length
* (ci
->num_stripes
- 2)) {
2388 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2389 context
.stripes
[i
].stripeend
+= ci
->stripe_length
;
2392 pos
+= (uint32_t)(ci
->stripe_length
* (ci
->num_stripes
- 2));
2395 uint16_t stripe
= (parity1
+ 2) % ci
->num_stripes
;
2398 while (stripe
!= parity1
) {
2399 if (endoffstripe
== i
) {
2400 context
.stripes
[stripe
].stripeend
= endoff
+ 1;
2402 } else if (endoffstripe
> i
)
2403 context
.stripes
[stripe
].stripeend
= endoff
- (endoff
% ci
->stripe_length
) + ci
->stripe_length
;
2406 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2413 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2414 if (context
.stripes
[i
].stripestart
!= context
.stripes
[i
].stripeend
) {
2415 context
.stripes
[i
].mdl
= IoAllocateMdl(context
.va
, (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
), false, false, NULL
);
2417 if (!context
.stripes
[i
].mdl
) {
2418 ERR("IoAllocateMdl failed\n");
2419 MmUnlockPages(master_mdl
);
2420 IoFreeMdl(master_mdl
);
2421 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2428 dummypage
= ExAllocatePoolWithTag(NonPagedPool
, PAGE_SIZE
, ALLOC_TAG
);
2430 ERR("out of memory\n");
2431 MmUnlockPages(master_mdl
);
2432 IoFreeMdl(master_mdl
);
2433 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2437 dummy_mdl
= IoAllocateMdl(dummypage
, PAGE_SIZE
, false, false, NULL
);
2439 ERR("IoAllocateMdl failed\n");
2440 MmUnlockPages(master_mdl
);
2441 IoFreeMdl(master_mdl
);
2442 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2446 MmBuildMdlForNonPagedPool(dummy_mdl
);
2448 dummy
= *(PFN_NUMBER
*)(dummy_mdl
+ 1);
2451 stripeoff
= ExAllocatePoolWithTag(NonPagedPool
, sizeof(uint32_t) * ci
->num_stripes
, ALLOC_TAG
);
2453 ERR("out of memory\n");
2454 MmUnlockPages(master_mdl
);
2455 IoFreeMdl(master_mdl
);
2456 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2460 RtlZeroMemory(stripeoff
, sizeof(uint32_t) * ci
->num_stripes
);
2464 while (pos
< length
) {
2465 PFN_NUMBER
* stripe_pfns
;
2467 parity1
= (((addr
- offset
+ pos
) / ((ci
->num_stripes
- 2) * ci
->stripe_length
)) + ci
->num_stripes
- 2) % ci
->num_stripes
;
2470 uint16_t stripe
= (parity1
+ startoffstripe
+ 2) % ci
->num_stripes
;
2471 uint32_t readlen
= min(length
- pos
, (uint32_t)min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
,
2472 ci
->stripe_length
- (context
.stripes
[stripe
].stripestart
% ci
->stripe_length
)));
2474 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2476 RtlCopyMemory(stripe_pfns
, pfns
, readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2478 stripeoff
[stripe
] = readlen
;
2481 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2483 while (stripe
!= parity1
) {
2484 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2485 readlen
= (uint32_t)min(length
- pos
, min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
));
2490 RtlCopyMemory(stripe_pfns
, &pfns
[pos
>> PAGE_SHIFT
], readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2492 stripeoff
[stripe
] = readlen
;
2495 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2497 } else if (length
- pos
>= ci
->stripe_length
* (ci
->num_stripes
- 2)) {
2498 uint16_t stripe
= (parity1
+ 2) % ci
->num_stripes
;
2499 uint16_t parity2
= (parity1
+ 1) % ci
->num_stripes
;
2502 while (stripe
!= parity1
) {
2503 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2505 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], (ULONG
)(ci
->stripe_length
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
));
2507 stripeoff
[stripe
] += (uint32_t)ci
->stripe_length
;
2508 pos
+= (uint32_t)ci
->stripe_length
;
2510 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2513 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[parity1
].mdl
+ 1);
2515 for (k
= 0; k
< ci
->stripe_length
>> PAGE_SHIFT
; k
++) {
2516 stripe_pfns
[stripeoff
[parity1
] >> PAGE_SHIFT
] = dummy
;
2517 stripeoff
[parity1
] += PAGE_SIZE
;
2520 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[parity2
].mdl
+ 1);
2522 for (k
= 0; k
< ci
->stripe_length
>> PAGE_SHIFT
; k
++) {
2523 stripe_pfns
[stripeoff
[parity2
] >> PAGE_SHIFT
] = dummy
;
2524 stripeoff
[parity2
] += PAGE_SIZE
;
2527 uint16_t stripe
= (parity1
+ 2) % ci
->num_stripes
;
2530 while (pos
< length
) {
2531 stripe_pfns
= (PFN_NUMBER
*)(context
.stripes
[stripe
].mdl
+ 1);
2532 readlen
= (uint32_t)min(length
- pos
, min(context
.stripes
[stripe
].stripeend
- context
.stripes
[stripe
].stripestart
, ci
->stripe_length
));
2537 RtlCopyMemory(&stripe_pfns
[stripeoff
[stripe
] >> PAGE_SHIFT
], &pfns
[pos
>> PAGE_SHIFT
], readlen
* sizeof(PFN_NUMBER
) >> PAGE_SHIFT
);
2539 stripeoff
[stripe
] += readlen
;
2542 stripe
= (stripe
+ 1) % ci
->num_stripes
;
2547 MmUnlockPages(master_mdl
);
2548 IoFreeMdl(master_mdl
);
2550 ExFreePool(stripeoff
);
2553 context
.address
= addr
;
2555 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2556 if (!devices
[i
] || !devices
[i
]->devobj
|| context
.stripes
[i
].stripestart
== context
.stripes
[i
].stripeend
) {
2557 context
.stripes
[i
].status
= ReadDataStatus_MissingDevice
;
2558 context
.stripes_left
--;
2560 if (!devices
[i
] || !devices
[i
]->devobj
)
2565 if (missing_devices
> allowed_missing
) {
2566 ERR("not enough devices to service request (%u missing)\n", missing_devices
);
2567 Status
= STATUS_UNEXPECTED_IO_ERROR
;
2571 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2572 PIO_STACK_LOCATION IrpSp
;
2574 if (devices
[i
] && devices
[i
]->devobj
&& context
.stripes
[i
].stripestart
!= context
.stripes
[i
].stripeend
&& context
.stripes
[i
].status
!= ReadDataStatus_Skip
) {
2575 context
.stripes
[i
].context
= (struct read_data_context
*)&context
;
2577 if (type
== BLOCK_FLAG_RAID10
) {
2578 context
.stripes
[i
].stripenum
= i
/ ci
->sub_stripes
;
2582 context
.stripes
[i
].Irp
= IoAllocateIrp(devices
[i
]->devobj
->StackSize
, false);
2584 if (!context
.stripes
[i
].Irp
) {
2585 ERR("IoAllocateIrp failed\n");
2586 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2590 context
.stripes
[i
].Irp
= IoMakeAssociatedIrp(Irp
, devices
[i
]->devobj
->StackSize
);
2592 if (!context
.stripes
[i
].Irp
) {
2593 ERR("IoMakeAssociatedIrp failed\n");
2594 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2599 IrpSp
= IoGetNextIrpStackLocation(context
.stripes
[i
].Irp
);
2600 IrpSp
->MajorFunction
= IRP_MJ_READ
;
2601 IrpSp
->MinorFunction
= IRP_MN_NORMAL
;
2602 IrpSp
->FileObject
= devices
[i
]->fileobj
;
2604 if (devices
[i
]->devobj
->Flags
& DO_BUFFERED_IO
) {
2605 context
.stripes
[i
].Irp
->AssociatedIrp
.SystemBuffer
= ExAllocatePoolWithTag(NonPagedPool
, (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
), ALLOC_TAG
);
2606 if (!context
.stripes
[i
].Irp
->AssociatedIrp
.SystemBuffer
) {
2607 ERR("out of memory\n");
2608 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2612 context
.stripes
[i
].Irp
->Flags
|= IRP_BUFFERED_IO
| IRP_DEALLOCATE_BUFFER
| IRP_INPUT_OPERATION
;
2614 context
.stripes
[i
].Irp
->UserBuffer
= MmGetSystemAddressForMdlSafe(context
.stripes
[i
].mdl
, priority
);
2615 } else if (devices
[i
]->devobj
->Flags
& DO_DIRECT_IO
)
2616 context
.stripes
[i
].Irp
->MdlAddress
= context
.stripes
[i
].mdl
;
2618 context
.stripes
[i
].Irp
->UserBuffer
= MmGetSystemAddressForMdlSafe(context
.stripes
[i
].mdl
, priority
);
2620 IrpSp
->Parameters
.Read
.Length
= (ULONG
)(context
.stripes
[i
].stripeend
- context
.stripes
[i
].stripestart
);
2621 IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
= context
.stripes
[i
].stripestart
+ cis
[i
].offset
;
2623 total_reading
+= IrpSp
->Parameters
.Read
.Length
;
2625 context
.stripes
[i
].Irp
->UserIosb
= &context
.stripes
[i
].iosb
;
2627 IoSetCompletionRoutine(context
.stripes
[i
].Irp
, read_data_completion
, &context
.stripes
[i
], true, true, true);
2629 context
.stripes
[i
].status
= ReadDataStatus_Pending
;
2633 need_to_wait
= false;
2634 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2635 if (context
.stripes
[i
].status
!= ReadDataStatus_MissingDevice
&& context
.stripes
[i
].status
!= ReadDataStatus_Skip
) {
2636 IoCallDriver(devices
[i
]->devobj
, context
.stripes
[i
].Irp
);
2637 need_to_wait
= true;
2642 KeWaitForSingleObject(&context
.Event
, Executive
, KernelMode
, false, NULL
);
2645 fFsRtlUpdateDiskCounters(total_reading
, 0);
2647 // check if any of the devices return a "user-induced" error
2649 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2650 if (context
.stripes
[i
].status
== ReadDataStatus_Error
&& IoIsErrorUserInduced(context
.stripes
[i
].iosb
.Status
)) {
2651 Status
= context
.stripes
[i
].iosb
.Status
;
2656 if (type
== BLOCK_FLAG_RAID0
) {
2657 Status
= read_data_raid0(Vcb
, file_read
? context
.va
: buf
, addr
, length
, &context
, ci
, devices
, generation
, offset
);
2658 if (!NT_SUCCESS(Status
)) {
2659 ERR("read_data_raid0 returned %08lx\n", Status
);
2662 ExFreePool(context
.va
);
2668 RtlCopyMemory(buf
, context
.va
, length
);
2669 ExFreePool(context
.va
);
2671 } else if (type
== BLOCK_FLAG_RAID10
) {
2672 Status
= read_data_raid10(Vcb
, file_read
? context
.va
: buf
, addr
, length
, &context
, ci
, devices
, generation
, offset
);
2674 if (!NT_SUCCESS(Status
)) {
2675 ERR("read_data_raid10 returned %08lx\n", Status
);
2678 ExFreePool(context
.va
);
2684 RtlCopyMemory(buf
, context
.va
, length
);
2685 ExFreePool(context
.va
);
2687 } else if (type
== BLOCK_FLAG_DUPLICATE
) {
2688 Status
= read_data_dup(Vcb
, file_read
? context
.va
: buf
, addr
, &context
, ci
, devices
, generation
);
2689 if (!NT_SUCCESS(Status
)) {
2690 ERR("read_data_dup returned %08lx\n", Status
);
2693 ExFreePool(context
.va
);
2699 RtlCopyMemory(buf
, context
.va
, length
);
2700 ExFreePool(context
.va
);
2702 } else if (type
== BLOCK_FLAG_RAID5
) {
2703 Status
= read_data_raid5(Vcb
, file_read
? context
.va
: buf
, addr
, length
, &context
, ci
, devices
, offset
, generation
, c
, missing_devices
> 0 ? true : false);
2704 if (!NT_SUCCESS(Status
)) {
2705 ERR("read_data_raid5 returned %08lx\n", Status
);
2708 ExFreePool(context
.va
);
2714 RtlCopyMemory(buf
, context
.va
, length
);
2715 ExFreePool(context
.va
);
2717 } else if (type
== BLOCK_FLAG_RAID6
) {
2718 Status
= read_data_raid6(Vcb
, file_read
? context
.va
: buf
, addr
, length
, &context
, ci
, devices
, offset
, generation
, c
, missing_devices
> 0 ? true : false);
2719 if (!NT_SUCCESS(Status
)) {
2720 ERR("read_data_raid6 returned %08lx\n", Status
);
2723 ExFreePool(context
.va
);
2729 RtlCopyMemory(buf
, context
.va
, length
);
2730 ExFreePool(context
.va
);
2735 if (c
&& (type
== BLOCK_FLAG_RAID5
|| type
== BLOCK_FLAG_RAID6
))
2736 chunk_unlock_range(Vcb
, c
, lockaddr
, locklen
);
2739 IoFreeMdl(dummy_mdl
);
2742 ExFreePool(dummypage
);
2744 for (i
= 0; i
< ci
->num_stripes
; i
++) {
2745 if (context
.stripes
[i
].mdl
) {
2746 if (context
.stripes
[i
].mdl
->MdlFlags
& MDL_PAGES_LOCKED
)
2747 MmUnlockPages(context
.stripes
[i
].mdl
);
2749 IoFreeMdl(context
.stripes
[i
].mdl
);
2752 if (context
.stripes
[i
].Irp
)
2753 IoFreeIrp(context
.stripes
[i
].Irp
);
2756 ExFreePool(context
.stripes
);
2758 if (!Vcb
->log_to_phys_loaded
)
2759 ExFreePool(devices
);
2764 NTSTATUS
read_stream(fcb
* fcb
, uint8_t* data
, uint64_t start
, ULONG length
, ULONG
* pbr
) {
2767 TRACE("(%p, %p, %I64x, %lx, %p)\n", fcb
, data
, start
, length
, pbr
);
2771 if (start
>= fcb
->adsdata
.Length
) {
2772 TRACE("tried to read beyond end of stream\n");
2773 return STATUS_END_OF_FILE
;
2777 WARN("tried to read zero bytes\n");
2778 return STATUS_SUCCESS
;
2781 if (start
+ length
< fcb
->adsdata
.Length
)
2784 readlen
= fcb
->adsdata
.Length
- (ULONG
)start
;
2787 RtlCopyMemory(data
, fcb
->adsdata
.Buffer
+ start
, readlen
);
2789 if (pbr
) *pbr
= readlen
;
2791 return STATUS_SUCCESS
;
2798 uint64_t ed_num_bytes
;
2802 LIST_ENTRY list_entry
;
2814 uint8_t compression
;
2815 unsigned int num_extents
;
2816 read_part_extent extents
[1];
2820 LIST_ENTRY list_entry
;
2824 unsigned int offset
;
2828 NTSTATUS
read_file(fcb
* fcb
, uint8_t* data
, uint64_t start
, uint64_t length
, ULONG
* pbr
, PIRP Irp
) {
2830 uint32_t bytes_read
= 0;
2833 POOL_TYPE pool_type
;
2834 LIST_ENTRY read_parts
, calc_jobs
;
2836 TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb
, data
, start
, length
, pbr
);
2841 if (start
>= fcb
->inode_item
.st_size
) {
2842 WARN("Tried to read beyond end of file\n");
2843 return STATUS_END_OF_FILE
;
2846 InitializeListHead(&read_parts
);
2847 InitializeListHead(&calc_jobs
);
2849 pool_type
= fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
? NonPagedPool
: PagedPool
;
2851 le
= fcb
->extents
.Flink
;
2855 while (le
!= &fcb
->extents
) {
2857 extent
* ext
= CONTAINING_RECORD(le
, extent
, list_entry
);
2860 EXTENT_DATA
* ed
= &ext
->extent_data
;
2861 EXTENT_DATA2
* ed2
= (ed
->type
== EXTENT_TYPE_REGULAR
|| ed
->type
== EXTENT_TYPE_PREALLOC
) ? (EXTENT_DATA2
*)ed
->data
: NULL
;
2863 len
= ed2
? ed2
->num_bytes
: ed
->decoded_size
;
2865 if (ext
->offset
+ len
<= start
) {
2866 last_end
= ext
->offset
+ len
;
2870 if (ext
->offset
> last_end
&& ext
->offset
> start
+ bytes_read
) {
2871 uint32_t read
= (uint32_t)min(length
, ext
->offset
- max(start
, last_end
));
2873 RtlZeroMemory(data
+ bytes_read
, read
);
2878 if (length
== 0 || ext
->offset
> start
+ bytes_read
+ length
)
2881 if (ed
->encryption
!= BTRFS_ENCRYPTION_NONE
) {
2882 WARN("Encryption not supported\n");
2883 Status
= STATUS_NOT_IMPLEMENTED
;
2887 if (ed
->encoding
!= BTRFS_ENCODING_NONE
) {
2888 WARN("Other encodings not supported\n");
2889 Status
= STATUS_NOT_IMPLEMENTED
;
2894 case EXTENT_TYPE_INLINE
:
2896 uint64_t off
= start
+ bytes_read
- ext
->offset
;
2899 if (ed
->compression
== BTRFS_COMPRESSION_NONE
) {
2900 read
= (uint32_t)min(min(len
, ext
->datalen
) - off
, length
);
2902 RtlCopyMemory(data
+ bytes_read
, &ed
->data
[off
], read
);
2903 } else if (ed
->compression
== BTRFS_COMPRESSION_ZLIB
|| ed
->compression
== BTRFS_COMPRESSION_LZO
|| ed
->compression
== BTRFS_COMPRESSION_ZSTD
) {
2906 uint16_t inlen
= ext
->datalen
- (uint16_t)offsetof(EXTENT_DATA
, data
[0]);
2908 if (ed
->decoded_size
== 0 || ed
->decoded_size
> 0xffffffff) {
2909 ERR("ed->decoded_size was invalid (%I64x)\n", ed
->decoded_size
);
2910 Status
= STATUS_INTERNAL_ERROR
;
2914 read
= (uint32_t)min(ed
->decoded_size
- off
, length
);
2917 decomp
= ExAllocatePoolWithTag(NonPagedPool
, (uint32_t)ed
->decoded_size
, ALLOC_TAG
);
2919 ERR("out of memory\n");
2920 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2924 decomp_alloc
= true;
2926 decomp
= data
+ bytes_read
;
2927 decomp_alloc
= false;
2930 if (ed
->compression
== BTRFS_COMPRESSION_ZLIB
) {
2931 Status
= zlib_decompress(ed
->data
, inlen
, decomp
, (uint32_t)(read
+ off
));
2932 if (!NT_SUCCESS(Status
)) {
2933 ERR("zlib_decompress returned %08lx\n", Status
);
2934 if (decomp_alloc
) ExFreePool(decomp
);
2937 } else if (ed
->compression
== BTRFS_COMPRESSION_LZO
) {
2938 if (inlen
< sizeof(uint32_t)) {
2939 ERR("extent data was truncated\n");
2940 Status
= STATUS_INTERNAL_ERROR
;
2941 if (decomp_alloc
) ExFreePool(decomp
);
2944 inlen
-= sizeof(uint32_t);
2946 Status
= lzo_decompress(ed
->data
+ sizeof(uint32_t), inlen
, decomp
, (uint32_t)(read
+ off
), sizeof(uint32_t));
2947 if (!NT_SUCCESS(Status
)) {
2948 ERR("lzo_decompress returned %08lx\n", Status
);
2949 if (decomp_alloc
) ExFreePool(decomp
);
2952 } else if (ed
->compression
== BTRFS_COMPRESSION_ZSTD
) {
2953 Status
= zstd_decompress(ed
->data
, inlen
, decomp
, (uint32_t)(read
+ off
));
2954 if (!NT_SUCCESS(Status
)) {
2955 ERR("zstd_decompress returned %08lx\n", Status
);
2956 if (decomp_alloc
) ExFreePool(decomp
);
2962 RtlCopyMemory(data
+ bytes_read
, decomp
+ off
, read
);
2966 ERR("unhandled compression type %x\n", ed
->compression
);
2967 Status
= STATUS_NOT_IMPLEMENTED
;
2977 case EXTENT_TYPE_REGULAR
:
2981 rp
= ExAllocatePoolWithTag(pool_type
, sizeof(read_part
), ALLOC_TAG
);
2983 ERR("out of memory\n");
2984 Status
= STATUS_INSUFFICIENT_RESOURCES
;
2988 rp
->mdl
= (Irp
&& Irp
->MdlAddress
) ? true : false;
2989 rp
->extents
[0].off
= start
+ bytes_read
- ext
->offset
;
2991 rp
->num_extents
= 1;
2992 rp
->csum_free
= false;
2994 rp
->read
= (uint32_t)(len
- rp
->extents
[0].off
);
2995 if (rp
->read
> length
) rp
->read
= (uint32_t)length
;
2997 if (ed
->compression
== BTRFS_COMPRESSION_NONE
) {
2998 rp
->addr
= ed2
->address
+ ed2
->offset
+ rp
->extents
[0].off
;
2999 rp
->to_read
= (uint32_t)sector_align(rp
->read
, fcb
->Vcb
->superblock
.sector_size
);
3001 if (rp
->addr
% fcb
->Vcb
->superblock
.sector_size
> 0) {
3002 rp
->bumpoff
= rp
->addr
% fcb
->Vcb
->superblock
.sector_size
;
3003 rp
->addr
-= rp
->bumpoff
;
3004 rp
->to_read
= (uint32_t)sector_align(rp
->read
+ rp
->bumpoff
, fcb
->Vcb
->superblock
.sector_size
);
3007 rp
->addr
= ed2
->address
;
3008 rp
->to_read
= (uint32_t)sector_align(ed2
->size
, fcb
->Vcb
->superblock
.sector_size
);
3011 if (ed
->compression
== BTRFS_COMPRESSION_NONE
&& start
% fcb
->Vcb
->superblock
.sector_size
== 0 &&
3012 length
% fcb
->Vcb
->superblock
.sector_size
== 0) {
3013 rp
->buf
= data
+ bytes_read
;
3014 rp
->buf_free
= false;
3016 rp
->buf
= ExAllocatePoolWithTag(pool_type
, rp
->to_read
, ALLOC_TAG
);
3017 rp
->buf_free
= true;
3020 ERR("out of memory\n");
3021 Status
= STATUS_INSUFFICIENT_RESOURCES
;
3029 rp
->c
= get_chunk_from_address(fcb
->Vcb
, rp
->addr
);
3032 ERR("get_chunk_from_address(%I64x) failed\n", rp
->addr
);
3035 ExFreePool(rp
->buf
);
3043 if (ed
->compression
== BTRFS_COMPRESSION_NONE
) {
3044 rp
->csum
= (uint8_t*)ext
->csum
+ (fcb
->Vcb
->csum_size
* (rp
->extents
[0].off
/ fcb
->Vcb
->superblock
.sector_size
));
3046 rp
->csum
= ext
->csum
;
3050 rp
->data
= data
+ bytes_read
;
3051 rp
->compression
= ed
->compression
;
3052 rp
->extents
[0].ed_offset
= ed2
->offset
;
3053 rp
->extents
[0].ed_size
= ed2
->size
;
3054 rp
->extents
[0].ed_num_bytes
= ed2
->num_bytes
;
3056 InsertTailList(&read_parts
, &rp
->list_entry
);
3058 bytes_read
+= rp
->read
;
3064 case EXTENT_TYPE_PREALLOC
:
3066 uint64_t off
= start
+ bytes_read
- ext
->offset
;
3067 uint32_t read
= (uint32_t)(len
- off
);
3069 if (read
> length
) read
= (uint32_t)length
;
3071 RtlZeroMemory(data
+ bytes_read
, read
);
3080 WARN("Unsupported extent data type %u\n", ed
->type
);
3081 Status
= STATUS_NOT_IMPLEMENTED
;
3085 last_end
= ext
->offset
+ len
;
3095 if (!IsListEmpty(&read_parts
) && read_parts
.Flink
->Flink
!= &read_parts
) { // at least two entries in list
3096 read_part
* last_rp
= CONTAINING_RECORD(read_parts
.Flink
, read_part
, list_entry
);
3098 le
= read_parts
.Flink
->Flink
;
3099 while (le
!= &read_parts
) {
3100 LIST_ENTRY
* le2
= le
->Flink
;
3101 read_part
* rp
= CONTAINING_RECORD(le
, read_part
, list_entry
);
3103 // merge together runs
3104 if (rp
->compression
!= BTRFS_COMPRESSION_NONE
&& rp
->compression
== last_rp
->compression
&& rp
->addr
== last_rp
->addr
+ last_rp
->to_read
&&
3105 rp
->data
== (uint8_t*)last_rp
->data
+ last_rp
->read
&& rp
->c
== last_rp
->c
&& ((rp
->csum
&& last_rp
->csum
) || (!rp
->csum
&& !last_rp
->csum
))) {
3108 rp2
= ExAllocatePoolWithTag(pool_type
, offsetof(read_part
, extents
) + (sizeof(read_part_extent
) * (last_rp
->num_extents
+ 1)), ALLOC_TAG
);
3110 rp2
->addr
= last_rp
->addr
;
3111 rp2
->c
= last_rp
->c
;
3112 rp2
->read
= last_rp
->read
+ rp
->read
;
3113 rp2
->to_read
= last_rp
->to_read
+ rp
->to_read
;
3114 rp2
->csum_free
= false;
3116 if (last_rp
->csum
) {
3117 uint32_t sectors
= (last_rp
->to_read
+ rp
->to_read
) / fcb
->Vcb
->superblock
.sector_size
;
3119 rp2
->csum
= ExAllocatePoolWithTag(pool_type
, sectors
* fcb
->Vcb
->csum_size
, ALLOC_TAG
);
3121 ERR("out of memory\n");
3123 Status
= STATUS_INSUFFICIENT_RESOURCES
;
3127 RtlCopyMemory(rp2
->csum
, last_rp
->csum
, last_rp
->to_read
* fcb
->Vcb
->csum_size
/ fcb
->Vcb
->superblock
.sector_size
);
3128 RtlCopyMemory((uint8_t*)rp2
->csum
+ (last_rp
->to_read
* fcb
->Vcb
->csum_size
/ fcb
->Vcb
->superblock
.sector_size
), rp
->csum
,
3129 rp
->to_read
* fcb
->Vcb
->csum_size
/ fcb
->Vcb
->superblock
.sector_size
);
3131 rp2
->csum_free
= true;
3135 rp2
->buf
= ExAllocatePoolWithTag(pool_type
, rp2
->to_read
, ALLOC_TAG
);
3137 ERR("out of memory\n");
3140 ExFreePool(rp2
->csum
);
3143 Status
= STATUS_INSUFFICIENT_RESOURCES
;
3147 rp2
->buf_free
= true;
3150 rp2
->data
= last_rp
->data
;
3151 rp2
->compression
= last_rp
->compression
;
3152 rp2
->num_extents
= last_rp
->num_extents
+ 1;
3154 RtlCopyMemory(rp2
->extents
, last_rp
->extents
, last_rp
->num_extents
* sizeof(read_part_extent
));
3155 RtlCopyMemory(&rp2
->extents
[last_rp
->num_extents
], rp
->extents
, sizeof(read_part_extent
));
3157 InsertHeadList(le
->Blink
, &rp2
->list_entry
);
3160 ExFreePool(rp
->buf
);
3163 ExFreePool(rp
->csum
);
3165 RemoveEntryList(&rp
->list_entry
);
3169 if (last_rp
->buf_free
)
3170 ExFreePool(last_rp
->buf
);
3172 if (last_rp
->csum_free
)
3173 ExFreePool(last_rp
->csum
);
3175 RemoveEntryList(&last_rp
->list_entry
);
3177 ExFreePool(last_rp
);
3187 le
= read_parts
.Flink
;
3188 while (le
!= &read_parts
) {
3189 read_part
* rp
= CONTAINING_RECORD(le
, read_part
, list_entry
);
3191 Status
= read_data(fcb
->Vcb
, rp
->addr
, rp
->to_read
, rp
->csum
, false, rp
->buf
, rp
->c
, NULL
, Irp
, 0, rp
->mdl
,
3192 fcb
&& fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
? HighPagePriority
: NormalPagePriority
);
3193 if (!NT_SUCCESS(Status
)) {
3194 ERR("read_data returned %08lx\n", Status
);
3198 if (rp
->compression
== BTRFS_COMPRESSION_NONE
) {
3200 RtlCopyMemory(rp
->data
, rp
->buf
+ rp
->bumpoff
, rp
->read
);
3202 uint8_t* buf
= rp
->buf
;
3205 for (i
= 0; i
< rp
->num_extents
; i
++) {
3207 for (unsigned int i
= 0; i
< rp
->num_extents
; i
++) {
3208 #endif // __REACTOS__
3209 uint8_t *decomp
= NULL
, *buf2
;
3210 ULONG outlen
, inlen
, off2
;
3211 uint32_t inpageoff
= 0;
3214 off2
= (ULONG
)(rp
->extents
[i
].ed_offset
+ rp
->extents
[i
].off
);
3216 inlen
= (ULONG
)rp
->extents
[i
].ed_size
;
3218 if (rp
->compression
== BTRFS_COMPRESSION_LZO
) {
3219 ULONG inoff
= sizeof(uint32_t);
3221 inlen
-= sizeof(uint32_t);
3223 // If reading a few sectors in, skip to the interesting bit
3224 while (off2
> LZO_PAGE_SIZE
) {
3227 if (inlen
< sizeof(uint32_t))
3230 partlen
= *(uint32_t*)(buf2
+ inoff
);
3232 if (partlen
< inlen
) {
3233 off2
-= LZO_PAGE_SIZE
;
3234 inoff
+= partlen
+ sizeof(uint32_t);
3235 inlen
-= partlen
+ sizeof(uint32_t);
3237 if (LZO_PAGE_SIZE
- (inoff
% LZO_PAGE_SIZE
) < sizeof(uint32_t))
3238 inoff
= ((inoff
/ LZO_PAGE_SIZE
) + 1) * LZO_PAGE_SIZE
;
3243 buf2
= &buf2
[inoff
];
3244 inpageoff
= inoff
% LZO_PAGE_SIZE
;
3248 outlen
= off2
+ min(rp
->read
, (uint32_t)(rp
->extents
[i
].ed_num_bytes
- rp
->extents
[i
].off
));
3250 decomp
= ExAllocatePoolWithTag(pool_type
, outlen
, ALLOC_TAG
);
3252 ERR("out of memory\n");
3253 Status
= STATUS_INSUFFICIENT_RESOURCES
;
3257 outlen
= min(rp
->read
, (uint32_t)(rp
->extents
[i
].ed_num_bytes
- rp
->extents
[i
].off
));
3259 ccj
= (comp_calc_job
*)ExAllocatePoolWithTag(pool_type
, sizeof(comp_calc_job
), ALLOC_TAG
);
3261 ERR("out of memory\n");
3266 Status
= STATUS_INSUFFICIENT_RESOURCES
;
3270 Status
= add_calc_job_decomp(fcb
->Vcb
, rp
->compression
, buf2
, inlen
, decomp
? decomp
: rp
->data
, outlen
,
3271 inpageoff
, &ccj
->cj
);
3272 if (!NT_SUCCESS(Status
)) {
3273 ERR("add_calc_job_decomp returned %08lx\n", Status
);
3283 ccj
->data
= rp
->data
;
3284 ccj
->decomp
= decomp
;
3287 ccj
->length
= (size_t)min(rp
->read
, rp
->extents
[i
].ed_num_bytes
- rp
->extents
[i
].off
);
3289 InsertTailList(&calc_jobs
, &ccj
->list_entry
);
3291 buf
+= rp
->extents
[i
].ed_size
;
3292 rp
->data
= (uint8_t*)rp
->data
+ rp
->extents
[i
].ed_num_bytes
- rp
->extents
[i
].off
;
3293 rp
->read
-= (uint32_t)(rp
->extents
[i
].ed_num_bytes
- rp
->extents
[i
].off
);
3300 if (length
> 0 && start
+ bytes_read
< fcb
->inode_item
.st_size
) {
3301 uint32_t read
= (uint32_t)min(fcb
->inode_item
.st_size
- start
- bytes_read
, length
);
3303 RtlZeroMemory(data
+ bytes_read
, read
);
3309 Status
= STATUS_SUCCESS
;
3311 while (!IsListEmpty(&calc_jobs
)) {
3312 comp_calc_job
* ccj
= CONTAINING_RECORD(RemoveTailList(&calc_jobs
), comp_calc_job
, list_entry
);
3314 calc_thread_main(fcb
->Vcb
, ccj
->cj
);
3316 KeWaitForSingleObject(&ccj
->cj
->event
, Executive
, KernelMode
, false, NULL
);
3318 if (!NT_SUCCESS(ccj
->cj
->Status
))
3319 Status
= ccj
->cj
->Status
;
3322 RtlCopyMemory(ccj
->data
, (uint8_t*)ccj
->decomp
+ ccj
->offset
, ccj
->length
);
3323 ExFreePool(ccj
->decomp
);
3333 while (!IsListEmpty(&read_parts
)) {
3334 read_part
* rp
= CONTAINING_RECORD(RemoveHeadList(&read_parts
), read_part
, list_entry
);
3337 ExFreePool(rp
->buf
);
3340 ExFreePool(rp
->csum
);
3345 while (!IsListEmpty(&calc_jobs
)) {
3346 comp_calc_job
* ccj
= CONTAINING_RECORD(RemoveHeadList(&calc_jobs
), comp_calc_job
, list_entry
);
3348 KeWaitForSingleObject(&ccj
->cj
->event
, Executive
, KernelMode
, false, NULL
);
3351 ExFreePool(ccj
->decomp
);
3353 ExFreePool(ccj
->cj
);
3361 NTSTATUS
do_read(PIRP Irp
, bool wait
, ULONG
* bytes_read
) {
3362 PIO_STACK_LOCATION IrpSp
= IoGetCurrentIrpStackLocation(Irp
);
3363 PFILE_OBJECT FileObject
= IrpSp
->FileObject
;
3364 fcb
* fcb
= FileObject
->FsContext
;
3365 uint8_t* data
= NULL
;
3366 ULONG length
= IrpSp
->Parameters
.Read
.Length
, addon
= 0;
3367 uint64_t start
= IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
;
3371 if (!fcb
|| !fcb
->Vcb
|| !fcb
->subvol
)
3372 return STATUS_INTERNAL_ERROR
;
3374 TRACE("fcb = %p\n", fcb
);
3375 TRACE("offset = %I64x, length = %lx\n", start
, length
);
3376 TRACE("paging_io = %s, no cache = %s\n", Irp
->Flags
& IRP_PAGING_IO
? "true" : "false", Irp
->Flags
& IRP_NOCACHE
? "true" : "false");
3378 if (!fcb
->ads
&& fcb
->type
== BTRFS_TYPE_DIRECTORY
)
3379 return STATUS_INVALID_DEVICE_REQUEST
;
3381 if (!(Irp
->Flags
& IRP_PAGING_IO
) && !FsRtlCheckLockForReadAccess(&fcb
->lock
, Irp
)) {
3382 WARN("tried to read locked region\n");
3383 return STATUS_FILE_LOCK_CONFLICT
;
3387 TRACE("tried to read zero bytes\n");
3388 return STATUS_SUCCESS
;
3391 if (start
>= (uint64_t)fcb
->Header
.FileSize
.QuadPart
) {
3392 TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start
, fcb
->Header
.FileSize
.QuadPart
);
3393 return STATUS_END_OF_FILE
;
3396 TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject
, fcb
, fcb
->Header
.FileSize
.QuadPart
, fcb
->inode_item
.st_size
, &fcb
->inode_item
.st_size
);
3398 if (Irp
->Flags
& IRP_NOCACHE
|| !(IrpSp
->MinorFunction
& IRP_MN_MDL
)) {
3399 data
= map_user_buffer(Irp
, fcb
->Header
.Flags2
& FSRTL_FLAG2_IS_PAGING_FILE
? HighPagePriority
: NormalPagePriority
);
3401 if (Irp
->MdlAddress
&& !data
) {
3402 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3403 return STATUS_INSUFFICIENT_RESOURCES
;
3406 if (start
>= (uint64_t)fcb
->Header
.ValidDataLength
.QuadPart
) {
3407 length
= (ULONG
)min(length
, min(start
+ length
, (uint64_t)fcb
->Header
.FileSize
.QuadPart
) - fcb
->Header
.ValidDataLength
.QuadPart
);
3408 RtlZeroMemory(data
, length
);
3409 Irp
->IoStatus
.Information
= *bytes_read
= length
;
3410 return STATUS_SUCCESS
;
3413 if (length
+ start
> (uint64_t)fcb
->Header
.ValidDataLength
.QuadPart
) {
3414 addon
= (ULONG
)(min(start
+ length
, (uint64_t)fcb
->Header
.FileSize
.QuadPart
) - fcb
->Header
.ValidDataLength
.QuadPart
);
3415 RtlZeroMemory(data
+ (fcb
->Header
.ValidDataLength
.QuadPart
- start
), addon
);
3416 length
= (ULONG
)(fcb
->Header
.ValidDataLength
.QuadPart
- start
);
3420 if (!(Irp
->Flags
& IRP_NOCACHE
)) {
3421 NTSTATUS Status
= STATUS_SUCCESS
;
3424 if (!FileObject
->PrivateCacheMap
) {
3427 ccfs
.AllocationSize
= fcb
->Header
.AllocationSize
;
3428 ccfs
.FileSize
= fcb
->Header
.FileSize
;
3429 ccfs
.ValidDataLength
= fcb
->Header
.ValidDataLength
;
3431 init_file_cache(FileObject
, &ccfs
);
3434 if (IrpSp
->MinorFunction
& IRP_MN_MDL
) {
3435 CcMdlRead(FileObject
,&IrpSp
->Parameters
.Read
.ByteOffset
, length
, &Irp
->MdlAddress
, &Irp
->IoStatus
);
3437 if (fCcCopyReadEx
) {
3438 TRACE("CcCopyReadEx(%p, %I64x, %lx, %u, %p, %p, %p)\n", FileObject
, IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
,
3439 length
, wait
, data
, &Irp
->IoStatus
, Irp
->Tail
.Overlay
.Thread
);
3440 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb
->Header
.AllocationSize
.QuadPart
, fcb
->Header
.FileSize
.QuadPart
, fcb
->Header
.ValidDataLength
.QuadPart
);
3441 if (!fCcCopyReadEx(FileObject
, &IrpSp
->Parameters
.Read
.ByteOffset
, length
, wait
, data
, &Irp
->IoStatus
, Irp
->Tail
.Overlay
.Thread
)) {
3442 TRACE("CcCopyReadEx could not wait\n");
3444 IoMarkIrpPending(Irp
);
3445 return STATUS_PENDING
;
3447 TRACE("CcCopyReadEx finished\n");
3449 TRACE("CcCopyRead(%p, %I64x, %lx, %u, %p, %p)\n", FileObject
, IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
, length
, wait
, data
, &Irp
->IoStatus
);
3450 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb
->Header
.AllocationSize
.QuadPart
, fcb
->Header
.FileSize
.QuadPart
, fcb
->Header
.ValidDataLength
.QuadPart
);
3451 if (!CcCopyRead(FileObject
, &IrpSp
->Parameters
.Read
.ByteOffset
, length
, wait
, data
, &Irp
->IoStatus
)) {
3452 TRACE("CcCopyRead could not wait\n");
3454 IoMarkIrpPending(Irp
);
3455 return STATUS_PENDING
;
3457 TRACE("CcCopyRead finished\n");
3460 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER
) {
3461 Status
= _SEH2_GetExceptionCode();
3464 if (NT_SUCCESS(Status
)) {
3465 Status
= Irp
->IoStatus
.Status
;
3466 Irp
->IoStatus
.Information
+= addon
;
3467 *bytes_read
= (ULONG
)Irp
->IoStatus
.Information
;
3469 ERR("EXCEPTION - %08lx\n", Status
);
3476 IoMarkIrpPending(Irp
);
3477 return STATUS_PENDING
;
3481 Status
= read_stream(fcb
, data
, start
, length
, bytes_read
);
3483 if (!NT_SUCCESS(Status
))
3484 ERR("read_stream returned %08lx\n", Status
);
3486 Status
= read_file(fcb
, data
, start
, length
, bytes_read
, Irp
);
3488 if (!NT_SUCCESS(Status
))
3489 ERR("read_file returned %08lx\n", Status
);
3492 *bytes_read
+= addon
;
3493 TRACE("read %lu bytes\n", *bytes_read
);
3495 Irp
->IoStatus
.Information
= *bytes_read
;
3497 if (diskacc
&& Status
!= STATUS_PENDING
) {
3498 PETHREAD thread
= NULL
;
3500 if (Irp
->Tail
.Overlay
.Thread
&& !IoIsSystemThread(Irp
->Tail
.Overlay
.Thread
))
3501 thread
= Irp
->Tail
.Overlay
.Thread
;
3502 else if (!IoIsSystemThread(PsGetCurrentThread()))
3503 thread
= PsGetCurrentThread();
3504 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp
)
3505 thread
= PsGetCurrentThread();
3508 fPsUpdateDiskCounters(PsGetThreadProcess(thread
), *bytes_read
, 0, 1, 0, 0);
3515 _Dispatch_type_(IRP_MJ_READ
)
3516 _Function_class_(DRIVER_DISPATCH
)
3517 NTSTATUS __stdcall
drv_read(PDEVICE_OBJECT DeviceObject
, PIRP Irp
) {
3518 device_extension
* Vcb
= DeviceObject
->DeviceExtension
;
3519 PIO_STACK_LOCATION IrpSp
= IoGetCurrentIrpStackLocation(Irp
);
3520 PFILE_OBJECT FileObject
= IrpSp
->FileObject
;
3521 ULONG bytes_read
= 0;
3526 bool acquired_fcb_lock
= false, wait
;
3528 FsRtlEnterFileSystem();
3530 top_level
= is_top_level(Irp
);
3534 if (Vcb
&& Vcb
->type
== VCB_TYPE_VOLUME
) {
3535 Status
= vol_read(DeviceObject
, Irp
);
3537 } else if (!Vcb
|| Vcb
->type
!= VCB_TYPE_FS
) {
3538 Status
= STATUS_INVALID_PARAMETER
;
3542 Irp
->IoStatus
.Information
= 0;
3544 if (IrpSp
->MinorFunction
& IRP_MN_COMPLETE
) {
3545 CcMdlReadComplete(IrpSp
->FileObject
, Irp
->MdlAddress
);
3547 Irp
->MdlAddress
= NULL
;
3548 Status
= STATUS_SUCCESS
;
3553 fcb
= FileObject
->FsContext
;
3556 ERR("fcb was NULL\n");
3557 Status
= STATUS_INVALID_PARAMETER
;
3561 ccb
= FileObject
->FsContext2
;
3564 ERR("ccb was NULL\n");
3565 Status
= STATUS_INVALID_PARAMETER
;
3569 if (Irp
->RequestorMode
== UserMode
&& !(ccb
->access
& FILE_READ_DATA
)) {
3570 WARN("insufficient privileges\n");
3571 Status
= STATUS_ACCESS_DENIED
;
3575 if (fcb
== Vcb
->volume_fcb
) {
3576 TRACE("reading volume FCB\n");
3578 IoSkipCurrentIrpStackLocation(Irp
);
3580 Status
= IoCallDriver(Vcb
->Vpb
->RealDevice
, Irp
);
3585 if (!(Irp
->Flags
& IRP_PAGING_IO
))
3586 FsRtlCheckOplock(fcb_oplock(fcb
), Irp
, NULL
, NULL
, NULL
);
3588 wait
= IoIsOperationSynchronous(Irp
);
3590 // Don't offload jobs when doing paging IO - otherwise this can lead to
3591 // deadlocks in CcCopyRead.
3592 if (Irp
->Flags
& IRP_PAGING_IO
)
3595 if (!(Irp
->Flags
& IRP_PAGING_IO
) && FileObject
->SectionObjectPointer
&& FileObject
->SectionObjectPointer
->DataSectionObject
) {
3596 IO_STATUS_BLOCK iosb
;
3598 CcFlushCache(FileObject
->SectionObjectPointer
, &IrpSp
->Parameters
.Read
.ByteOffset
, IrpSp
->Parameters
.Read
.Length
, &iosb
);
3599 if (!NT_SUCCESS(iosb
.Status
)) {
3600 ERR("CcFlushCache returned %08lx\n", iosb
.Status
);
3605 if (!ExIsResourceAcquiredSharedLite(fcb
->Header
.Resource
)) {
3606 if (!ExAcquireResourceSharedLite(fcb
->Header
.Resource
, wait
)) {
3607 Status
= STATUS_PENDING
;
3608 IoMarkIrpPending(Irp
);
3612 acquired_fcb_lock
= true;
3615 Status
= do_read(Irp
, wait
, &bytes_read
);
3617 if (acquired_fcb_lock
)
3618 ExReleaseResourceLite(fcb
->Header
.Resource
);
3621 if (FileObject
->Flags
& FO_SYNCHRONOUS_IO
&& !(Irp
->Flags
& IRP_PAGING_IO
))
3622 FileObject
->CurrentByteOffset
.QuadPart
= IrpSp
->Parameters
.Read
.ByteOffset
.QuadPart
+ (NT_SUCCESS(Status
) ? bytes_read
: 0);
3625 Irp
->IoStatus
.Status
= Status
;
3627 TRACE("Irp->IoStatus.Status = %08lx\n", Irp
->IoStatus
.Status
);
3628 TRACE("Irp->IoStatus.Information = %Iu\n", Irp
->IoStatus
.Information
);
3629 TRACE("returning %08lx\n", Status
);
3631 if (Status
!= STATUS_PENDING
)
3632 IoCompleteRequest(Irp
, IO_NO_INCREMENT
);
3634 if (!add_thread_job(Vcb
, Irp
))
3635 Status
= do_read_job(Irp
);
3640 IoSetTopLevelIrp(NULL
);
3642 FsRtlExitFileSystem();