[BTRFS] Upgrade to 1.0.2
[reactos.git] / drivers / filesystems / btrfs / read.c
1 /* Copyright (c) Mark Harmstone 2016-17
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 enum read_data_status {
21 ReadDataStatus_Pending,
22 ReadDataStatus_Success,
23 ReadDataStatus_Error,
24 ReadDataStatus_MissingDevice,
25 ReadDataStatus_Skip
26 };
27
28 struct read_data_context;
29
30 typedef struct {
31 struct read_data_context* context;
32 UINT16 stripenum;
33 BOOL rewrite;
34 PIRP Irp;
35 IO_STATUS_BLOCK iosb;
36 enum read_data_status status;
37 PMDL mdl;
38 UINT64 stripestart;
39 UINT64 stripeend;
40 } read_data_stripe;
41
42 typedef struct {
43 KEVENT Event;
44 NTSTATUS Status;
45 chunk* c;
46 UINT64 address;
47 UINT32 buflen;
48 LONG num_stripes, stripes_left;
49 UINT64 type;
50 UINT32 sector_size;
51 UINT16 firstoff, startoffstripe, sectors_per_stripe;
52 UINT32* csum;
53 BOOL tree;
54 read_data_stripe* stripes;
55 UINT8* va;
56 } read_data_context;
57
58 extern BOOL diskacc;
59 extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
60 extern tCcCopyReadEx fCcCopyReadEx;
61 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
62
63 #define LINUX_PAGE_SIZE 4096
64
65 _Function_class_(IO_COMPLETION_ROUTINE)
66 #ifdef __REACTOS__
67 static NTSTATUS NTAPI read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
68 #else
69 static NTSTATUS read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
70 #endif
71 read_data_stripe* stripe = conptr;
72 read_data_context* context = (read_data_context*)stripe->context;
73
74 UNUSED(DeviceObject);
75
76 stripe->iosb = Irp->IoStatus;
77
78 if (NT_SUCCESS(Irp->IoStatus.Status))
79 stripe->status = ReadDataStatus_Success;
80 else
81 stripe->status = ReadDataStatus_Error;
82
83 if (InterlockedDecrement(&context->stripes_left) == 0)
84 KeSetEvent(&context->Event, 0, FALSE);
85
86 return STATUS_MORE_PROCESSING_REQUIRED;
87 }
88
89 NTSTATUS check_csum(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum) {
90 NTSTATUS Status;
91 calc_job* cj;
92 UINT32* csum2;
93
94 // From experimenting, it seems that 40 sectors is roughly the crossover
95 // point where offloading the crc32 calculation becomes worth it.
96
97 if (sectors < 40 || KeQueryActiveProcessorCount(NULL) < 2) {
98 ULONG j;
99
100 for (j = 0; j < sectors; j++) {
101 UINT32 crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
102
103 if (crc32 != csum[j]) {
104 return STATUS_CRC_ERROR;
105 }
106 }
107
108 return STATUS_SUCCESS;
109 }
110
111 csum2 = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sectors, ALLOC_TAG);
112 if (!csum2) {
113 ERR("out of memory\n");
114 return STATUS_INSUFFICIENT_RESOURCES;
115 }
116
117 Status = add_calc_job(Vcb, data, sectors, csum2, &cj);
118 if (!NT_SUCCESS(Status)) {
119 ERR("add_calc_job returned %08x\n", Status);
120 ExFreePool(csum2);
121 return Status;
122 }
123
124 KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL);
125
126 if (RtlCompareMemory(csum2, csum, sectors * sizeof(UINT32)) != sectors * sizeof(UINT32)) {
127 free_calc_job(cj);
128 ExFreePool(csum2);
129 return STATUS_CRC_ERROR;
130 }
131
132 free_calc_job(cj);
133 ExFreePool(csum2);
134
135 return STATUS_SUCCESS;
136 }
137
138 static NTSTATUS read_data_dup(device_extension* Vcb, UINT8* buf, UINT64 addr, read_data_context* context, CHUNK_ITEM* ci,
139 device** devices, UINT64 generation) {
140 ULONG i;
141 BOOL checksum_error = FALSE;
142 UINT16 j, stripe = 0;
143 NTSTATUS Status;
144 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
145
146 for (j = 0; j < ci->num_stripes; j++) {
147 if (context->stripes[j].status == ReadDataStatus_Error) {
148 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
149 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
150 return context->stripes[j].iosb.Status;
151 } else if (context->stripes[j].status == ReadDataStatus_Success) {
152 stripe = j;
153 break;
154 }
155 }
156
157 if (context->stripes[stripe].status != ReadDataStatus_Success)
158 return STATUS_INTERNAL_ERROR;
159
160 if (context->tree) {
161 tree_header* th = (tree_header*)buf;
162 UINT32 crc32;
163
164 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
165
166 if (th->address != context->address || crc32 != *((UINT32*)th->csum)) {
167 checksum_error = TRUE;
168 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
169 } else if (generation != 0 && th->generation != generation) {
170 checksum_error = TRUE;
171 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
172 }
173 } else if (context->csum) {
174 #ifdef DEBUG_STATS
175 LARGE_INTEGER time1, time2;
176
177 time1 = KeQueryPerformanceCounter(NULL);
178 #endif
179 Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
180
181 if (Status == STATUS_CRC_ERROR) {
182 checksum_error = TRUE;
183 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
184 } else if (!NT_SUCCESS(Status)) {
185 ERR("check_csum returned %08x\n", Status);
186 return Status;
187 }
188 #ifdef DEBUG_STATS
189 time2 = KeQueryPerformanceCounter(NULL);
190
191 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
192 #endif
193 }
194
195 if (!checksum_error)
196 return STATUS_SUCCESS;
197
198 if (ci->num_stripes == 1)
199 return STATUS_CRC_ERROR;
200
201 if (context->tree) {
202 tree_header* t2;
203 BOOL recovered = FALSE;
204
205 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
206 if (!t2) {
207 ERR("out of memory\n");
208 return STATUS_INSUFFICIENT_RESOURCES;
209 }
210
211 for (j = 0; j < ci->num_stripes; j++) {
212 if (j != stripe && devices[j] && devices[j]->devobj) {
213 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart, Vcb->superblock.node_size, (UINT8*)t2, FALSE);
214 if (!NT_SUCCESS(Status)) {
215 WARN("sync_read_phys returned %08x\n", Status);
216 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
217 } else {
218 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
219
220 if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) {
221 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
222 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
223 recovered = TRUE;
224
225 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
226 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart,
227 t2, Vcb->superblock.node_size);
228 if (!NT_SUCCESS(Status)) {
229 WARN("write_data_phys returned %08x\n", Status);
230 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
231 }
232 }
233
234 break;
235 } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum))
236 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
237 else
238 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS);
239 }
240 }
241 }
242
243 if (!recovered) {
244 ERR("unrecoverable checksum error at %llx\n", addr);
245 ExFreePool(t2);
246 return STATUS_CRC_ERROR;
247 }
248
249 ExFreePool(t2);
250 } else {
251 ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size;
252 UINT8* sector;
253
254 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
255 if (!sector) {
256 ERR("out of memory\n");
257 return STATUS_INSUFFICIENT_RESOURCES;
258 }
259
260 for (i = 0; i < sectors; i++) {
261 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
262
263 if (context->csum[i] != crc32) {
264 BOOL recovered = FALSE;
265
266 for (j = 0; j < ci->num_stripes; j++) {
267 if (j != stripe && devices[j] && devices[j]->devobj) {
268 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
269 Vcb->superblock.sector_size, sector, FALSE);
270 if (!NT_SUCCESS(Status)) {
271 WARN("sync_read_phys returned %08x\n", Status);
272 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
273 } else {
274 UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
275
276 if (crc32b == context->csum[i]) {
277 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
278 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
279 recovered = TRUE;
280
281 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
282 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
283 sector, Vcb->superblock.sector_size);
284 if (!NT_SUCCESS(Status)) {
285 WARN("write_data_phys returned %08x\n", Status);
286 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
287 }
288 }
289
290 break;
291 } else
292 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
293 }
294 }
295 }
296
297 if (!recovered) {
298 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
299 ExFreePool(sector);
300 return STATUS_CRC_ERROR;
301 }
302 }
303 }
304
305 ExFreePool(sector);
306 }
307
308 return STATUS_SUCCESS;
309 }
310
311 static NTSTATUS read_data_raid0(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context,
312 CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) {
313 UINT64 i;
314
315 for (i = 0; i < ci->num_stripes; i++) {
316 if (context->stripes[i].status == ReadDataStatus_Error) {
317 WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status);
318 log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS);
319 return context->stripes[i].iosb.Status;
320 }
321 }
322
323 if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
324 tree_header* th = (tree_header*)buf;
325 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
326
327 if (crc32 != *((UINT32*)th->csum) || addr != th->address || (generation != 0 && generation != th->generation)) {
328 UINT64 off;
329 UINT16 stripe;
330
331 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe);
332
333 ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
334
335 if (crc32 != *((UINT32*)th->csum)) {
336 WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
337 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
338 return STATUS_CRC_ERROR;
339 } else if (addr != th->address) {
340 WARN("address of tree was %llx, not %llx as expected\n", th->address, addr);
341 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
342 return STATUS_CRC_ERROR;
343 } else if (generation != 0 && generation != th->generation) {
344 WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation);
345 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
346 return STATUS_CRC_ERROR;
347 }
348 }
349 } else if (context->csum) {
350 NTSTATUS Status;
351 #ifdef DEBUG_STATS
352 LARGE_INTEGER time1, time2;
353
354 time1 = KeQueryPerformanceCounter(NULL);
355 #endif
356 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
357
358 if (Status == STATUS_CRC_ERROR) {
359 for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
360 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
361
362 if (context->csum[i] != crc32) {
363 UINT64 off;
364 UINT16 stripe;
365
366 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe);
367
368 ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
369
370 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
371
372 return Status;
373 }
374 }
375
376 return Status;
377 } else if (!NT_SUCCESS(Status)) {
378 ERR("check_csum returned %08x\n", Status);
379 return Status;
380 }
381 #ifdef DEBUG_STATS
382 time2 = KeQueryPerformanceCounter(NULL);
383
384 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
385 #endif
386 }
387
388 return STATUS_SUCCESS;
389 }
390
391 static NTSTATUS read_data_raid10(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context,
392 CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) {
393 UINT64 i;
394 UINT16 j, stripe;
395 NTSTATUS Status;
396 BOOL checksum_error = FALSE;
397 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
398
399 for (j = 0; j < ci->num_stripes; j++) {
400 if (context->stripes[j].status == ReadDataStatus_Error) {
401 WARN("stripe %llu returned error %08x\n", j, context->stripes[j].iosb.Status);
402 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
403 return context->stripes[j].iosb.Status;
404 } else if (context->stripes[j].status == ReadDataStatus_Success)
405 stripe = j;
406 }
407
408 if (context->tree) {
409 tree_header* th = (tree_header*)buf;
410 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
411
412 if (crc32 != *((UINT32*)th->csum)) {
413 WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
414 checksum_error = TRUE;
415 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
416 } else if (addr != th->address) {
417 WARN("address of tree was %llx, not %llx as expected\n", th->address, addr);
418 checksum_error = TRUE;
419 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
420 } else if (generation != 0 && generation != th->generation) {
421 WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation);
422 checksum_error = TRUE;
423 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
424 }
425 } else if (context->csum) {
426 #ifdef DEBUG_STATS
427 LARGE_INTEGER time1, time2;
428
429 time1 = KeQueryPerformanceCounter(NULL);
430 #endif
431 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
432
433 if (Status == STATUS_CRC_ERROR)
434 checksum_error = TRUE;
435 else if (!NT_SUCCESS(Status)) {
436 ERR("check_csum returned %08x\n", Status);
437 return Status;
438 }
439 #ifdef DEBUG_STATS
440 time2 = KeQueryPerformanceCounter(NULL);
441
442 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
443 #endif
444 }
445
446 if (!checksum_error)
447 return STATUS_SUCCESS;
448
449 if (context->tree) {
450 tree_header* t2;
451 UINT64 off;
452 UINT16 badsubstripe = 0;
453 BOOL recovered = FALSE;
454
455 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
456 if (!t2) {
457 ERR("out of memory\n");
458 return STATUS_INSUFFICIENT_RESOURCES;
459 }
460
461 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe);
462
463 stripe *= ci->sub_stripes;
464
465 for (j = 0; j < ci->sub_stripes; j++) {
466 if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
467 badsubstripe = j;
468 break;
469 }
470 }
471
472 for (j = 0; j < ci->sub_stripes; j++) {
473 if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
474 Status = sync_read_phys(devices[stripe + j]->devobj, cis[stripe + j].offset + off,
475 Vcb->superblock.node_size, (UINT8*)t2, FALSE);
476 if (!NT_SUCCESS(Status)) {
477 WARN("sync_read_phys returned %08x\n", Status);
478 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS);
479 } else {
480 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
481
482 if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) {
483 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
484 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe + j]->devitem.dev_id);
485 recovered = TRUE;
486
487 if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
488 Status = write_data_phys(devices[stripe + badsubstripe]->devobj, cis[stripe + badsubstripe].offset + off,
489 t2, Vcb->superblock.node_size);
490 if (!NT_SUCCESS(Status)) {
491 WARN("write_data_phys returned %08x\n", Status);
492 log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
493 }
494 }
495
496 break;
497 } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum))
498 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
499 else
500 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS);
501 }
502 }
503 }
504
505 if (!recovered) {
506 ERR("unrecoverable checksum error at %llx\n", addr);
507 ExFreePool(t2);
508 return STATUS_CRC_ERROR;
509 }
510
511 ExFreePool(t2);
512 } else {
513 ULONG sectors = length / Vcb->superblock.sector_size;
514 UINT8* sector;
515
516 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
517 if (!sector) {
518 ERR("out of memory\n");
519 return STATUS_INSUFFICIENT_RESOURCES;
520 }
521
522 for (i = 0; i < sectors; i++) {
523 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
524
525 if (context->csum[i] != crc32) {
526 UINT64 off;
527 UINT16 stripe2, badsubstripe = 0;
528 BOOL recovered = FALSE;
529
530 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
531 ci->num_stripes / ci->sub_stripes, &off, &stripe2);
532
533 stripe2 *= ci->sub_stripes;
534
535 for (j = 0; j < ci->sub_stripes; j++) {
536 if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
537 badsubstripe = j;
538 break;
539 }
540 }
541
542 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
543
544 for (j = 0; j < ci->sub_stripes; j++) {
545 if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
546 Status = sync_read_phys(devices[stripe2 + j]->devobj, cis[stripe2 + j].offset + off,
547 Vcb->superblock.sector_size, sector, FALSE);
548 if (!NT_SUCCESS(Status)) {
549 WARN("sync_read_phys returned %08x\n", Status);
550 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS);
551 } else {
552 UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
553
554 if (crc32b == context->csum[i]) {
555 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
556 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id);
557 recovered = TRUE;
558
559 if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
560 Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, cis[stripe2 + badsubstripe].offset + off,
561 sector, Vcb->superblock.sector_size);
562 if (!NT_SUCCESS(Status)) {
563 WARN("write_data_phys returned %08x\n", Status);
564 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
565 }
566 }
567
568 break;
569 } else
570 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
571 }
572 }
573 }
574
575 if (!recovered) {
576 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
577 ExFreePool(sector);
578 return STATUS_CRC_ERROR;
579 }
580 }
581 }
582
583 ExFreePool(sector);
584 }
585
586 return STATUS_SUCCESS;
587 }
588
589 static NTSTATUS read_data_raid5(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci,
590 device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) {
591 ULONG i;
592 NTSTATUS Status;
593 BOOL checksum_error = FALSE;
594 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
595 UINT16 j, stripe;
596 BOOL no_success = TRUE;
597
598 for (j = 0; j < ci->num_stripes; j++) {
599 if (context->stripes[j].status == ReadDataStatus_Error) {
600 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
601 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
602 return context->stripes[j].iosb.Status;
603 } else if (context->stripes[j].status == ReadDataStatus_Success) {
604 stripe = j;
605 no_success = FALSE;
606 }
607 }
608
609 if (c) { // check partial stripes
610 LIST_ENTRY* le;
611 UINT64 ps_length = (ci->num_stripes - 1) * ci->stripe_length;
612
613 ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE);
614
615 le = c->partial_stripes.Flink;
616 while (le != &c->partial_stripes) {
617 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
618
619 if (ps->address + ps_length > addr && ps->address < addr + length) {
620 ULONG runlength, index;
621
622 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
623
624 while (runlength != 0) {
625 UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size);
626 UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size);
627 UINT64 start = max(runstart, addr);
628 UINT64 end = min(runend, addr + length);
629
630 if (end > start)
631 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
632
633 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
634 }
635 } else if (ps->address >= addr + length)
636 break;
637
638 le = le->Flink;
639 }
640
641 ExReleaseResourceLite(&c->partial_stripes_lock);
642 }
643
644 if (context->tree) {
645 tree_header* th = (tree_header*)buf;
646 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
647
648 if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
649 checksum_error = TRUE;
650 if (!no_success && !degraded)
651 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
652 } else if (generation != 0 && generation != th->generation) {
653 checksum_error = TRUE;
654 if (!no_success && !degraded)
655 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
656 }
657 } else if (context->csum) {
658 #ifdef DEBUG_STATS
659 LARGE_INTEGER time1, time2;
660
661 time1 = KeQueryPerformanceCounter(NULL);
662 #endif
663 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
664
665 if (Status == STATUS_CRC_ERROR) {
666 if (!degraded)
667 WARN("checksum error\n");
668 checksum_error = TRUE;
669 } else if (!NT_SUCCESS(Status)) {
670 ERR("check_csum returned %08x\n", Status);
671 return Status;
672 }
673
674 #ifdef DEBUG_STATS
675 time2 = KeQueryPerformanceCounter(NULL);
676
677 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
678 #endif
679 } else if (degraded)
680 checksum_error = TRUE;
681
682 if (!checksum_error)
683 return STATUS_SUCCESS;
684
685 if (context->tree) {
686 UINT16 parity;
687 UINT64 off;
688 BOOL recovered = FALSE, first = TRUE, failed = FALSE;
689 UINT8* t2;
690
691 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
692 if (!t2) {
693 ERR("out of memory\n");
694 return STATUS_INSUFFICIENT_RESOURCES;
695 }
696
697 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe);
698
699 parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
700
701 stripe = (parity + stripe + 1) % ci->num_stripes;
702
703 for (j = 0; j < ci->num_stripes; j++) {
704 if (j != stripe) {
705 if (devices[j] && devices[j]->devobj) {
706 if (first) {
707 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2, FALSE);
708 if (!NT_SUCCESS(Status)) {
709 ERR("sync_read_phys returned %08x\n", Status);
710 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
711 failed = TRUE;
712 break;
713 }
714
715 first = FALSE;
716 } else {
717 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, FALSE);
718 if (!NT_SUCCESS(Status)) {
719 ERR("sync_read_phys returned %08x\n", Status);
720 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
721 failed = TRUE;
722 break;
723 }
724
725 do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
726 }
727 } else {
728 failed = TRUE;
729 break;
730 }
731 }
732 }
733
734 if (!failed) {
735 tree_header* t3 = (tree_header*)t2;
736 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t3->fs_uuid, Vcb->superblock.node_size - sizeof(t3->csum));
737
738 if (t3->address == addr && crc32 == *((UINT32*)t3->csum) && (generation == 0 || t3->generation == generation)) {
739 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
740
741 if (!degraded)
742 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
743
744 recovered = TRUE;
745
746 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
747 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
748 if (!NT_SUCCESS(Status)) {
749 WARN("write_data_phys returned %08x\n", Status);
750 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
751 }
752 }
753 }
754 }
755
756 if (!recovered) {
757 ERR("unrecoverable checksum error at %llx\n", addr);
758 ExFreePool(t2);
759 return STATUS_CRC_ERROR;
760 }
761
762 ExFreePool(t2);
763 } else {
764 ULONG sectors = length / Vcb->superblock.sector_size;
765 UINT8* sector;
766
767 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
768 if (!sector) {
769 ERR("out of memory\n");
770 return STATUS_INSUFFICIENT_RESOURCES;
771 }
772
773 for (i = 0; i < sectors; i++) {
774 UINT16 parity;
775 UINT64 off;
776 UINT32 crc32;
777
778 if (context->csum)
779 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
780
781 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
782 ci->num_stripes - 1, &off, &stripe);
783
784 parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
785
786 stripe = (parity + stripe + 1) % ci->num_stripes;
787
788 if (!devices[stripe] || !devices[stripe]->devobj || (context->csum && context->csum[i] != crc32)) {
789 BOOL recovered = FALSE, first = TRUE, failed = FALSE;
790
791 if (devices[stripe] && devices[stripe]->devobj)
792 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS);
793
794 for (j = 0; j < ci->num_stripes; j++) {
795 if (j != stripe) {
796 if (devices[j] && devices[j]->devobj) {
797 if (first) {
798 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, FALSE);
799 if (!NT_SUCCESS(Status)) {
800 ERR("sync_read_phys returned %08x\n", Status);
801 failed = TRUE;
802 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
803 break;
804 }
805
806 first = FALSE;
807 } else {
808 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + Vcb->superblock.sector_size, FALSE);
809 if (!NT_SUCCESS(Status)) {
810 ERR("sync_read_phys returned %08x\n", Status);
811 failed = TRUE;
812 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
813 break;
814 }
815
816 do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
817 }
818 } else {
819 failed = TRUE;
820 break;
821 }
822 }
823 }
824
825 if (!failed) {
826 if (context->csum)
827 crc32 = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
828
829 if (!context->csum || crc32 == context->csum[i]) {
830 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
831
832 if (!degraded)
833 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
834
835 recovered = TRUE;
836
837 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
838 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off,
839 sector, Vcb->superblock.sector_size);
840 if (!NT_SUCCESS(Status)) {
841 WARN("write_data_phys returned %08x\n", Status);
842 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
843 }
844 }
845 }
846 }
847
848 if (!recovered) {
849 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
850 ExFreePool(sector);
851 return STATUS_CRC_ERROR;
852 }
853 }
854 }
855
856 ExFreePool(sector);
857 }
858
859 return STATUS_SUCCESS;
860 }
861
862 void raid6_recover2(UINT8* sectors, UINT16 num_stripes, ULONG sector_size, UINT16 missing1, UINT16 missing2, UINT8* out) {
863 if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
864 UINT16 missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
865 UINT16 stripe;
866
867 stripe = num_stripes - 3;
868
869 if (stripe == missing)
870 RtlZeroMemory(out, sector_size);
871 else
872 RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size);
873
874 do {
875 stripe--;
876
877 galois_double(out, sector_size);
878
879 if (stripe != missing)
880 do_xor(out, sectors + (stripe * sector_size), sector_size);
881 } while (stripe > 0);
882
883 do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
884
885 if (missing != 0)
886 galois_divpower(out, (UINT8)missing, sector_size);
887 } else { // reconstruct from p and q
888 UINT16 x, y, stripe;
889 UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
890 UINT32 j;
891
892 stripe = num_stripes - 3;
893
894 pxy = out + sector_size;
895 qxy = out;
896
897 if (stripe == missing1 || stripe == missing2) {
898 RtlZeroMemory(qxy, sector_size);
899 RtlZeroMemory(pxy, sector_size);
900
901 if (stripe == missing1)
902 x = stripe;
903 else
904 y = stripe;
905 } else {
906 RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size);
907 RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size);
908 }
909
910 do {
911 stripe--;
912
913 galois_double(qxy, sector_size);
914
915 if (stripe != missing1 && stripe != missing2) {
916 do_xor(qxy, sectors + (stripe * sector_size), sector_size);
917 do_xor(pxy, sectors + (stripe * sector_size), sector_size);
918 } else if (stripe == missing1)
919 x = stripe;
920 else if (stripe == missing2)
921 y = stripe;
922 } while (stripe > 0);
923
924 gyx = gpow2(y > x ? (y-x) : (255-x+y));
925 gx = gpow2(255-x);
926
927 denom = gdiv(1, gyx ^ 1);
928 a = gmul(gyx, denom);
929 b = gmul(gx, denom);
930
931 p = sectors + ((num_stripes - 2) * sector_size);
932 q = sectors + ((num_stripes - 1) * sector_size);
933
934 for (j = 0; j < sector_size; j++) {
935 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
936
937 p++;
938 q++;
939 pxy++;
940 qxy++;
941 }
942
943 do_xor(out + sector_size, out, sector_size);
944 do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
945 }
946 }
947
948 static NTSTATUS read_data_raid6(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci,
949 device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) {
950 NTSTATUS Status;
951 ULONG i;
952 BOOL checksum_error = FALSE;
953 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
954 UINT16 stripe, j;
955 BOOL no_success = TRUE;
956
957 for (j = 0; j < ci->num_stripes; j++) {
958 if (context->stripes[j].status == ReadDataStatus_Error) {
959 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
960
961 if (devices[j])
962 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
963 return context->stripes[j].iosb.Status;
964 } else if (context->stripes[j].status == ReadDataStatus_Success) {
965 stripe = j;
966 no_success = FALSE;
967 }
968 }
969
970 if (c) { // check partial stripes
971 LIST_ENTRY* le;
972 UINT64 ps_length = (ci->num_stripes - 2) * ci->stripe_length;
973
974 ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE);
975
976 le = c->partial_stripes.Flink;
977 while (le != &c->partial_stripes) {
978 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
979
980 if (ps->address + ps_length > addr && ps->address < addr + length) {
981 ULONG runlength, index;
982
983 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
984
985 while (runlength != 0) {
986 UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size);
987 UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size);
988 UINT64 start = max(runstart, addr);
989 UINT64 end = min(runend, addr + length);
990
991 if (end > start)
992 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
993
994 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
995 }
996 } else if (ps->address >= addr + length)
997 break;
998
999 le = le->Flink;
1000 }
1001
1002 ExReleaseResourceLite(&c->partial_stripes_lock);
1003 }
1004
1005 if (context->tree) {
1006 tree_header* th = (tree_header*)buf;
1007 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1008
1009 if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
1010 checksum_error = TRUE;
1011 if (!no_success && !degraded && devices[stripe])
1012 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1013 } else if (generation != 0 && generation != th->generation) {
1014 checksum_error = TRUE;
1015 if (!no_success && !degraded && devices[stripe])
1016 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
1017 }
1018 } else if (context->csum) {
1019 #ifdef DEBUG_STATS
1020 LARGE_INTEGER time1, time2;
1021
1022 time1 = KeQueryPerformanceCounter(NULL);
1023 #endif
1024 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
1025
1026 if (Status == STATUS_CRC_ERROR) {
1027 if (!degraded)
1028 WARN("checksum error\n");
1029 checksum_error = TRUE;
1030 } else if (!NT_SUCCESS(Status)) {
1031 ERR("check_csum returned %08x\n", Status);
1032 return Status;
1033 }
1034 #ifdef DEBUG_STATS
1035 time2 = KeQueryPerformanceCounter(NULL);
1036
1037 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
1038 #endif
1039 } else if (degraded)
1040 checksum_error = TRUE;
1041
1042 if (!checksum_error)
1043 return STATUS_SUCCESS;
1044
1045 if (context->tree) {
1046 UINT8* sector;
1047 UINT16 k, physstripe, parity1, parity2, error_stripe;
1048 UINT64 off;
1049 BOOL recovered = FALSE, failed = FALSE;
1050 ULONG num_errors = 0;
1051
1052 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
1053 if (!sector) {
1054 ERR("out of memory\n");
1055 return STATUS_INSUFFICIENT_RESOURCES;
1056 }
1057
1058 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe);
1059
1060 parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1061 parity2 = (parity1 + 1) % ci->num_stripes;
1062
1063 physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1064
1065 j = (parity2 + 1) % ci->num_stripes;
1066
1067 for (k = 0; k < ci->num_stripes - 1; k++) {
1068 if (j != physstripe) {
1069 if (devices[j] && devices[j]->devobj) {
1070 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, sector + (k * Vcb->superblock.node_size), FALSE);
1071 if (!NT_SUCCESS(Status)) {
1072 ERR("sync_read_phys returned %08x\n", Status);
1073 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1074 num_errors++;
1075 error_stripe = k;
1076
1077 if (num_errors > 1) {
1078 failed = TRUE;
1079 break;
1080 }
1081 }
1082 } else {
1083 num_errors++;
1084 error_stripe = k;
1085
1086 if (num_errors > 1) {
1087 failed = TRUE;
1088 break;
1089 }
1090 }
1091 }
1092
1093 j = (j + 1) % ci->num_stripes;
1094 }
1095
1096 if (!failed) {
1097 if (num_errors == 0) {
1098 tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
1099 UINT32 crc32;
1100
1101 RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
1102 Vcb->superblock.node_size);
1103
1104 for (j = 0; j < ci->num_stripes - 2; j++) {
1105 if (j != stripe)
1106 do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
1107 }
1108
1109 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1110
1111 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) {
1112 RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1113
1114 if (devices[physstripe] && devices[physstripe]->devobj)
1115 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id);
1116
1117 recovered = TRUE;
1118
1119 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1120 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1121 sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1122 if (!NT_SUCCESS(Status)) {
1123 WARN("write_data_phys returned %08x\n", Status);
1124 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1125 }
1126 }
1127 }
1128 }
1129
1130 if (!recovered) {
1131 UINT32 crc32;
1132 tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
1133 BOOL read_q = FALSE;
1134
1135 if (devices[parity2] && devices[parity2]->devobj) {
1136 Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off,
1137 Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), FALSE);
1138 if (!NT_SUCCESS(Status)) {
1139 ERR("sync_read_phys returned %08x\n", Status);
1140 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1141 } else
1142 read_q = TRUE;
1143 }
1144
1145 if (read_q) {
1146 if (num_errors == 1) {
1147 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
1148
1149 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1150
1151 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation))
1152 recovered = TRUE;
1153 } else {
1154 for (j = 0; j < ci->num_stripes - 1; j++) {
1155 if (j != stripe) {
1156 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
1157
1158 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1159
1160 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) {
1161 recovered = TRUE;
1162 error_stripe = j;
1163 break;
1164 }
1165 }
1166 }
1167 }
1168 }
1169
1170 if (recovered) {
1171 UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1172
1173 if (devices[physstripe] && devices[physstripe]->devobj)
1174 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id);
1175
1176 RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1177
1178 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1179 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1180 sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1181 if (!NT_SUCCESS(Status)) {
1182 WARN("write_data_phys returned %08x\n", Status);
1183 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1184 }
1185 }
1186
1187 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1188 if (error_stripe == ci->num_stripes - 2) {
1189 ERR("recovering from parity error at %llx, device %llx\n", addr, devices[error_stripe_phys]->devitem.dev_id);
1190
1191 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1192
1193 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1194
1195 for (j = 0; j < ci->num_stripes - 2; j++) {
1196 if (j == stripe) {
1197 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
1198 Vcb->superblock.node_size);
1199 } else {
1200 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
1201 Vcb->superblock.node_size);
1202 }
1203 }
1204 } else {
1205 ERR("recovering from checksum error at %llx, device %llx\n", addr + ((error_stripe - stripe) * ci->stripe_length),
1206 devices[error_stripe_phys]->devitem.dev_id);
1207
1208 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1209
1210 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
1211 sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1212 }
1213 }
1214
1215 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1216 Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off,
1217 sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1218 if (!NT_SUCCESS(Status)) {
1219 WARN("write_data_phys returned %08x\n", Status);
1220 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1221 }
1222 }
1223 }
1224 }
1225 }
1226
1227 if (!recovered) {
1228 ERR("unrecoverable checksum error at %llx\n", addr);
1229 ExFreePool(sector);
1230 return STATUS_CRC_ERROR;
1231 }
1232
1233 ExFreePool(sector);
1234 } else {
1235 ULONG sectors = length / Vcb->superblock.sector_size;
1236 UINT8* sector;
1237
1238 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG);
1239 if (!sector) {
1240 ERR("out of memory\n");
1241 return STATUS_INSUFFICIENT_RESOURCES;
1242 }
1243
1244 for (i = 0; i < sectors; i++) {
1245 UINT64 off;
1246 UINT16 physstripe, parity1, parity2;
1247 UINT32 crc32;
1248
1249 if (context->csum)
1250 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1251
1252 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
1253 ci->num_stripes - 2, &off, &stripe);
1254
1255 parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1256 parity2 = (parity1 + 1) % ci->num_stripes;
1257
1258 physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1259
1260 if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && context->csum[i] != crc32)) {
1261 UINT16 k, error_stripe;
1262 BOOL recovered = FALSE, failed = FALSE;
1263 ULONG num_errors = 0;
1264
1265 if (devices[physstripe] && devices[physstripe]->devobj)
1266 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS);
1267
1268 j = (parity2 + 1) % ci->num_stripes;
1269
1270 for (k = 0; k < ci->num_stripes - 1; k++) {
1271 if (j != physstripe) {
1272 if (devices[j] && devices[j]->devobj) {
1273 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + (k * Vcb->superblock.sector_size), FALSE);
1274 if (!NT_SUCCESS(Status)) {
1275 ERR("sync_read_phys returned %08x\n", Status);
1276 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1277 num_errors++;
1278 error_stripe = k;
1279
1280 if (num_errors > 1) {
1281 failed = TRUE;
1282 break;
1283 }
1284 }
1285 } else {
1286 num_errors++;
1287 error_stripe = k;
1288
1289 if (num_errors > 1) {
1290 failed = TRUE;
1291 break;
1292 }
1293 }
1294 }
1295
1296 j = (j + 1) % ci->num_stripes;
1297 }
1298
1299 if (!failed) {
1300 if (num_errors == 0) {
1301 RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1302
1303 for (j = 0; j < ci->num_stripes - 2; j++) {
1304 if (j != stripe)
1305 do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1306 }
1307
1308 if (context->csum)
1309 crc32 = ~calc_crc32c(0xffffffff, sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1310
1311 if (!context->csum || crc32 == context->csum[i]) {
1312 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1313
1314 if (devices[physstripe] && devices[physstripe]->devobj)
1315 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1316 devices[physstripe]->devitem.dev_id);
1317
1318 recovered = TRUE;
1319
1320 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1321 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1322 sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1323 if (!NT_SUCCESS(Status)) {
1324 WARN("write_data_phys returned %08x\n", Status);
1325 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1326 }
1327 }
1328 }
1329 }
1330
1331 if (!recovered) {
1332 BOOL read_q = FALSE;
1333
1334 if (devices[parity2] && devices[parity2]->devobj) {
1335 Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off,
1336 Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), FALSE);
1337 if (!NT_SUCCESS(Status)) {
1338 ERR("sync_read_phys returned %08x\n", Status);
1339 log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS);
1340 } else
1341 read_q = TRUE;
1342 }
1343
1344 if (read_q) {
1345 if (num_errors == 1) {
1346 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1347
1348 if (!devices[physstripe] || !devices[physstripe]->devobj)
1349 recovered = TRUE;
1350 else {
1351 crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1352
1353 if (crc32 == context->csum[i])
1354 recovered = TRUE;
1355 }
1356 } else {
1357 for (j = 0; j < ci->num_stripes - 1; j++) {
1358 if (j != stripe) {
1359 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1360
1361 crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1362
1363 if (crc32 == context->csum[i]) {
1364 recovered = TRUE;
1365 error_stripe = j;
1366 break;
1367 }
1368 }
1369 }
1370 }
1371 }
1372
1373 if (recovered) {
1374 UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1375
1376 if (devices[physstripe] && devices[physstripe]->devobj)
1377 ERR("recovering from checksum error at %llx, device %llx\n",
1378 addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id);
1379
1380 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1381
1382 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1383 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1384 sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1385 if (!NT_SUCCESS(Status)) {
1386 WARN("write_data_phys returned %08x\n", Status);
1387 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1388 }
1389 }
1390
1391 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1392 if (error_stripe == ci->num_stripes - 2) {
1393 ERR("recovering from parity error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1394 devices[error_stripe_phys]->devitem.dev_id);
1395
1396 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1397
1398 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1399
1400 for (j = 0; j < ci->num_stripes - 2; j++) {
1401 if (j == stripe) {
1402 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size),
1403 Vcb->superblock.sector_size);
1404 } else {
1405 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size),
1406 Vcb->superblock.sector_size);
1407 }
1408 }
1409 } else {
1410 ERR("recovering from checksum error at %llx, device %llx\n",
1411 addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length),
1412 devices[error_stripe_phys]->devitem.dev_id);
1413
1414 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1415
1416 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size),
1417 sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1418 }
1419 }
1420
1421 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1422 Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off,
1423 sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1424 if (!NT_SUCCESS(Status)) {
1425 WARN("write_data_phys returned %08x\n", Status);
1426 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1427 }
1428 }
1429 }
1430 }
1431 }
1432
1433 if (!recovered) {
1434 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
1435 ExFreePool(sector);
1436 return STATUS_CRC_ERROR;
1437 }
1438 }
1439 }
1440
1441 ExFreePool(sector);
1442 }
1443
1444 return STATUS_SUCCESS;
1445 }
1446
1447 NTSTATUS read_data(_In_ device_extension* Vcb, _In_ UINT64 addr, _In_ UINT32 length, _In_reads_bytes_opt_(length*sizeof(UINT32)/Vcb->superblock.sector_size) UINT32* csum,
1448 _In_ BOOL is_tree, _Out_writes_bytes_(length) UINT8* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ UINT64 generation, _In_ BOOL file_read,
1449 _In_ ULONG priority) {
1450 CHUNK_ITEM* ci;
1451 CHUNK_ITEM_STRIPE* cis;
1452 read_data_context context;
1453 UINT64 type, offset, total_reading = 0;
1454 NTSTATUS Status;
1455 device** devices = NULL;
1456 UINT16 i, startoffstripe, allowed_missing, missing_devices = 0;
1457 UINT8* dummypage = NULL;
1458 PMDL dummy_mdl = NULL;
1459 BOOL need_to_wait;
1460 UINT64 lockaddr, locklen;
1461 #ifdef DEBUG_STATS
1462 LARGE_INTEGER time1, time2;
1463 #endif
1464
1465 if (Vcb->log_to_phys_loaded) {
1466 if (!c) {
1467 c = get_chunk_from_address(Vcb, addr);
1468
1469 if (!c) {
1470 ERR("get_chunk_from_address failed\n");
1471 return STATUS_INTERNAL_ERROR;
1472 }
1473 }
1474
1475 ci = c->chunk_item;
1476 offset = c->offset;
1477 devices = c->devices;
1478
1479 if (pc)
1480 *pc = c;
1481 } else {
1482 LIST_ENTRY* le = Vcb->sys_chunks.Flink;
1483
1484 ci = NULL;
1485
1486 c = NULL;
1487 while (le != &Vcb->sys_chunks) {
1488 sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
1489
1490 if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
1491 CHUNK_ITEM* chunk_item = sc->data;
1492
1493 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
1494 ci = chunk_item;
1495 offset = sc->key.offset;
1496 cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
1497
1498 devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
1499 if (!devices) {
1500 ERR("out of memory\n");
1501 return STATUS_INSUFFICIENT_RESOURCES;
1502 }
1503
1504 for (i = 0; i < ci->num_stripes; i++) {
1505 devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
1506 }
1507
1508 break;
1509 }
1510 }
1511
1512 le = le->Flink;
1513 }
1514
1515 if (!ci) {
1516 ERR("could not find chunk for %llx in bootstrap\n", addr);
1517 return STATUS_INTERNAL_ERROR;
1518 }
1519
1520 if (pc)
1521 *pc = NULL;
1522 }
1523
1524 if (ci->type & BLOCK_FLAG_DUPLICATE) {
1525 type = BLOCK_FLAG_DUPLICATE;
1526 allowed_missing = ci->num_stripes - 1;
1527 } else if (ci->type & BLOCK_FLAG_RAID0) {
1528 type = BLOCK_FLAG_RAID0;
1529 allowed_missing = 0;
1530 } else if (ci->type & BLOCK_FLAG_RAID1) {
1531 type = BLOCK_FLAG_DUPLICATE;
1532 allowed_missing = 1;
1533 } else if (ci->type & BLOCK_FLAG_RAID10) {
1534 type = BLOCK_FLAG_RAID10;
1535 allowed_missing = 1;
1536 } else if (ci->type & BLOCK_FLAG_RAID5) {
1537 type = BLOCK_FLAG_RAID5;
1538 allowed_missing = 1;
1539 } else if (ci->type & BLOCK_FLAG_RAID6) {
1540 type = BLOCK_FLAG_RAID6;
1541 allowed_missing = 2;
1542 } else { // SINGLE
1543 type = BLOCK_FLAG_DUPLICATE;
1544 allowed_missing = 0;
1545 }
1546
1547 cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1548
1549 RtlZeroMemory(&context, sizeof(read_data_context));
1550 KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
1551
1552 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
1553 if (!context.stripes) {
1554 ERR("out of memory\n");
1555 return STATUS_INSUFFICIENT_RESOURCES;
1556 }
1557
1558 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
1559 get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
1560 chunk_lock_range(Vcb, c, lockaddr, locklen);
1561 }
1562
1563 RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
1564
1565 context.buflen = length;
1566 context.num_stripes = ci->num_stripes;
1567 context.stripes_left = context.num_stripes;
1568 context.sector_size = Vcb->superblock.sector_size;
1569 context.csum = csum;
1570 context.tree = is_tree;
1571 context.type = type;
1572
1573 if (type == BLOCK_FLAG_RAID0) {
1574 UINT64 startoff, endoff;
1575 UINT16 endoffstripe, stripe;
1576 UINT32 *stripeoff, pos;
1577 PMDL master_mdl;
1578 PFN_NUMBER* pfns;
1579
1580 // FIXME - test this still works if page size isn't the same as sector size
1581
1582 // This relies on the fact that MDLs are followed in memory by the page file numbers,
1583 // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1584 // data for you without doing a memcpy yourself.
1585 // MDLs are officially opaque, so this might very well break in future versions of Windows.
1586
1587 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
1588 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
1589
1590 if (file_read) {
1591 // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1592 // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1593 // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1594
1595 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1596
1597 if (!context.va) {
1598 ERR("out of memory\n");
1599 Status = STATUS_INSUFFICIENT_RESOURCES;
1600 goto exit;
1601 }
1602 } else
1603 context.va = buf;
1604
1605 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1606 if (!master_mdl) {
1607 ERR("out of memory\n");
1608 Status = STATUS_INSUFFICIENT_RESOURCES;
1609 goto exit;
1610 }
1611
1612 Status = STATUS_SUCCESS;
1613
1614 _SEH2_TRY {
1615 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1616 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1617 Status = _SEH2_GetExceptionCode();
1618 } _SEH2_END;
1619
1620 if (!NT_SUCCESS(Status)) {
1621 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1622 IoFreeMdl(master_mdl);
1623 goto exit;
1624 }
1625
1626 pfns = (PFN_NUMBER*)(master_mdl + 1);
1627
1628 for (i = 0; i < ci->num_stripes; i++) {
1629 if (startoffstripe > i)
1630 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1631 else if (startoffstripe == i)
1632 context.stripes[i].stripestart = startoff;
1633 else
1634 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
1635
1636 if (endoffstripe > i)
1637 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1638 else if (endoffstripe == i)
1639 context.stripes[i].stripeend = endoff + 1;
1640 else
1641 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
1642
1643 if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
1644 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL);
1645
1646 if (!context.stripes[i].mdl) {
1647 ERR("IoAllocateMdl failed\n");
1648 Status = STATUS_INSUFFICIENT_RESOURCES;
1649 goto exit;
1650 }
1651 }
1652 }
1653
1654 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
1655 if (!stripeoff) {
1656 ERR("out of memory\n");
1657 Status = STATUS_INSUFFICIENT_RESOURCES;
1658 goto exit;
1659 }
1660
1661 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
1662
1663 pos = 0;
1664 stripe = startoffstripe;
1665 while (pos < length) {
1666 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
1667
1668 if (pos == 0) {
1669 UINT32 readlen = (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
1670
1671 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1672
1673 stripeoff[stripe] += readlen;
1674 pos += readlen;
1675 } else if (length - pos < ci->stripe_length) {
1676 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1677
1678 pos = length;
1679 } else {
1680 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1681
1682 stripeoff[stripe] += (UINT32)ci->stripe_length;
1683 pos += (UINT32)ci->stripe_length;
1684 }
1685
1686 stripe = (stripe + 1) % ci->num_stripes;
1687 }
1688
1689 MmUnlockPages(master_mdl);
1690 IoFreeMdl(master_mdl);
1691
1692 ExFreePool(stripeoff);
1693 } else if (type == BLOCK_FLAG_RAID10) {
1694 UINT64 startoff, endoff;
1695 UINT16 endoffstripe, j, stripe;
1696 ULONG orig_ls;
1697 PMDL master_mdl;
1698 PFN_NUMBER* pfns;
1699 UINT32* stripeoff, pos;
1700 read_data_stripe** stripes;
1701
1702 if (c)
1703 orig_ls = c->last_stripe;
1704 else
1705 orig_ls = 0;
1706
1707 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
1708 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
1709
1710 if ((ci->num_stripes % ci->sub_stripes) != 0) {
1711 ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
1712 Status = STATUS_INTERNAL_ERROR;
1713 goto exit;
1714 }
1715
1716 if (file_read) {
1717 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1718
1719 if (!context.va) {
1720 ERR("out of memory\n");
1721 Status = STATUS_INSUFFICIENT_RESOURCES;
1722 goto exit;
1723 }
1724 } else
1725 context.va = buf;
1726
1727 context.firstoff = (UINT16)((startoff % ci->stripe_length) / Vcb->superblock.sector_size);
1728 context.startoffstripe = startoffstripe;
1729 context.sectors_per_stripe = (UINT16)(ci->stripe_length / Vcb->superblock.sector_size);
1730
1731 startoffstripe *= ci->sub_stripes;
1732 endoffstripe *= ci->sub_stripes;
1733
1734 if (c)
1735 c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
1736
1737 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1738 if (!master_mdl) {
1739 ERR("out of memory\n");
1740 Status = STATUS_INSUFFICIENT_RESOURCES;
1741 goto exit;
1742 }
1743
1744 Status = STATUS_SUCCESS;
1745
1746 _SEH2_TRY {
1747 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1748 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1749 Status = _SEH2_GetExceptionCode();
1750 } _SEH2_END;
1751
1752 if (!NT_SUCCESS(Status)) {
1753 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1754 IoFreeMdl(master_mdl);
1755 goto exit;
1756 }
1757
1758 pfns = (PFN_NUMBER*)(master_mdl + 1);
1759
1760 stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1761 if (!stripes) {
1762 ERR("out of memory\n");
1763 Status = STATUS_INSUFFICIENT_RESOURCES;
1764 goto exit;
1765 }
1766
1767 RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
1768
1769 for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
1770 UINT64 sstart, send;
1771 BOOL stripeset = FALSE;
1772
1773 if (startoffstripe > i)
1774 sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1775 else if (startoffstripe == i)
1776 sstart = startoff;
1777 else
1778 sstart = startoff - (startoff % ci->stripe_length);
1779
1780 if (endoffstripe > i)
1781 send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1782 else if (endoffstripe == i)
1783 send = endoff + 1;
1784 else
1785 send = endoff - (endoff % ci->stripe_length);
1786
1787 for (j = 0; j < ci->sub_stripes; j++) {
1788 if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
1789 context.stripes[i+j].stripestart = sstart;
1790 context.stripes[i+j].stripeend = send;
1791 stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1792
1793 if (sstart != send) {
1794 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL);
1795
1796 if (!context.stripes[i+j].mdl) {
1797 ERR("IoAllocateMdl failed\n");
1798 Status = STATUS_INSUFFICIENT_RESOURCES;
1799 goto exit;
1800 }
1801 }
1802
1803 stripeset = TRUE;
1804 } else
1805 context.stripes[i+j].status = ReadDataStatus_Skip;
1806 }
1807
1808 if (!stripeset) {
1809 for (j = 0; j < ci->sub_stripes; j++) {
1810 if (devices[i+j] && devices[i+j]->devobj) {
1811 context.stripes[i+j].stripestart = sstart;
1812 context.stripes[i+j].stripeend = send;
1813 context.stripes[i+j].status = ReadDataStatus_Pending;
1814 stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1815
1816 if (sstart != send) {
1817 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL);
1818
1819 if (!context.stripes[i+j].mdl) {
1820 ERR("IoAllocateMdl failed\n");
1821 Status = STATUS_INSUFFICIENT_RESOURCES;
1822 goto exit;
1823 }
1824 }
1825
1826 stripeset = TRUE;
1827 break;
1828 }
1829 }
1830
1831 if (!stripeset) {
1832 ERR("could not find stripe to read\n");
1833 Status = STATUS_DEVICE_NOT_READY;
1834 goto exit;
1835 }
1836 }
1837 }
1838
1839 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1840 if (!stripeoff) {
1841 ERR("out of memory\n");
1842 Status = STATUS_INSUFFICIENT_RESOURCES;
1843 goto exit;
1844 }
1845
1846 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes);
1847
1848 pos = 0;
1849 stripe = startoffstripe / ci->sub_stripes;
1850 while (pos < length) {
1851 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
1852
1853 if (pos == 0) {
1854 UINT32 readlen = (UINT32)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
1855 ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
1856
1857 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1858
1859 stripeoff[stripe] += readlen;
1860 pos += readlen;
1861 } else if (length - pos < ci->stripe_length) {
1862 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1863
1864 pos = length;
1865 } else {
1866 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1867
1868 stripeoff[stripe] += (ULONG)ci->stripe_length;
1869 pos += (ULONG)ci->stripe_length;
1870 }
1871
1872 stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
1873 }
1874
1875 MmUnlockPages(master_mdl);
1876 IoFreeMdl(master_mdl);
1877
1878 ExFreePool(stripeoff);
1879 ExFreePool(stripes);
1880 } else if (type == BLOCK_FLAG_DUPLICATE) {
1881 UINT64 orig_ls;
1882
1883 if (c)
1884 orig_ls = i = c->last_stripe;
1885 else
1886 orig_ls = i = 0;
1887
1888 while (!devices[i] || !devices[i]->devobj) {
1889 i = (i + 1) % ci->num_stripes;
1890
1891 if (i == orig_ls) {
1892 ERR("no devices available to service request\n");
1893 Status = STATUS_DEVICE_NOT_READY;
1894 goto exit;
1895 }
1896 }
1897
1898 if (c)
1899 c->last_stripe = (i + 1) % ci->num_stripes;
1900
1901 context.stripes[i].stripestart = addr - offset;
1902 context.stripes[i].stripeend = context.stripes[i].stripestart + length;
1903
1904 if (file_read) {
1905 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1906
1907 if (!context.va) {
1908 ERR("out of memory\n");
1909 Status = STATUS_INSUFFICIENT_RESOURCES;
1910 goto exit;
1911 }
1912
1913 context.stripes[i].mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1914 if (!context.stripes[i].mdl) {
1915 ERR("IoAllocateMdl failed\n");
1916 Status = STATUS_INSUFFICIENT_RESOURCES;
1917 goto exit;
1918 }
1919
1920 MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
1921 } else {
1922 context.stripes[i].mdl = IoAllocateMdl(buf, length, FALSE, FALSE, NULL);
1923
1924 if (!context.stripes[i].mdl) {
1925 ERR("IoAllocateMdl failed\n");
1926 Status = STATUS_INSUFFICIENT_RESOURCES;
1927 goto exit;
1928 }
1929
1930 Status = STATUS_SUCCESS;
1931
1932 _SEH2_TRY {
1933 MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess);
1934 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1935 Status = _SEH2_GetExceptionCode();
1936 } _SEH2_END;
1937
1938 if (!NT_SUCCESS(Status)) {
1939 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1940 goto exit;
1941 }
1942 }
1943 } else if (type == BLOCK_FLAG_RAID5) {
1944 UINT64 startoff, endoff;
1945 UINT16 endoffstripe, parity;
1946 UINT32 *stripeoff, pos;
1947 PMDL master_mdl;
1948 PFN_NUMBER *pfns, dummy;
1949 BOOL need_dummy = FALSE;
1950
1951 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
1952 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
1953
1954 if (file_read) {
1955 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1956
1957 if (!context.va) {
1958 ERR("out of memory\n");
1959 Status = STATUS_INSUFFICIENT_RESOURCES;
1960 goto exit;
1961 }
1962 } else
1963 context.va = buf;
1964
1965 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1966 if (!master_mdl) {
1967 ERR("out of memory\n");
1968 Status = STATUS_INSUFFICIENT_RESOURCES;
1969 goto exit;
1970 }
1971
1972 Status = STATUS_SUCCESS;
1973
1974 _SEH2_TRY {
1975 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1976 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1977 Status = _SEH2_GetExceptionCode();
1978 } _SEH2_END;
1979
1980 if (!NT_SUCCESS(Status)) {
1981 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1982 IoFreeMdl(master_mdl);
1983 goto exit;
1984 }
1985
1986 pfns = (PFN_NUMBER*)(master_mdl + 1);
1987
1988 pos = 0;
1989 while (pos < length) {
1990 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
1991
1992 if (pos == 0) {
1993 UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes;
1994 ULONG skip, readlen;
1995
1996 i = startoffstripe;
1997 while (stripe != parity) {
1998 if (i == startoffstripe) {
1999 readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
2000
2001 context.stripes[stripe].stripestart = startoff;
2002 context.stripes[stripe].stripeend = startoff + readlen;
2003
2004 pos += readlen;
2005
2006 if (pos == length)
2007 break;
2008 } else {
2009 readlen = min(length - pos, (ULONG)ci->stripe_length);
2010
2011 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2012 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2013
2014 pos += readlen;
2015
2016 if (pos == length)
2017 break;
2018 }
2019
2020 i++;
2021 stripe = (stripe + 1) % ci->num_stripes;
2022 }
2023
2024 if (pos == length)
2025 break;
2026
2027 for (i = 0; i < startoffstripe; i++) {
2028 UINT16 stripe2 = (parity + i + 1) % ci->num_stripes;
2029
2030 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2031 }
2032
2033 context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2034
2035 if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
2036 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
2037
2038 for (i = 0; i < ci->num_stripes; i++) {
2039 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2040 }
2041
2042 pos += (UINT32)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
2043 need_dummy = TRUE;
2044 }
2045 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2046 for (i = 0; i < ci->num_stripes; i++) {
2047 context.stripes[i].stripeend += ci->stripe_length;
2048 }
2049
2050 pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 1));
2051 need_dummy = TRUE;
2052 } else {
2053 UINT16 stripe = (parity + 1) % ci->num_stripes;
2054
2055 i = 0;
2056 while (stripe != parity) {
2057 if (endoffstripe == i) {
2058 context.stripes[stripe].stripeend = endoff + 1;
2059 break;
2060 } else if (endoffstripe > i)
2061 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2062
2063 i++;
2064 stripe = (stripe + 1) % ci->num_stripes;
2065 }
2066
2067 break;
2068 }
2069 }
2070
2071 for (i = 0; i < ci->num_stripes; i++) {
2072 if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2073 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
2074 FALSE, FALSE, NULL);
2075
2076 if (!context.stripes[i].mdl) {
2077 ERR("IoAllocateMdl failed\n");
2078 Status = STATUS_INSUFFICIENT_RESOURCES;
2079 goto exit;
2080 }
2081 }
2082 }
2083
2084 if (need_dummy) {
2085 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2086 if (!dummypage) {
2087 ERR("out of memory\n");
2088 Status = STATUS_INSUFFICIENT_RESOURCES;
2089 goto exit;
2090 }
2091
2092 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL);
2093 if (!dummy_mdl) {
2094 ERR("IoAllocateMdl failed\n");
2095 Status = STATUS_INSUFFICIENT_RESOURCES;
2096 goto exit;
2097 }
2098
2099 MmBuildMdlForNonPagedPool(dummy_mdl);
2100
2101 dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2102 }
2103
2104 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
2105 if (!stripeoff) {
2106 ERR("out of memory\n");
2107 Status = STATUS_INSUFFICIENT_RESOURCES;
2108 goto exit;
2109 }
2110
2111 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
2112
2113 pos = 0;
2114
2115 while (pos < length) {
2116 PFN_NUMBER* stripe_pfns;
2117
2118 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2119
2120 if (pos == 0) {
2121 UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2122 UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2123 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2124
2125 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2126
2127 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2128
2129 stripeoff[stripe] = readlen;
2130 pos += readlen;
2131
2132 stripe = (stripe + 1) % ci->num_stripes;
2133
2134 while (stripe != parity) {
2135 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2136 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2137
2138 if (readlen == 0)
2139 break;
2140
2141 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2142
2143 stripeoff[stripe] = readlen;
2144 pos += readlen;
2145
2146 stripe = (stripe + 1) % ci->num_stripes;
2147 }
2148 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2149 UINT16 stripe = (parity + 1) % ci->num_stripes;
2150 ULONG k;
2151
2152 while (stripe != parity) {
2153 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2154
2155 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2156
2157 stripeoff[stripe] += (UINT32)ci->stripe_length;
2158 pos += (UINT32)ci->stripe_length;
2159
2160 stripe = (stripe + 1) % ci->num_stripes;
2161 }
2162
2163 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
2164
2165 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2166 stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
2167 stripeoff[parity] += PAGE_SIZE;
2168 }
2169 } else {
2170 UINT16 stripe = (parity + 1) % ci->num_stripes;
2171 UINT32 readlen;
2172
2173 while (pos < length) {
2174 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2175 readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2176
2177 if (readlen == 0)
2178 break;
2179
2180 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2181
2182 stripeoff[stripe] += readlen;
2183 pos += readlen;
2184
2185 stripe = (stripe + 1) % ci->num_stripes;
2186 }
2187 }
2188 }
2189
2190 MmUnlockPages(master_mdl);
2191 IoFreeMdl(master_mdl);
2192
2193 ExFreePool(stripeoff);
2194 } else if (type == BLOCK_FLAG_RAID6) {
2195 UINT64 startoff, endoff;
2196 UINT16 endoffstripe, parity1;
2197 UINT32 *stripeoff, pos;
2198 PMDL master_mdl;
2199 PFN_NUMBER *pfns, dummy;
2200 BOOL need_dummy = FALSE;
2201
2202 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
2203 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
2204
2205 if (file_read) {
2206 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
2207
2208 if (!context.va) {
2209 ERR("out of memory\n");
2210 Status = STATUS_INSUFFICIENT_RESOURCES;
2211 goto exit;
2212 }
2213 } else
2214 context.va = buf;
2215
2216 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
2217 if (!master_mdl) {
2218 ERR("out of memory\n");
2219 Status = STATUS_INSUFFICIENT_RESOURCES;
2220 goto exit;
2221 }
2222
2223 Status = STATUS_SUCCESS;
2224
2225 _SEH2_TRY {
2226 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
2227 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2228 Status = _SEH2_GetExceptionCode();
2229 } _SEH2_END;
2230
2231 if (!NT_SUCCESS(Status)) {
2232 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
2233 IoFreeMdl(master_mdl);
2234 goto exit;
2235 }
2236
2237 pfns = (PFN_NUMBER*)(master_mdl + 1);
2238
2239 pos = 0;
2240 while (pos < length) {
2241 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2242
2243 if (pos == 0) {
2244 UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
2245 ULONG skip, readlen;
2246
2247 i = startoffstripe;
2248 while (stripe != parity1) {
2249 if (i == startoffstripe) {
2250 readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
2251
2252 context.stripes[stripe].stripestart = startoff;
2253 context.stripes[stripe].stripeend = startoff + readlen;
2254
2255 pos += readlen;
2256
2257 if (pos == length)
2258 break;
2259 } else {
2260 readlen = min(length - pos, (ULONG)ci->stripe_length);
2261
2262 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2263 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2264
2265 pos += readlen;
2266
2267 if (pos == length)
2268 break;
2269 }
2270
2271 i++;
2272 stripe = (stripe + 1) % ci->num_stripes;
2273 }
2274
2275 if (pos == length)
2276 break;
2277
2278 for (i = 0; i < startoffstripe; i++) {
2279 UINT16 stripe2 = (parity1 + i + 2) % ci->num_stripes;
2280
2281 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2282 }
2283
2284 context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2285
2286 parity2 = (parity1 + 1) % ci->num_stripes;
2287 context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2288
2289 if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
2290 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
2291
2292 for (i = 0; i < ci->num_stripes; i++) {
2293 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2294 }
2295
2296 pos += (UINT32)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
2297 need_dummy = TRUE;
2298 }
2299 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2300 for (i = 0; i < ci->num_stripes; i++) {
2301 context.stripes[i].stripeend += ci->stripe_length;
2302 }
2303
2304 pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 2));
2305 need_dummy = TRUE;
2306 } else {
2307 UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2308
2309 i = 0;
2310 while (stripe != parity1) {
2311 if (endoffstripe == i) {
2312 context.stripes[stripe].stripeend = endoff + 1;
2313 break;
2314 } else if (endoffstripe > i)
2315 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2316
2317 i++;
2318 stripe = (stripe + 1) % ci->num_stripes;
2319 }
2320
2321 break;
2322 }
2323 }
2324
2325 for (i = 0; i < ci->num_stripes; i++) {
2326 if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2327 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL);
2328
2329 if (!context.stripes[i].mdl) {
2330 ERR("IoAllocateMdl failed\n");
2331 Status = STATUS_INSUFFICIENT_RESOURCES;
2332 goto exit;
2333 }
2334 }
2335 }
2336
2337 if (need_dummy) {
2338 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2339 if (!dummypage) {
2340 ERR("out of memory\n");
2341 Status = STATUS_INSUFFICIENT_RESOURCES;
2342 goto exit;
2343 }
2344
2345 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL);
2346 if (!dummy_mdl) {
2347 ERR("IoAllocateMdl failed\n");
2348 Status = STATUS_INSUFFICIENT_RESOURCES;
2349 goto exit;
2350 }
2351
2352 MmBuildMdlForNonPagedPool(dummy_mdl);
2353
2354 dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2355 }
2356
2357 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
2358 if (!stripeoff) {
2359 ERR("out of memory\n");
2360 Status = STATUS_INSUFFICIENT_RESOURCES;
2361 goto exit;
2362 }
2363
2364 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
2365
2366 pos = 0;
2367
2368 while (pos < length) {
2369 PFN_NUMBER* stripe_pfns;
2370
2371 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2372
2373 if (pos == 0) {
2374 UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
2375 UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2376 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2377
2378 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2379
2380 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2381
2382 stripeoff[stripe] = readlen;
2383 pos += readlen;
2384
2385 stripe = (stripe + 1) % ci->num_stripes;
2386
2387 while (stripe != parity1) {
2388 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2389 readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2390
2391 if (readlen == 0)
2392 break;
2393
2394 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2395
2396 stripeoff[stripe] = readlen;
2397 pos += readlen;
2398
2399 stripe = (stripe + 1) % ci->num_stripes;
2400 }
2401 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2402 UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2403 UINT16 parity2 = (parity1 + 1) % ci->num_stripes;
2404 ULONG k;
2405
2406 while (stripe != parity1) {
2407 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2408
2409 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2410
2411 stripeoff[stripe] += (UINT32)ci->stripe_length;
2412 pos += (UINT32)ci->stripe_length;
2413
2414 stripe = (stripe + 1) % ci->num_stripes;
2415 }
2416
2417 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
2418
2419 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2420 stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
2421 stripeoff[parity1] += PAGE_SIZE;
2422 }
2423
2424 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
2425
2426 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2427 stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
2428 stripeoff[parity2] += PAGE_SIZE;
2429 }
2430 } else {
2431 UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2432 UINT32 readlen;
2433
2434 while (pos < length) {
2435 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2436 readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2437
2438 if (readlen == 0)
2439 break;
2440
2441 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2442
2443 stripeoff[stripe] += readlen;
2444 pos += readlen;
2445
2446 stripe = (stripe + 1) % ci->num_stripes;
2447 }
2448 }
2449 }
2450
2451 MmUnlockPages(master_mdl);
2452 IoFreeMdl(master_mdl);
2453
2454 ExFreePool(stripeoff);
2455 }
2456
2457 context.address = addr;
2458
2459 for (i = 0; i < ci->num_stripes; i++) {
2460 if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
2461 context.stripes[i].status = ReadDataStatus_MissingDevice;
2462 context.stripes_left--;
2463
2464 if (!devices[i] || !devices[i]->devobj)
2465 missing_devices++;
2466 }
2467 }
2468
2469 if (missing_devices > allowed_missing) {
2470 ERR("not enough devices to service request (%u missing)\n", missing_devices);
2471 Status = STATUS_UNEXPECTED_IO_ERROR;
2472 goto exit;
2473 }
2474
2475 for (i = 0; i < ci->num_stripes; i++) {
2476 PIO_STACK_LOCATION IrpSp;
2477
2478 if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
2479 context.stripes[i].context = (struct read_data_context*)&context;
2480
2481 if (type == BLOCK_FLAG_RAID10) {
2482 context.stripes[i].stripenum = i / ci->sub_stripes;
2483 }
2484
2485 if (!Irp) {
2486 context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE);
2487
2488 if (!context.stripes[i].Irp) {
2489 ERR("IoAllocateIrp failed\n");
2490 Status = STATUS_INSUFFICIENT_RESOURCES;
2491 goto exit;
2492 }
2493 } else {
2494 context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
2495
2496 if (!context.stripes[i].Irp) {
2497 ERR("IoMakeAssociatedIrp failed\n");
2498 Status = STATUS_INSUFFICIENT_RESOURCES;
2499 goto exit;
2500 }
2501 }
2502
2503 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2504 IrpSp->MajorFunction = IRP_MJ_READ;
2505
2506 if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2507 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
2508 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2509 ERR("out of memory\n");
2510 Status = STATUS_INSUFFICIENT_RESOURCES;
2511 goto exit;
2512 }
2513
2514 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
2515
2516 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2517 } else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
2518 context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
2519 else
2520 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2521
2522 IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
2523 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
2524
2525 total_reading += IrpSp->Parameters.Read.Length;
2526
2527 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2528
2529 IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], TRUE, TRUE, TRUE);
2530
2531 context.stripes[i].status = ReadDataStatus_Pending;
2532 }
2533 }
2534
2535 #ifdef DEBUG_STATS
2536 if (!is_tree)
2537 time1 = KeQueryPerformanceCounter(NULL);
2538 #endif
2539
2540 need_to_wait = FALSE;
2541 for (i = 0; i < ci->num_stripes; i++) {
2542 if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
2543 IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
2544 need_to_wait = TRUE;
2545 }
2546 }
2547
2548 if (need_to_wait)
2549 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
2550
2551 #ifdef DEBUG_STATS
2552 if (!is_tree) {
2553 time2 = KeQueryPerformanceCounter(NULL);
2554
2555 Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
2556 }
2557 #endif
2558
2559 if (diskacc)
2560 fFsRtlUpdateDiskCounters(total_reading, 0);
2561
2562 // check if any of the devices return a "user-induced" error
2563
2564 for (i = 0; i < ci->num_stripes; i++) {
2565 if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
2566 Status = context.stripes[i].iosb.Status;
2567 goto exit;
2568 }
2569 }
2570
2571 if (type == BLOCK_FLAG_RAID0) {
2572 Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2573 if (!NT_SUCCESS(Status)) {
2574 ERR("read_data_raid0 returned %08x\n", Status);
2575
2576 if (file_read)
2577 ExFreePool(context.va);
2578
2579 goto exit;
2580 }
2581
2582 if (file_read) {
2583 RtlCopyMemory(buf, context.va, length);
2584 ExFreePool(context.va);
2585 }
2586 } else if (type == BLOCK_FLAG_RAID10) {
2587 Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2588
2589 if (!NT_SUCCESS(Status)) {
2590 ERR("read_data_raid10 returned %08x\n", Status);
2591
2592 if (file_read)
2593 ExFreePool(context.va);
2594
2595 goto exit;
2596 }
2597
2598 if (file_read) {
2599 RtlCopyMemory(buf, context.va, length);
2600 ExFreePool(context.va);
2601 }
2602 } else if (type == BLOCK_FLAG_DUPLICATE) {
2603 Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
2604 if (!NT_SUCCESS(Status)) {
2605 ERR("read_data_dup returned %08x\n", Status);
2606
2607 if (file_read)
2608 ExFreePool(context.va);
2609
2610 goto exit;
2611 }
2612
2613 if (file_read) {
2614 RtlCopyMemory(buf, context.va, length);
2615 ExFreePool(context.va);
2616 }
2617 } else if (type == BLOCK_FLAG_RAID5) {
2618 Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE);
2619 if (!NT_SUCCESS(Status)) {
2620 ERR("read_data_raid5 returned %08x\n", Status);
2621
2622 if (file_read)
2623 ExFreePool(context.va);
2624
2625 goto exit;
2626 }
2627
2628 if (file_read) {
2629 RtlCopyMemory(buf, context.va, length);
2630 ExFreePool(context.va);
2631 }
2632 } else if (type == BLOCK_FLAG_RAID6) {
2633 Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE);
2634 if (!NT_SUCCESS(Status)) {
2635 ERR("read_data_raid6 returned %08x\n", Status);
2636
2637 if (file_read)
2638 ExFreePool(context.va);
2639
2640 goto exit;
2641 }
2642
2643 if (file_read) {
2644 RtlCopyMemory(buf, context.va, length);
2645 ExFreePool(context.va);
2646 }
2647 }
2648
2649 exit:
2650 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
2651 chunk_unlock_range(Vcb, c, lockaddr, locklen);
2652
2653 if (dummy_mdl)
2654 IoFreeMdl(dummy_mdl);
2655
2656 if (dummypage)
2657 ExFreePool(dummypage);
2658
2659 for (i = 0; i < ci->num_stripes; i++) {
2660 if (context.stripes[i].mdl) {
2661 if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2662 MmUnlockPages(context.stripes[i].mdl);
2663
2664 IoFreeMdl(context.stripes[i].mdl);
2665 }
2666
2667 if (context.stripes[i].Irp)
2668 IoFreeIrp(context.stripes[i].Irp);
2669 }
2670
2671 ExFreePool(context.stripes);
2672
2673 if (!Vcb->log_to_phys_loaded)
2674 ExFreePool(devices);
2675
2676 return Status;
2677 }
2678
2679 NTSTATUS read_stream(fcb* fcb, UINT8* data, UINT64 start, ULONG length, ULONG* pbr) {
2680 ULONG readlen;
2681
2682 TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr);
2683
2684 if (pbr) *pbr = 0;
2685
2686 if (start >= fcb->adsdata.Length) {
2687 TRACE("tried to read beyond end of stream\n");
2688 return STATUS_END_OF_FILE;
2689 }
2690
2691 if (length == 0) {
2692 WARN("tried to read zero bytes\n");
2693 return STATUS_SUCCESS;
2694 }
2695
2696 if (start + length < fcb->adsdata.Length)
2697 readlen = length;
2698 else
2699 readlen = fcb->adsdata.Length - (ULONG)start;
2700
2701 if (readlen > 0)
2702 RtlCopyMemory(data + start, fcb->adsdata.Buffer, readlen);
2703
2704 if (pbr) *pbr = readlen;
2705
2706 return STATUS_SUCCESS;
2707 }
2708
2709 NTSTATUS read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp) {
2710 NTSTATUS Status;
2711 EXTENT_DATA* ed;
2712 UINT32 bytes_read = 0;
2713 UINT64 last_end;
2714 LIST_ENTRY* le;
2715 #ifdef DEBUG_STATS
2716 LARGE_INTEGER time1, time2;
2717 #endif
2718
2719 TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr);
2720
2721 if (pbr)
2722 *pbr = 0;
2723
2724 if (start >= fcb->inode_item.st_size) {
2725 WARN("Tried to read beyond end of file\n");
2726 Status = STATUS_END_OF_FILE;
2727 goto exit;
2728 }
2729
2730 #ifdef DEBUG_STATS
2731 time1 = KeQueryPerformanceCounter(NULL);
2732 #endif
2733
2734 le = fcb->extents.Flink;
2735
2736 last_end = start;
2737
2738 while (le != &fcb->extents) {
2739 UINT64 len;
2740 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2741 EXTENT_DATA2* ed2;
2742
2743 if (!ext->ignore) {
2744 ed = &ext->extent_data;
2745
2746 ed2 = (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ed->data : NULL;
2747
2748 len = ed2 ? ed2->num_bytes : ed->decoded_size;
2749
2750 if (ext->offset + len <= start) {
2751 last_end = ext->offset + len;
2752 goto nextitem;
2753 }
2754
2755 if (ext->offset > last_end && ext->offset > start + bytes_read) {
2756 UINT32 read = (UINT32)min(length, ext->offset - max(start, last_end));
2757
2758 RtlZeroMemory(data + bytes_read, read);
2759 bytes_read += read;
2760 length -= read;
2761 }
2762
2763 if (length == 0 || ext->offset > start + bytes_read + length)
2764 break;
2765
2766 if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
2767 WARN("Encryption not supported\n");
2768 Status = STATUS_NOT_IMPLEMENTED;
2769 goto exit;
2770 }
2771
2772 if (ed->encoding != BTRFS_ENCODING_NONE) {
2773 WARN("Other encodings not supported\n");
2774 Status = STATUS_NOT_IMPLEMENTED;
2775 goto exit;
2776 }
2777
2778 switch (ed->type) {
2779 case EXTENT_TYPE_INLINE:
2780 {
2781 UINT64 off = start + bytes_read - ext->offset;
2782 UINT32 read;
2783
2784 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2785 read = (UINT32)min(min(len, ext->datalen) - off, length);
2786
2787 RtlCopyMemory(data + bytes_read, &ed->data[off], read);
2788 } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO) {
2789 UINT8* decomp;
2790 BOOL decomp_alloc;
2791 UINT16 inlen = ext->datalen - (UINT16)offsetof(EXTENT_DATA, data[0]);
2792
2793 if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
2794 ERR("ed->decoded_size was invalid (%llx)\n", ed->decoded_size);
2795 Status = STATUS_INTERNAL_ERROR;
2796 goto exit;
2797 }
2798
2799 read = (UINT32)min(ed->decoded_size - off, length);
2800
2801 if (off > 0) {
2802 decomp = ExAllocatePoolWithTag(NonPagedPool, (UINT32)ed->decoded_size, ALLOC_TAG);
2803 if (!decomp) {
2804 ERR("out of memory\n");
2805 Status = STATUS_INSUFFICIENT_RESOURCES;
2806 goto exit;
2807 }
2808
2809 decomp_alloc = TRUE;
2810 } else {
2811 decomp = data + bytes_read;
2812 decomp_alloc = FALSE;
2813 }
2814
2815 if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2816 Status = zlib_decompress(ed->data, inlen, decomp, (UINT32)(read + off));
2817 if (!NT_SUCCESS(Status)) {
2818 ERR("zlib_decompress returned %08x\n", Status);
2819 if (decomp_alloc) ExFreePool(decomp);
2820 goto exit;
2821 }
2822 } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2823 if (inlen < sizeof(UINT32)) {
2824 ERR("extent data was truncated\n");
2825 Status = STATUS_INTERNAL_ERROR;
2826 if (decomp_alloc) ExFreePool(decomp);
2827 goto exit;
2828 } else
2829 inlen -= sizeof(UINT32);
2830
2831 Status = lzo_decompress(ed->data + sizeof(UINT32), inlen, decomp, (UINT32)(read + off), sizeof(UINT32));
2832 if (!NT_SUCCESS(Status)) {
2833 ERR("lzo_decompress returned %08x\n", Status);
2834 if (decomp_alloc) ExFreePool(decomp);
2835 goto exit;
2836 }
2837 }
2838
2839 if (decomp_alloc) {
2840 RtlCopyMemory(data + bytes_read, decomp + off, read);
2841 ExFreePool(decomp);
2842 }
2843 } else {
2844 ERR("unhandled compression type %x\n", ed->compression);
2845 Status = STATUS_NOT_IMPLEMENTED;
2846 goto exit;
2847 }
2848
2849 bytes_read += read;
2850 length -= read;
2851
2852 break;
2853 }
2854
2855 case EXTENT_TYPE_REGULAR:
2856 {
2857 UINT64 off = start + bytes_read - ext->offset;
2858 UINT32 to_read, read;
2859 UINT8* buf;
2860 BOOL mdl = (Irp && Irp->MdlAddress) ? TRUE : FALSE;
2861 BOOL buf_free;
2862 UINT32 bumpoff = 0, *csum;
2863 UINT64 addr;
2864 chunk* c;
2865
2866 read = (UINT32)(len - off);
2867 if (read > length) read = (UINT32)length;
2868
2869 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2870 addr = ed2->address + ed2->offset + off;
2871 to_read = (UINT32)sector_align(read, fcb->Vcb->superblock.sector_size);
2872
2873 if (addr % fcb->Vcb->superblock.sector_size > 0) {
2874 bumpoff = addr % fcb->Vcb->superblock.sector_size;
2875 addr -= bumpoff;
2876 to_read = (UINT32)sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size);
2877 }
2878 } else {
2879 addr = ed2->address;
2880 to_read = (UINT32)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
2881 }
2882
2883 if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 &&
2884 length % fcb->Vcb->superblock.sector_size == 0) {
2885 buf = data + bytes_read;
2886 buf_free = FALSE;
2887 } else {
2888 buf = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG);
2889 buf_free = TRUE;
2890
2891 if (!buf) {
2892 ERR("out of memory\n");
2893 Status = STATUS_INSUFFICIENT_RESOURCES;
2894 goto exit;
2895 }
2896
2897 mdl = FALSE;
2898 }
2899
2900 c = get_chunk_from_address(fcb->Vcb, addr);
2901
2902 if (!c) {
2903 ERR("get_chunk_from_address(%llx) failed\n", addr);
2904
2905 if (buf_free)
2906 ExFreePool(buf);
2907
2908 goto exit;
2909 }
2910
2911 if (ext->csum) {
2912 if (ed->compression == BTRFS_COMPRESSION_NONE)
2913 csum = &ext->csum[off / fcb->Vcb->superblock.sector_size];
2914 else
2915 csum = ext->csum;
2916 } else
2917 csum = NULL;
2918
2919 Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, c, NULL, Irp, 0, mdl,
2920 fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
2921 if (!NT_SUCCESS(Status)) {
2922 ERR("read_data returned %08x\n", Status);
2923
2924 if (buf_free)
2925 ExFreePool(buf);
2926
2927 goto exit;
2928 }
2929
2930 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2931 if (buf_free)
2932 RtlCopyMemory(data + bytes_read, buf + bumpoff, read);
2933 } else {
2934 UINT8 *decomp = NULL, *buf2;
2935 ULONG outlen, inlen, off2;
2936 UINT32 inpageoff = 0;
2937
2938 off2 = (ULONG)(ed2->offset + off);
2939 buf2 = buf;
2940 inlen = (ULONG)ed2->size;
2941
2942 if (ed->compression == BTRFS_COMPRESSION_LZO) {
2943 ULONG inoff = sizeof(UINT32);
2944
2945 inlen -= sizeof(UINT32);
2946
2947 // If reading a few sectors in, skip to the interesting bit
2948 while (off2 > LINUX_PAGE_SIZE) {
2949 UINT32 partlen;
2950
2951 if (inlen < sizeof(UINT32))
2952 break;
2953
2954 partlen = *(UINT32*)(buf2 + inoff);
2955
2956 if (partlen < inlen) {
2957 off2 -= LINUX_PAGE_SIZE;
2958 inoff += partlen + sizeof(UINT32);
2959 inlen -= partlen + sizeof(UINT32);
2960
2961 if (LINUX_PAGE_SIZE - (inoff % LINUX_PAGE_SIZE) < sizeof(UINT32))
2962 inoff = ((inoff / LINUX_PAGE_SIZE) + 1) * LINUX_PAGE_SIZE;
2963 } else
2964 break;
2965 }
2966
2967 buf2 = &buf2[inoff];
2968 inpageoff = inoff % LINUX_PAGE_SIZE;
2969 }
2970
2971 if (off2 != 0) {
2972 outlen = off2 + min(read, (UINT32)(ed2->num_bytes - off));
2973
2974 decomp = ExAllocatePoolWithTag(PagedPool, outlen, ALLOC_TAG);
2975 if (!decomp) {
2976 ERR("out of memory\n");
2977 ExFreePool(buf);
2978 Status = STATUS_INSUFFICIENT_RESOURCES;
2979 goto exit;
2980 }
2981 } else
2982 outlen = min(read, (UINT32)(ed2->num_bytes - off));
2983
2984 if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2985 Status = zlib_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen);
2986
2987 if (!NT_SUCCESS(Status)) {
2988 ERR("zlib_decompress returned %08x\n", Status);
2989 ExFreePool(buf);
2990
2991 if (decomp)
2992 ExFreePool(decomp);
2993
2994 goto exit;
2995 }
2996 } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2997 Status = lzo_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen, inpageoff);
2998
2999 if (!NT_SUCCESS(Status)) {
3000 ERR("lzo_decompress returned %08x\n", Status);
3001 ExFreePool(buf);
3002
3003 if (decomp)
3004 ExFreePool(decomp);
3005
3006 goto exit;
3007 }
3008 } else {
3009 ERR("unsupported compression type %x\n", ed->compression);
3010 Status = STATUS_NOT_SUPPORTED;
3011
3012 ExFreePool(buf);
3013
3014 if (decomp)
3015 ExFreePool(decomp);
3016
3017 goto exit;
3018 }
3019
3020 if (decomp) {
3021 RtlCopyMemory(data + bytes_read, decomp + off2, (size_t)min(read, ed2->num_bytes - off));
3022 ExFreePool(decomp);
3023 }
3024 }
3025
3026 if (buf_free)
3027 ExFreePool(buf);
3028
3029 bytes_read += read;
3030 length -= read;
3031
3032 break;
3033 }
3034
3035 case EXTENT_TYPE_PREALLOC:
3036 {
3037 UINT64 off = start + bytes_read - ext->offset;
3038 UINT32 read = (UINT32)(len - off);
3039
3040 if (read > length) read = (UINT32)length;
3041
3042 RtlZeroMemory(data + bytes_read, read);
3043
3044 bytes_read += read;
3045 length -= read;
3046
3047 break;
3048 }
3049
3050 default:
3051 WARN("Unsupported extent data type %u\n", ed->type);
3052 Status = STATUS_NOT_IMPLEMENTED;
3053 goto exit;
3054 }
3055
3056 last_end = ext->offset + len;
3057
3058 if (length == 0)
3059 break;
3060 }
3061
3062 nextitem:
3063 le = le->Flink;
3064 }
3065
3066 if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
3067 UINT32 read = (UINT32)min(fcb->inode_item.st_size - start - bytes_read, length);
3068
3069 RtlZeroMemory(data + bytes_read, read);
3070
3071 bytes_read += read;
3072 length -= read;
3073 }
3074
3075 Status = STATUS_SUCCESS;
3076 if (pbr)
3077 *pbr = bytes_read;
3078
3079 #ifdef DEBUG_STATS
3080 time2 = KeQueryPerformanceCounter(NULL);
3081
3082 fcb->Vcb->stats.num_reads++;
3083 fcb->Vcb->stats.data_read += bytes_read;
3084 fcb->Vcb->stats.read_total_time += time2.QuadPart - time1.QuadPart;
3085 #endif
3086
3087 exit:
3088 return Status;
3089 }
3090
3091 NTSTATUS do_read(PIRP Irp, BOOLEAN wait, ULONG* bytes_read) {
3092 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3093 PFILE_OBJECT FileObject = IrpSp->FileObject;
3094 fcb* fcb = FileObject->FsContext;
3095 UINT8* data = NULL;
3096 ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
3097 UINT64 start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
3098
3099 *bytes_read = 0;
3100
3101 if (!fcb || !fcb->Vcb || !fcb->subvol)
3102 return STATUS_INTERNAL_ERROR;
3103
3104 TRACE("file = %S (fcb = %p)\n", file_desc(FileObject), fcb);
3105 TRACE("offset = %llx, length = %x\n", start, length);
3106 TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "TRUE" : "FALSE", Irp->Flags & IRP_NOCACHE ? "TRUE" : "FALSE");
3107
3108 if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
3109 return STATUS_INVALID_DEVICE_REQUEST;
3110
3111 if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
3112 WARN("tried to read locked region\n");
3113 return STATUS_FILE_LOCK_CONFLICT;
3114 }
3115
3116 if (length == 0) {
3117 TRACE("tried to read zero bytes\n");
3118 return STATUS_SUCCESS;
3119 }
3120
3121 if (start >= (UINT64)fcb->Header.FileSize.QuadPart) {
3122 TRACE("tried to read with offset after file end (%llx >= %llx)\n", start, fcb->Header.FileSize.QuadPart);
3123 return STATUS_END_OF_FILE;
3124 }
3125
3126 TRACE("FileObject %p fcb %p FileSize = %llx st_size = %llx (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
3127
3128 if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) {
3129 data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
3130
3131 if (Irp->MdlAddress && !data) {
3132 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3133 return STATUS_INSUFFICIENT_RESOURCES;
3134 }
3135
3136 if (start >= (UINT64)fcb->Header.ValidDataLength.QuadPart) {
3137 length = (ULONG)min(length, min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3138 RtlZeroMemory(data, length);
3139 Irp->IoStatus.Information = *bytes_read = length;
3140 return STATUS_SUCCESS;
3141 }
3142
3143 if (length + start > (UINT64)fcb->Header.ValidDataLength.QuadPart) {
3144 addon = (ULONG)(min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3145 RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
3146 length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
3147 }
3148 }
3149
3150 if (!(Irp->Flags & IRP_NOCACHE)) {
3151 NTSTATUS Status = STATUS_SUCCESS;
3152
3153 _SEH2_TRY {
3154 if (!FileObject->PrivateCacheMap) {
3155 CC_FILE_SIZES ccfs;
3156
3157 ccfs.AllocationSize = fcb->Header.AllocationSize;
3158 ccfs.FileSize = fcb->Header.FileSize;
3159 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3160
3161 init_file_cache(FileObject, &ccfs);
3162 }
3163
3164 if (IrpSp->MinorFunction & IRP_MN_MDL) {
3165 CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
3166 } else {
3167 if (fCcCopyReadEx) {
3168 TRACE("CcCopyReadEx(%p, %llx, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
3169 length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
3170 TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3171 if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
3172 TRACE("CcCopyReadEx could not wait\n");
3173
3174 IoMarkIrpPending(Irp);
3175 return STATUS_PENDING;
3176 }
3177 TRACE("CcCopyReadEx finished\n");
3178 } else {
3179 TRACE("CcCopyRead(%p, %llx, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
3180 TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3181 if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
3182 TRACE("CcCopyRead could not wait\n");
3183
3184 IoMarkIrpPending(Irp);
3185 return STATUS_PENDING;
3186 }
3187 TRACE("CcCopyRead finished\n");
3188 }
3189 }
3190 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
3191 Status = _SEH2_GetExceptionCode();
3192 } _SEH2_END;
3193
3194 if (NT_SUCCESS(Status)) {
3195 Status = Irp->IoStatus.Status;
3196 Irp->IoStatus.Information += addon;
3197 *bytes_read = (ULONG)Irp->IoStatus.Information;
3198 } else
3199 ERR("EXCEPTION - %08x\n", Status);
3200
3201 return Status;
3202 } else {
3203 NTSTATUS Status;
3204
3205 if (!wait) {
3206 IoMarkIrpPending(Irp);
3207 return STATUS_PENDING;
3208 }
3209
3210 if (fcb->ads)
3211 Status = read_stream(fcb, data, start, length, bytes_read);
3212 else
3213 Status = read_file(fcb, data, start, length, bytes_read, Irp);
3214
3215 *bytes_read += addon;
3216 TRACE("read %u bytes\n", *bytes_read);
3217
3218 Irp->IoStatus.Information = *bytes_read;
3219
3220 if (diskacc && Status != STATUS_PENDING) {
3221 PETHREAD thread = NULL;
3222
3223 if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
3224 thread = Irp->Tail.Overlay.Thread;
3225 else if (!IoIsSystemThread(PsGetCurrentThread()))
3226 thread = PsGetCurrentThread();
3227 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
3228 thread = PsGetCurrentThread();
3229
3230 if (thread)
3231 fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
3232 }
3233
3234 return Status;
3235 }
3236 }
3237
3238 _Dispatch_type_(IRP_MJ_READ)
3239 _Function_class_(DRIVER_DISPATCH)
3240 NTSTATUS drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
3241 device_extension* Vcb = DeviceObject->DeviceExtension;
3242 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3243 PFILE_OBJECT FileObject = IrpSp->FileObject;
3244 ULONG bytes_read = 0;
3245 NTSTATUS Status;
3246 BOOL top_level;
3247 fcb* fcb;
3248 ccb* ccb;
3249 BOOLEAN fcb_lock = FALSE, wait;
3250
3251 FsRtlEnterFileSystem();
3252
3253 top_level = is_top_level(Irp);
3254
3255 TRACE("read\n");
3256
3257 if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
3258 Status = vol_read(DeviceObject, Irp);
3259 goto exit2;
3260 } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
3261 Status = STATUS_INVALID_PARAMETER;
3262 goto end;
3263 }
3264
3265 Irp->IoStatus.Information = 0;
3266
3267 if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
3268 CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress);
3269
3270 Irp->MdlAddress = NULL;
3271 Status = STATUS_SUCCESS;
3272
3273 goto exit;
3274 }
3275
3276 fcb = FileObject->FsContext;
3277
3278 if (!fcb) {
3279 ERR("fcb was NULL\n");
3280 Status = STATUS_INVALID_PARAMETER;
3281 goto exit;
3282 }
3283
3284 ccb = FileObject->FsContext2;
3285
3286 if (!ccb) {
3287 ERR("ccb was NULL\n");
3288 Status = STATUS_INVALID_PARAMETER;
3289 goto exit;
3290 }
3291
3292 if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) {
3293 WARN("insufficient privileges\n");
3294 Status = STATUS_ACCESS_DENIED;
3295 goto exit;
3296 }
3297
3298 if (fcb == Vcb->volume_fcb) {
3299 TRACE("reading volume FCB\n");
3300
3301 IoSkipCurrentIrpStackLocation(Irp);
3302
3303 Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
3304
3305 goto exit2;
3306 }
3307
3308 wait = IoIsOperationSynchronous(Irp);
3309
3310 // Don't offload jobs when doing paging IO - otherwise this can lead to
3311 // deadlocks in CcCopyRead.
3312 if (Irp->Flags & IRP_PAGING_IO)
3313 wait = TRUE;
3314
3315 if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer->DataSectionObject) {
3316 IO_STATUS_BLOCK iosb;
3317
3318 CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb);
3319 if (!NT_SUCCESS(iosb.Status)) {
3320 ERR("CcFlushCache returned %08x\n", iosb.Status);
3321 return iosb.Status;
3322 }
3323 }
3324
3325 if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
3326 if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
3327 Status = STATUS_PENDING;
3328 IoMarkIrpPending(Irp);
3329 goto exit;
3330 }
3331
3332 fcb_lock = TRUE;
3333 }
3334
3335 Status = do_read(Irp, wait, &bytes_read);
3336
3337 if (fcb_lock)
3338 ExReleaseResourceLite(fcb->Header.Resource);
3339
3340 exit:
3341 if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO))
3342 FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0);
3343
3344 end:
3345 Irp->IoStatus.Status = Status;
3346
3347 TRACE("Irp->IoStatus.Status = %08x\n", Irp->IoStatus.Status);
3348 TRACE("Irp->IoStatus.Information = %lu\n", Irp->IoStatus.Information);
3349 TRACE("returning %08x\n", Status);
3350
3351 if (Status != STATUS_PENDING)
3352 IoCompleteRequest(Irp, IO_NO_INCREMENT);
3353 else {
3354 if (!add_thread_job(Vcb, Irp))
3355 do_read_job(Irp);
3356 }
3357
3358 exit2:
3359 if (top_level)
3360 IoSetTopLevelIrp(NULL);
3361
3362 FsRtlExitFileSystem();
3363
3364 return Status;
3365 }