5934821beb2208a8ecd4162389dd25c7cb508efa
[reactos.git] / drivers / filesystems / btrfs / read.c
1 /* Copyright (c) Mark Harmstone 2016-17
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 enum read_data_status {
21 ReadDataStatus_Pending,
22 ReadDataStatus_Success,
23 ReadDataStatus_Error,
24 ReadDataStatus_MissingDevice,
25 ReadDataStatus_Skip
26 };
27
28 struct read_data_context;
29
30 typedef struct {
31 struct read_data_context* context;
32 uint16_t stripenum;
33 bool rewrite;
34 PIRP Irp;
35 IO_STATUS_BLOCK iosb;
36 enum read_data_status status;
37 PMDL mdl;
38 uint64_t stripestart;
39 uint64_t stripeend;
40 } read_data_stripe;
41
42 typedef struct {
43 KEVENT Event;
44 NTSTATUS Status;
45 chunk* c;
46 uint64_t address;
47 uint32_t buflen;
48 LONG num_stripes, stripes_left;
49 uint64_t type;
50 uint32_t sector_size;
51 uint16_t firstoff, startoffstripe, sectors_per_stripe;
52 uint32_t* csum;
53 bool tree;
54 read_data_stripe* stripes;
55 uint8_t* va;
56 } read_data_context;
57
58 extern bool diskacc;
59 extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
60 extern tCcCopyReadEx fCcCopyReadEx;
61 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
62
63 #define LZO_PAGE_SIZE 4096
64
65 _Function_class_(IO_COMPLETION_ROUTINE)
66 static NTSTATUS __stdcall read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
67 read_data_stripe* stripe = conptr;
68 read_data_context* context = (read_data_context*)stripe->context;
69
70 UNUSED(DeviceObject);
71
72 stripe->iosb = Irp->IoStatus;
73
74 if (NT_SUCCESS(Irp->IoStatus.Status))
75 stripe->status = ReadDataStatus_Success;
76 else
77 stripe->status = ReadDataStatus_Error;
78
79 if (InterlockedDecrement(&context->stripes_left) == 0)
80 KeSetEvent(&context->Event, 0, false);
81
82 return STATUS_MORE_PROCESSING_REQUIRED;
83 }
84
85 NTSTATUS check_csum(device_extension* Vcb, uint8_t* data, uint32_t sectors, uint32_t* csum) {
86 NTSTATUS Status;
87 calc_job* cj;
88 uint32_t* csum2;
89
90 // From experimenting, it seems that 40 sectors is roughly the crossover
91 // point where offloading the crc32 calculation becomes worth it.
92
93 if (sectors < 40 || get_num_of_processors() < 2) {
94 ULONG j;
95
96 for (j = 0; j < sectors; j++) {
97 uint32_t crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
98
99 if (crc32 != csum[j]) {
100 return STATUS_CRC_ERROR;
101 }
102 }
103
104 return STATUS_SUCCESS;
105 }
106
107 csum2 = ExAllocatePoolWithTag(PagedPool, sizeof(uint32_t) * sectors, ALLOC_TAG);
108 if (!csum2) {
109 ERR("out of memory\n");
110 return STATUS_INSUFFICIENT_RESOURCES;
111 }
112
113 Status = add_calc_job(Vcb, data, sectors, csum2, &cj);
114 if (!NT_SUCCESS(Status)) {
115 ERR("add_calc_job returned %08x\n", Status);
116 ExFreePool(csum2);
117 return Status;
118 }
119
120 KeWaitForSingleObject(&cj->event, Executive, KernelMode, false, NULL);
121
122 if (RtlCompareMemory(csum2, csum, sectors * sizeof(uint32_t)) != sectors * sizeof(uint32_t)) {
123 free_calc_job(cj);
124 ExFreePool(csum2);
125 return STATUS_CRC_ERROR;
126 }
127
128 free_calc_job(cj);
129 ExFreePool(csum2);
130
131 return STATUS_SUCCESS;
132 }
133
134 static NTSTATUS read_data_dup(device_extension* Vcb, uint8_t* buf, uint64_t addr, read_data_context* context, CHUNK_ITEM* ci,
135 device** devices, uint64_t generation) {
136 ULONG i;
137 bool checksum_error = false;
138 uint16_t j, stripe = 0;
139 NTSTATUS Status;
140 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
141
142 for (j = 0; j < ci->num_stripes; j++) {
143 if (context->stripes[j].status == ReadDataStatus_Error) {
144 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
145 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
146 return context->stripes[j].iosb.Status;
147 } else if (context->stripes[j].status == ReadDataStatus_Success) {
148 stripe = j;
149 break;
150 }
151 }
152
153 if (context->stripes[stripe].status != ReadDataStatus_Success)
154 return STATUS_INTERNAL_ERROR;
155
156 if (context->tree) {
157 tree_header* th = (tree_header*)buf;
158 uint32_t crc32;
159
160 crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, context->buflen - sizeof(th->csum));
161
162 if (th->address != context->address || crc32 != *((uint32_t*)th->csum)) {
163 checksum_error = true;
164 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
165 } else if (generation != 0 && th->generation != generation) {
166 checksum_error = true;
167 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
168 }
169 } else if (context->csum) {
170 Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
171
172 if (Status == STATUS_CRC_ERROR) {
173 checksum_error = true;
174 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
175 } else if (!NT_SUCCESS(Status)) {
176 ERR("check_csum returned %08x\n", Status);
177 return Status;
178 }
179 }
180
181 if (!checksum_error)
182 return STATUS_SUCCESS;
183
184 if (ci->num_stripes == 1)
185 return STATUS_CRC_ERROR;
186
187 if (context->tree) {
188 tree_header* t2;
189 bool recovered = false;
190
191 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
192 if (!t2) {
193 ERR("out of memory\n");
194 return STATUS_INSUFFICIENT_RESOURCES;
195 }
196
197 for (j = 0; j < ci->num_stripes; j++) {
198 if (j != stripe && devices[j] && devices[j]->devobj) {
199 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + context->stripes[stripe].stripestart,
200 Vcb->superblock.node_size, (uint8_t*)t2, false);
201 if (!NT_SUCCESS(Status)) {
202 WARN("sync_read_phys returned %08x\n", Status);
203 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
204 } else {
205 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
206
207 if (t2->address == addr && crc32 == *((uint32_t*)t2->csum) && (generation == 0 || t2->generation == generation)) {
208 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
209 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
210 recovered = true;
211
212 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
213 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + context->stripes[stripe].stripestart,
214 t2, Vcb->superblock.node_size);
215 if (!NT_SUCCESS(Status)) {
216 WARN("write_data_phys returned %08x\n", Status);
217 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
218 }
219 }
220
221 break;
222 } else if (t2->address != addr || crc32 != *((uint32_t*)t2->csum))
223 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
224 else
225 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS);
226 }
227 }
228 }
229
230 if (!recovered) {
231 ERR("unrecoverable checksum error at %I64x\n", addr);
232 ExFreePool(t2);
233 return STATUS_CRC_ERROR;
234 }
235
236 ExFreePool(t2);
237 } else {
238 ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size;
239 uint8_t* sector;
240
241 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
242 if (!sector) {
243 ERR("out of memory\n");
244 return STATUS_INSUFFICIENT_RESOURCES;
245 }
246
247 for (i = 0; i < sectors; i++) {
248 uint32_t crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
249
250 if (context->csum[i] != crc32) {
251 bool recovered = false;
252
253 for (j = 0; j < ci->num_stripes; j++) {
254 if (j != stripe && devices[j] && devices[j]->devobj) {
255 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj,
256 cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
257 Vcb->superblock.sector_size, sector, false);
258 if (!NT_SUCCESS(Status)) {
259 WARN("sync_read_phys returned %08x\n", Status);
260 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
261 } else {
262 uint32_t crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
263
264 if (crc32b == context->csum[i]) {
265 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
266 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
267 recovered = true;
268
269 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
270 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj,
271 cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
272 sector, Vcb->superblock.sector_size);
273 if (!NT_SUCCESS(Status)) {
274 WARN("write_data_phys returned %08x\n", Status);
275 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
276 }
277 }
278
279 break;
280 } else
281 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
282 }
283 }
284 }
285
286 if (!recovered) {
287 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
288 ExFreePool(sector);
289 return STATUS_CRC_ERROR;
290 }
291 }
292 }
293
294 ExFreePool(sector);
295 }
296
297 return STATUS_SUCCESS;
298 }
299
300 static NTSTATUS read_data_raid0(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context,
301 CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) {
302 uint64_t i;
303
304 for (i = 0; i < ci->num_stripes; i++) {
305 if (context->stripes[i].status == ReadDataStatus_Error) {
306 WARN("stripe %I64u returned error %08x\n", i, context->stripes[i].iosb.Status);
307 log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS);
308 return context->stripes[i].iosb.Status;
309 }
310 }
311
312 if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
313 tree_header* th = (tree_header*)buf;
314 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
315
316 if (crc32 != *((uint32_t*)th->csum) || addr != th->address || (generation != 0 && generation != th->generation)) {
317 uint64_t off;
318 uint16_t stripe;
319
320 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe);
321
322 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
323
324 if (crc32 != *((uint32_t*)th->csum)) {
325 WARN("crc32 was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum));
326 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
327 return STATUS_CRC_ERROR;
328 } else if (addr != th->address) {
329 WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
330 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
331 return STATUS_CRC_ERROR;
332 } else if (generation != 0 && generation != th->generation) {
333 WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
334 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
335 return STATUS_CRC_ERROR;
336 }
337 }
338 } else if (context->csum) {
339 NTSTATUS Status;
340
341 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
342
343 if (Status == STATUS_CRC_ERROR) {
344 for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
345 uint32_t crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
346
347 if (context->csum[i] != crc32) {
348 uint64_t off;
349 uint16_t stripe;
350
351 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe);
352
353 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
354
355 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
356
357 return Status;
358 }
359 }
360
361 return Status;
362 } else if (!NT_SUCCESS(Status)) {
363 ERR("check_csum returned %08x\n", Status);
364 return Status;
365 }
366 }
367
368 return STATUS_SUCCESS;
369 }
370
371 static NTSTATUS read_data_raid10(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context,
372 CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) {
373 uint64_t i;
374 uint16_t j, stripe;
375 NTSTATUS Status;
376 bool checksum_error = false;
377 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
378
379 for (j = 0; j < ci->num_stripes; j++) {
380 if (context->stripes[j].status == ReadDataStatus_Error) {
381 WARN("stripe %I64u returned error %08x\n", j, context->stripes[j].iosb.Status);
382 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
383 return context->stripes[j].iosb.Status;
384 } else if (context->stripes[j].status == ReadDataStatus_Success)
385 stripe = j;
386 }
387
388 if (context->tree) {
389 tree_header* th = (tree_header*)buf;
390 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
391
392 if (crc32 != *((uint32_t*)th->csum)) {
393 WARN("crc32 was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum));
394 checksum_error = true;
395 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
396 } else if (addr != th->address) {
397 WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
398 checksum_error = true;
399 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
400 } else if (generation != 0 && generation != th->generation) {
401 WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
402 checksum_error = true;
403 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
404 }
405 } else if (context->csum) {
406 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
407
408 if (Status == STATUS_CRC_ERROR)
409 checksum_error = true;
410 else if (!NT_SUCCESS(Status)) {
411 ERR("check_csum returned %08x\n", Status);
412 return Status;
413 }
414 }
415
416 if (!checksum_error)
417 return STATUS_SUCCESS;
418
419 if (context->tree) {
420 tree_header* t2;
421 uint64_t off;
422 uint16_t badsubstripe = 0;
423 bool recovered = false;
424
425 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
426 if (!t2) {
427 ERR("out of memory\n");
428 return STATUS_INSUFFICIENT_RESOURCES;
429 }
430
431 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe);
432
433 stripe *= ci->sub_stripes;
434
435 for (j = 0; j < ci->sub_stripes; j++) {
436 if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
437 badsubstripe = j;
438 break;
439 }
440 }
441
442 for (j = 0; j < ci->sub_stripes; j++) {
443 if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
444 Status = sync_read_phys(devices[stripe + j]->devobj, devices[stripe + j]->fileobj, cis[stripe + j].offset + off,
445 Vcb->superblock.node_size, (uint8_t*)t2, false);
446 if (!NT_SUCCESS(Status)) {
447 WARN("sync_read_phys returned %08x\n", Status);
448 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS);
449 } else {
450 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
451
452 if (t2->address == addr && crc32 == *((uint32_t*)t2->csum) && (generation == 0 || t2->generation == generation)) {
453 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
454 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe + j]->devitem.dev_id);
455 recovered = true;
456
457 if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
458 Status = write_data_phys(devices[stripe + badsubstripe]->devobj, devices[stripe + badsubstripe]->fileobj,
459 cis[stripe + badsubstripe].offset + off, t2, Vcb->superblock.node_size);
460 if (!NT_SUCCESS(Status)) {
461 WARN("write_data_phys returned %08x\n", Status);
462 log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
463 }
464 }
465
466 break;
467 } else if (t2->address != addr || crc32 != *((uint32_t*)t2->csum))
468 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
469 else
470 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS);
471 }
472 }
473 }
474
475 if (!recovered) {
476 ERR("unrecoverable checksum error at %I64x\n", addr);
477 ExFreePool(t2);
478 return STATUS_CRC_ERROR;
479 }
480
481 ExFreePool(t2);
482 } else {
483 ULONG sectors = length / Vcb->superblock.sector_size;
484 uint8_t* sector;
485
486 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
487 if (!sector) {
488 ERR("out of memory\n");
489 return STATUS_INSUFFICIENT_RESOURCES;
490 }
491
492 for (i = 0; i < sectors; i++) {
493 uint32_t crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
494
495 if (context->csum[i] != crc32) {
496 uint64_t off;
497 uint16_t stripe2, badsubstripe = 0;
498 bool recovered = false;
499
500 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
501 ci->num_stripes / ci->sub_stripes, &off, &stripe2);
502
503 stripe2 *= ci->sub_stripes;
504
505 for (j = 0; j < ci->sub_stripes; j++) {
506 if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
507 badsubstripe = j;
508 break;
509 }
510 }
511
512 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
513
514 for (j = 0; j < ci->sub_stripes; j++) {
515 if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
516 Status = sync_read_phys(devices[stripe2 + j]->devobj, devices[stripe2 + j]->fileobj, cis[stripe2 + j].offset + off,
517 Vcb->superblock.sector_size, sector, false);
518 if (!NT_SUCCESS(Status)) {
519 WARN("sync_read_phys returned %08x\n", Status);
520 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS);
521 } else {
522 uint32_t crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
523
524 if (crc32b == context->csum[i]) {
525 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
526 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id);
527 recovered = true;
528
529 if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
530 Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, devices[stripe2 + badsubstripe]->fileobj,
531 cis[stripe2 + badsubstripe].offset + off, sector, Vcb->superblock.sector_size);
532 if (!NT_SUCCESS(Status)) {
533 WARN("write_data_phys returned %08x\n", Status);
534 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
535 }
536 }
537
538 break;
539 } else
540 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
541 }
542 }
543 }
544
545 if (!recovered) {
546 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
547 ExFreePool(sector);
548 return STATUS_CRC_ERROR;
549 }
550 }
551 }
552
553 ExFreePool(sector);
554 }
555
556 return STATUS_SUCCESS;
557 }
558
559 static NTSTATUS read_data_raid5(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci,
560 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
561 ULONG i;
562 NTSTATUS Status;
563 bool checksum_error = false;
564 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
565 uint16_t j, stripe;
566 bool no_success = true;
567
568 for (j = 0; j < ci->num_stripes; j++) {
569 if (context->stripes[j].status == ReadDataStatus_Error) {
570 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
571 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
572 return context->stripes[j].iosb.Status;
573 } else if (context->stripes[j].status == ReadDataStatus_Success) {
574 stripe = j;
575 no_success = false;
576 }
577 }
578
579 if (c) { // check partial stripes
580 LIST_ENTRY* le;
581 uint64_t ps_length = (ci->num_stripes - 1) * ci->stripe_length;
582
583 ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
584
585 le = c->partial_stripes.Flink;
586 while (le != &c->partial_stripes) {
587 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
588
589 if (ps->address + ps_length > addr && ps->address < addr + length) {
590 ULONG runlength, index;
591
592 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
593
594 while (runlength != 0) {
595 #ifdef __REACTOS__
596 uint64_t runstart, runend, start, end;
597 #endif
598 if (index >= ps->bmplen)
599 break;
600
601 if (index + runlength >= ps->bmplen) {
602 runlength = ps->bmplen - index;
603
604 if (runlength == 0)
605 break;
606 }
607
608 #ifndef __REACTOS__
609 uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size);
610 uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size);
611 uint64_t start = max(runstart, addr);
612 uint64_t end = min(runend, addr + length);
613 #else
614 runstart = ps->address + (index * Vcb->superblock.sector_size);
615 runend = runstart + (runlength * Vcb->superblock.sector_size);
616 start = max(runstart, addr);
617 end = min(runend, addr + length);
618 #endif
619
620 if (end > start)
621 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
622
623 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
624 }
625 } else if (ps->address >= addr + length)
626 break;
627
628 le = le->Flink;
629 }
630
631 ExReleaseResourceLite(&c->partial_stripes_lock);
632 }
633
634 if (context->tree) {
635 tree_header* th = (tree_header*)buf;
636 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
637
638 if (addr != th->address || crc32 != *((uint32_t*)th->csum)) {
639 checksum_error = true;
640 if (!no_success && !degraded)
641 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
642 } else if (generation != 0 && generation != th->generation) {
643 checksum_error = true;
644 if (!no_success && !degraded)
645 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
646 }
647 } else if (context->csum) {
648 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
649
650 if (Status == STATUS_CRC_ERROR) {
651 if (!degraded)
652 WARN("checksum error\n");
653 checksum_error = true;
654 } else if (!NT_SUCCESS(Status)) {
655 ERR("check_csum returned %08x\n", Status);
656 return Status;
657 }
658 } else if (degraded)
659 checksum_error = true;
660
661 if (!checksum_error)
662 return STATUS_SUCCESS;
663
664 if (context->tree) {
665 uint16_t parity;
666 uint64_t off;
667 bool recovered = false, first = true, failed = false;
668 uint8_t* t2;
669
670 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
671 if (!t2) {
672 ERR("out of memory\n");
673 return STATUS_INSUFFICIENT_RESOURCES;
674 }
675
676 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe);
677
678 parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
679
680 stripe = (parity + stripe + 1) % ci->num_stripes;
681
682 for (j = 0; j < ci->num_stripes; j++) {
683 if (j != stripe) {
684 if (devices[j] && devices[j]->devobj) {
685 if (first) {
686 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2, false);
687 if (!NT_SUCCESS(Status)) {
688 ERR("sync_read_phys returned %08x\n", Status);
689 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
690 failed = true;
691 break;
692 }
693
694 first = false;
695 } else {
696 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, false);
697 if (!NT_SUCCESS(Status)) {
698 ERR("sync_read_phys returned %08x\n", Status);
699 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
700 failed = true;
701 break;
702 }
703
704 do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
705 }
706 } else {
707 failed = true;
708 break;
709 }
710 }
711 }
712
713 if (!failed) {
714 tree_header* t3 = (tree_header*)t2;
715 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&t3->fs_uuid, Vcb->superblock.node_size - sizeof(t3->csum));
716
717 if (t3->address == addr && crc32 == *((uint32_t*)t3->csum) && (generation == 0 || t3->generation == generation)) {
718 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
719
720 if (!degraded)
721 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
722
723 recovered = true;
724
725 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
726 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
727 if (!NT_SUCCESS(Status)) {
728 WARN("write_data_phys returned %08x\n", Status);
729 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
730 }
731 }
732 }
733 }
734
735 if (!recovered) {
736 ERR("unrecoverable checksum error at %I64x\n", addr);
737 ExFreePool(t2);
738 return STATUS_CRC_ERROR;
739 }
740
741 ExFreePool(t2);
742 } else {
743 ULONG sectors = length / Vcb->superblock.sector_size;
744 uint8_t* sector;
745
746 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
747 if (!sector) {
748 ERR("out of memory\n");
749 return STATUS_INSUFFICIENT_RESOURCES;
750 }
751
752 for (i = 0; i < sectors; i++) {
753 uint16_t parity;
754 uint64_t off;
755 uint32_t crc32;
756
757 if (context->csum)
758 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
759
760 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
761 ci->num_stripes - 1, &off, &stripe);
762
763 parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
764
765 stripe = (parity + stripe + 1) % ci->num_stripes;
766
767 if (!devices[stripe] || !devices[stripe]->devobj || (context->csum && context->csum[i] != crc32)) {
768 bool recovered = false, first = true, failed = false;
769
770 if (devices[stripe] && devices[stripe]->devobj)
771 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS);
772
773 for (j = 0; j < ci->num_stripes; j++) {
774 if (j != stripe) {
775 if (devices[j] && devices[j]->devobj) {
776 if (first) {
777 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, false);
778 if (!NT_SUCCESS(Status)) {
779 ERR("sync_read_phys returned %08x\n", Status);
780 failed = true;
781 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
782 break;
783 }
784
785 first = false;
786 } else {
787 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
788 sector + Vcb->superblock.sector_size, false);
789 if (!NT_SUCCESS(Status)) {
790 ERR("sync_read_phys returned %08x\n", Status);
791 failed = true;
792 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
793 break;
794 }
795
796 do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
797 }
798 } else {
799 failed = true;
800 break;
801 }
802 }
803 }
804
805 if (!failed) {
806 if (context->csum)
807 crc32 = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
808
809 if (!context->csum || crc32 == context->csum[i]) {
810 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
811
812 if (!degraded)
813 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
814
815 recovered = true;
816
817 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
818 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off,
819 sector, Vcb->superblock.sector_size);
820 if (!NT_SUCCESS(Status)) {
821 WARN("write_data_phys returned %08x\n", Status);
822 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
823 }
824 }
825 }
826 }
827
828 if (!recovered) {
829 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
830 ExFreePool(sector);
831 return STATUS_CRC_ERROR;
832 }
833 }
834 }
835
836 ExFreePool(sector);
837 }
838
839 return STATUS_SUCCESS;
840 }
841
842 void raid6_recover2(uint8_t* sectors, uint16_t num_stripes, ULONG sector_size, uint16_t missing1, uint16_t missing2, uint8_t* out) {
843 if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
844 uint16_t missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
845 uint16_t stripe;
846
847 stripe = num_stripes - 3;
848
849 if (stripe == missing)
850 RtlZeroMemory(out, sector_size);
851 else
852 RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size);
853
854 do {
855 stripe--;
856
857 galois_double(out, sector_size);
858
859 if (stripe != missing)
860 do_xor(out, sectors + (stripe * sector_size), sector_size);
861 } while (stripe > 0);
862
863 do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
864
865 if (missing != 0)
866 galois_divpower(out, (uint8_t)missing, sector_size);
867 } else { // reconstruct from p and q
868 uint16_t x, y, stripe;
869 uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
870 uint32_t j;
871
872 stripe = num_stripes - 3;
873
874 pxy = out + sector_size;
875 qxy = out;
876
877 if (stripe == missing1 || stripe == missing2) {
878 RtlZeroMemory(qxy, sector_size);
879 RtlZeroMemory(pxy, sector_size);
880
881 if (stripe == missing1)
882 x = stripe;
883 else
884 y = stripe;
885 } else {
886 RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size);
887 RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size);
888 }
889
890 do {
891 stripe--;
892
893 galois_double(qxy, sector_size);
894
895 if (stripe != missing1 && stripe != missing2) {
896 do_xor(qxy, sectors + (stripe * sector_size), sector_size);
897 do_xor(pxy, sectors + (stripe * sector_size), sector_size);
898 } else if (stripe == missing1)
899 x = stripe;
900 else if (stripe == missing2)
901 y = stripe;
902 } while (stripe > 0);
903
904 gyx = gpow2(y > x ? (y-x) : (255-x+y));
905 gx = gpow2(255-x);
906
907 denom = gdiv(1, gyx ^ 1);
908 a = gmul(gyx, denom);
909 b = gmul(gx, denom);
910
911 p = sectors + ((num_stripes - 2) * sector_size);
912 q = sectors + ((num_stripes - 1) * sector_size);
913
914 for (j = 0; j < sector_size; j++) {
915 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
916
917 p++;
918 q++;
919 pxy++;
920 qxy++;
921 }
922
923 do_xor(out + sector_size, out, sector_size);
924 do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
925 }
926 }
927
928 static NTSTATUS read_data_raid6(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci,
929 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
930 NTSTATUS Status;
931 ULONG i;
932 bool checksum_error = false;
933 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
934 uint16_t stripe, j;
935 bool no_success = true;
936
937 for (j = 0; j < ci->num_stripes; j++) {
938 if (context->stripes[j].status == ReadDataStatus_Error) {
939 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
940
941 if (devices[j])
942 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
943 return context->stripes[j].iosb.Status;
944 } else if (context->stripes[j].status == ReadDataStatus_Success) {
945 stripe = j;
946 no_success = false;
947 }
948 }
949
950 if (c) { // check partial stripes
951 LIST_ENTRY* le;
952 uint64_t ps_length = (ci->num_stripes - 2) * ci->stripe_length;
953
954 ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
955
956 le = c->partial_stripes.Flink;
957 while (le != &c->partial_stripes) {
958 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
959
960 if (ps->address + ps_length > addr && ps->address < addr + length) {
961 ULONG runlength, index;
962
963 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
964
965 while (runlength != 0) {
966 #ifdef __REACTOS__
967 uint64_t runstart, runend, start, end;
968 #endif
969 if (index >= ps->bmplen)
970 break;
971
972 if (index + runlength >= ps->bmplen) {
973 runlength = ps->bmplen - index;
974
975 if (runlength == 0)
976 break;
977 }
978
979 #ifndef __REACTOS__
980 uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size);
981 uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size);
982 uint64_t start = max(runstart, addr);
983 uint64_t end = min(runend, addr + length);
984 #else
985 runstart = ps->address + (index * Vcb->superblock.sector_size);
986 runend = runstart + (runlength * Vcb->superblock.sector_size);
987 start = max(runstart, addr);
988 end = min(runend, addr + length);
989 #endif
990
991 if (end > start)
992 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
993
994 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
995 }
996 } else if (ps->address >= addr + length)
997 break;
998
999 le = le->Flink;
1000 }
1001
1002 ExReleaseResourceLite(&c->partial_stripes_lock);
1003 }
1004
1005 if (context->tree) {
1006 tree_header* th = (tree_header*)buf;
1007 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1008
1009 if (addr != th->address || crc32 != *((uint32_t*)th->csum)) {
1010 checksum_error = true;
1011 if (!no_success && !degraded && devices[stripe])
1012 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1013 } else if (generation != 0 && generation != th->generation) {
1014 checksum_error = true;
1015 if (!no_success && !degraded && devices[stripe])
1016 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
1017 }
1018 } else if (context->csum) {
1019 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
1020
1021 if (Status == STATUS_CRC_ERROR) {
1022 if (!degraded)
1023 WARN("checksum error\n");
1024 checksum_error = true;
1025 } else if (!NT_SUCCESS(Status)) {
1026 ERR("check_csum returned %08x\n", Status);
1027 return Status;
1028 }
1029 } else if (degraded)
1030 checksum_error = true;
1031
1032 if (!checksum_error)
1033 return STATUS_SUCCESS;
1034
1035 if (context->tree) {
1036 uint8_t* sector;
1037 uint16_t k, physstripe, parity1, parity2, error_stripe;
1038 uint64_t off;
1039 bool recovered = false, failed = false;
1040 ULONG num_errors = 0;
1041
1042 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
1043 if (!sector) {
1044 ERR("out of memory\n");
1045 return STATUS_INSUFFICIENT_RESOURCES;
1046 }
1047
1048 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe);
1049
1050 parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1051 parity2 = (parity1 + 1) % ci->num_stripes;
1052
1053 physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1054
1055 j = (parity2 + 1) % ci->num_stripes;
1056
1057 for (k = 0; k < ci->num_stripes - 1; k++) {
1058 if (j != physstripe) {
1059 if (devices[j] && devices[j]->devobj) {
1060 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size,
1061 sector + (k * Vcb->superblock.node_size), false);
1062 if (!NT_SUCCESS(Status)) {
1063 ERR("sync_read_phys returned %08x\n", Status);
1064 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1065 num_errors++;
1066 error_stripe = k;
1067
1068 if (num_errors > 1) {
1069 failed = true;
1070 break;
1071 }
1072 }
1073 } else {
1074 num_errors++;
1075 error_stripe = k;
1076
1077 if (num_errors > 1) {
1078 failed = true;
1079 break;
1080 }
1081 }
1082 }
1083
1084 j = (j + 1) % ci->num_stripes;
1085 }
1086
1087 if (!failed) {
1088 if (num_errors == 0) {
1089 tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
1090 uint32_t crc32;
1091
1092 RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
1093 Vcb->superblock.node_size);
1094
1095 for (j = 0; j < ci->num_stripes - 2; j++) {
1096 if (j != stripe)
1097 do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
1098 }
1099
1100 crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1101
1102 if (th->address == addr && crc32 == *((uint32_t*)th->csum) && (generation == 0 || th->generation == generation)) {
1103 RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1104
1105 if (devices[physstripe] && devices[physstripe]->devobj)
1106 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1107
1108 recovered = true;
1109
1110 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1111 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1112 sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1113 if (!NT_SUCCESS(Status)) {
1114 WARN("write_data_phys returned %08x\n", Status);
1115 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1116 }
1117 }
1118 }
1119 }
1120
1121 if (!recovered) {
1122 uint32_t crc32;
1123 tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
1124 bool read_q = false;
1125
1126 if (devices[parity2] && devices[parity2]->devobj) {
1127 Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1128 Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), false);
1129 if (!NT_SUCCESS(Status)) {
1130 ERR("sync_read_phys returned %08x\n", Status);
1131 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1132 } else
1133 read_q = true;
1134 }
1135
1136 if (read_q) {
1137 if (num_errors == 1) {
1138 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
1139
1140 crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1141
1142 if (th->address == addr && crc32 == *((uint32_t*)th->csum) && (generation == 0 || th->generation == generation))
1143 recovered = true;
1144 } else {
1145 for (j = 0; j < ci->num_stripes - 1; j++) {
1146 if (j != stripe) {
1147 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
1148
1149 crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1150
1151 if (th->address == addr && crc32 == *((uint32_t*)th->csum) && (generation == 0 || th->generation == generation)) {
1152 recovered = true;
1153 error_stripe = j;
1154 break;
1155 }
1156 }
1157 }
1158 }
1159 }
1160
1161 if (recovered) {
1162 uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1163
1164 if (devices[physstripe] && devices[physstripe]->devobj)
1165 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1166
1167 RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1168
1169 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1170 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1171 sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1172 if (!NT_SUCCESS(Status)) {
1173 WARN("write_data_phys returned %08x\n", Status);
1174 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1175 }
1176 }
1177
1178 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1179 if (error_stripe == ci->num_stripes - 2) {
1180 ERR("recovering from parity error at %I64x, device %I64x\n", addr, devices[error_stripe_phys]->devitem.dev_id);
1181
1182 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1183
1184 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1185
1186 for (j = 0; j < ci->num_stripes - 2; j++) {
1187 if (j == stripe) {
1188 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
1189 Vcb->superblock.node_size);
1190 } else {
1191 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
1192 Vcb->superblock.node_size);
1193 }
1194 }
1195 } else {
1196 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((error_stripe - stripe) * ci->stripe_length),
1197 devices[error_stripe_phys]->devitem.dev_id);
1198
1199 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1200
1201 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
1202 sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1203 }
1204 }
1205
1206 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1207 Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1208 sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1209 if (!NT_SUCCESS(Status)) {
1210 WARN("write_data_phys returned %08x\n", Status);
1211 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1212 }
1213 }
1214 }
1215 }
1216 }
1217
1218 if (!recovered) {
1219 ERR("unrecoverable checksum error at %I64x\n", addr);
1220 ExFreePool(sector);
1221 return STATUS_CRC_ERROR;
1222 }
1223
1224 ExFreePool(sector);
1225 } else {
1226 ULONG sectors = length / Vcb->superblock.sector_size;
1227 uint8_t* sector;
1228
1229 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG);
1230 if (!sector) {
1231 ERR("out of memory\n");
1232 return STATUS_INSUFFICIENT_RESOURCES;
1233 }
1234
1235 for (i = 0; i < sectors; i++) {
1236 uint64_t off;
1237 uint16_t physstripe, parity1, parity2;
1238 uint32_t crc32;
1239
1240 if (context->csum)
1241 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1242
1243 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
1244 ci->num_stripes - 2, &off, &stripe);
1245
1246 parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1247 parity2 = (parity1 + 1) % ci->num_stripes;
1248
1249 physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1250
1251 if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && context->csum[i] != crc32)) {
1252 uint16_t k, error_stripe;
1253 bool recovered = false, failed = false;
1254 ULONG num_errors = 0;
1255
1256 if (devices[physstripe] && devices[physstripe]->devobj)
1257 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS);
1258
1259 j = (parity2 + 1) % ci->num_stripes;
1260
1261 for (k = 0; k < ci->num_stripes - 1; k++) {
1262 if (j != physstripe) {
1263 if (devices[j] && devices[j]->devobj) {
1264 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
1265 sector + (k * Vcb->superblock.sector_size), false);
1266 if (!NT_SUCCESS(Status)) {
1267 ERR("sync_read_phys returned %08x\n", Status);
1268 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1269 num_errors++;
1270 error_stripe = k;
1271
1272 if (num_errors > 1) {
1273 failed = true;
1274 break;
1275 }
1276 }
1277 } else {
1278 num_errors++;
1279 error_stripe = k;
1280
1281 if (num_errors > 1) {
1282 failed = true;
1283 break;
1284 }
1285 }
1286 }
1287
1288 j = (j + 1) % ci->num_stripes;
1289 }
1290
1291 if (!failed) {
1292 if (num_errors == 0) {
1293 RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1294
1295 for (j = 0; j < ci->num_stripes - 2; j++) {
1296 if (j != stripe)
1297 do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1298 }
1299
1300 if (context->csum)
1301 crc32 = ~calc_crc32c(0xffffffff, sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1302
1303 if (!context->csum || crc32 == context->csum[i]) {
1304 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1305
1306 if (devices[physstripe] && devices[physstripe]->devobj)
1307 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1308 devices[physstripe]->devitem.dev_id);
1309
1310 recovered = true;
1311
1312 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1313 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1314 sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1315 if (!NT_SUCCESS(Status)) {
1316 WARN("write_data_phys returned %08x\n", Status);
1317 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1318 }
1319 }
1320 }
1321 }
1322
1323 if (!recovered) {
1324 bool read_q = false;
1325
1326 if (devices[parity2] && devices[parity2]->devobj) {
1327 Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1328 Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), false);
1329 if (!NT_SUCCESS(Status)) {
1330 ERR("sync_read_phys returned %08x\n", Status);
1331 log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS);
1332 } else
1333 read_q = true;
1334 }
1335
1336 if (read_q) {
1337 if (num_errors == 1) {
1338 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1339
1340 if (!devices[physstripe] || !devices[physstripe]->devobj)
1341 recovered = true;
1342 else {
1343 crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1344
1345 if (crc32 == context->csum[i])
1346 recovered = true;
1347 }
1348 } else {
1349 for (j = 0; j < ci->num_stripes - 1; j++) {
1350 if (j != stripe) {
1351 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1352
1353 crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1354
1355 if (crc32 == context->csum[i]) {
1356 recovered = true;
1357 error_stripe = j;
1358 break;
1359 }
1360 }
1361 }
1362 }
1363 }
1364
1365 if (recovered) {
1366 uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1367
1368 if (devices[physstripe] && devices[physstripe]->devobj)
1369 ERR("recovering from checksum error at %I64x, device %I64x\n",
1370 addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id);
1371
1372 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1373
1374 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1375 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1376 sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1377 if (!NT_SUCCESS(Status)) {
1378 WARN("write_data_phys returned %08x\n", Status);
1379 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1380 }
1381 }
1382
1383 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1384 if (error_stripe == ci->num_stripes - 2) {
1385 ERR("recovering from parity error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1386 devices[error_stripe_phys]->devitem.dev_id);
1387
1388 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1389
1390 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1391
1392 for (j = 0; j < ci->num_stripes - 2; j++) {
1393 if (j == stripe) {
1394 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size),
1395 Vcb->superblock.sector_size);
1396 } else {
1397 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size),
1398 Vcb->superblock.sector_size);
1399 }
1400 }
1401 } else {
1402 ERR("recovering from checksum error at %I64x, device %I64x\n",
1403 addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length),
1404 devices[error_stripe_phys]->devitem.dev_id);
1405
1406 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1407
1408 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size),
1409 sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1410 }
1411 }
1412
1413 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1414 Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1415 sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1416 if (!NT_SUCCESS(Status)) {
1417 WARN("write_data_phys returned %08x\n", Status);
1418 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1419 }
1420 }
1421 }
1422 }
1423 }
1424
1425 if (!recovered) {
1426 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
1427 ExFreePool(sector);
1428 return STATUS_CRC_ERROR;
1429 }
1430 }
1431 }
1432
1433 ExFreePool(sector);
1434 }
1435
1436 return STATUS_SUCCESS;
1437 }
1438
1439 NTSTATUS read_data(_In_ device_extension* Vcb, _In_ uint64_t addr, _In_ uint32_t length, _In_reads_bytes_opt_(length*sizeof(uint32_t)/Vcb->superblock.sector_size) uint32_t* csum,
1440 _In_ bool is_tree, _Out_writes_bytes_(length) uint8_t* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ uint64_t generation, _In_ bool file_read,
1441 _In_ ULONG priority) {
1442 CHUNK_ITEM* ci;
1443 CHUNK_ITEM_STRIPE* cis;
1444 read_data_context context;
1445 uint64_t type, offset, total_reading = 0;
1446 NTSTATUS Status;
1447 device** devices = NULL;
1448 uint16_t i, startoffstripe, allowed_missing, missing_devices = 0;
1449 uint8_t* dummypage = NULL;
1450 PMDL dummy_mdl = NULL;
1451 bool need_to_wait;
1452 uint64_t lockaddr, locklen;
1453
1454 if (Vcb->log_to_phys_loaded) {
1455 if (!c) {
1456 c = get_chunk_from_address(Vcb, addr);
1457
1458 if (!c) {
1459 ERR("get_chunk_from_address failed\n");
1460 return STATUS_INTERNAL_ERROR;
1461 }
1462 }
1463
1464 ci = c->chunk_item;
1465 offset = c->offset;
1466 devices = c->devices;
1467
1468 if (pc)
1469 *pc = c;
1470 } else {
1471 LIST_ENTRY* le = Vcb->sys_chunks.Flink;
1472
1473 ci = NULL;
1474
1475 c = NULL;
1476 while (le != &Vcb->sys_chunks) {
1477 sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
1478
1479 if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
1480 CHUNK_ITEM* chunk_item = sc->data;
1481
1482 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
1483 ci = chunk_item;
1484 offset = sc->key.offset;
1485 cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
1486
1487 devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
1488 if (!devices) {
1489 ERR("out of memory\n");
1490 return STATUS_INSUFFICIENT_RESOURCES;
1491 }
1492
1493 for (i = 0; i < ci->num_stripes; i++) {
1494 devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
1495 }
1496
1497 break;
1498 }
1499 }
1500
1501 le = le->Flink;
1502 }
1503
1504 if (!ci) {
1505 ERR("could not find chunk for %I64x in bootstrap\n", addr);
1506 return STATUS_INTERNAL_ERROR;
1507 }
1508
1509 if (pc)
1510 *pc = NULL;
1511 }
1512
1513 if (ci->type & BLOCK_FLAG_DUPLICATE) {
1514 type = BLOCK_FLAG_DUPLICATE;
1515 allowed_missing = ci->num_stripes - 1;
1516 } else if (ci->type & BLOCK_FLAG_RAID0) {
1517 type = BLOCK_FLAG_RAID0;
1518 allowed_missing = 0;
1519 } else if (ci->type & BLOCK_FLAG_RAID1) {
1520 type = BLOCK_FLAG_DUPLICATE;
1521 allowed_missing = 1;
1522 } else if (ci->type & BLOCK_FLAG_RAID10) {
1523 type = BLOCK_FLAG_RAID10;
1524 allowed_missing = 1;
1525 } else if (ci->type & BLOCK_FLAG_RAID5) {
1526 type = BLOCK_FLAG_RAID5;
1527 allowed_missing = 1;
1528 } else if (ci->type & BLOCK_FLAG_RAID6) {
1529 type = BLOCK_FLAG_RAID6;
1530 allowed_missing = 2;
1531 } else { // SINGLE
1532 type = BLOCK_FLAG_DUPLICATE;
1533 allowed_missing = 0;
1534 }
1535
1536 cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1537
1538 RtlZeroMemory(&context, sizeof(read_data_context));
1539 KeInitializeEvent(&context.Event, NotificationEvent, false);
1540
1541 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
1542 if (!context.stripes) {
1543 ERR("out of memory\n");
1544 return STATUS_INSUFFICIENT_RESOURCES;
1545 }
1546
1547 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
1548 get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
1549 chunk_lock_range(Vcb, c, lockaddr, locklen);
1550 }
1551
1552 RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
1553
1554 context.buflen = length;
1555 context.num_stripes = ci->num_stripes;
1556 context.stripes_left = context.num_stripes;
1557 context.sector_size = Vcb->superblock.sector_size;
1558 context.csum = csum;
1559 context.tree = is_tree;
1560 context.type = type;
1561
1562 if (type == BLOCK_FLAG_RAID0) {
1563 uint64_t startoff, endoff;
1564 uint16_t endoffstripe, stripe;
1565 uint32_t *stripeoff, pos;
1566 PMDL master_mdl;
1567 PFN_NUMBER* pfns;
1568
1569 // FIXME - test this still works if page size isn't the same as sector size
1570
1571 // This relies on the fact that MDLs are followed in memory by the page file numbers,
1572 // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1573 // data for you without doing a memcpy yourself.
1574 // MDLs are officially opaque, so this might very well break in future versions of Windows.
1575
1576 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
1577 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
1578
1579 if (file_read) {
1580 // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1581 // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1582 // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1583
1584 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1585
1586 if (!context.va) {
1587 ERR("out of memory\n");
1588 Status = STATUS_INSUFFICIENT_RESOURCES;
1589 goto exit;
1590 }
1591 } else
1592 context.va = buf;
1593
1594 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1595 if (!master_mdl) {
1596 ERR("out of memory\n");
1597 Status = STATUS_INSUFFICIENT_RESOURCES;
1598 goto exit;
1599 }
1600
1601 Status = STATUS_SUCCESS;
1602
1603 _SEH2_TRY {
1604 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1605 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1606 Status = _SEH2_GetExceptionCode();
1607 } _SEH2_END;
1608
1609 if (!NT_SUCCESS(Status)) {
1610 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1611 IoFreeMdl(master_mdl);
1612 goto exit;
1613 }
1614
1615 pfns = (PFN_NUMBER*)(master_mdl + 1);
1616
1617 for (i = 0; i < ci->num_stripes; i++) {
1618 if (startoffstripe > i)
1619 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1620 else if (startoffstripe == i)
1621 context.stripes[i].stripestart = startoff;
1622 else
1623 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
1624
1625 if (endoffstripe > i)
1626 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1627 else if (endoffstripe == i)
1628 context.stripes[i].stripeend = endoff + 1;
1629 else
1630 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
1631
1632 if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
1633 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
1634
1635 if (!context.stripes[i].mdl) {
1636 ERR("IoAllocateMdl failed\n");
1637 MmUnlockPages(master_mdl);
1638 IoFreeMdl(master_mdl);
1639 Status = STATUS_INSUFFICIENT_RESOURCES;
1640 goto exit;
1641 }
1642 }
1643 }
1644
1645 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
1646 if (!stripeoff) {
1647 ERR("out of memory\n");
1648 MmUnlockPages(master_mdl);
1649 IoFreeMdl(master_mdl);
1650 Status = STATUS_INSUFFICIENT_RESOURCES;
1651 goto exit;
1652 }
1653
1654 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
1655
1656 pos = 0;
1657 stripe = startoffstripe;
1658 while (pos < length) {
1659 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
1660
1661 if (pos == 0) {
1662 uint32_t readlen = (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
1663
1664 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1665
1666 stripeoff[stripe] += readlen;
1667 pos += readlen;
1668 } else if (length - pos < ci->stripe_length) {
1669 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1670
1671 pos = length;
1672 } else {
1673 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1674
1675 stripeoff[stripe] += (uint32_t)ci->stripe_length;
1676 pos += (uint32_t)ci->stripe_length;
1677 }
1678
1679 stripe = (stripe + 1) % ci->num_stripes;
1680 }
1681
1682 MmUnlockPages(master_mdl);
1683 IoFreeMdl(master_mdl);
1684
1685 ExFreePool(stripeoff);
1686 } else if (type == BLOCK_FLAG_RAID10) {
1687 uint64_t startoff, endoff;
1688 uint16_t endoffstripe, j, stripe;
1689 ULONG orig_ls;
1690 PMDL master_mdl;
1691 PFN_NUMBER* pfns;
1692 uint32_t* stripeoff, pos;
1693 read_data_stripe** stripes;
1694
1695 if (c)
1696 orig_ls = c->last_stripe;
1697 else
1698 orig_ls = 0;
1699
1700 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
1701 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
1702
1703 if ((ci->num_stripes % ci->sub_stripes) != 0) {
1704 ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
1705 Status = STATUS_INTERNAL_ERROR;
1706 goto exit;
1707 }
1708
1709 if (file_read) {
1710 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1711
1712 if (!context.va) {
1713 ERR("out of memory\n");
1714 Status = STATUS_INSUFFICIENT_RESOURCES;
1715 goto exit;
1716 }
1717 } else
1718 context.va = buf;
1719
1720 context.firstoff = (uint16_t)((startoff % ci->stripe_length) / Vcb->superblock.sector_size);
1721 context.startoffstripe = startoffstripe;
1722 context.sectors_per_stripe = (uint16_t)(ci->stripe_length / Vcb->superblock.sector_size);
1723
1724 startoffstripe *= ci->sub_stripes;
1725 endoffstripe *= ci->sub_stripes;
1726
1727 if (c)
1728 c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
1729
1730 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1731 if (!master_mdl) {
1732 ERR("out of memory\n");
1733 Status = STATUS_INSUFFICIENT_RESOURCES;
1734 goto exit;
1735 }
1736
1737 Status = STATUS_SUCCESS;
1738
1739 _SEH2_TRY {
1740 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1741 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1742 Status = _SEH2_GetExceptionCode();
1743 } _SEH2_END;
1744
1745 if (!NT_SUCCESS(Status)) {
1746 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1747 IoFreeMdl(master_mdl);
1748 goto exit;
1749 }
1750
1751 pfns = (PFN_NUMBER*)(master_mdl + 1);
1752
1753 stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1754 if (!stripes) {
1755 ERR("out of memory\n");
1756 MmUnlockPages(master_mdl);
1757 IoFreeMdl(master_mdl);
1758 Status = STATUS_INSUFFICIENT_RESOURCES;
1759 goto exit;
1760 }
1761
1762 RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
1763
1764 for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
1765 uint64_t sstart, send;
1766 bool stripeset = false;
1767
1768 if (startoffstripe > i)
1769 sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1770 else if (startoffstripe == i)
1771 sstart = startoff;
1772 else
1773 sstart = startoff - (startoff % ci->stripe_length);
1774
1775 if (endoffstripe > i)
1776 send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1777 else if (endoffstripe == i)
1778 send = endoff + 1;
1779 else
1780 send = endoff - (endoff % ci->stripe_length);
1781
1782 for (j = 0; j < ci->sub_stripes; j++) {
1783 if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
1784 context.stripes[i+j].stripestart = sstart;
1785 context.stripes[i+j].stripeend = send;
1786 stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1787
1788 if (sstart != send) {
1789 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1790
1791 if (!context.stripes[i+j].mdl) {
1792 ERR("IoAllocateMdl failed\n");
1793 MmUnlockPages(master_mdl);
1794 IoFreeMdl(master_mdl);
1795 Status = STATUS_INSUFFICIENT_RESOURCES;
1796 goto exit;
1797 }
1798 }
1799
1800 stripeset = true;
1801 } else
1802 context.stripes[i+j].status = ReadDataStatus_Skip;
1803 }
1804
1805 if (!stripeset) {
1806 for (j = 0; j < ci->sub_stripes; j++) {
1807 if (devices[i+j] && devices[i+j]->devobj) {
1808 context.stripes[i+j].stripestart = sstart;
1809 context.stripes[i+j].stripeend = send;
1810 context.stripes[i+j].status = ReadDataStatus_Pending;
1811 stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1812
1813 if (sstart != send) {
1814 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1815
1816 if (!context.stripes[i+j].mdl) {
1817 ERR("IoAllocateMdl failed\n");
1818 MmUnlockPages(master_mdl);
1819 IoFreeMdl(master_mdl);
1820 Status = STATUS_INSUFFICIENT_RESOURCES;
1821 goto exit;
1822 }
1823 }
1824
1825 stripeset = true;
1826 break;
1827 }
1828 }
1829
1830 if (!stripeset) {
1831 ERR("could not find stripe to read\n");
1832 Status = STATUS_DEVICE_NOT_READY;
1833 goto exit;
1834 }
1835 }
1836 }
1837
1838 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1839 if (!stripeoff) {
1840 ERR("out of memory\n");
1841 MmUnlockPages(master_mdl);
1842 IoFreeMdl(master_mdl);
1843 Status = STATUS_INSUFFICIENT_RESOURCES;
1844 goto exit;
1845 }
1846
1847 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes);
1848
1849 pos = 0;
1850 stripe = startoffstripe / ci->sub_stripes;
1851 while (pos < length) {
1852 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
1853
1854 if (pos == 0) {
1855 uint32_t readlen = (uint32_t)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
1856 ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
1857
1858 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1859
1860 stripeoff[stripe] += readlen;
1861 pos += readlen;
1862 } else if (length - pos < ci->stripe_length) {
1863 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1864
1865 pos = length;
1866 } else {
1867 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1868
1869 stripeoff[stripe] += (ULONG)ci->stripe_length;
1870 pos += (ULONG)ci->stripe_length;
1871 }
1872
1873 stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
1874 }
1875
1876 MmUnlockPages(master_mdl);
1877 IoFreeMdl(master_mdl);
1878
1879 ExFreePool(stripeoff);
1880 ExFreePool(stripes);
1881 } else if (type == BLOCK_FLAG_DUPLICATE) {
1882 uint64_t orig_ls;
1883
1884 if (c)
1885 orig_ls = i = c->last_stripe;
1886 else
1887 orig_ls = i = 0;
1888
1889 while (!devices[i] || !devices[i]->devobj) {
1890 i = (i + 1) % ci->num_stripes;
1891
1892 if (i == orig_ls) {
1893 ERR("no devices available to service request\n");
1894 Status = STATUS_DEVICE_NOT_READY;
1895 goto exit;
1896 }
1897 }
1898
1899 if (c)
1900 c->last_stripe = (i + 1) % ci->num_stripes;
1901
1902 context.stripes[i].stripestart = addr - offset;
1903 context.stripes[i].stripeend = context.stripes[i].stripestart + length;
1904
1905 if (file_read) {
1906 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1907
1908 if (!context.va) {
1909 ERR("out of memory\n");
1910 Status = STATUS_INSUFFICIENT_RESOURCES;
1911 goto exit;
1912 }
1913
1914 context.stripes[i].mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1915 if (!context.stripes[i].mdl) {
1916 ERR("IoAllocateMdl failed\n");
1917 Status = STATUS_INSUFFICIENT_RESOURCES;
1918 goto exit;
1919 }
1920
1921 MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
1922 } else {
1923 context.stripes[i].mdl = IoAllocateMdl(buf, length, false, false, NULL);
1924
1925 if (!context.stripes[i].mdl) {
1926 ERR("IoAllocateMdl failed\n");
1927 Status = STATUS_INSUFFICIENT_RESOURCES;
1928 goto exit;
1929 }
1930
1931 Status = STATUS_SUCCESS;
1932
1933 _SEH2_TRY {
1934 MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess);
1935 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1936 Status = _SEH2_GetExceptionCode();
1937 } _SEH2_END;
1938
1939 if (!NT_SUCCESS(Status)) {
1940 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1941 goto exit;
1942 }
1943 }
1944 } else if (type == BLOCK_FLAG_RAID5) {
1945 uint64_t startoff, endoff;
1946 uint16_t endoffstripe, parity;
1947 uint32_t *stripeoff, pos;
1948 PMDL master_mdl;
1949 PFN_NUMBER *pfns, dummy;
1950 bool need_dummy = false;
1951
1952 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
1953 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
1954
1955 if (file_read) {
1956 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1957
1958 if (!context.va) {
1959 ERR("out of memory\n");
1960 Status = STATUS_INSUFFICIENT_RESOURCES;
1961 goto exit;
1962 }
1963 } else
1964 context.va = buf;
1965
1966 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1967 if (!master_mdl) {
1968 ERR("out of memory\n");
1969 Status = STATUS_INSUFFICIENT_RESOURCES;
1970 goto exit;
1971 }
1972
1973 Status = STATUS_SUCCESS;
1974
1975 _SEH2_TRY {
1976 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1977 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1978 Status = _SEH2_GetExceptionCode();
1979 } _SEH2_END;
1980
1981 if (!NT_SUCCESS(Status)) {
1982 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1983 IoFreeMdl(master_mdl);
1984 goto exit;
1985 }
1986
1987 pfns = (PFN_NUMBER*)(master_mdl + 1);
1988
1989 pos = 0;
1990 while (pos < length) {
1991 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
1992
1993 if (pos == 0) {
1994 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
1995 ULONG skip, readlen;
1996
1997 i = startoffstripe;
1998 while (stripe != parity) {
1999 if (i == startoffstripe) {
2000 readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
2001
2002 context.stripes[stripe].stripestart = startoff;
2003 context.stripes[stripe].stripeend = startoff + readlen;
2004
2005 pos += readlen;
2006
2007 if (pos == length)
2008 break;
2009 } else {
2010 readlen = min(length - pos, (ULONG)ci->stripe_length);
2011
2012 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2013 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2014
2015 pos += readlen;
2016
2017 if (pos == length)
2018 break;
2019 }
2020
2021 i++;
2022 stripe = (stripe + 1) % ci->num_stripes;
2023 }
2024
2025 if (pos == length)
2026 break;
2027
2028 for (i = 0; i < startoffstripe; i++) {
2029 uint16_t stripe2 = (parity + i + 1) % ci->num_stripes;
2030
2031 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2032 }
2033
2034 context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2035
2036 if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
2037 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
2038
2039 for (i = 0; i < ci->num_stripes; i++) {
2040 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2041 }
2042
2043 pos += (uint32_t)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
2044 need_dummy = true;
2045 }
2046 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2047 for (i = 0; i < ci->num_stripes; i++) {
2048 context.stripes[i].stripeend += ci->stripe_length;
2049 }
2050
2051 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 1));
2052 need_dummy = true;
2053 } else {
2054 uint16_t stripe = (parity + 1) % ci->num_stripes;
2055
2056 i = 0;
2057 while (stripe != parity) {
2058 if (endoffstripe == i) {
2059 context.stripes[stripe].stripeend = endoff + 1;
2060 break;
2061 } else if (endoffstripe > i)
2062 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2063
2064 i++;
2065 stripe = (stripe + 1) % ci->num_stripes;
2066 }
2067
2068 break;
2069 }
2070 }
2071
2072 for (i = 0; i < ci->num_stripes; i++) {
2073 if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2074 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
2075 false, false, NULL);
2076
2077 if (!context.stripes[i].mdl) {
2078 ERR("IoAllocateMdl failed\n");
2079 MmUnlockPages(master_mdl);
2080 IoFreeMdl(master_mdl);
2081 Status = STATUS_INSUFFICIENT_RESOURCES;
2082 goto exit;
2083 }
2084 }
2085 }
2086
2087 if (need_dummy) {
2088 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2089 if (!dummypage) {
2090 ERR("out of memory\n");
2091 MmUnlockPages(master_mdl);
2092 IoFreeMdl(master_mdl);
2093 Status = STATUS_INSUFFICIENT_RESOURCES;
2094 goto exit;
2095 }
2096
2097 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2098 if (!dummy_mdl) {
2099 ERR("IoAllocateMdl failed\n");
2100 MmUnlockPages(master_mdl);
2101 IoFreeMdl(master_mdl);
2102 Status = STATUS_INSUFFICIENT_RESOURCES;
2103 goto exit;
2104 }
2105
2106 MmBuildMdlForNonPagedPool(dummy_mdl);
2107
2108 dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2109 }
2110
2111 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2112 if (!stripeoff) {
2113 ERR("out of memory\n");
2114 MmUnlockPages(master_mdl);
2115 IoFreeMdl(master_mdl);
2116 Status = STATUS_INSUFFICIENT_RESOURCES;
2117 goto exit;
2118 }
2119
2120 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2121
2122 pos = 0;
2123
2124 while (pos < length) {
2125 PFN_NUMBER* stripe_pfns;
2126
2127 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2128
2129 if (pos == 0) {
2130 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2131 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2132 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2133
2134 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2135
2136 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2137
2138 stripeoff[stripe] = readlen;
2139 pos += readlen;
2140
2141 stripe = (stripe + 1) % ci->num_stripes;
2142
2143 while (stripe != parity) {
2144 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2145 readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2146
2147 if (readlen == 0)
2148 break;
2149
2150 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2151
2152 stripeoff[stripe] = readlen;
2153 pos += readlen;
2154
2155 stripe = (stripe + 1) % ci->num_stripes;
2156 }
2157 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2158 uint16_t stripe = (parity + 1) % ci->num_stripes;
2159 ULONG k;
2160
2161 while (stripe != parity) {
2162 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2163
2164 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2165
2166 stripeoff[stripe] += (uint32_t)ci->stripe_length;
2167 pos += (uint32_t)ci->stripe_length;
2168
2169 stripe = (stripe + 1) % ci->num_stripes;
2170 }
2171
2172 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
2173
2174 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2175 stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
2176 stripeoff[parity] += PAGE_SIZE;
2177 }
2178 } else {
2179 uint16_t stripe = (parity + 1) % ci->num_stripes;
2180 uint32_t readlen;
2181
2182 while (pos < length) {
2183 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2184 readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2185
2186 if (readlen == 0)
2187 break;
2188
2189 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2190
2191 stripeoff[stripe] += readlen;
2192 pos += readlen;
2193
2194 stripe = (stripe + 1) % ci->num_stripes;
2195 }
2196 }
2197 }
2198
2199 MmUnlockPages(master_mdl);
2200 IoFreeMdl(master_mdl);
2201
2202 ExFreePool(stripeoff);
2203 } else if (type == BLOCK_FLAG_RAID6) {
2204 uint64_t startoff, endoff;
2205 uint16_t endoffstripe, parity1;
2206 uint32_t *stripeoff, pos;
2207 PMDL master_mdl;
2208 PFN_NUMBER *pfns, dummy;
2209 bool need_dummy = false;
2210
2211 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
2212 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
2213
2214 if (file_read) {
2215 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
2216
2217 if (!context.va) {
2218 ERR("out of memory\n");
2219 Status = STATUS_INSUFFICIENT_RESOURCES;
2220 goto exit;
2221 }
2222 } else
2223 context.va = buf;
2224
2225 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
2226 if (!master_mdl) {
2227 ERR("out of memory\n");
2228 Status = STATUS_INSUFFICIENT_RESOURCES;
2229 goto exit;
2230 }
2231
2232 Status = STATUS_SUCCESS;
2233
2234 _SEH2_TRY {
2235 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
2236 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2237 Status = _SEH2_GetExceptionCode();
2238 } _SEH2_END;
2239
2240 if (!NT_SUCCESS(Status)) {
2241 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
2242 IoFreeMdl(master_mdl);
2243 goto exit;
2244 }
2245
2246 pfns = (PFN_NUMBER*)(master_mdl + 1);
2247
2248 pos = 0;
2249 while (pos < length) {
2250 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2251
2252 if (pos == 0) {
2253 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
2254 ULONG skip, readlen;
2255
2256 i = startoffstripe;
2257 while (stripe != parity1) {
2258 if (i == startoffstripe) {
2259 readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
2260
2261 context.stripes[stripe].stripestart = startoff;
2262 context.stripes[stripe].stripeend = startoff + readlen;
2263
2264 pos += readlen;
2265
2266 if (pos == length)
2267 break;
2268 } else {
2269 readlen = min(length - pos, (ULONG)ci->stripe_length);
2270
2271 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2272 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2273
2274 pos += readlen;
2275
2276 if (pos == length)
2277 break;
2278 }
2279
2280 i++;
2281 stripe = (stripe + 1) % ci->num_stripes;
2282 }
2283
2284 if (pos == length)
2285 break;
2286
2287 for (i = 0; i < startoffstripe; i++) {
2288 uint16_t stripe2 = (parity1 + i + 2) % ci->num_stripes;
2289
2290 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2291 }
2292
2293 context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2294
2295 parity2 = (parity1 + 1) % ci->num_stripes;
2296 context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2297
2298 if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
2299 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
2300
2301 for (i = 0; i < ci->num_stripes; i++) {
2302 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2303 }
2304
2305 pos += (uint32_t)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
2306 need_dummy = true;
2307 }
2308 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2309 for (i = 0; i < ci->num_stripes; i++) {
2310 context.stripes[i].stripeend += ci->stripe_length;
2311 }
2312
2313 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 2));
2314 need_dummy = true;
2315 } else {
2316 uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2317
2318 i = 0;
2319 while (stripe != parity1) {
2320 if (endoffstripe == i) {
2321 context.stripes[stripe].stripeend = endoff + 1;
2322 break;
2323 } else if (endoffstripe > i)
2324 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2325
2326 i++;
2327 stripe = (stripe + 1) % ci->num_stripes;
2328 }
2329
2330 break;
2331 }
2332 }
2333
2334 for (i = 0; i < ci->num_stripes; i++) {
2335 if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2336 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
2337
2338 if (!context.stripes[i].mdl) {
2339 ERR("IoAllocateMdl failed\n");
2340 MmUnlockPages(master_mdl);
2341 IoFreeMdl(master_mdl);
2342 Status = STATUS_INSUFFICIENT_RESOURCES;
2343 goto exit;
2344 }
2345 }
2346 }
2347
2348 if (need_dummy) {
2349 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2350 if (!dummypage) {
2351 ERR("out of memory\n");
2352 MmUnlockPages(master_mdl);
2353 IoFreeMdl(master_mdl);
2354 Status = STATUS_INSUFFICIENT_RESOURCES;
2355 goto exit;
2356 }
2357
2358 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2359 if (!dummy_mdl) {
2360 ERR("IoAllocateMdl failed\n");
2361 MmUnlockPages(master_mdl);
2362 IoFreeMdl(master_mdl);
2363 Status = STATUS_INSUFFICIENT_RESOURCES;
2364 goto exit;
2365 }
2366
2367 MmBuildMdlForNonPagedPool(dummy_mdl);
2368
2369 dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2370 }
2371
2372 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2373 if (!stripeoff) {
2374 ERR("out of memory\n");
2375 MmUnlockPages(master_mdl);
2376 IoFreeMdl(master_mdl);
2377 Status = STATUS_INSUFFICIENT_RESOURCES;
2378 goto exit;
2379 }
2380
2381 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2382
2383 pos = 0;
2384
2385 while (pos < length) {
2386 PFN_NUMBER* stripe_pfns;
2387
2388 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2389
2390 if (pos == 0) {
2391 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
2392 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2393 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2394
2395 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2396
2397 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2398
2399 stripeoff[stripe] = readlen;
2400 pos += readlen;
2401
2402 stripe = (stripe + 1) % ci->num_stripes;
2403
2404 while (stripe != parity1) {
2405 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2406 readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2407
2408 if (readlen == 0)
2409 break;
2410
2411 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2412
2413 stripeoff[stripe] = readlen;
2414 pos += readlen;
2415
2416 stripe = (stripe + 1) % ci->num_stripes;
2417 }
2418 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2419 uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2420 uint16_t parity2 = (parity1 + 1) % ci->num_stripes;
2421 ULONG k;
2422
2423 while (stripe != parity1) {
2424 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2425
2426 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2427
2428 stripeoff[stripe] += (uint32_t)ci->stripe_length;
2429 pos += (uint32_t)ci->stripe_length;
2430
2431 stripe = (stripe + 1) % ci->num_stripes;
2432 }
2433
2434 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
2435
2436 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2437 stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
2438 stripeoff[parity1] += PAGE_SIZE;
2439 }
2440
2441 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
2442
2443 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2444 stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
2445 stripeoff[parity2] += PAGE_SIZE;
2446 }
2447 } else {
2448 uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2449 uint32_t readlen;
2450
2451 while (pos < length) {
2452 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2453 readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2454
2455 if (readlen == 0)
2456 break;
2457
2458 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2459
2460 stripeoff[stripe] += readlen;
2461 pos += readlen;
2462
2463 stripe = (stripe + 1) % ci->num_stripes;
2464 }
2465 }
2466 }
2467
2468 MmUnlockPages(master_mdl);
2469 IoFreeMdl(master_mdl);
2470
2471 ExFreePool(stripeoff);
2472 }
2473
2474 context.address = addr;
2475
2476 for (i = 0; i < ci->num_stripes; i++) {
2477 if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
2478 context.stripes[i].status = ReadDataStatus_MissingDevice;
2479 context.stripes_left--;
2480
2481 if (!devices[i] || !devices[i]->devobj)
2482 missing_devices++;
2483 }
2484 }
2485
2486 if (missing_devices > allowed_missing) {
2487 ERR("not enough devices to service request (%u missing)\n", missing_devices);
2488 Status = STATUS_UNEXPECTED_IO_ERROR;
2489 goto exit;
2490 }
2491
2492 for (i = 0; i < ci->num_stripes; i++) {
2493 PIO_STACK_LOCATION IrpSp;
2494
2495 if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
2496 context.stripes[i].context = (struct read_data_context*)&context;
2497
2498 if (type == BLOCK_FLAG_RAID10) {
2499 context.stripes[i].stripenum = i / ci->sub_stripes;
2500 }
2501
2502 if (!Irp) {
2503 context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, false);
2504
2505 if (!context.stripes[i].Irp) {
2506 ERR("IoAllocateIrp failed\n");
2507 Status = STATUS_INSUFFICIENT_RESOURCES;
2508 goto exit;
2509 }
2510 } else {
2511 context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
2512
2513 if (!context.stripes[i].Irp) {
2514 ERR("IoMakeAssociatedIrp failed\n");
2515 Status = STATUS_INSUFFICIENT_RESOURCES;
2516 goto exit;
2517 }
2518 }
2519
2520 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2521 IrpSp->MajorFunction = IRP_MJ_READ;
2522 IrpSp->MinorFunction = IRP_MN_NORMAL;
2523 IrpSp->FileObject = devices[i]->fileobj;
2524
2525 if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2526 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
2527 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2528 ERR("out of memory\n");
2529 Status = STATUS_INSUFFICIENT_RESOURCES;
2530 goto exit;
2531 }
2532
2533 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
2534
2535 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2536 } else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
2537 context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
2538 else
2539 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2540
2541 IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
2542 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
2543
2544 total_reading += IrpSp->Parameters.Read.Length;
2545
2546 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2547
2548 IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], true, true, true);
2549
2550 context.stripes[i].status = ReadDataStatus_Pending;
2551 }
2552 }
2553
2554 need_to_wait = false;
2555 for (i = 0; i < ci->num_stripes; i++) {
2556 if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
2557 IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
2558 need_to_wait = true;
2559 }
2560 }
2561
2562 if (need_to_wait)
2563 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
2564
2565 if (diskacc)
2566 fFsRtlUpdateDiskCounters(total_reading, 0);
2567
2568 // check if any of the devices return a "user-induced" error
2569
2570 for (i = 0; i < ci->num_stripes; i++) {
2571 if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
2572 Status = context.stripes[i].iosb.Status;
2573 goto exit;
2574 }
2575 }
2576
2577 if (type == BLOCK_FLAG_RAID0) {
2578 Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2579 if (!NT_SUCCESS(Status)) {
2580 ERR("read_data_raid0 returned %08x\n", Status);
2581
2582 if (file_read)
2583 ExFreePool(context.va);
2584
2585 goto exit;
2586 }
2587
2588 if (file_read) {
2589 RtlCopyMemory(buf, context.va, length);
2590 ExFreePool(context.va);
2591 }
2592 } else if (type == BLOCK_FLAG_RAID10) {
2593 Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2594
2595 if (!NT_SUCCESS(Status)) {
2596 ERR("read_data_raid10 returned %08x\n", Status);
2597
2598 if (file_read)
2599 ExFreePool(context.va);
2600
2601 goto exit;
2602 }
2603
2604 if (file_read) {
2605 RtlCopyMemory(buf, context.va, length);
2606 ExFreePool(context.va);
2607 }
2608 } else if (type == BLOCK_FLAG_DUPLICATE) {
2609 Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
2610 if (!NT_SUCCESS(Status)) {
2611 ERR("read_data_dup returned %08x\n", Status);
2612
2613 if (file_read)
2614 ExFreePool(context.va);
2615
2616 goto exit;
2617 }
2618
2619 if (file_read) {
2620 RtlCopyMemory(buf, context.va, length);
2621 ExFreePool(context.va);
2622 }
2623 } else if (type == BLOCK_FLAG_RAID5) {
2624 Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2625 if (!NT_SUCCESS(Status)) {
2626 ERR("read_data_raid5 returned %08x\n", Status);
2627
2628 if (file_read)
2629 ExFreePool(context.va);
2630
2631 goto exit;
2632 }
2633
2634 if (file_read) {
2635 RtlCopyMemory(buf, context.va, length);
2636 ExFreePool(context.va);
2637 }
2638 } else if (type == BLOCK_FLAG_RAID6) {
2639 Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2640 if (!NT_SUCCESS(Status)) {
2641 ERR("read_data_raid6 returned %08x\n", Status);
2642
2643 if (file_read)
2644 ExFreePool(context.va);
2645
2646 goto exit;
2647 }
2648
2649 if (file_read) {
2650 RtlCopyMemory(buf, context.va, length);
2651 ExFreePool(context.va);
2652 }
2653 }
2654
2655 exit:
2656 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
2657 chunk_unlock_range(Vcb, c, lockaddr, locklen);
2658
2659 if (dummy_mdl)
2660 IoFreeMdl(dummy_mdl);
2661
2662 if (dummypage)
2663 ExFreePool(dummypage);
2664
2665 for (i = 0; i < ci->num_stripes; i++) {
2666 if (context.stripes[i].mdl) {
2667 if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2668 MmUnlockPages(context.stripes[i].mdl);
2669
2670 IoFreeMdl(context.stripes[i].mdl);
2671 }
2672
2673 if (context.stripes[i].Irp)
2674 IoFreeIrp(context.stripes[i].Irp);
2675 }
2676
2677 ExFreePool(context.stripes);
2678
2679 if (!Vcb->log_to_phys_loaded)
2680 ExFreePool(devices);
2681
2682 return Status;
2683 }
2684
2685 NTSTATUS read_stream(fcb* fcb, uint8_t* data, uint64_t start, ULONG length, ULONG* pbr) {
2686 ULONG readlen;
2687
2688 TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr);
2689
2690 if (pbr) *pbr = 0;
2691
2692 if (start >= fcb->adsdata.Length) {
2693 TRACE("tried to read beyond end of stream\n");
2694 return STATUS_END_OF_FILE;
2695 }
2696
2697 if (length == 0) {
2698 WARN("tried to read zero bytes\n");
2699 return STATUS_SUCCESS;
2700 }
2701
2702 if (start + length < fcb->adsdata.Length)
2703 readlen = length;
2704 else
2705 readlen = fcb->adsdata.Length - (ULONG)start;
2706
2707 if (readlen > 0)
2708 RtlCopyMemory(data + start, fcb->adsdata.Buffer, readlen);
2709
2710 if (pbr) *pbr = readlen;
2711
2712 return STATUS_SUCCESS;
2713 }
2714
2715 NTSTATUS read_file(fcb* fcb, uint8_t* data, uint64_t start, uint64_t length, ULONG* pbr, PIRP Irp) {
2716 NTSTATUS Status;
2717 EXTENT_DATA* ed;
2718 uint32_t bytes_read = 0;
2719 uint64_t last_end;
2720 LIST_ENTRY* le;
2721 POOL_TYPE pool_type;
2722
2723 TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr);
2724
2725 if (pbr)
2726 *pbr = 0;
2727
2728 if (start >= fcb->inode_item.st_size) {
2729 WARN("Tried to read beyond end of file\n");
2730 Status = STATUS_END_OF_FILE;
2731 goto exit;
2732 }
2733
2734 pool_type = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? NonPagedPool : PagedPool;
2735
2736 le = fcb->extents.Flink;
2737
2738 last_end = start;
2739
2740 while (le != &fcb->extents) {
2741 uint64_t len;
2742 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2743 EXTENT_DATA2* ed2;
2744
2745 if (!ext->ignore) {
2746 ed = &ext->extent_data;
2747
2748 ed2 = (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ed->data : NULL;
2749
2750 len = ed2 ? ed2->num_bytes : ed->decoded_size;
2751
2752 if (ext->offset + len <= start) {
2753 last_end = ext->offset + len;
2754 goto nextitem;
2755 }
2756
2757 if (ext->offset > last_end && ext->offset > start + bytes_read) {
2758 uint32_t read = (uint32_t)min(length, ext->offset - max(start, last_end));
2759
2760 RtlZeroMemory(data + bytes_read, read);
2761 bytes_read += read;
2762 length -= read;
2763 }
2764
2765 if (length == 0 || ext->offset > start + bytes_read + length)
2766 break;
2767
2768 if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
2769 WARN("Encryption not supported\n");
2770 Status = STATUS_NOT_IMPLEMENTED;
2771 goto exit;
2772 }
2773
2774 if (ed->encoding != BTRFS_ENCODING_NONE) {
2775 WARN("Other encodings not supported\n");
2776 Status = STATUS_NOT_IMPLEMENTED;
2777 goto exit;
2778 }
2779
2780 switch (ed->type) {
2781 case EXTENT_TYPE_INLINE:
2782 {
2783 uint64_t off = start + bytes_read - ext->offset;
2784 uint32_t read;
2785
2786 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2787 read = (uint32_t)min(min(len, ext->datalen) - off, length);
2788
2789 RtlCopyMemory(data + bytes_read, &ed->data[off], read);
2790 } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO || ed->compression == BTRFS_COMPRESSION_ZSTD) {
2791 uint8_t* decomp;
2792 bool decomp_alloc;
2793 uint16_t inlen = ext->datalen - (uint16_t)offsetof(EXTENT_DATA, data[0]);
2794
2795 if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
2796 ERR("ed->decoded_size was invalid (%I64x)\n", ed->decoded_size);
2797 Status = STATUS_INTERNAL_ERROR;
2798 goto exit;
2799 }
2800
2801 read = (uint32_t)min(ed->decoded_size - off, length);
2802
2803 if (off > 0) {
2804 decomp = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)ed->decoded_size, ALLOC_TAG);
2805 if (!decomp) {
2806 ERR("out of memory\n");
2807 Status = STATUS_INSUFFICIENT_RESOURCES;
2808 goto exit;
2809 }
2810
2811 decomp_alloc = true;
2812 } else {
2813 decomp = data + bytes_read;
2814 decomp_alloc = false;
2815 }
2816
2817 if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2818 Status = zlib_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2819 if (!NT_SUCCESS(Status)) {
2820 ERR("zlib_decompress returned %08x\n", Status);
2821 if (decomp_alloc) ExFreePool(decomp);
2822 goto exit;
2823 }
2824 } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2825 if (inlen < sizeof(uint32_t)) {
2826 ERR("extent data was truncated\n");
2827 Status = STATUS_INTERNAL_ERROR;
2828 if (decomp_alloc) ExFreePool(decomp);
2829 goto exit;
2830 } else
2831 inlen -= sizeof(uint32_t);
2832
2833 Status = lzo_decompress(ed->data + sizeof(uint32_t), inlen, decomp, (uint32_t)(read + off), sizeof(uint32_t));
2834 if (!NT_SUCCESS(Status)) {
2835 ERR("lzo_decompress returned %08x\n", Status);
2836 if (decomp_alloc) ExFreePool(decomp);
2837 goto exit;
2838 }
2839 } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
2840 Status = zstd_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2841 if (!NT_SUCCESS(Status)) {
2842 ERR("zstd_decompress returned %08x\n", Status);
2843 if (decomp_alloc) ExFreePool(decomp);
2844 goto exit;
2845 }
2846 }
2847
2848 if (decomp_alloc) {
2849 RtlCopyMemory(data + bytes_read, decomp + off, read);
2850 ExFreePool(decomp);
2851 }
2852 } else {
2853 ERR("unhandled compression type %x\n", ed->compression);
2854 Status = STATUS_NOT_IMPLEMENTED;
2855 goto exit;
2856 }
2857
2858 bytes_read += read;
2859 length -= read;
2860
2861 break;
2862 }
2863
2864 case EXTENT_TYPE_REGULAR:
2865 {
2866 uint64_t off = start + bytes_read - ext->offset;
2867 uint32_t to_read, read;
2868 uint8_t* buf;
2869 bool mdl = (Irp && Irp->MdlAddress) ? true : false;
2870 bool buf_free;
2871 uint32_t bumpoff = 0, *csum;
2872 uint64_t addr;
2873 chunk* c;
2874
2875 read = (uint32_t)(len - off);
2876 if (read > length) read = (uint32_t)length;
2877
2878 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2879 addr = ed2->address + ed2->offset + off;
2880 to_read = (uint32_t)sector_align(read, fcb->Vcb->superblock.sector_size);
2881
2882 if (addr % fcb->Vcb->superblock.sector_size > 0) {
2883 bumpoff = addr % fcb->Vcb->superblock.sector_size;
2884 addr -= bumpoff;
2885 to_read = (uint32_t)sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size);
2886 }
2887 } else {
2888 addr = ed2->address;
2889 to_read = (uint32_t)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
2890 }
2891
2892 if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 &&
2893 length % fcb->Vcb->superblock.sector_size == 0) {
2894 buf = data + bytes_read;
2895 buf_free = false;
2896 } else {
2897 buf = ExAllocatePoolWithTag(pool_type, to_read, ALLOC_TAG);
2898 buf_free = true;
2899
2900 if (!buf) {
2901 ERR("out of memory\n");
2902 Status = STATUS_INSUFFICIENT_RESOURCES;
2903 goto exit;
2904 }
2905
2906 mdl = false;
2907 }
2908
2909 c = get_chunk_from_address(fcb->Vcb, addr);
2910
2911 if (!c) {
2912 ERR("get_chunk_from_address(%I64x) failed\n", addr);
2913
2914 if (buf_free)
2915 ExFreePool(buf);
2916
2917 goto exit;
2918 }
2919
2920 if (ext->csum) {
2921 if (ed->compression == BTRFS_COMPRESSION_NONE)
2922 csum = &ext->csum[off / fcb->Vcb->superblock.sector_size];
2923 else
2924 csum = ext->csum;
2925 } else
2926 csum = NULL;
2927
2928 Status = read_data(fcb->Vcb, addr, to_read, csum, false, buf, c, NULL, Irp, 0, mdl,
2929 fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
2930 if (!NT_SUCCESS(Status)) {
2931 ERR("read_data returned %08x\n", Status);
2932
2933 if (buf_free)
2934 ExFreePool(buf);
2935
2936 goto exit;
2937 }
2938
2939 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2940 if (buf_free)
2941 RtlCopyMemory(data + bytes_read, buf + bumpoff, read);
2942 } else {
2943 uint8_t *decomp = NULL, *buf2;
2944 ULONG outlen, inlen, off2;
2945 uint32_t inpageoff = 0;
2946
2947 off2 = (ULONG)(ed2->offset + off);
2948 buf2 = buf;
2949 inlen = (ULONG)ed2->size;
2950
2951 if (ed->compression == BTRFS_COMPRESSION_LZO) {
2952 ULONG inoff = sizeof(uint32_t);
2953
2954 inlen -= sizeof(uint32_t);
2955
2956 // If reading a few sectors in, skip to the interesting bit
2957 while (off2 > LZO_PAGE_SIZE) {
2958 uint32_t partlen;
2959
2960 if (inlen < sizeof(uint32_t))
2961 break;
2962
2963 partlen = *(uint32_t*)(buf2 + inoff);
2964
2965 if (partlen < inlen) {
2966 off2 -= LZO_PAGE_SIZE;
2967 inoff += partlen + sizeof(uint32_t);
2968 inlen -= partlen + sizeof(uint32_t);
2969
2970 if (LZO_PAGE_SIZE - (inoff % LZO_PAGE_SIZE) < sizeof(uint32_t))
2971 inoff = ((inoff / LZO_PAGE_SIZE) + 1) * LZO_PAGE_SIZE;
2972 } else
2973 break;
2974 }
2975
2976 buf2 = &buf2[inoff];
2977 inpageoff = inoff % LZO_PAGE_SIZE;
2978 }
2979
2980 if (off2 != 0) {
2981 outlen = off2 + min(read, (uint32_t)(ed2->num_bytes - off));
2982
2983 decomp = ExAllocatePoolWithTag(pool_type, outlen, ALLOC_TAG);
2984 if (!decomp) {
2985 ERR("out of memory\n");
2986 ExFreePool(buf);
2987 Status = STATUS_INSUFFICIENT_RESOURCES;
2988 goto exit;
2989 }
2990 } else
2991 outlen = min(read, (uint32_t)(ed2->num_bytes - off));
2992
2993 if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2994 Status = zlib_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen);
2995
2996 if (!NT_SUCCESS(Status)) {
2997 ERR("zlib_decompress returned %08x\n", Status);
2998 ExFreePool(buf);
2999
3000 if (decomp)
3001 ExFreePool(decomp);
3002
3003 goto exit;
3004 }
3005 } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
3006 Status = lzo_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen, inpageoff);
3007
3008 if (!NT_SUCCESS(Status)) {
3009 ERR("lzo_decompress returned %08x\n", Status);
3010 ExFreePool(buf);
3011
3012 if (decomp)
3013 ExFreePool(decomp);
3014
3015 goto exit;
3016 }
3017 } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
3018 Status = zstd_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen);
3019
3020 if (!NT_SUCCESS(Status)) {
3021 ERR("zstd_decompress returned %08x\n", Status);
3022 ExFreePool(buf);
3023
3024 if (decomp)
3025 ExFreePool(decomp);
3026
3027 goto exit;
3028 }
3029 } else {
3030 ERR("unsupported compression type %x\n", ed->compression);
3031 Status = STATUS_NOT_SUPPORTED;
3032
3033 ExFreePool(buf);
3034
3035 if (decomp)
3036 ExFreePool(decomp);
3037
3038 goto exit;
3039 }
3040
3041 if (decomp) {
3042 RtlCopyMemory(data + bytes_read, decomp + off2, (size_t)min(read, ed2->num_bytes - off));
3043 ExFreePool(decomp);
3044 }
3045 }
3046
3047 if (buf_free)
3048 ExFreePool(buf);
3049
3050 bytes_read += read;
3051 length -= read;
3052
3053 break;
3054 }
3055
3056 case EXTENT_TYPE_PREALLOC:
3057 {
3058 uint64_t off = start + bytes_read - ext->offset;
3059 uint32_t read = (uint32_t)(len - off);
3060
3061 if (read > length) read = (uint32_t)length;
3062
3063 RtlZeroMemory(data + bytes_read, read);
3064
3065 bytes_read += read;
3066 length -= read;
3067
3068 break;
3069 }
3070
3071 default:
3072 WARN("Unsupported extent data type %u\n", ed->type);
3073 Status = STATUS_NOT_IMPLEMENTED;
3074 goto exit;
3075 }
3076
3077 last_end = ext->offset + len;
3078
3079 if (length == 0)
3080 break;
3081 }
3082
3083 nextitem:
3084 le = le->Flink;
3085 }
3086
3087 if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
3088 uint32_t read = (uint32_t)min(fcb->inode_item.st_size - start - bytes_read, length);
3089
3090 RtlZeroMemory(data + bytes_read, read);
3091
3092 bytes_read += read;
3093 length -= read;
3094 }
3095
3096 Status = STATUS_SUCCESS;
3097 if (pbr)
3098 *pbr = bytes_read;
3099
3100 exit:
3101 return Status;
3102 }
3103
3104 NTSTATUS do_read(PIRP Irp, bool wait, ULONG* bytes_read) {
3105 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3106 PFILE_OBJECT FileObject = IrpSp->FileObject;
3107 fcb* fcb = FileObject->FsContext;
3108 uint8_t* data = NULL;
3109 ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
3110 uint64_t start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
3111
3112 *bytes_read = 0;
3113
3114 if (!fcb || !fcb->Vcb || !fcb->subvol)
3115 return STATUS_INTERNAL_ERROR;
3116
3117 TRACE("file = %S (fcb = %p)\n", file_desc(FileObject), fcb);
3118 TRACE("offset = %I64x, length = %x\n", start, length);
3119 TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "true" : "false", Irp->Flags & IRP_NOCACHE ? "true" : "false");
3120
3121 if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
3122 return STATUS_INVALID_DEVICE_REQUEST;
3123
3124 if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
3125 WARN("tried to read locked region\n");
3126 return STATUS_FILE_LOCK_CONFLICT;
3127 }
3128
3129 if (length == 0) {
3130 TRACE("tried to read zero bytes\n");
3131 return STATUS_SUCCESS;
3132 }
3133
3134 if (start >= (uint64_t)fcb->Header.FileSize.QuadPart) {
3135 TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start, fcb->Header.FileSize.QuadPart);
3136 return STATUS_END_OF_FILE;
3137 }
3138
3139 TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
3140
3141 if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) {
3142 data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
3143
3144 if (Irp->MdlAddress && !data) {
3145 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3146 return STATUS_INSUFFICIENT_RESOURCES;
3147 }
3148
3149 if (start >= (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3150 length = (ULONG)min(length, min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3151 RtlZeroMemory(data, length);
3152 Irp->IoStatus.Information = *bytes_read = length;
3153 return STATUS_SUCCESS;
3154 }
3155
3156 if (length + start > (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3157 addon = (ULONG)(min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3158 RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
3159 length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
3160 }
3161 }
3162
3163 if (!(Irp->Flags & IRP_NOCACHE)) {
3164 NTSTATUS Status = STATUS_SUCCESS;
3165
3166 _SEH2_TRY {
3167 if (!FileObject->PrivateCacheMap) {
3168 CC_FILE_SIZES ccfs;
3169
3170 ccfs.AllocationSize = fcb->Header.AllocationSize;
3171 ccfs.FileSize = fcb->Header.FileSize;
3172 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3173
3174 init_file_cache(FileObject, &ccfs);
3175 }
3176
3177 if (IrpSp->MinorFunction & IRP_MN_MDL) {
3178 CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
3179 } else {
3180 if (fCcCopyReadEx) {
3181 TRACE("CcCopyReadEx(%p, %I64x, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
3182 length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
3183 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3184 if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
3185 TRACE("CcCopyReadEx could not wait\n");
3186
3187 IoMarkIrpPending(Irp);
3188 return STATUS_PENDING;
3189 }
3190 TRACE("CcCopyReadEx finished\n");
3191 } else {
3192 TRACE("CcCopyRead(%p, %I64x, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
3193 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3194 if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
3195 TRACE("CcCopyRead could not wait\n");
3196
3197 IoMarkIrpPending(Irp);
3198 return STATUS_PENDING;
3199 }
3200 TRACE("CcCopyRead finished\n");
3201 }
3202 }
3203 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
3204 Status = _SEH2_GetExceptionCode();
3205 } _SEH2_END;
3206
3207 if (NT_SUCCESS(Status)) {
3208 Status = Irp->IoStatus.Status;
3209 Irp->IoStatus.Information += addon;
3210 *bytes_read = (ULONG)Irp->IoStatus.Information;
3211 } else
3212 ERR("EXCEPTION - %08x\n", Status);
3213
3214 return Status;
3215 } else {
3216 NTSTATUS Status;
3217
3218 if (!wait) {
3219 IoMarkIrpPending(Irp);
3220 return STATUS_PENDING;
3221 }
3222
3223 if (fcb->ads)
3224 Status = read_stream(fcb, data, start, length, bytes_read);
3225 else
3226 Status = read_file(fcb, data, start, length, bytes_read, Irp);
3227
3228 *bytes_read += addon;
3229 TRACE("read %u bytes\n", *bytes_read);
3230
3231 Irp->IoStatus.Information = *bytes_read;
3232
3233 if (diskacc && Status != STATUS_PENDING) {
3234 PETHREAD thread = NULL;
3235
3236 if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
3237 thread = Irp->Tail.Overlay.Thread;
3238 else if (!IoIsSystemThread(PsGetCurrentThread()))
3239 thread = PsGetCurrentThread();
3240 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
3241 thread = PsGetCurrentThread();
3242
3243 if (thread)
3244 fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
3245 }
3246
3247 return Status;
3248 }
3249 }
3250
3251 _Dispatch_type_(IRP_MJ_READ)
3252 _Function_class_(DRIVER_DISPATCH)
3253 NTSTATUS __stdcall drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
3254 device_extension* Vcb = DeviceObject->DeviceExtension;
3255 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3256 PFILE_OBJECT FileObject = IrpSp->FileObject;
3257 ULONG bytes_read = 0;
3258 NTSTATUS Status;
3259 bool top_level;
3260 fcb* fcb;
3261 ccb* ccb;
3262 bool acquired_fcb_lock = false, wait;
3263
3264 FsRtlEnterFileSystem();
3265
3266 top_level = is_top_level(Irp);
3267
3268 TRACE("read\n");
3269
3270 if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
3271 Status = vol_read(DeviceObject, Irp);
3272 goto exit2;
3273 } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
3274 Status = STATUS_INVALID_PARAMETER;
3275 goto end;
3276 }
3277
3278 Irp->IoStatus.Information = 0;
3279
3280 if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
3281 CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress);
3282
3283 Irp->MdlAddress = NULL;
3284 Status = STATUS_SUCCESS;
3285
3286 goto exit;
3287 }
3288
3289 fcb = FileObject->FsContext;
3290
3291 if (!fcb) {
3292 ERR("fcb was NULL\n");
3293 Status = STATUS_INVALID_PARAMETER;
3294 goto exit;
3295 }
3296
3297 ccb = FileObject->FsContext2;
3298
3299 if (!ccb) {
3300 ERR("ccb was NULL\n");
3301 Status = STATUS_INVALID_PARAMETER;
3302 goto exit;
3303 }
3304
3305 if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) {
3306 WARN("insufficient privileges\n");
3307 Status = STATUS_ACCESS_DENIED;
3308 goto exit;
3309 }
3310
3311 if (fcb == Vcb->volume_fcb) {
3312 TRACE("reading volume FCB\n");
3313
3314 IoSkipCurrentIrpStackLocation(Irp);
3315
3316 Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
3317
3318 goto exit2;
3319 }
3320
3321 wait = IoIsOperationSynchronous(Irp);
3322
3323 // Don't offload jobs when doing paging IO - otherwise this can lead to
3324 // deadlocks in CcCopyRead.
3325 if (Irp->Flags & IRP_PAGING_IO)
3326 wait = true;
3327
3328 if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer && FileObject->SectionObjectPointer->DataSectionObject) {
3329 IO_STATUS_BLOCK iosb;
3330
3331 CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb);
3332 if (!NT_SUCCESS(iosb.Status)) {
3333 ERR("CcFlushCache returned %08x\n", iosb.Status);
3334 return iosb.Status;
3335 }
3336 }
3337
3338 if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
3339 if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
3340 Status = STATUS_PENDING;
3341 IoMarkIrpPending(Irp);
3342 goto exit;
3343 }
3344
3345 acquired_fcb_lock = true;
3346 }
3347
3348 Status = do_read(Irp, wait, &bytes_read);
3349
3350 if (acquired_fcb_lock)
3351 ExReleaseResourceLite(fcb->Header.Resource);
3352
3353 exit:
3354 if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO))
3355 FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0);
3356
3357 end:
3358 Irp->IoStatus.Status = Status;
3359
3360 TRACE("Irp->IoStatus.Status = %08x\n", Irp->IoStatus.Status);
3361 TRACE("Irp->IoStatus.Information = %lu\n", Irp->IoStatus.Information);
3362 TRACE("returning %08x\n", Status);
3363
3364 if (Status != STATUS_PENDING)
3365 IoCompleteRequest(Irp, IO_NO_INCREMENT);
3366 else {
3367 if (!add_thread_job(Vcb, Irp))
3368 Status = do_read_job(Irp);
3369 }
3370
3371 exit2:
3372 if (top_level)
3373 IoSetTopLevelIrp(NULL);
3374
3375 FsRtlExitFileSystem();
3376
3377 return Status;
3378 }