ddeea66c7aa5cb3377c772db21b0d0291ec08fe4
[reactos.git] / drivers / filesystems / btrfs / read.c
1 /* Copyright (c) Mark Harmstone 2016-17
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19
20 enum read_data_status {
21 ReadDataStatus_Pending,
22 ReadDataStatus_Success,
23 ReadDataStatus_Error,
24 ReadDataStatus_MissingDevice,
25 ReadDataStatus_Skip
26 };
27
28 struct read_data_context;
29
30 typedef struct {
31 struct read_data_context* context;
32 UINT16 stripenum;
33 BOOL rewrite;
34 PIRP Irp;
35 IO_STATUS_BLOCK iosb;
36 enum read_data_status status;
37 PMDL mdl;
38 UINT64 stripestart;
39 UINT64 stripeend;
40 } read_data_stripe;
41
42 typedef struct {
43 KEVENT Event;
44 NTSTATUS Status;
45 chunk* c;
46 UINT64 address;
47 UINT32 buflen;
48 LONG num_stripes, stripes_left;
49 UINT64 type;
50 UINT32 sector_size;
51 UINT16 firstoff, startoffstripe, sectors_per_stripe;
52 UINT32* csum;
53 BOOL tree;
54 read_data_stripe* stripes;
55 UINT8* va;
56 } read_data_context;
57
58 extern BOOL diskacc;
59 extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
60 extern tCcCopyReadEx fCcCopyReadEx;
61 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
62
63 #define LINUX_PAGE_SIZE 4096
64
65 _Function_class_(IO_COMPLETION_ROUTINE)
66 #ifdef __REACTOS__
67 static NTSTATUS NTAPI read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
68 #else
69 static NTSTATUS read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
70 #endif
71 read_data_stripe* stripe = conptr;
72 read_data_context* context = (read_data_context*)stripe->context;
73
74 UNUSED(DeviceObject);
75
76 stripe->iosb = Irp->IoStatus;
77
78 if (NT_SUCCESS(Irp->IoStatus.Status))
79 stripe->status = ReadDataStatus_Success;
80 else
81 stripe->status = ReadDataStatus_Error;
82
83 if (InterlockedDecrement(&context->stripes_left) == 0)
84 KeSetEvent(&context->Event, 0, FALSE);
85
86 return STATUS_MORE_PROCESSING_REQUIRED;
87 }
88
89 NTSTATUS check_csum(device_extension* Vcb, UINT8* data, UINT32 sectors, UINT32* csum) {
90 NTSTATUS Status;
91 calc_job* cj;
92 UINT32* csum2;
93
94 // From experimenting, it seems that 40 sectors is roughly the crossover
95 // point where offloading the crc32 calculation becomes worth it.
96
97 if (sectors < 40 || KeQueryActiveProcessorCount(NULL) < 2) {
98 ULONG j;
99
100 for (j = 0; j < sectors; j++) {
101 UINT32 crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
102
103 if (crc32 != csum[j]) {
104 return STATUS_CRC_ERROR;
105 }
106 }
107
108 return STATUS_SUCCESS;
109 }
110
111 csum2 = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sectors, ALLOC_TAG);
112 if (!csum2) {
113 ERR("out of memory\n");
114 return STATUS_INSUFFICIENT_RESOURCES;
115 }
116
117 Status = add_calc_job(Vcb, data, sectors, csum2, &cj);
118 if (!NT_SUCCESS(Status)) {
119 ERR("add_calc_job returned %08x\n", Status);
120 ExFreePool(csum2);
121 return Status;
122 }
123
124 KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL);
125
126 if (RtlCompareMemory(csum2, csum, sectors * sizeof(UINT32)) != sectors * sizeof(UINT32)) {
127 free_calc_job(cj);
128 ExFreePool(csum2);
129 return STATUS_CRC_ERROR;
130 }
131
132 free_calc_job(cj);
133 ExFreePool(csum2);
134
135 return STATUS_SUCCESS;
136 }
137
138 static NTSTATUS read_data_dup(device_extension* Vcb, UINT8* buf, UINT64 addr, read_data_context* context, CHUNK_ITEM* ci,
139 device** devices, UINT64 generation) {
140 ULONG i;
141 BOOL checksum_error = FALSE;
142 UINT16 j, stripe = 0;
143 NTSTATUS Status;
144 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
145
146 for (j = 0; j < ci->num_stripes; j++) {
147 if (context->stripes[j].status == ReadDataStatus_Error) {
148 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
149 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
150 return context->stripes[j].iosb.Status;
151 } else if (context->stripes[j].status == ReadDataStatus_Success) {
152 stripe = j;
153 break;
154 }
155 }
156
157 if (context->stripes[stripe].status != ReadDataStatus_Success)
158 return STATUS_INTERNAL_ERROR;
159
160 if (context->tree) {
161 tree_header* th = (tree_header*)buf;
162 UINT32 crc32;
163
164 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
165
166 if (th->address != context->address || crc32 != *((UINT32*)th->csum)) {
167 checksum_error = TRUE;
168 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
169 } else if (generation != 0 && th->generation != generation) {
170 checksum_error = TRUE;
171 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
172 }
173 } else if (context->csum) {
174 #ifdef DEBUG_STATS
175 LARGE_INTEGER time1, time2;
176
177 time1 = KeQueryPerformanceCounter(NULL);
178 #endif
179 Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
180
181 if (Status == STATUS_CRC_ERROR) {
182 checksum_error = TRUE;
183 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
184 } else if (!NT_SUCCESS(Status)) {
185 ERR("check_csum returned %08x\n", Status);
186 return Status;
187 }
188 #ifdef DEBUG_STATS
189 time2 = KeQueryPerformanceCounter(NULL);
190
191 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
192 #endif
193 }
194
195 if (!checksum_error)
196 return STATUS_SUCCESS;
197
198 if (ci->num_stripes == 1)
199 return STATUS_CRC_ERROR;
200
201 if (context->tree) {
202 tree_header* t2;
203 BOOL recovered = FALSE;
204
205 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
206 if (!t2) {
207 ERR("out of memory\n");
208 return STATUS_INSUFFICIENT_RESOURCES;
209 }
210
211 for (j = 0; j < ci->num_stripes; j++) {
212 if (j != stripe && devices[j] && devices[j]->devobj) {
213 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart, Vcb->superblock.node_size, (UINT8*)t2, FALSE);
214 if (!NT_SUCCESS(Status)) {
215 WARN("sync_read_phys returned %08x\n", Status);
216 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
217 } else {
218 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
219
220 if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) {
221 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
222 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
223 recovered = TRUE;
224
225 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
226 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart,
227 t2, Vcb->superblock.node_size);
228 if (!NT_SUCCESS(Status)) {
229 WARN("write_data_phys returned %08x\n", Status);
230 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
231 }
232 }
233
234 break;
235 } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum))
236 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
237 else
238 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS);
239 }
240 }
241 }
242
243 if (!recovered) {
244 ERR("unrecoverable checksum error at %llx\n", addr);
245 ExFreePool(t2);
246 return STATUS_CRC_ERROR;
247 }
248
249 ExFreePool(t2);
250 } else {
251 ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size;
252 UINT8* sector;
253
254 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
255 if (!sector) {
256 ERR("out of memory\n");
257 return STATUS_INSUFFICIENT_RESOURCES;
258 }
259
260 for (i = 0; i < sectors; i++) {
261 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
262
263 if (context->csum[i] != crc32) {
264 BOOL recovered = FALSE;
265
266 for (j = 0; j < ci->num_stripes; j++) {
267 if (j != stripe && devices[j] && devices[j]->devobj) {
268 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
269 Vcb->superblock.sector_size, sector, FALSE);
270 if (!NT_SUCCESS(Status)) {
271 WARN("sync_read_phys returned %08x\n", Status);
272 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
273 } else {
274 UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
275
276 if (crc32b == context->csum[i]) {
277 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
278 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
279 recovered = TRUE;
280
281 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
282 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
283 sector, Vcb->superblock.sector_size);
284 if (!NT_SUCCESS(Status)) {
285 WARN("write_data_phys returned %08x\n", Status);
286 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
287 }
288 }
289
290 break;
291 } else
292 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
293 }
294 }
295 }
296
297 if (!recovered) {
298 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
299 ExFreePool(sector);
300 return STATUS_CRC_ERROR;
301 }
302 }
303 }
304
305 ExFreePool(sector);
306 }
307
308 return STATUS_SUCCESS;
309 }
310
311 static NTSTATUS read_data_raid0(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context,
312 CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) {
313 UINT64 i;
314
315 for (i = 0; i < ci->num_stripes; i++) {
316 if (context->stripes[i].status == ReadDataStatus_Error) {
317 WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status);
318 log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS);
319 return context->stripes[i].iosb.Status;
320 }
321 }
322
323 if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
324 tree_header* th = (tree_header*)buf;
325 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
326
327 if (crc32 != *((UINT32*)th->csum) || addr != th->address || (generation != 0 && generation != th->generation)) {
328 UINT64 off;
329 UINT16 stripe;
330
331 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe);
332
333 ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
334
335 if (crc32 != *((UINT32*)th->csum)) {
336 WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
337 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
338 return STATUS_CRC_ERROR;
339 } else if (addr != th->address) {
340 WARN("address of tree was %llx, not %llx as expected\n", th->address, addr);
341 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
342 return STATUS_CRC_ERROR;
343 } else if (generation != 0 && generation != th->generation) {
344 WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation);
345 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
346 return STATUS_CRC_ERROR;
347 }
348 }
349 } else if (context->csum) {
350 NTSTATUS Status;
351 #ifdef DEBUG_STATS
352 LARGE_INTEGER time1, time2;
353
354 time1 = KeQueryPerformanceCounter(NULL);
355 #endif
356 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
357
358 if (Status == STATUS_CRC_ERROR) {
359 for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
360 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
361
362 if (context->csum[i] != crc32) {
363 UINT64 off;
364 UINT16 stripe;
365
366 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe);
367
368 ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
369
370 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
371
372 return Status;
373 }
374 }
375
376 return Status;
377 } else if (!NT_SUCCESS(Status)) {
378 ERR("check_csum returned %08x\n", Status);
379 return Status;
380 }
381 #ifdef DEBUG_STATS
382 time2 = KeQueryPerformanceCounter(NULL);
383
384 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
385 #endif
386 }
387
388 return STATUS_SUCCESS;
389 }
390
391 static NTSTATUS read_data_raid10(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context,
392 CHUNK_ITEM* ci, device** devices, UINT64 generation, UINT64 offset) {
393 UINT64 i;
394 UINT16 j, stripe;
395 NTSTATUS Status;
396 BOOL checksum_error = FALSE;
397 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
398
399 for (j = 0; j < ci->num_stripes; j++) {
400 if (context->stripes[j].status == ReadDataStatus_Error) {
401 WARN("stripe %llu returned error %08x\n", j, context->stripes[j].iosb.Status);
402 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
403 return context->stripes[j].iosb.Status;
404 } else if (context->stripes[j].status == ReadDataStatus_Success)
405 stripe = j;
406 }
407
408 if (context->tree) {
409 tree_header* th = (tree_header*)buf;
410 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
411
412 if (crc32 != *((UINT32*)th->csum)) {
413 WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
414 checksum_error = TRUE;
415 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
416 } else if (addr != th->address) {
417 WARN("address of tree was %llx, not %llx as expected\n", th->address, addr);
418 checksum_error = TRUE;
419 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
420 } else if (generation != 0 && generation != th->generation) {
421 WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation);
422 checksum_error = TRUE;
423 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
424 }
425 } else if (context->csum) {
426 #ifdef DEBUG_STATS
427 LARGE_INTEGER time1, time2;
428
429 time1 = KeQueryPerformanceCounter(NULL);
430 #endif
431 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
432
433 if (Status == STATUS_CRC_ERROR)
434 checksum_error = TRUE;
435 else if (!NT_SUCCESS(Status)) {
436 ERR("check_csum returned %08x\n", Status);
437 return Status;
438 }
439 #ifdef DEBUG_STATS
440 time2 = KeQueryPerformanceCounter(NULL);
441
442 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
443 #endif
444 }
445
446 if (!checksum_error)
447 return STATUS_SUCCESS;
448
449 if (context->tree) {
450 tree_header* t2;
451 UINT64 off;
452 UINT16 badsubstripe = 0;
453 BOOL recovered = FALSE;
454
455 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
456 if (!t2) {
457 ERR("out of memory\n");
458 return STATUS_INSUFFICIENT_RESOURCES;
459 }
460
461 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe);
462
463 stripe *= ci->sub_stripes;
464
465 for (j = 0; j < ci->sub_stripes; j++) {
466 if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
467 badsubstripe = j;
468 break;
469 }
470 }
471
472 for (j = 0; j < ci->sub_stripes; j++) {
473 if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
474 Status = sync_read_phys(devices[stripe + j]->devobj, cis[stripe + j].offset + off,
475 Vcb->superblock.node_size, (UINT8*)t2, FALSE);
476 if (!NT_SUCCESS(Status)) {
477 WARN("sync_read_phys returned %08x\n", Status);
478 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS);
479 } else {
480 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
481
482 if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) {
483 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
484 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe + j]->devitem.dev_id);
485 recovered = TRUE;
486
487 if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
488 Status = write_data_phys(devices[stripe + badsubstripe]->devobj, cis[stripe + badsubstripe].offset + off,
489 t2, Vcb->superblock.node_size);
490 if (!NT_SUCCESS(Status)) {
491 WARN("write_data_phys returned %08x\n", Status);
492 log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
493 }
494 }
495
496 break;
497 } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum))
498 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
499 else
500 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS);
501 }
502 }
503 }
504
505 if (!recovered) {
506 ERR("unrecoverable checksum error at %llx\n", addr);
507 ExFreePool(t2);
508 return STATUS_CRC_ERROR;
509 }
510
511 ExFreePool(t2);
512 } else {
513 ULONG sectors = length / Vcb->superblock.sector_size;
514 UINT8* sector;
515
516 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
517 if (!sector) {
518 ERR("out of memory\n");
519 return STATUS_INSUFFICIENT_RESOURCES;
520 }
521
522 for (i = 0; i < sectors; i++) {
523 UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
524
525 if (context->csum[i] != crc32) {
526 UINT64 off;
527 UINT16 stripe2, badsubstripe = 0;
528 BOOL recovered = FALSE;
529
530 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
531 ci->num_stripes / ci->sub_stripes, &off, &stripe2);
532
533 stripe2 *= ci->sub_stripes;
534
535 for (j = 0; j < ci->sub_stripes; j++) {
536 if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
537 badsubstripe = j;
538 break;
539 }
540 }
541
542 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
543
544 for (j = 0; j < ci->sub_stripes; j++) {
545 if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
546 Status = sync_read_phys(devices[stripe2 + j]->devobj, cis[stripe2 + j].offset + off,
547 Vcb->superblock.sector_size, sector, FALSE);
548 if (!NT_SUCCESS(Status)) {
549 WARN("sync_read_phys returned %08x\n", Status);
550 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS);
551 } else {
552 UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
553
554 if (crc32b == context->csum[i]) {
555 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
556 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id);
557 recovered = TRUE;
558
559 if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
560 Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, cis[stripe2 + badsubstripe].offset + off,
561 sector, Vcb->superblock.sector_size);
562 if (!NT_SUCCESS(Status)) {
563 WARN("write_data_phys returned %08x\n", Status);
564 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
565 }
566 }
567
568 break;
569 } else
570 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
571 }
572 }
573 }
574
575 if (!recovered) {
576 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
577 ExFreePool(sector);
578 return STATUS_CRC_ERROR;
579 }
580 }
581 }
582
583 ExFreePool(sector);
584 }
585
586 return STATUS_SUCCESS;
587 }
588
589 static NTSTATUS read_data_raid5(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci,
590 device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) {
591 ULONG i;
592 NTSTATUS Status;
593 BOOL checksum_error = FALSE;
594 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
595 UINT16 j, stripe;
596 BOOL no_success = TRUE;
597
598 for (j = 0; j < ci->num_stripes; j++) {
599 if (context->stripes[j].status == ReadDataStatus_Error) {
600 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
601 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
602 return context->stripes[j].iosb.Status;
603 } else if (context->stripes[j].status == ReadDataStatus_Success) {
604 stripe = j;
605 no_success = FALSE;
606 }
607 }
608
609 if (c) { // check partial stripes
610 LIST_ENTRY* le;
611 UINT64 ps_length = (ci->num_stripes - 1) * ci->stripe_length;
612
613 ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE);
614
615 le = c->partial_stripes.Flink;
616 while (le != &c->partial_stripes) {
617 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
618
619 if (ps->address + ps_length > addr && ps->address < addr + length) {
620 ULONG runlength, index;
621
622 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
623
624 while (runlength != 0) {
625 UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size);
626 UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size);
627 UINT64 start = max(runstart, addr);
628 UINT64 end = min(runend, addr + length);
629
630 if (end > start)
631 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
632
633 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
634 }
635 } else if (ps->address >= addr + length)
636 break;
637
638 le = le->Flink;
639 }
640
641 ExReleaseResourceLite(&c->partial_stripes_lock);
642 }
643
644 if (context->tree) {
645 tree_header* th = (tree_header*)buf;
646 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
647
648 if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
649 checksum_error = TRUE;
650 if (!no_success && !degraded)
651 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
652 } else if (generation != 0 && generation != th->generation) {
653 checksum_error = TRUE;
654 if (!no_success && !degraded)
655 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
656 }
657 } else if (context->csum) {
658 #ifdef DEBUG_STATS
659 LARGE_INTEGER time1, time2;
660
661 time1 = KeQueryPerformanceCounter(NULL);
662 #endif
663 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
664
665 if (Status == STATUS_CRC_ERROR) {
666 if (!degraded)
667 WARN("checksum error\n");
668 checksum_error = TRUE;
669 } else if (!NT_SUCCESS(Status)) {
670 ERR("check_csum returned %08x\n", Status);
671 return Status;
672 }
673
674 #ifdef DEBUG_STATS
675 time2 = KeQueryPerformanceCounter(NULL);
676
677 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
678 #endif
679 } else if (degraded)
680 checksum_error = TRUE;
681
682 if (!checksum_error)
683 return STATUS_SUCCESS;
684
685 if (context->tree) {
686 UINT16 parity;
687 UINT64 off;
688 BOOL recovered = FALSE, first = TRUE, failed = FALSE;
689 UINT8* t2;
690
691 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
692 if (!t2) {
693 ERR("out of memory\n");
694 return STATUS_INSUFFICIENT_RESOURCES;
695 }
696
697 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe);
698
699 parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
700
701 stripe = (parity + stripe + 1) % ci->num_stripes;
702
703 for (j = 0; j < ci->num_stripes; j++) {
704 if (j != stripe) {
705 if (devices[j] && devices[j]->devobj) {
706 if (first) {
707 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2, FALSE);
708 if (!NT_SUCCESS(Status)) {
709 ERR("sync_read_phys returned %08x\n", Status);
710 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
711 failed = TRUE;
712 break;
713 }
714
715 first = FALSE;
716 } else {
717 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, FALSE);
718 if (!NT_SUCCESS(Status)) {
719 ERR("sync_read_phys returned %08x\n", Status);
720 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
721 failed = TRUE;
722 break;
723 }
724
725 do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
726 }
727 } else {
728 failed = TRUE;
729 break;
730 }
731 }
732 }
733
734 if (!failed) {
735 tree_header* t3 = (tree_header*)t2;
736 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t3->fs_uuid, Vcb->superblock.node_size - sizeof(t3->csum));
737
738 if (t3->address == addr && crc32 == *((UINT32*)t3->csum) && (generation == 0 || t3->generation == generation)) {
739 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
740
741 if (!degraded)
742 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
743
744 recovered = TRUE;
745
746 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
747 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
748 if (!NT_SUCCESS(Status)) {
749 WARN("write_data_phys returned %08x\n", Status);
750 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
751 }
752 }
753 }
754 }
755
756 if (!recovered) {
757 ERR("unrecoverable checksum error at %llx\n", addr);
758 ExFreePool(t2);
759 return STATUS_CRC_ERROR;
760 }
761
762 ExFreePool(t2);
763 } else {
764 ULONG sectors = length / Vcb->superblock.sector_size;
765 UINT8* sector;
766
767 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
768 if (!sector) {
769 ERR("out of memory\n");
770 return STATUS_INSUFFICIENT_RESOURCES;
771 }
772
773 for (i = 0; i < sectors; i++) {
774 UINT16 parity;
775 UINT64 off;
776 UINT32 crc32;
777
778 if (context->csum)
779 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
780
781 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
782 ci->num_stripes - 1, &off, &stripe);
783
784 parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
785
786 stripe = (parity + stripe + 1) % ci->num_stripes;
787
788 if (!devices[stripe] || !devices[stripe]->devobj || (context->csum && context->csum[i] != crc32)) {
789 BOOL recovered = FALSE, first = TRUE, failed = FALSE;
790
791 if (devices[stripe] && devices[stripe]->devobj)
792 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS);
793
794 for (j = 0; j < ci->num_stripes; j++) {
795 if (j != stripe) {
796 if (devices[j] && devices[j]->devobj) {
797 if (first) {
798 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, FALSE);
799 if (!NT_SUCCESS(Status)) {
800 ERR("sync_read_phys returned %08x\n", Status);
801 failed = TRUE;
802 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
803 break;
804 }
805
806 first = FALSE;
807 } else {
808 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + Vcb->superblock.sector_size, FALSE);
809 if (!NT_SUCCESS(Status)) {
810 ERR("sync_read_phys returned %08x\n", Status);
811 failed = TRUE;
812 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
813 break;
814 }
815
816 do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
817 }
818 } else {
819 failed = TRUE;
820 break;
821 }
822 }
823 }
824
825 if (!failed) {
826 if (context->csum)
827 crc32 = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
828
829 if (!context->csum || crc32 == context->csum[i]) {
830 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
831
832 if (!degraded)
833 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
834
835 recovered = TRUE;
836
837 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
838 Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off,
839 sector, Vcb->superblock.sector_size);
840 if (!NT_SUCCESS(Status)) {
841 WARN("write_data_phys returned %08x\n", Status);
842 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
843 }
844 }
845 }
846 }
847
848 if (!recovered) {
849 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
850 ExFreePool(sector);
851 return STATUS_CRC_ERROR;
852 }
853 }
854 }
855
856 ExFreePool(sector);
857 }
858
859 return STATUS_SUCCESS;
860 }
861
862 void raid6_recover2(UINT8* sectors, UINT16 num_stripes, ULONG sector_size, UINT16 missing1, UINT16 missing2, UINT8* out) {
863 if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
864 UINT16 missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
865 UINT16 stripe;
866
867 stripe = num_stripes - 3;
868
869 if (stripe == missing)
870 RtlZeroMemory(out, sector_size);
871 else
872 RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size);
873
874 do {
875 stripe--;
876
877 galois_double(out, sector_size);
878
879 if (stripe != missing)
880 do_xor(out, sectors + (stripe * sector_size), sector_size);
881 } while (stripe > 0);
882
883 do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
884
885 if (missing != 0)
886 galois_divpower(out, (UINT8)missing, sector_size);
887 } else { // reconstruct from p and q
888 UINT16 x, y, stripe;
889 UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
890 UINT32 j;
891
892 stripe = num_stripes - 3;
893
894 pxy = out + sector_size;
895 qxy = out;
896
897 if (stripe == missing1 || stripe == missing2) {
898 RtlZeroMemory(qxy, sector_size);
899 RtlZeroMemory(pxy, sector_size);
900
901 if (stripe == missing1)
902 x = stripe;
903 else
904 y = stripe;
905 } else {
906 RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size);
907 RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size);
908 }
909
910 do {
911 stripe--;
912
913 galois_double(qxy, sector_size);
914
915 if (stripe != missing1 && stripe != missing2) {
916 do_xor(qxy, sectors + (stripe * sector_size), sector_size);
917 do_xor(pxy, sectors + (stripe * sector_size), sector_size);
918 } else if (stripe == missing1)
919 x = stripe;
920 else if (stripe == missing2)
921 y = stripe;
922 } while (stripe > 0);
923
924 gyx = gpow2(y > x ? (y-x) : (255-x+y));
925 gx = gpow2(255-x);
926
927 denom = gdiv(1, gyx ^ 1);
928 a = gmul(gyx, denom);
929 b = gmul(gx, denom);
930
931 p = sectors + ((num_stripes - 2) * sector_size);
932 q = sectors + ((num_stripes - 1) * sector_size);
933
934 for (j = 0; j < sector_size; j++) {
935 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
936
937 p++;
938 q++;
939 pxy++;
940 qxy++;
941 }
942
943 do_xor(out + sector_size, out, sector_size);
944 do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
945 }
946 }
947
948 static NTSTATUS read_data_raid6(device_extension* Vcb, UINT8* buf, UINT64 addr, UINT32 length, read_data_context* context, CHUNK_ITEM* ci,
949 device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) {
950 NTSTATUS Status;
951 ULONG i;
952 BOOL checksum_error = FALSE;
953 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
954 UINT16 stripe, j;
955 BOOL no_success = TRUE;
956
957 for (j = 0; j < ci->num_stripes; j++) {
958 if (context->stripes[j].status == ReadDataStatus_Error) {
959 WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
960
961 if (devices[j])
962 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
963 return context->stripes[j].iosb.Status;
964 } else if (context->stripes[j].status == ReadDataStatus_Success) {
965 stripe = j;
966 no_success = FALSE;
967 }
968 }
969
970 if (c) { // check partial stripes
971 LIST_ENTRY* le;
972 UINT64 ps_length = (ci->num_stripes - 2) * ci->stripe_length;
973
974 ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE);
975
976 le = c->partial_stripes.Flink;
977 while (le != &c->partial_stripes) {
978 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
979
980 if (ps->address + ps_length > addr && ps->address < addr + length) {
981 ULONG runlength, index;
982
983 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
984
985 while (runlength != 0) {
986 UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size);
987 UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size);
988 UINT64 start = max(runstart, addr);
989 UINT64 end = min(runend, addr + length);
990
991 if (end > start)
992 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
993
994 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
995 }
996 } else if (ps->address >= addr + length)
997 break;
998
999 le = le->Flink;
1000 }
1001
1002 ExReleaseResourceLite(&c->partial_stripes_lock);
1003 }
1004
1005 if (context->tree) {
1006 tree_header* th = (tree_header*)buf;
1007 UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1008
1009 if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
1010 checksum_error = TRUE;
1011 if (!no_success && !degraded && devices[stripe])
1012 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1013 } else if (generation != 0 && generation != th->generation) {
1014 checksum_error = TRUE;
1015 if (!no_success && !degraded && devices[stripe])
1016 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
1017 }
1018 } else if (context->csum) {
1019 #ifdef DEBUG_STATS
1020 LARGE_INTEGER time1, time2;
1021
1022 time1 = KeQueryPerformanceCounter(NULL);
1023 #endif
1024 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
1025
1026 if (Status == STATUS_CRC_ERROR) {
1027 if (!degraded)
1028 WARN("checksum error\n");
1029 checksum_error = TRUE;
1030 } else if (!NT_SUCCESS(Status)) {
1031 ERR("check_csum returned %08x\n", Status);
1032 return Status;
1033 }
1034 #ifdef DEBUG_STATS
1035 time2 = KeQueryPerformanceCounter(NULL);
1036
1037 Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
1038 #endif
1039 } else if (degraded)
1040 checksum_error = TRUE;
1041
1042 if (!checksum_error)
1043 return STATUS_SUCCESS;
1044
1045 if (context->tree) {
1046 UINT8* sector;
1047 UINT16 k, physstripe, parity1, parity2, error_stripe;
1048 UINT64 off;
1049 BOOL recovered = FALSE, failed = FALSE;
1050 ULONG num_errors = 0;
1051
1052 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
1053 if (!sector) {
1054 ERR("out of memory\n");
1055 return STATUS_INSUFFICIENT_RESOURCES;
1056 }
1057
1058 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe);
1059
1060 parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1061 parity2 = (parity1 + 1) % ci->num_stripes;
1062
1063 physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1064
1065 j = (parity2 + 1) % ci->num_stripes;
1066
1067 for (k = 0; k < ci->num_stripes - 1; k++) {
1068 if (j != physstripe) {
1069 if (devices[j] && devices[j]->devobj) {
1070 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, sector + (k * Vcb->superblock.node_size), FALSE);
1071 if (!NT_SUCCESS(Status)) {
1072 ERR("sync_read_phys returned %08x\n", Status);
1073 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1074 num_errors++;
1075 error_stripe = k;
1076
1077 if (num_errors > 1) {
1078 failed = TRUE;
1079 break;
1080 }
1081 }
1082 } else {
1083 num_errors++;
1084 error_stripe = k;
1085
1086 if (num_errors > 1) {
1087 failed = TRUE;
1088 break;
1089 }
1090 }
1091 }
1092
1093 j = (j + 1) % ci->num_stripes;
1094 }
1095
1096 if (!failed) {
1097 if (num_errors == 0) {
1098 tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
1099 UINT32 crc32;
1100
1101 RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
1102 Vcb->superblock.node_size);
1103
1104 for (j = 0; j < ci->num_stripes - 2; j++) {
1105 if (j != stripe)
1106 do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
1107 }
1108
1109 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1110
1111 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) {
1112 RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1113
1114 if (devices[physstripe] && devices[physstripe]->devobj)
1115 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id);
1116
1117 recovered = TRUE;
1118
1119 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1120 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1121 sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1122 if (!NT_SUCCESS(Status)) {
1123 WARN("write_data_phys returned %08x\n", Status);
1124 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1125 }
1126 }
1127 }
1128 }
1129
1130 if (!recovered) {
1131 UINT32 crc32;
1132 tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
1133 BOOL read_q = FALSE;
1134
1135 if (devices[parity2] && devices[parity2]->devobj) {
1136 Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off,
1137 Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), FALSE);
1138 if (!NT_SUCCESS(Status)) {
1139 ERR("sync_read_phys returned %08x\n", Status);
1140 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1141 } else
1142 read_q = TRUE;
1143 }
1144
1145 if (read_q) {
1146 if (num_errors == 1) {
1147 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
1148
1149 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1150
1151 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation))
1152 recovered = TRUE;
1153 } else {
1154 for (j = 0; j < ci->num_stripes - 1; j++) {
1155 if (j != stripe) {
1156 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
1157
1158 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1159
1160 if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) {
1161 recovered = TRUE;
1162 error_stripe = j;
1163 break;
1164 }
1165 }
1166 }
1167 }
1168 }
1169
1170 if (recovered) {
1171 UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1172
1173 if (devices[physstripe] && devices[physstripe]->devobj)
1174 ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id);
1175
1176 RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1177
1178 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1179 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1180 sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1181 if (!NT_SUCCESS(Status)) {
1182 WARN("write_data_phys returned %08x\n", Status);
1183 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1184 }
1185 }
1186
1187 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1188 if (error_stripe == ci->num_stripes - 2) {
1189 ERR("recovering from parity error at %llx, device %llx\n", addr, devices[error_stripe_phys]->devitem.dev_id);
1190
1191 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1192
1193 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1194
1195 for (j = 0; j < ci->num_stripes - 2; j++) {
1196 if (j == stripe) {
1197 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
1198 Vcb->superblock.node_size);
1199 } else {
1200 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
1201 Vcb->superblock.node_size);
1202 }
1203 }
1204 } else {
1205 ERR("recovering from checksum error at %llx, device %llx\n", addr + ((error_stripe - stripe) * ci->stripe_length),
1206 devices[error_stripe_phys]->devitem.dev_id);
1207
1208 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1209
1210 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
1211 sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1212 }
1213 }
1214
1215 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1216 Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off,
1217 sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1218 if (!NT_SUCCESS(Status)) {
1219 WARN("write_data_phys returned %08x\n", Status);
1220 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1221 }
1222 }
1223 }
1224 }
1225 }
1226
1227 if (!recovered) {
1228 ERR("unrecoverable checksum error at %llx\n", addr);
1229 ExFreePool(sector);
1230 return STATUS_CRC_ERROR;
1231 }
1232
1233 ExFreePool(sector);
1234 } else {
1235 ULONG sectors = length / Vcb->superblock.sector_size;
1236 UINT8* sector;
1237
1238 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG);
1239 if (!sector) {
1240 ERR("out of memory\n");
1241 return STATUS_INSUFFICIENT_RESOURCES;
1242 }
1243
1244 for (i = 0; i < sectors; i++) {
1245 UINT64 off;
1246 UINT16 physstripe, parity1, parity2;
1247 UINT32 crc32;
1248
1249 if (context->csum)
1250 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1251
1252 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
1253 ci->num_stripes - 2, &off, &stripe);
1254
1255 parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1256 parity2 = (parity1 + 1) % ci->num_stripes;
1257
1258 physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1259
1260 if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && context->csum[i] != crc32)) {
1261 UINT16 k, error_stripe;
1262 BOOL recovered = FALSE, failed = FALSE;
1263 ULONG num_errors = 0;
1264
1265 if (devices[physstripe] && devices[physstripe]->devobj)
1266 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS);
1267
1268 j = (parity2 + 1) % ci->num_stripes;
1269
1270 for (k = 0; k < ci->num_stripes - 1; k++) {
1271 if (j != physstripe) {
1272 if (devices[j] && devices[j]->devobj) {
1273 Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + (k * Vcb->superblock.sector_size), FALSE);
1274 if (!NT_SUCCESS(Status)) {
1275 ERR("sync_read_phys returned %08x\n", Status);
1276 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1277 num_errors++;
1278 error_stripe = k;
1279
1280 if (num_errors > 1) {
1281 failed = TRUE;
1282 break;
1283 }
1284 }
1285 } else {
1286 num_errors++;
1287 error_stripe = k;
1288
1289 if (num_errors > 1) {
1290 failed = TRUE;
1291 break;
1292 }
1293 }
1294 }
1295
1296 j = (j + 1) % ci->num_stripes;
1297 }
1298
1299 if (!failed) {
1300 if (num_errors == 0) {
1301 RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1302
1303 for (j = 0; j < ci->num_stripes - 2; j++) {
1304 if (j != stripe)
1305 do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1306 }
1307
1308 if (context->csum)
1309 crc32 = ~calc_crc32c(0xffffffff, sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1310
1311 if (!context->csum || crc32 == context->csum[i]) {
1312 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1313
1314 if (devices[physstripe] && devices[physstripe]->devobj)
1315 ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1316 devices[physstripe]->devitem.dev_id);
1317
1318 recovered = TRUE;
1319
1320 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1321 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1322 sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1323 if (!NT_SUCCESS(Status)) {
1324 WARN("write_data_phys returned %08x\n", Status);
1325 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1326 }
1327 }
1328 }
1329 }
1330
1331 if (!recovered) {
1332 BOOL read_q = FALSE;
1333
1334 if (devices[parity2] && devices[parity2]->devobj) {
1335 Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off,
1336 Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), FALSE);
1337 if (!NT_SUCCESS(Status)) {
1338 ERR("sync_read_phys returned %08x\n", Status);
1339 log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS);
1340 } else
1341 read_q = TRUE;
1342 }
1343
1344 if (read_q) {
1345 if (num_errors == 1) {
1346 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1347
1348 if (!devices[physstripe] || !devices[physstripe]->devobj)
1349 recovered = TRUE;
1350 else {
1351 crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1352
1353 if (crc32 == context->csum[i])
1354 recovered = TRUE;
1355 }
1356 } else {
1357 for (j = 0; j < ci->num_stripes - 1; j++) {
1358 if (j != stripe) {
1359 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1360
1361 crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1362
1363 if (crc32 == context->csum[i]) {
1364 recovered = TRUE;
1365 error_stripe = j;
1366 break;
1367 }
1368 }
1369 }
1370 }
1371 }
1372
1373 if (recovered) {
1374 UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1375
1376 if (devices[physstripe] && devices[physstripe]->devobj)
1377 ERR("recovering from checksum error at %llx, device %llx\n",
1378 addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id);
1379
1380 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1381
1382 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1383 Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1384 sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1385 if (!NT_SUCCESS(Status)) {
1386 WARN("write_data_phys returned %08x\n", Status);
1387 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1388 }
1389 }
1390
1391 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1392 if (error_stripe == ci->num_stripes - 2) {
1393 ERR("recovering from parity error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1394 devices[error_stripe_phys]->devitem.dev_id);
1395
1396 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1397
1398 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1399
1400 for (j = 0; j < ci->num_stripes - 2; j++) {
1401 if (j == stripe) {
1402 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size),
1403 Vcb->superblock.sector_size);
1404 } else {
1405 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size),
1406 Vcb->superblock.sector_size);
1407 }
1408 }
1409 } else {
1410 ERR("recovering from checksum error at %llx, device %llx\n",
1411 addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length),
1412 devices[error_stripe_phys]->devitem.dev_id);
1413
1414 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1415
1416 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size),
1417 sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1418 }
1419 }
1420
1421 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1422 Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off,
1423 sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1424 if (!NT_SUCCESS(Status)) {
1425 WARN("write_data_phys returned %08x\n", Status);
1426 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1427 }
1428 }
1429 }
1430 }
1431 }
1432
1433 if (!recovered) {
1434 ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
1435 ExFreePool(sector);
1436 return STATUS_CRC_ERROR;
1437 }
1438 }
1439 }
1440
1441 ExFreePool(sector);
1442 }
1443
1444 return STATUS_SUCCESS;
1445 }
1446
1447 NTSTATUS read_data(_In_ device_extension* Vcb, _In_ UINT64 addr, _In_ UINT32 length, _In_reads_bytes_opt_(length*sizeof(UINT32)/Vcb->superblock.sector_size) UINT32* csum,
1448 _In_ BOOL is_tree, _Out_writes_bytes_(length) UINT8* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ UINT64 generation, _In_ BOOL file_read,
1449 _In_ ULONG priority) {
1450 CHUNK_ITEM* ci;
1451 CHUNK_ITEM_STRIPE* cis;
1452 read_data_context context;
1453 UINT64 type, offset, total_reading = 0;
1454 NTSTATUS Status;
1455 device** devices = NULL;
1456 UINT16 i, startoffstripe, allowed_missing, missing_devices = 0;
1457 UINT8* dummypage = NULL;
1458 PMDL dummy_mdl = NULL;
1459 BOOL need_to_wait;
1460 UINT64 lockaddr, locklen;
1461 #ifdef DEBUG_STATS
1462 LARGE_INTEGER time1, time2;
1463 #endif
1464
1465 if (Vcb->log_to_phys_loaded) {
1466 if (!c) {
1467 c = get_chunk_from_address(Vcb, addr);
1468
1469 if (!c) {
1470 ERR("get_chunk_from_address failed\n");
1471 return STATUS_INTERNAL_ERROR;
1472 }
1473 }
1474
1475 ci = c->chunk_item;
1476 offset = c->offset;
1477 devices = c->devices;
1478
1479 if (pc)
1480 *pc = c;
1481 } else {
1482 LIST_ENTRY* le = Vcb->sys_chunks.Flink;
1483
1484 ci = NULL;
1485
1486 c = NULL;
1487 while (le != &Vcb->sys_chunks) {
1488 sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
1489
1490 if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
1491 CHUNK_ITEM* chunk_item = sc->data;
1492
1493 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
1494 ci = chunk_item;
1495 offset = sc->key.offset;
1496 cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
1497
1498 devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
1499 if (!devices) {
1500 ERR("out of memory\n");
1501 return STATUS_INSUFFICIENT_RESOURCES;
1502 }
1503
1504 for (i = 0; i < ci->num_stripes; i++) {
1505 devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
1506 }
1507
1508 break;
1509 }
1510 }
1511
1512 le = le->Flink;
1513 }
1514
1515 if (!ci) {
1516 ERR("could not find chunk for %llx in bootstrap\n", addr);
1517 return STATUS_INTERNAL_ERROR;
1518 }
1519
1520 if (pc)
1521 *pc = NULL;
1522 }
1523
1524 if (ci->type & BLOCK_FLAG_DUPLICATE) {
1525 type = BLOCK_FLAG_DUPLICATE;
1526 allowed_missing = ci->num_stripes - 1;
1527 } else if (ci->type & BLOCK_FLAG_RAID0) {
1528 type = BLOCK_FLAG_RAID0;
1529 allowed_missing = 0;
1530 } else if (ci->type & BLOCK_FLAG_RAID1) {
1531 type = BLOCK_FLAG_DUPLICATE;
1532 allowed_missing = 1;
1533 } else if (ci->type & BLOCK_FLAG_RAID10) {
1534 type = BLOCK_FLAG_RAID10;
1535 allowed_missing = 1;
1536 } else if (ci->type & BLOCK_FLAG_RAID5) {
1537 type = BLOCK_FLAG_RAID5;
1538 allowed_missing = 1;
1539 } else if (ci->type & BLOCK_FLAG_RAID6) {
1540 type = BLOCK_FLAG_RAID6;
1541 allowed_missing = 2;
1542 } else { // SINGLE
1543 type = BLOCK_FLAG_DUPLICATE;
1544 allowed_missing = 0;
1545 }
1546
1547 cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1548
1549 RtlZeroMemory(&context, sizeof(read_data_context));
1550 KeInitializeEvent(&context.Event, NotificationEvent, FALSE);
1551
1552 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
1553 if (!context.stripes) {
1554 ERR("out of memory\n");
1555 return STATUS_INSUFFICIENT_RESOURCES;
1556 }
1557
1558 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
1559 get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
1560 chunk_lock_range(Vcb, c, lockaddr, locklen);
1561 }
1562
1563 RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
1564
1565 context.buflen = length;
1566 context.num_stripes = ci->num_stripes;
1567 context.stripes_left = context.num_stripes;
1568 context.sector_size = Vcb->superblock.sector_size;
1569 context.csum = csum;
1570 context.tree = is_tree;
1571 context.type = type;
1572
1573 if (type == BLOCK_FLAG_RAID0) {
1574 UINT64 startoff, endoff;
1575 UINT16 endoffstripe, stripe;
1576 UINT32 *stripeoff, pos;
1577 PMDL master_mdl;
1578 PFN_NUMBER* pfns;
1579
1580 // FIXME - test this still works if page size isn't the same as sector size
1581
1582 // This relies on the fact that MDLs are followed in memory by the page file numbers,
1583 // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1584 // data for you without doing a memcpy yourself.
1585 // MDLs are officially opaque, so this might very well break in future versions of Windows.
1586
1587 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
1588 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
1589
1590 if (file_read) {
1591 // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1592 // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1593 // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1594
1595 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1596
1597 if (!context.va) {
1598 ERR("out of memory\n");
1599 Status = STATUS_INSUFFICIENT_RESOURCES;
1600 goto exit;
1601 }
1602 } else
1603 context.va = buf;
1604
1605 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1606 if (!master_mdl) {
1607 ERR("out of memory\n");
1608 Status = STATUS_INSUFFICIENT_RESOURCES;
1609 goto exit;
1610 }
1611
1612 Status = STATUS_SUCCESS;
1613
1614 _SEH2_TRY {
1615 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1616 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1617 Status = _SEH2_GetExceptionCode();
1618 } _SEH2_END;
1619
1620 if (!NT_SUCCESS(Status)) {
1621 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1622 IoFreeMdl(master_mdl);
1623 goto exit;
1624 }
1625
1626 pfns = (PFN_NUMBER*)(master_mdl + 1);
1627
1628 for (i = 0; i < ci->num_stripes; i++) {
1629 if (startoffstripe > i)
1630 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1631 else if (startoffstripe == i)
1632 context.stripes[i].stripestart = startoff;
1633 else
1634 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
1635
1636 if (endoffstripe > i)
1637 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1638 else if (endoffstripe == i)
1639 context.stripes[i].stripeend = endoff + 1;
1640 else
1641 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
1642
1643 if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
1644 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL);
1645
1646 if (!context.stripes[i].mdl) {
1647 ERR("IoAllocateMdl failed\n");
1648 Status = STATUS_INSUFFICIENT_RESOURCES;
1649 goto exit;
1650 }
1651 }
1652 }
1653
1654 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
1655 if (!stripeoff) {
1656 ERR("out of memory\n");
1657 Status = STATUS_INSUFFICIENT_RESOURCES;
1658 goto exit;
1659 }
1660
1661 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
1662
1663 pos = 0;
1664 stripe = startoffstripe;
1665 while (pos < length) {
1666 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
1667
1668 if (pos == 0) {
1669 UINT32 readlen = (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
1670
1671 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1672
1673 stripeoff[stripe] += readlen;
1674 pos += readlen;
1675 } else if (length - pos < ci->stripe_length) {
1676 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1677
1678 pos = length;
1679 } else {
1680 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1681
1682 stripeoff[stripe] += (UINT32)ci->stripe_length;
1683 pos += (UINT32)ci->stripe_length;
1684 }
1685
1686 stripe = (stripe + 1) % ci->num_stripes;
1687 }
1688
1689 MmUnlockPages(master_mdl);
1690 IoFreeMdl(master_mdl);
1691
1692 ExFreePool(stripeoff);
1693 } else if (type == BLOCK_FLAG_RAID10) {
1694 UINT64 startoff, endoff;
1695 UINT16 endoffstripe, j, stripe;
1696 ULONG orig_ls;
1697 PMDL master_mdl;
1698 PFN_NUMBER* pfns;
1699 UINT32* stripeoff, pos;
1700 read_data_stripe** stripes;
1701
1702 if (c)
1703 orig_ls = c->last_stripe;
1704 else
1705 orig_ls = 0;
1706
1707 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
1708 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
1709
1710 if ((ci->num_stripes % ci->sub_stripes) != 0) {
1711 ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
1712 Status = STATUS_INTERNAL_ERROR;
1713 goto exit;
1714 }
1715
1716 if (file_read) {
1717 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1718
1719 if (!context.va) {
1720 ERR("out of memory\n");
1721 Status = STATUS_INSUFFICIENT_RESOURCES;
1722 goto exit;
1723 }
1724 } else
1725 context.va = buf;
1726
1727 context.firstoff = (UINT16)((startoff % ci->stripe_length) / Vcb->superblock.sector_size);
1728 context.startoffstripe = startoffstripe;
1729 context.sectors_per_stripe = (UINT16)(ci->stripe_length / Vcb->superblock.sector_size);
1730
1731 startoffstripe *= ci->sub_stripes;
1732 endoffstripe *= ci->sub_stripes;
1733
1734 if (c)
1735 c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
1736
1737 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1738 if (!master_mdl) {
1739 ERR("out of memory\n");
1740 Status = STATUS_INSUFFICIENT_RESOURCES;
1741 goto exit;
1742 }
1743
1744 Status = STATUS_SUCCESS;
1745
1746 _SEH2_TRY {
1747 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1748 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1749 Status = _SEH2_GetExceptionCode();
1750 } _SEH2_END;
1751
1752 if (!NT_SUCCESS(Status)) {
1753 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1754 IoFreeMdl(master_mdl);
1755 goto exit;
1756 }
1757
1758 pfns = (PFN_NUMBER*)(master_mdl + 1);
1759
1760 stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1761 if (!stripes) {
1762 ERR("out of memory\n");
1763 Status = STATUS_INSUFFICIENT_RESOURCES;
1764 goto exit;
1765 }
1766
1767 RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
1768
1769 for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
1770 UINT64 sstart, send;
1771 BOOL stripeset = FALSE;
1772
1773 if (startoffstripe > i)
1774 sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1775 else if (startoffstripe == i)
1776 sstart = startoff;
1777 else
1778 sstart = startoff - (startoff % ci->stripe_length);
1779
1780 if (endoffstripe > i)
1781 send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1782 else if (endoffstripe == i)
1783 send = endoff + 1;
1784 else
1785 send = endoff - (endoff % ci->stripe_length);
1786
1787 for (j = 0; j < ci->sub_stripes; j++) {
1788 if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
1789 context.stripes[i+j].stripestart = sstart;
1790 context.stripes[i+j].stripeend = send;
1791 stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1792
1793 if (sstart != send) {
1794 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL);
1795
1796 if (!context.stripes[i+j].mdl) {
1797 ERR("IoAllocateMdl failed\n");
1798 Status = STATUS_INSUFFICIENT_RESOURCES;
1799 goto exit;
1800 }
1801 }
1802
1803 stripeset = TRUE;
1804 } else
1805 context.stripes[i+j].status = ReadDataStatus_Skip;
1806 }
1807
1808 if (!stripeset) {
1809 for (j = 0; j < ci->sub_stripes; j++) {
1810 if (devices[i+j] && devices[i+j]->devobj) {
1811 context.stripes[i+j].stripestart = sstart;
1812 context.stripes[i+j].stripeend = send;
1813 context.stripes[i+j].status = ReadDataStatus_Pending;
1814 stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1815
1816 if (sstart != send) {
1817 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL);
1818
1819 if (!context.stripes[i+j].mdl) {
1820 ERR("IoAllocateMdl failed\n");
1821 Status = STATUS_INSUFFICIENT_RESOURCES;
1822 goto exit;
1823 }
1824 }
1825
1826 stripeset = TRUE;
1827 break;
1828 }
1829 }
1830
1831 if (!stripeset) {
1832 ERR("could not find stripe to read\n");
1833 Status = STATUS_DEVICE_NOT_READY;
1834 goto exit;
1835 }
1836 }
1837 }
1838
1839 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1840 if (!stripeoff) {
1841 ERR("out of memory\n");
1842 Status = STATUS_INSUFFICIENT_RESOURCES;
1843 goto exit;
1844 }
1845
1846 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes);
1847
1848 pos = 0;
1849 stripe = startoffstripe / ci->sub_stripes;
1850 while (pos < length) {
1851 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
1852
1853 if (pos == 0) {
1854 UINT32 readlen = (UINT32)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
1855 ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
1856
1857 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1858
1859 stripeoff[stripe] += readlen;
1860 pos += readlen;
1861 } else if (length - pos < ci->stripe_length) {
1862 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1863
1864 pos = length;
1865 } else {
1866 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1867
1868 stripeoff[stripe] += (ULONG)ci->stripe_length;
1869 pos += (ULONG)ci->stripe_length;
1870 }
1871
1872 stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
1873 }
1874
1875 MmUnlockPages(master_mdl);
1876 IoFreeMdl(master_mdl);
1877
1878 ExFreePool(stripeoff);
1879 ExFreePool(stripes);
1880 } else if (type == BLOCK_FLAG_DUPLICATE) {
1881 UINT64 orig_ls;
1882
1883 if (c)
1884 orig_ls = i = c->last_stripe;
1885 else
1886 orig_ls = i = 0;
1887
1888 while (!devices[i] || !devices[i]->devobj) {
1889 i = (i + 1) % ci->num_stripes;
1890
1891 if (i == orig_ls) {
1892 ERR("no devices available to service request\n");
1893 Status = STATUS_DEVICE_NOT_READY;
1894 goto exit;
1895 }
1896 }
1897
1898 if (c)
1899 c->last_stripe = (i + 1) % ci->num_stripes;
1900
1901 context.stripes[i].stripestart = addr - offset;
1902 context.stripes[i].stripeend = context.stripes[i].stripestart + length;
1903
1904 if (file_read) {
1905 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1906
1907 if (!context.va) {
1908 ERR("out of memory\n");
1909 Status = STATUS_INSUFFICIENT_RESOURCES;
1910 goto exit;
1911 }
1912
1913 context.stripes[i].mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1914 if (!context.stripes[i].mdl) {
1915 ERR("IoAllocateMdl failed\n");
1916 Status = STATUS_INSUFFICIENT_RESOURCES;
1917 goto exit;
1918 }
1919
1920 MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
1921 } else {
1922 context.stripes[i].mdl = IoAllocateMdl(buf, length, FALSE, FALSE, NULL);
1923
1924 if (!context.stripes[i].mdl) {
1925 ERR("IoAllocateMdl failed\n");
1926 Status = STATUS_INSUFFICIENT_RESOURCES;
1927 goto exit;
1928 }
1929
1930 Status = STATUS_SUCCESS;
1931
1932 _SEH2_TRY {
1933 MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess);
1934 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1935 Status = _SEH2_GetExceptionCode();
1936 } _SEH2_END;
1937
1938 if (!NT_SUCCESS(Status)) {
1939 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1940 goto exit;
1941 }
1942 }
1943 } else if (type == BLOCK_FLAG_RAID5) {
1944 UINT64 startoff, endoff;
1945 UINT16 endoffstripe, parity;
1946 UINT32 *stripeoff, pos;
1947 PMDL master_mdl;
1948 PFN_NUMBER *pfns, dummy;
1949 BOOL need_dummy = FALSE;
1950
1951 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
1952 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
1953
1954 if (file_read) {
1955 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1956
1957 if (!context.va) {
1958 ERR("out of memory\n");
1959 Status = STATUS_INSUFFICIENT_RESOURCES;
1960 goto exit;
1961 }
1962 } else
1963 context.va = buf;
1964
1965 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1966 if (!master_mdl) {
1967 ERR("out of memory\n");
1968 Status = STATUS_INSUFFICIENT_RESOURCES;
1969 goto exit;
1970 }
1971
1972 Status = STATUS_SUCCESS;
1973
1974 _SEH2_TRY {
1975 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1976 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1977 Status = _SEH2_GetExceptionCode();
1978 } _SEH2_END;
1979
1980 if (!NT_SUCCESS(Status)) {
1981 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1982 IoFreeMdl(master_mdl);
1983 goto exit;
1984 }
1985
1986 pfns = (PFN_NUMBER*)(master_mdl + 1);
1987
1988 pos = 0;
1989 while (pos < length) {
1990 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
1991
1992 if (pos == 0) {
1993 UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes;
1994 ULONG skip, readlen;
1995
1996 i = startoffstripe;
1997 while (stripe != parity) {
1998 if (i == startoffstripe) {
1999 readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
2000
2001 context.stripes[stripe].stripestart = startoff;
2002 context.stripes[stripe].stripeend = startoff + readlen;
2003
2004 pos += readlen;
2005
2006 if (pos == length)
2007 break;
2008 } else {
2009 readlen = min(length - pos, (ULONG)ci->stripe_length);
2010
2011 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2012 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2013
2014 pos += readlen;
2015
2016 if (pos == length)
2017 break;
2018 }
2019
2020 i++;
2021 stripe = (stripe + 1) % ci->num_stripes;
2022 }
2023
2024 if (pos == length)
2025 break;
2026
2027 for (i = 0; i < startoffstripe; i++) {
2028 UINT16 stripe2 = (parity + i + 1) % ci->num_stripes;
2029
2030 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2031 }
2032
2033 context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2034
2035 if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
2036 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
2037
2038 for (i = 0; i < ci->num_stripes; i++) {
2039 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2040 }
2041
2042 pos += (UINT32)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
2043 need_dummy = TRUE;
2044 }
2045 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2046 for (i = 0; i < ci->num_stripes; i++) {
2047 context.stripes[i].stripeend += ci->stripe_length;
2048 }
2049
2050 pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 1));
2051 need_dummy = TRUE;
2052 } else {
2053 UINT16 stripe = (parity + 1) % ci->num_stripes;
2054
2055 i = 0;
2056 while (stripe != parity) {
2057 if (endoffstripe == i) {
2058 context.stripes[stripe].stripeend = endoff + 1;
2059 break;
2060 } else if (endoffstripe > i)
2061 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2062
2063 i++;
2064 stripe = (stripe + 1) % ci->num_stripes;
2065 }
2066
2067 break;
2068 }
2069 }
2070
2071 for (i = 0; i < ci->num_stripes; i++) {
2072 if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2073 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
2074 FALSE, FALSE, NULL);
2075
2076 if (!context.stripes[i].mdl) {
2077 ERR("IoAllocateMdl failed\n");
2078 Status = STATUS_INSUFFICIENT_RESOURCES;
2079 goto exit;
2080 }
2081 }
2082 }
2083
2084 if (need_dummy) {
2085 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2086 if (!dummypage) {
2087 ERR("out of memory\n");
2088 Status = STATUS_INSUFFICIENT_RESOURCES;
2089 goto exit;
2090 }
2091
2092 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL);
2093 if (!dummy_mdl) {
2094 ERR("IoAllocateMdl failed\n");
2095 Status = STATUS_INSUFFICIENT_RESOURCES;
2096 ExFreePool(dummypage);
2097 goto exit;
2098 }
2099
2100 MmBuildMdlForNonPagedPool(dummy_mdl);
2101
2102 dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2103 }
2104
2105 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
2106 if (!stripeoff) {
2107 ERR("out of memory\n");
2108 Status = STATUS_INSUFFICIENT_RESOURCES;
2109 goto exit;
2110 }
2111
2112 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
2113
2114 pos = 0;
2115
2116 while (pos < length) {
2117 PFN_NUMBER* stripe_pfns;
2118
2119 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2120
2121 if (pos == 0) {
2122 UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2123 UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2124 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2125
2126 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2127
2128 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2129
2130 stripeoff[stripe] = readlen;
2131 pos += readlen;
2132
2133 stripe = (stripe + 1) % ci->num_stripes;
2134
2135 while (stripe != parity) {
2136 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2137 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2138
2139 if (readlen == 0)
2140 break;
2141
2142 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2143
2144 stripeoff[stripe] = readlen;
2145 pos += readlen;
2146
2147 stripe = (stripe + 1) % ci->num_stripes;
2148 }
2149 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2150 UINT16 stripe = (parity + 1) % ci->num_stripes;
2151 ULONG k;
2152
2153 while (stripe != parity) {
2154 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2155
2156 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2157
2158 stripeoff[stripe] += (UINT32)ci->stripe_length;
2159 pos += (UINT32)ci->stripe_length;
2160
2161 stripe = (stripe + 1) % ci->num_stripes;
2162 }
2163
2164 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
2165
2166 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2167 stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
2168 stripeoff[parity] += PAGE_SIZE;
2169 }
2170 } else {
2171 UINT16 stripe = (parity + 1) % ci->num_stripes;
2172 UINT32 readlen;
2173
2174 while (pos < length) {
2175 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2176 readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2177
2178 if (readlen == 0)
2179 break;
2180
2181 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2182
2183 stripeoff[stripe] += readlen;
2184 pos += readlen;
2185
2186 stripe = (stripe + 1) % ci->num_stripes;
2187 }
2188 }
2189 }
2190
2191 MmUnlockPages(master_mdl);
2192 IoFreeMdl(master_mdl);
2193
2194 ExFreePool(stripeoff);
2195 } else if (type == BLOCK_FLAG_RAID6) {
2196 UINT64 startoff, endoff;
2197 UINT16 endoffstripe, parity1;
2198 UINT32 *stripeoff, pos;
2199 PMDL master_mdl;
2200 PFN_NUMBER *pfns, dummy;
2201 BOOL need_dummy = FALSE;
2202
2203 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
2204 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
2205
2206 if (file_read) {
2207 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
2208
2209 if (!context.va) {
2210 ERR("out of memory\n");
2211 Status = STATUS_INSUFFICIENT_RESOURCES;
2212 goto exit;
2213 }
2214 } else
2215 context.va = buf;
2216
2217 master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
2218 if (!master_mdl) {
2219 ERR("out of memory\n");
2220 Status = STATUS_INSUFFICIENT_RESOURCES;
2221 goto exit;
2222 }
2223
2224 Status = STATUS_SUCCESS;
2225
2226 _SEH2_TRY {
2227 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
2228 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2229 Status = _SEH2_GetExceptionCode();
2230 } _SEH2_END;
2231
2232 if (!NT_SUCCESS(Status)) {
2233 ERR("MmProbeAndLockPages threw exception %08x\n", Status);
2234 IoFreeMdl(master_mdl);
2235 goto exit;
2236 }
2237
2238 pfns = (PFN_NUMBER*)(master_mdl + 1);
2239
2240 pos = 0;
2241 while (pos < length) {
2242 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2243
2244 if (pos == 0) {
2245 UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
2246 ULONG skip, readlen;
2247
2248 i = startoffstripe;
2249 while (stripe != parity1) {
2250 if (i == startoffstripe) {
2251 readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
2252
2253 context.stripes[stripe].stripestart = startoff;
2254 context.stripes[stripe].stripeend = startoff + readlen;
2255
2256 pos += readlen;
2257
2258 if (pos == length)
2259 break;
2260 } else {
2261 readlen = min(length - pos, (ULONG)ci->stripe_length);
2262
2263 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2264 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2265
2266 pos += readlen;
2267
2268 if (pos == length)
2269 break;
2270 }
2271
2272 i++;
2273 stripe = (stripe + 1) % ci->num_stripes;
2274 }
2275
2276 if (pos == length)
2277 break;
2278
2279 for (i = 0; i < startoffstripe; i++) {
2280 UINT16 stripe2 = (parity1 + i + 2) % ci->num_stripes;
2281
2282 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2283 }
2284
2285 context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2286
2287 parity2 = (parity1 + 1) % ci->num_stripes;
2288 context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2289
2290 if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
2291 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
2292
2293 for (i = 0; i < ci->num_stripes; i++) {
2294 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2295 }
2296
2297 pos += (UINT32)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
2298 need_dummy = TRUE;
2299 }
2300 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2301 for (i = 0; i < ci->num_stripes; i++) {
2302 context.stripes[i].stripeend += ci->stripe_length;
2303 }
2304
2305 pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 2));
2306 need_dummy = TRUE;
2307 } else {
2308 UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2309
2310 i = 0;
2311 while (stripe != parity1) {
2312 if (endoffstripe == i) {
2313 context.stripes[stripe].stripeend = endoff + 1;
2314 break;
2315 } else if (endoffstripe > i)
2316 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2317
2318 i++;
2319 stripe = (stripe + 1) % ci->num_stripes;
2320 }
2321
2322 break;
2323 }
2324 }
2325
2326 for (i = 0; i < ci->num_stripes; i++) {
2327 if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2328 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL);
2329
2330 if (!context.stripes[i].mdl) {
2331 ERR("IoAllocateMdl failed\n");
2332 Status = STATUS_INSUFFICIENT_RESOURCES;
2333 goto exit;
2334 }
2335 }
2336 }
2337
2338 if (need_dummy) {
2339 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2340 if (!dummypage) {
2341 ERR("out of memory\n");
2342 Status = STATUS_INSUFFICIENT_RESOURCES;
2343 goto exit;
2344 }
2345
2346 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL);
2347 if (!dummy_mdl) {
2348 ERR("IoAllocateMdl failed\n");
2349 Status = STATUS_INSUFFICIENT_RESOURCES;
2350 ExFreePool(dummypage);
2351 goto exit;
2352 }
2353
2354 MmBuildMdlForNonPagedPool(dummy_mdl);
2355
2356 dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2357 }
2358
2359 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
2360 if (!stripeoff) {
2361 ERR("out of memory\n");
2362 Status = STATUS_INSUFFICIENT_RESOURCES;
2363 goto exit;
2364 }
2365
2366 RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
2367
2368 pos = 0;
2369
2370 while (pos < length) {
2371 PFN_NUMBER* stripe_pfns;
2372
2373 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2374
2375 if (pos == 0) {
2376 UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
2377 UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2378 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2379
2380 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2381
2382 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2383
2384 stripeoff[stripe] = readlen;
2385 pos += readlen;
2386
2387 stripe = (stripe + 1) % ci->num_stripes;
2388
2389 while (stripe != parity1) {
2390 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2391 readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2392
2393 if (readlen == 0)
2394 break;
2395
2396 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2397
2398 stripeoff[stripe] = readlen;
2399 pos += readlen;
2400
2401 stripe = (stripe + 1) % ci->num_stripes;
2402 }
2403 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2404 UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2405 UINT16 parity2 = (parity1 + 1) % ci->num_stripes;
2406 ULONG k;
2407
2408 while (stripe != parity1) {
2409 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2410
2411 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2412
2413 stripeoff[stripe] += (UINT32)ci->stripe_length;
2414 pos += (UINT32)ci->stripe_length;
2415
2416 stripe = (stripe + 1) % ci->num_stripes;
2417 }
2418
2419 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
2420
2421 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2422 stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
2423 stripeoff[parity1] += PAGE_SIZE;
2424 }
2425
2426 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
2427
2428 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2429 stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
2430 stripeoff[parity2] += PAGE_SIZE;
2431 }
2432 } else {
2433 UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2434 UINT32 readlen;
2435
2436 while (pos < length) {
2437 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2438 readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2439
2440 if (readlen == 0)
2441 break;
2442
2443 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2444
2445 stripeoff[stripe] += readlen;
2446 pos += readlen;
2447
2448 stripe = (stripe + 1) % ci->num_stripes;
2449 }
2450 }
2451 }
2452
2453 MmUnlockPages(master_mdl);
2454 IoFreeMdl(master_mdl);
2455
2456 ExFreePool(stripeoff);
2457 }
2458
2459 context.address = addr;
2460
2461 for (i = 0; i < ci->num_stripes; i++) {
2462 if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
2463 context.stripes[i].status = ReadDataStatus_MissingDevice;
2464 context.stripes_left--;
2465
2466 if (!devices[i] || !devices[i]->devobj)
2467 missing_devices++;
2468 }
2469 }
2470
2471 if (missing_devices > allowed_missing) {
2472 ERR("not enough devices to service request (%u missing)\n", missing_devices);
2473 Status = STATUS_UNEXPECTED_IO_ERROR;
2474 goto exit;
2475 }
2476
2477 for (i = 0; i < ci->num_stripes; i++) {
2478 PIO_STACK_LOCATION IrpSp;
2479
2480 if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
2481 context.stripes[i].context = (struct read_data_context*)&context;
2482
2483 if (type == BLOCK_FLAG_RAID10) {
2484 context.stripes[i].stripenum = i / ci->sub_stripes;
2485 }
2486
2487 if (!Irp) {
2488 context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE);
2489
2490 if (!context.stripes[i].Irp) {
2491 ERR("IoAllocateIrp failed\n");
2492 Status = STATUS_INSUFFICIENT_RESOURCES;
2493 goto exit;
2494 }
2495 } else {
2496 context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
2497
2498 if (!context.stripes[i].Irp) {
2499 ERR("IoMakeAssociatedIrp failed\n");
2500 Status = STATUS_INSUFFICIENT_RESOURCES;
2501 goto exit;
2502 }
2503 }
2504
2505 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2506 IrpSp->MajorFunction = IRP_MJ_READ;
2507
2508 if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2509 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
2510 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2511 ERR("out of memory\n");
2512 Status = STATUS_INSUFFICIENT_RESOURCES;
2513 goto exit;
2514 }
2515
2516 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
2517
2518 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2519 } else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
2520 context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
2521 else
2522 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2523
2524 IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
2525 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
2526
2527 total_reading += IrpSp->Parameters.Read.Length;
2528
2529 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2530
2531 IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], TRUE, TRUE, TRUE);
2532
2533 context.stripes[i].status = ReadDataStatus_Pending;
2534 }
2535 }
2536
2537 #ifdef DEBUG_STATS
2538 if (!is_tree)
2539 time1 = KeQueryPerformanceCounter(NULL);
2540 #endif
2541
2542 need_to_wait = FALSE;
2543 for (i = 0; i < ci->num_stripes; i++) {
2544 if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
2545 IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
2546 need_to_wait = TRUE;
2547 }
2548 }
2549
2550 if (need_to_wait)
2551 KeWaitForSingleObject(&context.Event, Executive, KernelMode, FALSE, NULL);
2552
2553 #ifdef DEBUG_STATS
2554 if (!is_tree) {
2555 time2 = KeQueryPerformanceCounter(NULL);
2556
2557 Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
2558 }
2559 #endif
2560
2561 if (diskacc)
2562 fFsRtlUpdateDiskCounters(total_reading, 0);
2563
2564 // check if any of the devices return a "user-induced" error
2565
2566 for (i = 0; i < ci->num_stripes; i++) {
2567 if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
2568 Status = context.stripes[i].iosb.Status;
2569 goto exit;
2570 }
2571 }
2572
2573 if (type == BLOCK_FLAG_RAID0) {
2574 Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2575 if (!NT_SUCCESS(Status)) {
2576 ERR("read_data_raid0 returned %08x\n", Status);
2577
2578 if (file_read)
2579 ExFreePool(context.va);
2580
2581 goto exit;
2582 }
2583
2584 if (file_read) {
2585 RtlCopyMemory(buf, context.va, length);
2586 ExFreePool(context.va);
2587 }
2588 } else if (type == BLOCK_FLAG_RAID10) {
2589 Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2590
2591 if (!NT_SUCCESS(Status)) {
2592 ERR("read_data_raid10 returned %08x\n", Status);
2593
2594 if (file_read)
2595 ExFreePool(context.va);
2596
2597 goto exit;
2598 }
2599
2600 if (file_read) {
2601 RtlCopyMemory(buf, context.va, length);
2602 ExFreePool(context.va);
2603 }
2604 } else if (type == BLOCK_FLAG_DUPLICATE) {
2605 Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
2606 if (!NT_SUCCESS(Status)) {
2607 ERR("read_data_dup returned %08x\n", Status);
2608
2609 if (file_read)
2610 ExFreePool(context.va);
2611
2612 goto exit;
2613 }
2614
2615 if (file_read) {
2616 RtlCopyMemory(buf, context.va, length);
2617 ExFreePool(context.va);
2618 }
2619 } else if (type == BLOCK_FLAG_RAID5) {
2620 Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE);
2621 if (!NT_SUCCESS(Status)) {
2622 ERR("read_data_raid5 returned %08x\n", Status);
2623
2624 if (file_read)
2625 ExFreePool(context.va);
2626
2627 goto exit;
2628 }
2629
2630 if (file_read) {
2631 RtlCopyMemory(buf, context.va, length);
2632 ExFreePool(context.va);
2633 }
2634 } else if (type == BLOCK_FLAG_RAID6) {
2635 Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE);
2636 if (!NT_SUCCESS(Status)) {
2637 ERR("read_data_raid6 returned %08x\n", Status);
2638
2639 if (file_read)
2640 ExFreePool(context.va);
2641
2642 goto exit;
2643 }
2644
2645 if (file_read) {
2646 RtlCopyMemory(buf, context.va, length);
2647 ExFreePool(context.va);
2648 }
2649 }
2650
2651 exit:
2652 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
2653 chunk_unlock_range(Vcb, c, lockaddr, locklen);
2654
2655 if (dummy_mdl)
2656 IoFreeMdl(dummy_mdl);
2657
2658 if (dummypage)
2659 ExFreePool(dummypage);
2660
2661 for (i = 0; i < ci->num_stripes; i++) {
2662 if (context.stripes[i].mdl) {
2663 if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2664 MmUnlockPages(context.stripes[i].mdl);
2665
2666 IoFreeMdl(context.stripes[i].mdl);
2667 }
2668
2669 if (context.stripes[i].Irp)
2670 IoFreeIrp(context.stripes[i].Irp);
2671 }
2672
2673 ExFreePool(context.stripes);
2674
2675 if (!Vcb->log_to_phys_loaded)
2676 ExFreePool(devices);
2677
2678 return Status;
2679 }
2680
2681 NTSTATUS read_stream(fcb* fcb, UINT8* data, UINT64 start, ULONG length, ULONG* pbr) {
2682 ULONG readlen;
2683
2684 TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr);
2685
2686 if (pbr) *pbr = 0;
2687
2688 if (start >= fcb->adsdata.Length) {
2689 TRACE("tried to read beyond end of stream\n");
2690 return STATUS_END_OF_FILE;
2691 }
2692
2693 if (length == 0) {
2694 WARN("tried to read zero bytes\n");
2695 return STATUS_SUCCESS;
2696 }
2697
2698 if (start + length < fcb->adsdata.Length)
2699 readlen = length;
2700 else
2701 readlen = fcb->adsdata.Length - (ULONG)start;
2702
2703 if (readlen > 0)
2704 RtlCopyMemory(data + start, fcb->adsdata.Buffer, readlen);
2705
2706 if (pbr) *pbr = readlen;
2707
2708 return STATUS_SUCCESS;
2709 }
2710
2711 NTSTATUS read_file(fcb* fcb, UINT8* data, UINT64 start, UINT64 length, ULONG* pbr, PIRP Irp) {
2712 NTSTATUS Status;
2713 EXTENT_DATA* ed;
2714 UINT32 bytes_read = 0;
2715 UINT64 last_end;
2716 LIST_ENTRY* le;
2717 #ifdef DEBUG_STATS
2718 LARGE_INTEGER time1, time2;
2719 #endif
2720
2721 TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr);
2722
2723 if (pbr)
2724 *pbr = 0;
2725
2726 if (start >= fcb->inode_item.st_size) {
2727 WARN("Tried to read beyond end of file\n");
2728 Status = STATUS_END_OF_FILE;
2729 goto exit;
2730 }
2731
2732 #ifdef DEBUG_STATS
2733 time1 = KeQueryPerformanceCounter(NULL);
2734 #endif
2735
2736 le = fcb->extents.Flink;
2737
2738 last_end = start;
2739
2740 while (le != &fcb->extents) {
2741 UINT64 len;
2742 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2743 EXTENT_DATA2* ed2;
2744
2745 if (!ext->ignore) {
2746 ed = &ext->extent_data;
2747
2748 ed2 = (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ed->data : NULL;
2749
2750 len = ed2 ? ed2->num_bytes : ed->decoded_size;
2751
2752 if (ext->offset + len <= start) {
2753 last_end = ext->offset + len;
2754 goto nextitem;
2755 }
2756
2757 if (ext->offset > last_end && ext->offset > start + bytes_read) {
2758 UINT32 read = (UINT32)min(length, ext->offset - max(start, last_end));
2759
2760 RtlZeroMemory(data + bytes_read, read);
2761 bytes_read += read;
2762 length -= read;
2763 }
2764
2765 if (length == 0 || ext->offset > start + bytes_read + length)
2766 break;
2767
2768 if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
2769 WARN("Encryption not supported\n");
2770 Status = STATUS_NOT_IMPLEMENTED;
2771 goto exit;
2772 }
2773
2774 if (ed->encoding != BTRFS_ENCODING_NONE) {
2775 WARN("Other encodings not supported\n");
2776 Status = STATUS_NOT_IMPLEMENTED;
2777 goto exit;
2778 }
2779
2780 switch (ed->type) {
2781 case EXTENT_TYPE_INLINE:
2782 {
2783 UINT64 off = start + bytes_read - ext->offset;
2784 UINT32 read;
2785
2786 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2787 read = (UINT32)min(min(len, ext->datalen) - off, length);
2788
2789 RtlCopyMemory(data + bytes_read, &ed->data[off], read);
2790 } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO) {
2791 UINT8* decomp;
2792 BOOL decomp_alloc;
2793 UINT16 inlen = ext->datalen - (UINT16)offsetof(EXTENT_DATA, data[0]);
2794
2795 if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
2796 ERR("ed->decoded_size was invalid (%llx)\n", ed->decoded_size);
2797 Status = STATUS_INTERNAL_ERROR;
2798 goto exit;
2799 }
2800
2801 read = (UINT32)min(ed->decoded_size - off, length);
2802
2803 if (off > 0) {
2804 decomp = ExAllocatePoolWithTag(NonPagedPool, (UINT32)ed->decoded_size, ALLOC_TAG);
2805 if (!decomp) {
2806 ERR("out of memory\n");
2807 Status = STATUS_INSUFFICIENT_RESOURCES;
2808 goto exit;
2809 }
2810
2811 decomp_alloc = TRUE;
2812 } else {
2813 decomp = data + bytes_read;
2814 decomp_alloc = FALSE;
2815 }
2816
2817 if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2818 Status = zlib_decompress(ed->data, inlen, decomp, (UINT32)(read + off));
2819 if (!NT_SUCCESS(Status)) {
2820 ERR("zlib_decompress returned %08x\n", Status);
2821 if (decomp_alloc) ExFreePool(decomp);
2822 goto exit;
2823 }
2824 } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2825 if (inlen < sizeof(UINT32)) {
2826 ERR("extent data was truncated\n");
2827 Status = STATUS_INTERNAL_ERROR;
2828 if (decomp_alloc) ExFreePool(decomp);
2829 goto exit;
2830 } else
2831 inlen -= sizeof(UINT32);
2832
2833 Status = lzo_decompress(ed->data + sizeof(UINT32), inlen, decomp, (UINT32)(read + off), sizeof(UINT32));
2834 if (!NT_SUCCESS(Status)) {
2835 ERR("lzo_decompress returned %08x\n", Status);
2836 if (decomp_alloc) ExFreePool(decomp);
2837 goto exit;
2838 }
2839 }
2840
2841 if (decomp_alloc) {
2842 RtlCopyMemory(data + bytes_read, decomp + off, read);
2843 ExFreePool(decomp);
2844 }
2845 } else {
2846 ERR("unhandled compression type %x\n", ed->compression);
2847 Status = STATUS_NOT_IMPLEMENTED;
2848 goto exit;
2849 }
2850
2851 bytes_read += read;
2852 length -= read;
2853
2854 break;
2855 }
2856
2857 case EXTENT_TYPE_REGULAR:
2858 {
2859 UINT64 off = start + bytes_read - ext->offset;
2860 UINT32 to_read, read;
2861 UINT8* buf;
2862 BOOL mdl = (Irp && Irp->MdlAddress) ? TRUE : FALSE;
2863 BOOL buf_free;
2864 UINT32 bumpoff = 0, *csum;
2865 UINT64 addr;
2866 chunk* c;
2867
2868 read = (UINT32)(len - off);
2869 if (read > length) read = (UINT32)length;
2870
2871 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2872 addr = ed2->address + ed2->offset + off;
2873 to_read = (UINT32)sector_align(read, fcb->Vcb->superblock.sector_size);
2874
2875 if (addr % fcb->Vcb->superblock.sector_size > 0) {
2876 bumpoff = addr % fcb->Vcb->superblock.sector_size;
2877 addr -= bumpoff;
2878 to_read = (UINT32)sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size);
2879 }
2880 } else {
2881 addr = ed2->address;
2882 to_read = (UINT32)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
2883 }
2884
2885 if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 &&
2886 length % fcb->Vcb->superblock.sector_size == 0) {
2887 buf = data + bytes_read;
2888 buf_free = FALSE;
2889 } else {
2890 buf = ExAllocatePoolWithTag(PagedPool, to_read, ALLOC_TAG);
2891 buf_free = TRUE;
2892
2893 if (!buf) {
2894 ERR("out of memory\n");
2895 Status = STATUS_INSUFFICIENT_RESOURCES;
2896 goto exit;
2897 }
2898
2899 mdl = FALSE;
2900 }
2901
2902 c = get_chunk_from_address(fcb->Vcb, addr);
2903
2904 if (!c) {
2905 ERR("get_chunk_from_address(%llx) failed\n", addr);
2906
2907 if (buf_free)
2908 ExFreePool(buf);
2909
2910 goto exit;
2911 }
2912
2913 if (ext->csum) {
2914 if (ed->compression == BTRFS_COMPRESSION_NONE)
2915 csum = &ext->csum[off / fcb->Vcb->superblock.sector_size];
2916 else
2917 csum = ext->csum;
2918 } else
2919 csum = NULL;
2920
2921 Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, c, NULL, Irp, 0, mdl,
2922 fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
2923 if (!NT_SUCCESS(Status)) {
2924 ERR("read_data returned %08x\n", Status);
2925
2926 if (buf_free)
2927 ExFreePool(buf);
2928
2929 goto exit;
2930 }
2931
2932 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2933 if (buf_free)
2934 RtlCopyMemory(data + bytes_read, buf + bumpoff, read);
2935 } else {
2936 UINT8 *decomp = NULL, *buf2;
2937 ULONG outlen, inlen, off2;
2938 UINT32 inpageoff = 0;
2939
2940 off2 = (ULONG)(ed2->offset + off);
2941 buf2 = buf;
2942 inlen = (ULONG)ed2->size;
2943
2944 if (ed->compression == BTRFS_COMPRESSION_LZO) {
2945 ULONG inoff = sizeof(UINT32);
2946
2947 inlen -= sizeof(UINT32);
2948
2949 // If reading a few sectors in, skip to the interesting bit
2950 while (off2 > LINUX_PAGE_SIZE) {
2951 UINT32 partlen;
2952
2953 if (inlen < sizeof(UINT32))
2954 break;
2955
2956 partlen = *(UINT32*)(buf2 + inoff);
2957
2958 if (partlen < inlen) {
2959 off2 -= LINUX_PAGE_SIZE;
2960 inoff += partlen + sizeof(UINT32);
2961 inlen -= partlen + sizeof(UINT32);
2962
2963 if (LINUX_PAGE_SIZE - (inoff % LINUX_PAGE_SIZE) < sizeof(UINT32))
2964 inoff = ((inoff / LINUX_PAGE_SIZE) + 1) * LINUX_PAGE_SIZE;
2965 } else
2966 break;
2967 }
2968
2969 buf2 = &buf2[inoff];
2970 inpageoff = inoff % LINUX_PAGE_SIZE;
2971 }
2972
2973 if (off2 != 0) {
2974 outlen = off2 + min(read, (UINT32)(ed2->num_bytes - off));
2975
2976 decomp = ExAllocatePoolWithTag(PagedPool, outlen, ALLOC_TAG);
2977 if (!decomp) {
2978 ERR("out of memory\n");
2979 ExFreePool(buf);
2980 Status = STATUS_INSUFFICIENT_RESOURCES;
2981 goto exit;
2982 }
2983 } else
2984 outlen = min(read, (UINT32)(ed2->num_bytes - off));
2985
2986 if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2987 Status = zlib_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen);
2988
2989 if (!NT_SUCCESS(Status)) {
2990 ERR("zlib_decompress returned %08x\n", Status);
2991 ExFreePool(buf);
2992
2993 if (decomp)
2994 ExFreePool(decomp);
2995
2996 goto exit;
2997 }
2998 } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2999 Status = lzo_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen, inpageoff);
3000
3001 if (!NT_SUCCESS(Status)) {
3002 ERR("lzo_decompress returned %08x\n", Status);
3003 ExFreePool(buf);
3004
3005 if (decomp)
3006 ExFreePool(decomp);
3007
3008 goto exit;
3009 }
3010 } else {
3011 ERR("unsupported compression type %x\n", ed->compression);
3012 Status = STATUS_NOT_SUPPORTED;
3013
3014 ExFreePool(buf);
3015
3016 if (decomp)
3017 ExFreePool(decomp);
3018
3019 goto exit;
3020 }
3021
3022 if (decomp) {
3023 RtlCopyMemory(data + bytes_read, decomp + off2, (size_t)min(read, ed2->num_bytes - off));
3024 ExFreePool(decomp);
3025 }
3026 }
3027
3028 if (buf_free)
3029 ExFreePool(buf);
3030
3031 bytes_read += read;
3032 length -= read;
3033
3034 break;
3035 }
3036
3037 case EXTENT_TYPE_PREALLOC:
3038 {
3039 UINT64 off = start + bytes_read - ext->offset;
3040 UINT32 read = (UINT32)(len - off);
3041
3042 if (read > length) read = (UINT32)length;
3043
3044 RtlZeroMemory(data + bytes_read, read);
3045
3046 bytes_read += read;
3047 length -= read;
3048
3049 break;
3050 }
3051
3052 default:
3053 WARN("Unsupported extent data type %u\n", ed->type);
3054 Status = STATUS_NOT_IMPLEMENTED;
3055 goto exit;
3056 }
3057
3058 last_end = ext->offset + len;
3059
3060 if (length == 0)
3061 break;
3062 }
3063
3064 nextitem:
3065 le = le->Flink;
3066 }
3067
3068 if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
3069 UINT32 read = (UINT32)min(fcb->inode_item.st_size - start - bytes_read, length);
3070
3071 RtlZeroMemory(data + bytes_read, read);
3072
3073 bytes_read += read;
3074 length -= read;
3075 }
3076
3077 Status = STATUS_SUCCESS;
3078 if (pbr)
3079 *pbr = bytes_read;
3080
3081 #ifdef DEBUG_STATS
3082 time2 = KeQueryPerformanceCounter(NULL);
3083
3084 fcb->Vcb->stats.num_reads++;
3085 fcb->Vcb->stats.data_read += bytes_read;
3086 fcb->Vcb->stats.read_total_time += time2.QuadPart - time1.QuadPart;
3087 #endif
3088
3089 exit:
3090 return Status;
3091 }
3092
3093 NTSTATUS do_read(PIRP Irp, BOOLEAN wait, ULONG* bytes_read) {
3094 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3095 PFILE_OBJECT FileObject = IrpSp->FileObject;
3096 fcb* fcb = FileObject->FsContext;
3097 UINT8* data = NULL;
3098 ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
3099 UINT64 start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
3100
3101 *bytes_read = 0;
3102
3103 if (!fcb || !fcb->Vcb || !fcb->subvol)
3104 return STATUS_INTERNAL_ERROR;
3105
3106 TRACE("file = %S (fcb = %p)\n", file_desc(FileObject), fcb);
3107 TRACE("offset = %llx, length = %x\n", start, length);
3108 TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "TRUE" : "FALSE", Irp->Flags & IRP_NOCACHE ? "TRUE" : "FALSE");
3109
3110 if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
3111 return STATUS_INVALID_DEVICE_REQUEST;
3112
3113 if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
3114 WARN("tried to read locked region\n");
3115 return STATUS_FILE_LOCK_CONFLICT;
3116 }
3117
3118 if (length == 0) {
3119 TRACE("tried to read zero bytes\n");
3120 return STATUS_SUCCESS;
3121 }
3122
3123 if (start >= (UINT64)fcb->Header.FileSize.QuadPart) {
3124 TRACE("tried to read with offset after file end (%llx >= %llx)\n", start, fcb->Header.FileSize.QuadPart);
3125 return STATUS_END_OF_FILE;
3126 }
3127
3128 TRACE("FileObject %p fcb %p FileSize = %llx st_size = %llx (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
3129
3130 if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) {
3131 data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
3132
3133 if (Irp->MdlAddress && !data) {
3134 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3135 return STATUS_INSUFFICIENT_RESOURCES;
3136 }
3137
3138 if (start >= (UINT64)fcb->Header.ValidDataLength.QuadPart) {
3139 length = (ULONG)min(length, min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3140 RtlZeroMemory(data, length);
3141 Irp->IoStatus.Information = *bytes_read = length;
3142 return STATUS_SUCCESS;
3143 }
3144
3145 if (length + start > (UINT64)fcb->Header.ValidDataLength.QuadPart) {
3146 addon = (ULONG)(min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3147 RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
3148 length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
3149 }
3150 }
3151
3152 if (!(Irp->Flags & IRP_NOCACHE)) {
3153 NTSTATUS Status = STATUS_SUCCESS;
3154
3155 _SEH2_TRY {
3156 if (!FileObject->PrivateCacheMap) {
3157 CC_FILE_SIZES ccfs;
3158
3159 ccfs.AllocationSize = fcb->Header.AllocationSize;
3160 ccfs.FileSize = fcb->Header.FileSize;
3161 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3162
3163 init_file_cache(FileObject, &ccfs);
3164 }
3165
3166 if (IrpSp->MinorFunction & IRP_MN_MDL) {
3167 CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
3168 } else {
3169 if (fCcCopyReadEx) {
3170 TRACE("CcCopyReadEx(%p, %llx, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
3171 length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
3172 TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3173 if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
3174 TRACE("CcCopyReadEx could not wait\n");
3175
3176 IoMarkIrpPending(Irp);
3177 return STATUS_PENDING;
3178 }
3179 TRACE("CcCopyReadEx finished\n");
3180 } else {
3181 TRACE("CcCopyRead(%p, %llx, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
3182 TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3183 if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
3184 TRACE("CcCopyRead could not wait\n");
3185
3186 IoMarkIrpPending(Irp);
3187 return STATUS_PENDING;
3188 }
3189 TRACE("CcCopyRead finished\n");
3190 }
3191 }
3192 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
3193 Status = _SEH2_GetExceptionCode();
3194 } _SEH2_END;
3195
3196 if (NT_SUCCESS(Status)) {
3197 Status = Irp->IoStatus.Status;
3198 Irp->IoStatus.Information += addon;
3199 *bytes_read = (ULONG)Irp->IoStatus.Information;
3200 } else
3201 ERR("EXCEPTION - %08x\n", Status);
3202
3203 return Status;
3204 } else {
3205 NTSTATUS Status;
3206
3207 if (!wait) {
3208 IoMarkIrpPending(Irp);
3209 return STATUS_PENDING;
3210 }
3211
3212 if (fcb->ads)
3213 Status = read_stream(fcb, data, start, length, bytes_read);
3214 else
3215 Status = read_file(fcb, data, start, length, bytes_read, Irp);
3216
3217 *bytes_read += addon;
3218 TRACE("read %u bytes\n", *bytes_read);
3219
3220 Irp->IoStatus.Information = *bytes_read;
3221
3222 if (diskacc && Status != STATUS_PENDING) {
3223 PETHREAD thread = NULL;
3224
3225 if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
3226 thread = Irp->Tail.Overlay.Thread;
3227 else if (!IoIsSystemThread(PsGetCurrentThread()))
3228 thread = PsGetCurrentThread();
3229 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
3230 thread = PsGetCurrentThread();
3231
3232 if (thread)
3233 fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
3234 }
3235
3236 return Status;
3237 }
3238 }
3239
3240 _Dispatch_type_(IRP_MJ_READ)
3241 _Function_class_(DRIVER_DISPATCH)
3242 NTSTATUS drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
3243 device_extension* Vcb = DeviceObject->DeviceExtension;
3244 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3245 PFILE_OBJECT FileObject = IrpSp->FileObject;
3246 ULONG bytes_read = 0;