[BTRFS][UBTRFS][SHELLBTRFS] Upgrade to 1.7.2
[reactos.git] / drivers / filesystems / btrfs / read.c
1 /* Copyright (c) Mark Harmstone 2016-17
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19 #include "xxhash.h"
20 #include "crc32c.h"
21
22 enum read_data_status {
23 ReadDataStatus_Pending,
24 ReadDataStatus_Success,
25 ReadDataStatus_Error,
26 ReadDataStatus_MissingDevice,
27 ReadDataStatus_Skip
28 };
29
30 struct read_data_context;
31
32 typedef struct {
33 struct read_data_context* context;
34 uint16_t stripenum;
35 bool rewrite;
36 PIRP Irp;
37 IO_STATUS_BLOCK iosb;
38 enum read_data_status status;
39 PMDL mdl;
40 uint64_t stripestart;
41 uint64_t stripeend;
42 } read_data_stripe;
43
44 typedef struct {
45 KEVENT Event;
46 NTSTATUS Status;
47 chunk* c;
48 uint64_t address;
49 uint32_t buflen;
50 LONG num_stripes, stripes_left;
51 uint64_t type;
52 uint32_t sector_size;
53 uint16_t firstoff, startoffstripe, sectors_per_stripe;
54 void* csum;
55 bool tree;
56 read_data_stripe* stripes;
57 uint8_t* va;
58 } read_data_context;
59
60 extern bool diskacc;
61 extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
62 extern tCcCopyReadEx fCcCopyReadEx;
63 extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
64
65 #define LZO_PAGE_SIZE 4096
66
67 _Function_class_(IO_COMPLETION_ROUTINE)
68 static NTSTATUS __stdcall read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
69 read_data_stripe* stripe = conptr;
70 read_data_context* context = (read_data_context*)stripe->context;
71
72 UNUSED(DeviceObject);
73
74 stripe->iosb = Irp->IoStatus;
75
76 if (NT_SUCCESS(Irp->IoStatus.Status))
77 stripe->status = ReadDataStatus_Success;
78 else
79 stripe->status = ReadDataStatus_Error;
80
81 if (InterlockedDecrement(&context->stripes_left) == 0)
82 KeSetEvent(&context->Event, 0, false);
83
84 return STATUS_MORE_PROCESSING_REQUIRED;
85 }
86
87 NTSTATUS check_csum(device_extension* Vcb, uint8_t* data, uint32_t sectors, void* csum) {
88 void* csum2;
89
90 csum2 = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * sectors, ALLOC_TAG);
91 if (!csum2) {
92 ERR("out of memory\n");
93 return STATUS_INSUFFICIENT_RESOURCES;
94 }
95
96 do_calc_job(Vcb, data, sectors, csum2);
97
98 if (RtlCompareMemory(csum2, csum, sectors * Vcb->csum_size) != sectors * Vcb->csum_size) {
99 ExFreePool(csum2);
100 return STATUS_CRC_ERROR;
101 }
102
103 ExFreePool(csum2);
104
105 return STATUS_SUCCESS;
106 }
107
108 void get_tree_checksum(device_extension* Vcb, tree_header* th, void* csum) {
109 switch (Vcb->superblock.csum_type) {
110 case CSUM_TYPE_CRC32C:
111 *(uint32_t*)csum = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
112 break;
113
114 case CSUM_TYPE_XXHASH:
115 *(uint64_t*)csum = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
116 break;
117
118 case CSUM_TYPE_SHA256:
119 calc_sha256(csum, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
120 break;
121
122 case CSUM_TYPE_BLAKE2:
123 blake2b(csum, BLAKE2_HASH_SIZE, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
124 break;
125 }
126 }
127
128 bool check_tree_checksum(device_extension* Vcb, tree_header* th) {
129 switch (Vcb->superblock.csum_type) {
130 case CSUM_TYPE_CRC32C: {
131 uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
132
133 if (crc32 == *((uint32_t*)th->csum))
134 return true;
135
136 WARN("hash was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum));
137
138 break;
139 }
140
141 case CSUM_TYPE_XXHASH: {
142 uint64_t hash = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
143
144 if (hash == *((uint64_t*)th->csum))
145 return true;
146
147 WARN("hash was %I64x, expected %I64x\n", hash, *((uint64_t*)th->csum));
148
149 break;
150 }
151
152 case CSUM_TYPE_SHA256: {
153 uint8_t hash[SHA256_HASH_SIZE];
154
155 calc_sha256(hash, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
156
157 if (RtlCompareMemory(hash, th, SHA256_HASH_SIZE) == SHA256_HASH_SIZE)
158 return true;
159
160 WARN("hash was invalid\n");
161
162 break;
163 }
164
165 case CSUM_TYPE_BLAKE2: {
166 uint8_t hash[BLAKE2_HASH_SIZE];
167
168 blake2b(hash, sizeof(hash), (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
169
170 if (RtlCompareMemory(hash, th, BLAKE2_HASH_SIZE) == BLAKE2_HASH_SIZE)
171 return true;
172
173 WARN("hash was invalid\n");
174
175 break;
176 }
177 }
178
179 return false;
180 }
181
182 void get_sector_csum(device_extension* Vcb, void* buf, void* csum) {
183 switch (Vcb->superblock.csum_type) {
184 case CSUM_TYPE_CRC32C:
185 *(uint32_t*)csum = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size);
186 break;
187
188 case CSUM_TYPE_XXHASH:
189 *(uint64_t*)csum = XXH64(buf, Vcb->superblock.sector_size, 0);
190 break;
191
192 case CSUM_TYPE_SHA256:
193 calc_sha256(csum, buf, Vcb->superblock.sector_size);
194 break;
195
196 case CSUM_TYPE_BLAKE2:
197 blake2b(csum, BLAKE2_HASH_SIZE, buf, Vcb->superblock.sector_size);
198 break;
199 }
200 }
201
202 bool check_sector_csum(device_extension* Vcb, void* buf, void* csum) {
203 switch (Vcb->superblock.csum_type) {
204 case CSUM_TYPE_CRC32C: {
205 uint32_t crc32 = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size);
206
207 return *(uint32_t*)csum == crc32;
208 }
209
210 case CSUM_TYPE_XXHASH: {
211 uint64_t hash = XXH64(buf, Vcb->superblock.sector_size, 0);
212
213 return *(uint64_t*)csum == hash;
214 }
215
216 case CSUM_TYPE_SHA256: {
217 uint8_t hash[SHA256_HASH_SIZE];
218
219 calc_sha256(hash, buf, Vcb->superblock.sector_size);
220
221 return RtlCompareMemory(hash, csum, SHA256_HASH_SIZE) == SHA256_HASH_SIZE;
222 }
223
224 case CSUM_TYPE_BLAKE2: {
225 uint8_t hash[BLAKE2_HASH_SIZE];
226
227 blake2b(hash, sizeof(hash), buf, Vcb->superblock.sector_size);
228
229 return RtlCompareMemory(hash, csum, BLAKE2_HASH_SIZE) == BLAKE2_HASH_SIZE;
230 }
231 }
232
233 return false;
234 }
235
236 static NTSTATUS read_data_dup(device_extension* Vcb, uint8_t* buf, uint64_t addr, read_data_context* context, CHUNK_ITEM* ci,
237 device** devices, uint64_t generation) {
238 ULONG i;
239 bool checksum_error = false;
240 uint16_t j, stripe = 0;
241 NTSTATUS Status;
242 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
243
244 for (j = 0; j < ci->num_stripes; j++) {
245 if (context->stripes[j].status == ReadDataStatus_Error) {
246 WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
247 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
248 return context->stripes[j].iosb.Status;
249 } else if (context->stripes[j].status == ReadDataStatus_Success) {
250 stripe = j;
251 break;
252 }
253 }
254
255 if (context->stripes[stripe].status != ReadDataStatus_Success)
256 return STATUS_INTERNAL_ERROR;
257
258 if (context->tree) {
259 tree_header* th = (tree_header*)buf;
260
261 if (th->address != context->address || !check_tree_checksum(Vcb, th)) {
262 checksum_error = true;
263 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
264 } else if (generation != 0 && th->generation != generation) {
265 checksum_error = true;
266 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
267 }
268 } else if (context->csum) {
269 Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
270
271 if (Status == STATUS_CRC_ERROR) {
272 checksum_error = true;
273 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
274 } else if (!NT_SUCCESS(Status)) {
275 ERR("check_csum returned %08lx\n", Status);
276 return Status;
277 }
278 }
279
280 if (!checksum_error)
281 return STATUS_SUCCESS;
282
283 if (ci->num_stripes == 1)
284 return STATUS_CRC_ERROR;
285
286 if (context->tree) {
287 tree_header* t2;
288 bool recovered = false;
289
290 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
291 if (!t2) {
292 ERR("out of memory\n");
293 return STATUS_INSUFFICIENT_RESOURCES;
294 }
295
296 for (j = 0; j < ci->num_stripes; j++) {
297 if (j != stripe && devices[j] && devices[j]->devobj) {
298 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + context->stripes[stripe].stripestart,
299 Vcb->superblock.node_size, (uint8_t*)t2, false);
300 if (!NT_SUCCESS(Status)) {
301 WARN("sync_read_phys returned %08lx\n", Status);
302 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
303 } else {
304 bool checksum_error = !check_tree_checksum(Vcb, t2);
305
306 if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) {
307 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
308 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
309 recovered = true;
310
311 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
312 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + context->stripes[stripe].stripestart,
313 t2, Vcb->superblock.node_size);
314 if (!NT_SUCCESS(Status)) {
315 WARN("write_data_phys returned %08lx\n", Status);
316 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
317 }
318 }
319
320 break;
321 } else if (t2->address != addr || checksum_error)
322 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
323 else
324 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS);
325 }
326 }
327 }
328
329 if (!recovered) {
330 ERR("unrecoverable checksum error at %I64x\n", addr);
331 ExFreePool(t2);
332 return STATUS_CRC_ERROR;
333 }
334
335 ExFreePool(t2);
336 } else {
337 ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size;
338 uint8_t* sector;
339 void* ptr = context->csum;
340
341 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
342 if (!sector) {
343 ERR("out of memory\n");
344 return STATUS_INSUFFICIENT_RESOURCES;
345 }
346
347 for (i = 0; i < sectors; i++) {
348 if (!check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr)) {
349 bool recovered = false;
350
351 for (j = 0; j < ci->num_stripes; j++) {
352 if (j != stripe && devices[j] && devices[j]->devobj) {
353 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj,
354 cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
355 Vcb->superblock.sector_size, sector, false);
356 if (!NT_SUCCESS(Status)) {
357 WARN("sync_read_phys returned %08lx\n", Status);
358 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
359 } else {
360 if (check_sector_csum(Vcb, sector, ptr)) {
361 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
362 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
363 recovered = true;
364
365 if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
366 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj,
367 cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
368 sector, Vcb->superblock.sector_size);
369 if (!NT_SUCCESS(Status)) {
370 WARN("write_data_phys returned %08lx\n", Status);
371 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
372 }
373 }
374
375 break;
376 } else
377 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
378 }
379 }
380 }
381
382 if (!recovered) {
383 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
384 ExFreePool(sector);
385 return STATUS_CRC_ERROR;
386 }
387 }
388
389 ptr = (uint8_t*)ptr + Vcb->csum_size;
390 }
391
392 ExFreePool(sector);
393 }
394
395 return STATUS_SUCCESS;
396 }
397
398 static NTSTATUS read_data_raid0(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context,
399 CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) {
400 uint64_t i;
401
402 for (i = 0; i < ci->num_stripes; i++) {
403 if (context->stripes[i].status == ReadDataStatus_Error) {
404 WARN("stripe %I64u returned error %08lx\n", i, context->stripes[i].iosb.Status);
405 log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS);
406 return context->stripes[i].iosb.Status;
407 }
408 }
409
410 if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
411 tree_header* th = (tree_header*)buf;
412 bool checksum_error = !check_tree_checksum(Vcb, th);
413
414 if (checksum_error || addr != th->address || (generation != 0 && generation != th->generation)) {
415 uint64_t off;
416 uint16_t stripe;
417
418 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe);
419
420 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
421
422 if (checksum_error) {
423 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
424 return STATUS_CRC_ERROR;
425 } else if (addr != th->address) {
426 WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
427 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
428 return STATUS_CRC_ERROR;
429 } else if (generation != 0 && generation != th->generation) {
430 WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
431 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
432 return STATUS_CRC_ERROR;
433 }
434 }
435 } else if (context->csum) {
436 NTSTATUS Status;
437
438 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
439
440 if (Status == STATUS_CRC_ERROR) {
441 void* ptr = context->csum;
442
443 for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
444 if (!check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr)) {
445 uint64_t off;
446 uint16_t stripe;
447
448 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe);
449
450 ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
451
452 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
453
454 return Status;
455 }
456
457 ptr = (uint8_t*)ptr + Vcb->csum_size;
458 }
459
460 return Status;
461 } else if (!NT_SUCCESS(Status)) {
462 ERR("check_csum returned %08lx\n", Status);
463 return Status;
464 }
465 }
466
467 return STATUS_SUCCESS;
468 }
469
470 static NTSTATUS read_data_raid10(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context,
471 CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) {
472 uint64_t i;
473 uint16_t j, stripe;
474 NTSTATUS Status;
475 bool checksum_error = false;
476 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
477
478 for (j = 0; j < ci->num_stripes; j++) {
479 if (context->stripes[j].status == ReadDataStatus_Error) {
480 WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
481 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
482 return context->stripes[j].iosb.Status;
483 } else if (context->stripes[j].status == ReadDataStatus_Success)
484 stripe = j;
485 }
486
487 if (context->tree) {
488 tree_header* th = (tree_header*)buf;
489
490 if (!check_tree_checksum(Vcb, th)) {
491 checksum_error = true;
492 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
493 } else if (addr != th->address) {
494 WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
495 checksum_error = true;
496 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
497 } else if (generation != 0 && generation != th->generation) {
498 WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
499 checksum_error = true;
500 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
501 }
502 } else if (context->csum) {
503 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
504
505 if (Status == STATUS_CRC_ERROR)
506 checksum_error = true;
507 else if (!NT_SUCCESS(Status)) {
508 ERR("check_csum returned %08lx\n", Status);
509 return Status;
510 }
511 }
512
513 if (!checksum_error)
514 return STATUS_SUCCESS;
515
516 if (context->tree) {
517 tree_header* t2;
518 uint64_t off;
519 uint16_t badsubstripe = 0;
520 bool recovered = false;
521
522 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
523 if (!t2) {
524 ERR("out of memory\n");
525 return STATUS_INSUFFICIENT_RESOURCES;
526 }
527
528 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe);
529
530 stripe *= ci->sub_stripes;
531
532 for (j = 0; j < ci->sub_stripes; j++) {
533 if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
534 badsubstripe = j;
535 break;
536 }
537 }
538
539 for (j = 0; j < ci->sub_stripes; j++) {
540 if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
541 Status = sync_read_phys(devices[stripe + j]->devobj, devices[stripe + j]->fileobj, cis[stripe + j].offset + off,
542 Vcb->superblock.node_size, (uint8_t*)t2, false);
543 if (!NT_SUCCESS(Status)) {
544 WARN("sync_read_phys returned %08lx\n", Status);
545 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS);
546 } else {
547 bool checksum_error = !check_tree_checksum(Vcb, t2);
548
549 if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) {
550 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
551 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe + j]->devitem.dev_id);
552 recovered = true;
553
554 if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
555 Status = write_data_phys(devices[stripe + badsubstripe]->devobj, devices[stripe + badsubstripe]->fileobj,
556 cis[stripe + badsubstripe].offset + off, t2, Vcb->superblock.node_size);
557 if (!NT_SUCCESS(Status)) {
558 WARN("write_data_phys returned %08lx\n", Status);
559 log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
560 }
561 }
562
563 break;
564 } else if (t2->address != addr || checksum_error)
565 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
566 else
567 log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS);
568 }
569 }
570 }
571
572 if (!recovered) {
573 ERR("unrecoverable checksum error at %I64x\n", addr);
574 ExFreePool(t2);
575 return STATUS_CRC_ERROR;
576 }
577
578 ExFreePool(t2);
579 } else {
580 ULONG sectors = length / Vcb->superblock.sector_size;
581 uint8_t* sector;
582 void* ptr = context->csum;
583
584 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
585 if (!sector) {
586 ERR("out of memory\n");
587 return STATUS_INSUFFICIENT_RESOURCES;
588 }
589
590 for (i = 0; i < sectors; i++) {
591 if (!check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr)) {
592 uint64_t off;
593 uint16_t stripe2, badsubstripe = 0;
594 bool recovered = false;
595
596 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
597 ci->num_stripes / ci->sub_stripes, &off, &stripe2);
598
599 stripe2 *= ci->sub_stripes;
600
601 for (j = 0; j < ci->sub_stripes; j++) {
602 if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
603 badsubstripe = j;
604 break;
605 }
606 }
607
608 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
609
610 for (j = 0; j < ci->sub_stripes; j++) {
611 if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
612 Status = sync_read_phys(devices[stripe2 + j]->devobj, devices[stripe2 + j]->fileobj, cis[stripe2 + j].offset + off,
613 Vcb->superblock.sector_size, sector, false);
614 if (!NT_SUCCESS(Status)) {
615 WARN("sync_read_phys returned %08lx\n", Status);
616 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS);
617 } else {
618 if (check_sector_csum(Vcb, sector, ptr)) {
619 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
620 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id);
621 recovered = true;
622
623 if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
624 Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, devices[stripe2 + badsubstripe]->fileobj,
625 cis[stripe2 + badsubstripe].offset + off, sector, Vcb->superblock.sector_size);
626 if (!NT_SUCCESS(Status)) {
627 WARN("write_data_phys returned %08lx\n", Status);
628 log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
629 }
630 }
631
632 break;
633 } else
634 log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
635 }
636 }
637 }
638
639 if (!recovered) {
640 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
641 ExFreePool(sector);
642 return STATUS_CRC_ERROR;
643 }
644 }
645
646 ptr = (uint8_t*)ptr + Vcb->csum_size;
647 }
648
649 ExFreePool(sector);
650 }
651
652 return STATUS_SUCCESS;
653 }
654
655 static NTSTATUS read_data_raid5(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci,
656 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
657 ULONG i;
658 NTSTATUS Status;
659 bool checksum_error = false;
660 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
661 uint16_t j, stripe;
662 bool no_success = true;
663
664 for (j = 0; j < ci->num_stripes; j++) {
665 if (context->stripes[j].status == ReadDataStatus_Error) {
666 WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
667 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
668 return context->stripes[j].iosb.Status;
669 } else if (context->stripes[j].status == ReadDataStatus_Success) {
670 stripe = j;
671 no_success = false;
672 }
673 }
674
675 if (c) { // check partial stripes
676 LIST_ENTRY* le;
677 uint64_t ps_length = (ci->num_stripes - 1) * ci->stripe_length;
678
679 ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
680
681 le = c->partial_stripes.Flink;
682 while (le != &c->partial_stripes) {
683 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
684
685 if (ps->address + ps_length > addr && ps->address < addr + length) {
686 ULONG runlength, index;
687
688 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
689
690 while (runlength != 0) {
691 #ifdef __REACTOS__
692 uint64_t runstart, runend, start, end;
693 #endif
694 if (index >= ps->bmplen)
695 break;
696
697 if (index + runlength >= ps->bmplen) {
698 runlength = ps->bmplen - index;
699
700 if (runlength == 0)
701 break;
702 }
703
704 #ifndef __REACTOS__
705 uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size);
706 uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size);
707 uint64_t start = max(runstart, addr);
708 uint64_t end = min(runend, addr + length);
709 #else
710 runstart = ps->address + (index * Vcb->superblock.sector_size);
711 runend = runstart + (runlength * Vcb->superblock.sector_size);
712 start = max(runstart, addr);
713 end = min(runend, addr + length);
714 #endif
715
716 if (end > start)
717 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
718
719 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
720 }
721 } else if (ps->address >= addr + length)
722 break;
723
724 le = le->Flink;
725 }
726
727 ExReleaseResourceLite(&c->partial_stripes_lock);
728 }
729
730 if (context->tree) {
731 tree_header* th = (tree_header*)buf;
732
733 if (addr != th->address || !check_tree_checksum(Vcb, th)) {
734 checksum_error = true;
735 if (!no_success && !degraded)
736 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
737 } else if (generation != 0 && generation != th->generation) {
738 checksum_error = true;
739 if (!no_success && !degraded)
740 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
741 }
742 } else if (context->csum) {
743 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
744
745 if (Status == STATUS_CRC_ERROR) {
746 if (!degraded)
747 WARN("checksum error\n");
748 checksum_error = true;
749 } else if (!NT_SUCCESS(Status)) {
750 ERR("check_csum returned %08lx\n", Status);
751 return Status;
752 }
753 } else if (degraded)
754 checksum_error = true;
755
756 if (!checksum_error)
757 return STATUS_SUCCESS;
758
759 if (context->tree) {
760 uint16_t parity;
761 uint64_t off;
762 bool recovered = false, first = true, failed = false;
763 uint8_t* t2;
764
765 t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
766 if (!t2) {
767 ERR("out of memory\n");
768 return STATUS_INSUFFICIENT_RESOURCES;
769 }
770
771 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe);
772
773 parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
774
775 stripe = (parity + stripe + 1) % ci->num_stripes;
776
777 for (j = 0; j < ci->num_stripes; j++) {
778 if (j != stripe) {
779 if (devices[j] && devices[j]->devobj) {
780 if (first) {
781 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2, false);
782 if (!NT_SUCCESS(Status)) {
783 ERR("sync_read_phys returned %08lx\n", Status);
784 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
785 failed = true;
786 break;
787 }
788
789 first = false;
790 } else {
791 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, false);
792 if (!NT_SUCCESS(Status)) {
793 ERR("sync_read_phys returned %08lx\n", Status);
794 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
795 failed = true;
796 break;
797 }
798
799 do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
800 }
801 } else {
802 failed = true;
803 break;
804 }
805 }
806 }
807
808 if (!failed) {
809 tree_header* t3 = (tree_header*)t2;
810
811 if (t3->address == addr && check_tree_checksum(Vcb, t3) && (generation == 0 || t3->generation == generation)) {
812 RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
813
814 if (!degraded)
815 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
816
817 recovered = true;
818
819 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
820 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
821 if (!NT_SUCCESS(Status)) {
822 WARN("write_data_phys returned %08lx\n", Status);
823 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
824 }
825 }
826 }
827 }
828
829 if (!recovered) {
830 ERR("unrecoverable checksum error at %I64x\n", addr);
831 ExFreePool(t2);
832 return STATUS_CRC_ERROR;
833 }
834
835 ExFreePool(t2);
836 } else {
837 ULONG sectors = length / Vcb->superblock.sector_size;
838 uint8_t* sector;
839 void* ptr = context->csum;
840
841 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
842 if (!sector) {
843 ERR("out of memory\n");
844 return STATUS_INSUFFICIENT_RESOURCES;
845 }
846
847 for (i = 0; i < sectors; i++) {
848 uint16_t parity;
849 uint64_t off;
850
851 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
852 ci->num_stripes - 1, &off, &stripe);
853
854 parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
855
856 stripe = (parity + stripe + 1) % ci->num_stripes;
857
858 if (!devices[stripe] || !devices[stripe]->devobj || (ptr && !check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr))) {
859 bool recovered = false, first = true, failed = false;
860
861 if (devices[stripe] && devices[stripe]->devobj)
862 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS);
863
864 for (j = 0; j < ci->num_stripes; j++) {
865 if (j != stripe) {
866 if (devices[j] && devices[j]->devobj) {
867 if (first) {
868 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, false);
869 if (!NT_SUCCESS(Status)) {
870 ERR("sync_read_phys returned %08lx\n", Status);
871 failed = true;
872 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
873 break;
874 }
875
876 first = false;
877 } else {
878 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
879 sector + Vcb->superblock.sector_size, false);
880 if (!NT_SUCCESS(Status)) {
881 ERR("sync_read_phys returned %08lx\n", Status);
882 failed = true;
883 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
884 break;
885 }
886
887 do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
888 }
889 } else {
890 failed = true;
891 break;
892 }
893 }
894 }
895
896 if (!failed) {
897 if (!ptr || check_sector_csum(Vcb, sector, ptr)) {
898 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
899
900 if (!degraded)
901 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
902
903 recovered = true;
904
905 if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
906 Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off,
907 sector, Vcb->superblock.sector_size);
908 if (!NT_SUCCESS(Status)) {
909 WARN("write_data_phys returned %08lx\n", Status);
910 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
911 }
912 }
913 }
914 }
915
916 if (!recovered) {
917 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
918 ExFreePool(sector);
919 return STATUS_CRC_ERROR;
920 }
921 }
922
923 if (ptr)
924 ptr = (uint8_t*)ptr + Vcb->csum_size;
925 }
926
927 ExFreePool(sector);
928 }
929
930 return STATUS_SUCCESS;
931 }
932
933 void raid6_recover2(uint8_t* sectors, uint16_t num_stripes, ULONG sector_size, uint16_t missing1, uint16_t missing2, uint8_t* out) {
934 if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
935 uint16_t missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
936 uint16_t stripe;
937
938 stripe = num_stripes - 3;
939
940 if (stripe == missing)
941 RtlZeroMemory(out, sector_size);
942 else
943 RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size);
944
945 do {
946 stripe--;
947
948 galois_double(out, sector_size);
949
950 if (stripe != missing)
951 do_xor(out, sectors + (stripe * sector_size), sector_size);
952 } while (stripe > 0);
953
954 do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
955
956 if (missing != 0)
957 galois_divpower(out, (uint8_t)missing, sector_size);
958 } else { // reconstruct from p and q
959 uint16_t x, y, stripe;
960 uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
961 uint32_t j;
962
963 stripe = num_stripes - 3;
964
965 pxy = out + sector_size;
966 qxy = out;
967
968 if (stripe == missing1 || stripe == missing2) {
969 RtlZeroMemory(qxy, sector_size);
970 RtlZeroMemory(pxy, sector_size);
971
972 if (stripe == missing1)
973 x = stripe;
974 else
975 y = stripe;
976 } else {
977 RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size);
978 RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size);
979 }
980
981 do {
982 stripe--;
983
984 galois_double(qxy, sector_size);
985
986 if (stripe != missing1 && stripe != missing2) {
987 do_xor(qxy, sectors + (stripe * sector_size), sector_size);
988 do_xor(pxy, sectors + (stripe * sector_size), sector_size);
989 } else if (stripe == missing1)
990 x = stripe;
991 else if (stripe == missing2)
992 y = stripe;
993 } while (stripe > 0);
994
995 gyx = gpow2(y > x ? (y-x) : (255-x+y));
996 gx = gpow2(255-x);
997
998 denom = gdiv(1, gyx ^ 1);
999 a = gmul(gyx, denom);
1000 b = gmul(gx, denom);
1001
1002 p = sectors + ((num_stripes - 2) * sector_size);
1003 q = sectors + ((num_stripes - 1) * sector_size);
1004
1005 for (j = 0; j < sector_size; j++) {
1006 *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
1007
1008 p++;
1009 q++;
1010 pxy++;
1011 qxy++;
1012 }
1013
1014 do_xor(out + sector_size, out, sector_size);
1015 do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
1016 }
1017 }
1018
1019 static NTSTATUS read_data_raid6(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci,
1020 device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
1021 NTSTATUS Status;
1022 ULONG i;
1023 bool checksum_error = false;
1024 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1025 uint16_t stripe, j;
1026 bool no_success = true;
1027
1028 for (j = 0; j < ci->num_stripes; j++) {
1029 if (context->stripes[j].status == ReadDataStatus_Error) {
1030 WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
1031
1032 if (devices[j])
1033 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1034 return context->stripes[j].iosb.Status;
1035 } else if (context->stripes[j].status == ReadDataStatus_Success) {
1036 stripe = j;
1037 no_success = false;
1038 }
1039 }
1040
1041 if (c) { // check partial stripes
1042 LIST_ENTRY* le;
1043 uint64_t ps_length = (ci->num_stripes - 2) * ci->stripe_length;
1044
1045 ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
1046
1047 le = c->partial_stripes.Flink;
1048 while (le != &c->partial_stripes) {
1049 partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
1050
1051 if (ps->address + ps_length > addr && ps->address < addr + length) {
1052 ULONG runlength, index;
1053
1054 runlength = RtlFindFirstRunClear(&ps->bmp, &index);
1055
1056 while (runlength != 0) {
1057 #ifdef __REACTOS__
1058 uint64_t runstart, runend, start, end;
1059 #endif
1060 if (index >= ps->bmplen)
1061 break;
1062
1063 if (index + runlength >= ps->bmplen) {
1064 runlength = ps->bmplen - index;
1065
1066 if (runlength == 0)
1067 break;
1068 }
1069
1070 #ifndef __REACTOS__
1071 uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size);
1072 uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size);
1073 uint64_t start = max(runstart, addr);
1074 uint64_t end = min(runend, addr + length);
1075 #else
1076 runstart = ps->address + (index * Vcb->superblock.sector_size);
1077 runend = runstart + (runlength * Vcb->superblock.sector_size);
1078 start = max(runstart, addr);
1079 end = min(runend, addr + length);
1080 #endif
1081
1082 if (end > start)
1083 RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
1084
1085 runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
1086 }
1087 } else if (ps->address >= addr + length)
1088 break;
1089
1090 le = le->Flink;
1091 }
1092
1093 ExReleaseResourceLite(&c->partial_stripes_lock);
1094 }
1095
1096 if (context->tree) {
1097 tree_header* th = (tree_header*)buf;
1098
1099 if (addr != th->address || !check_tree_checksum(Vcb, th)) {
1100 checksum_error = true;
1101 if (!no_success && !degraded && devices[stripe])
1102 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1103 } else if (generation != 0 && generation != th->generation) {
1104 checksum_error = true;
1105 if (!no_success && !degraded && devices[stripe])
1106 log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
1107 }
1108 } else if (context->csum) {
1109 Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
1110
1111 if (Status == STATUS_CRC_ERROR) {
1112 if (!degraded)
1113 WARN("checksum error\n");
1114 checksum_error = true;
1115 } else if (!NT_SUCCESS(Status)) {
1116 ERR("check_csum returned %08lx\n", Status);
1117 return Status;
1118 }
1119 } else if (degraded)
1120 checksum_error = true;
1121
1122 if (!checksum_error)
1123 return STATUS_SUCCESS;
1124
1125 if (context->tree) {
1126 uint8_t* sector;
1127 uint16_t k, physstripe, parity1, parity2, error_stripe;
1128 uint64_t off;
1129 bool recovered = false, failed = false;
1130 ULONG num_errors = 0;
1131
1132 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
1133 if (!sector) {
1134 ERR("out of memory\n");
1135 return STATUS_INSUFFICIENT_RESOURCES;
1136 }
1137
1138 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe);
1139
1140 parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1141 parity2 = (parity1 + 1) % ci->num_stripes;
1142
1143 physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1144
1145 j = (parity2 + 1) % ci->num_stripes;
1146
1147 for (k = 0; k < ci->num_stripes - 1; k++) {
1148 if (j != physstripe) {
1149 if (devices[j] && devices[j]->devobj) {
1150 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size,
1151 sector + (k * Vcb->superblock.node_size), false);
1152 if (!NT_SUCCESS(Status)) {
1153 ERR("sync_read_phys returned %08lx\n", Status);
1154 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1155 num_errors++;
1156 error_stripe = k;
1157
1158 if (num_errors > 1) {
1159 failed = true;
1160 break;
1161 }
1162 }
1163 } else {
1164 num_errors++;
1165 error_stripe = k;
1166
1167 if (num_errors > 1) {
1168 failed = true;
1169 break;
1170 }
1171 }
1172 }
1173
1174 j = (j + 1) % ci->num_stripes;
1175 }
1176
1177 if (!failed) {
1178 if (num_errors == 0) {
1179 tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
1180
1181 RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
1182 Vcb->superblock.node_size);
1183
1184 for (j = 0; j < ci->num_stripes - 2; j++) {
1185 if (j != stripe)
1186 do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
1187 }
1188
1189 if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) {
1190 RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1191
1192 if (devices[physstripe] && devices[physstripe]->devobj)
1193 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1194
1195 recovered = true;
1196
1197 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1198 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1199 sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1200 if (!NT_SUCCESS(Status)) {
1201 WARN("write_data_phys returned %08lx\n", Status);
1202 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1203 }
1204 }
1205 }
1206 }
1207
1208 if (!recovered) {
1209 tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
1210 bool read_q = false;
1211
1212 if (devices[parity2] && devices[parity2]->devobj) {
1213 Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1214 Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), false);
1215 if (!NT_SUCCESS(Status)) {
1216 ERR("sync_read_phys returned %08lx\n", Status);
1217 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1218 } else
1219 read_q = true;
1220 }
1221
1222 if (read_q) {
1223 if (num_errors == 1) {
1224 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
1225
1226 if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation))
1227 recovered = true;
1228 } else {
1229 for (j = 0; j < ci->num_stripes - 1; j++) {
1230 if (j != stripe) {
1231 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
1232
1233 if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) {
1234 recovered = true;
1235 error_stripe = j;
1236 break;
1237 }
1238 }
1239 }
1240 }
1241 }
1242
1243 if (recovered) {
1244 uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1245
1246 if (devices[physstripe] && devices[physstripe]->devobj)
1247 ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1248
1249 RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1250
1251 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1252 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1253 sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1254 if (!NT_SUCCESS(Status)) {
1255 WARN("write_data_phys returned %08lx\n", Status);
1256 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1257 }
1258 }
1259
1260 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1261 if (error_stripe == ci->num_stripes - 2) {
1262 ERR("recovering from parity error at %I64x, device %I64x\n", addr, devices[error_stripe_phys]->devitem.dev_id);
1263
1264 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1265
1266 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1267
1268 for (j = 0; j < ci->num_stripes - 2; j++) {
1269 if (j == stripe) {
1270 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
1271 Vcb->superblock.node_size);
1272 } else {
1273 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
1274 Vcb->superblock.node_size);
1275 }
1276 }
1277 } else {
1278 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((error_stripe - stripe) * ci->stripe_length),
1279 devices[error_stripe_phys]->devitem.dev_id);
1280
1281 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1282
1283 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
1284 sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1285 }
1286 }
1287
1288 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1289 Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1290 sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1291 if (!NT_SUCCESS(Status)) {
1292 WARN("write_data_phys returned %08lx\n", Status);
1293 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1294 }
1295 }
1296 }
1297 }
1298 }
1299
1300 if (!recovered) {
1301 ERR("unrecoverable checksum error at %I64x\n", addr);
1302 ExFreePool(sector);
1303 return STATUS_CRC_ERROR;
1304 }
1305
1306 ExFreePool(sector);
1307 } else {
1308 ULONG sectors = length / Vcb->superblock.sector_size;
1309 uint8_t* sector;
1310 void* ptr = context->csum;
1311
1312 sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG);
1313 if (!sector) {
1314 ERR("out of memory\n");
1315 return STATUS_INSUFFICIENT_RESOURCES;
1316 }
1317
1318 for (i = 0; i < sectors; i++) {
1319 uint64_t off;
1320 uint16_t physstripe, parity1, parity2;
1321
1322 get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
1323 ci->num_stripes - 2, &off, &stripe);
1324
1325 parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1326 parity2 = (parity1 + 1) % ci->num_stripes;
1327
1328 physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1329
1330 if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && !check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr))) {
1331 uint16_t k, error_stripe;
1332 bool recovered = false, failed = false;
1333 ULONG num_errors = 0;
1334
1335 if (devices[physstripe] && devices[physstripe]->devobj)
1336 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS);
1337
1338 j = (parity2 + 1) % ci->num_stripes;
1339
1340 for (k = 0; k < ci->num_stripes - 1; k++) {
1341 if (j != physstripe) {
1342 if (devices[j] && devices[j]->devobj) {
1343 Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
1344 sector + (k * Vcb->superblock.sector_size), false);
1345 if (!NT_SUCCESS(Status)) {
1346 ERR("sync_read_phys returned %08lx\n", Status);
1347 log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
1348 num_errors++;
1349 error_stripe = k;
1350
1351 if (num_errors > 1) {
1352 failed = true;
1353 break;
1354 }
1355 }
1356 } else {
1357 num_errors++;
1358 error_stripe = k;
1359
1360 if (num_errors > 1) {
1361 failed = true;
1362 break;
1363 }
1364 }
1365 }
1366
1367 j = (j + 1) % ci->num_stripes;
1368 }
1369
1370 if (!failed) {
1371 if (num_errors == 0) {
1372 RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1373
1374 for (j = 0; j < ci->num_stripes - 2; j++) {
1375 if (j != stripe)
1376 do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1377 }
1378
1379 if (!ptr || check_sector_csum(Vcb, sector + (stripe * Vcb->superblock.sector_size), ptr)) {
1380 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1381
1382 if (devices[physstripe] && devices[physstripe]->devobj)
1383 ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1384 devices[physstripe]->devitem.dev_id);
1385
1386 recovered = true;
1387
1388 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1389 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1390 sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1391 if (!NT_SUCCESS(Status)) {
1392 WARN("write_data_phys returned %08lx\n", Status);
1393 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1394 }
1395 }
1396 }
1397 }
1398
1399 if (!recovered) {
1400 bool read_q = false;
1401
1402 if (devices[parity2] && devices[parity2]->devobj) {
1403 Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1404 Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), false);
1405 if (!NT_SUCCESS(Status)) {
1406 ERR("sync_read_phys returned %08lx\n", Status);
1407 log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS);
1408 } else
1409 read_q = true;
1410 }
1411
1412 if (read_q) {
1413 if (num_errors == 1) {
1414 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1415
1416 if (!devices[physstripe] || !devices[physstripe]->devobj)
1417 recovered = true;
1418 else
1419 recovered = check_sector_csum(Vcb, sector + (ci->num_stripes * Vcb->superblock.sector_size), ptr);
1420 } else {
1421 for (j = 0; j < ci->num_stripes - 1; j++) {
1422 if (j != stripe) {
1423 raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1424
1425 if (check_sector_csum(Vcb, sector + (ci->num_stripes * Vcb->superblock.sector_size), ptr)) {
1426 recovered = true;
1427 error_stripe = j;
1428 break;
1429 }
1430 }
1431 }
1432 }
1433 }
1434
1435 if (recovered) {
1436 uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1437
1438 if (devices[physstripe] && devices[physstripe]->devobj)
1439 ERR("recovering from checksum error at %I64x, device %I64x\n",
1440 addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id);
1441
1442 RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1443
1444 if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1445 Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1446 sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1447 if (!NT_SUCCESS(Status)) {
1448 WARN("write_data_phys returned %08lx\n", Status);
1449 log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
1450 }
1451 }
1452
1453 if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1454 if (error_stripe == ci->num_stripes - 2) {
1455 ERR("recovering from parity error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1456 devices[error_stripe_phys]->devitem.dev_id);
1457
1458 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1459
1460 RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1461
1462 for (j = 0; j < ci->num_stripes - 2; j++) {
1463 if (j == stripe) {
1464 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size),
1465 Vcb->superblock.sector_size);
1466 } else {
1467 do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size),
1468 Vcb->superblock.sector_size);
1469 }
1470 }
1471 } else {
1472 ERR("recovering from checksum error at %I64x, device %I64x\n",
1473 addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length),
1474 devices[error_stripe_phys]->devitem.dev_id);
1475
1476 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1477
1478 RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size),
1479 sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1480 }
1481 }
1482
1483 if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1484 Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1485 sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1486 if (!NT_SUCCESS(Status)) {
1487 WARN("write_data_phys returned %08lx\n", Status);
1488 log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
1489 }
1490 }
1491 }
1492 }
1493 }
1494
1495 if (!recovered) {
1496 ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
1497 ExFreePool(sector);
1498 return STATUS_CRC_ERROR;
1499 }
1500 }
1501
1502 if (ptr)
1503 ptr = (uint8_t*)ptr + Vcb->csum_size;
1504 }
1505
1506 ExFreePool(sector);
1507 }
1508
1509 return STATUS_SUCCESS;
1510 }
1511
1512 NTSTATUS read_data(_In_ device_extension* Vcb, _In_ uint64_t addr, _In_ uint32_t length, _In_reads_bytes_opt_(length*sizeof(uint32_t)/Vcb->superblock.sector_size) void* csum,
1513 _In_ bool is_tree, _Out_writes_bytes_(length) uint8_t* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ uint64_t generation, _In_ bool file_read,
1514 _In_ ULONG priority) {
1515 CHUNK_ITEM* ci;
1516 CHUNK_ITEM_STRIPE* cis;
1517 read_data_context context;
1518 uint64_t type, offset, total_reading = 0;
1519 NTSTATUS Status;
1520 device** devices = NULL;
1521 uint16_t i, startoffstripe, allowed_missing, missing_devices = 0;
1522 uint8_t* dummypage = NULL;
1523 PMDL dummy_mdl = NULL;
1524 bool need_to_wait;
1525 uint64_t lockaddr, locklen;
1526
1527 if (Vcb->log_to_phys_loaded) {
1528 if (!c) {
1529 c = get_chunk_from_address(Vcb, addr);
1530
1531 if (!c) {
1532 ERR("get_chunk_from_address failed\n");
1533 return STATUS_INTERNAL_ERROR;
1534 }
1535 }
1536
1537 ci = c->chunk_item;
1538 offset = c->offset;
1539 devices = c->devices;
1540
1541 if (pc)
1542 *pc = c;
1543 } else {
1544 LIST_ENTRY* le = Vcb->sys_chunks.Flink;
1545
1546 ci = NULL;
1547
1548 c = NULL;
1549 while (le != &Vcb->sys_chunks) {
1550 sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
1551
1552 if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
1553 CHUNK_ITEM* chunk_item = sc->data;
1554
1555 if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
1556 ci = chunk_item;
1557 offset = sc->key.offset;
1558 cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
1559
1560 devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
1561 if (!devices) {
1562 ERR("out of memory\n");
1563 return STATUS_INSUFFICIENT_RESOURCES;
1564 }
1565
1566 for (i = 0; i < ci->num_stripes; i++) {
1567 devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
1568 }
1569
1570 break;
1571 }
1572 }
1573
1574 le = le->Flink;
1575 }
1576
1577 if (!ci) {
1578 ERR("could not find chunk for %I64x in bootstrap\n", addr);
1579 return STATUS_INTERNAL_ERROR;
1580 }
1581
1582 if (pc)
1583 *pc = NULL;
1584 }
1585
1586 if (ci->type & BLOCK_FLAG_DUPLICATE) {
1587 type = BLOCK_FLAG_DUPLICATE;
1588 allowed_missing = ci->num_stripes - 1;
1589 } else if (ci->type & BLOCK_FLAG_RAID0) {
1590 type = BLOCK_FLAG_RAID0;
1591 allowed_missing = 0;
1592 } else if (ci->type & BLOCK_FLAG_RAID1) {
1593 type = BLOCK_FLAG_DUPLICATE;
1594 allowed_missing = 1;
1595 } else if (ci->type & BLOCK_FLAG_RAID10) {
1596 type = BLOCK_FLAG_RAID10;
1597 allowed_missing = 1;
1598 } else if (ci->type & BLOCK_FLAG_RAID5) {
1599 type = BLOCK_FLAG_RAID5;
1600 allowed_missing = 1;
1601 } else if (ci->type & BLOCK_FLAG_RAID6) {
1602 type = BLOCK_FLAG_RAID6;
1603 allowed_missing = 2;
1604 } else if (ci->type & BLOCK_FLAG_RAID1C3) {
1605 type = BLOCK_FLAG_DUPLICATE;
1606 allowed_missing = 2;
1607 } else if (ci->type & BLOCK_FLAG_RAID1C4) {
1608 type = BLOCK_FLAG_DUPLICATE;
1609 allowed_missing = 3;
1610 } else { // SINGLE
1611 type = BLOCK_FLAG_DUPLICATE;
1612 allowed_missing = 0;
1613 }
1614
1615 cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1616
1617 RtlZeroMemory(&context, sizeof(read_data_context));
1618 KeInitializeEvent(&context.Event, NotificationEvent, false);
1619
1620 context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
1621 if (!context.stripes) {
1622 ERR("out of memory\n");
1623 return STATUS_INSUFFICIENT_RESOURCES;
1624 }
1625
1626 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
1627 get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
1628 chunk_lock_range(Vcb, c, lockaddr, locklen);
1629 }
1630
1631 RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
1632
1633 context.buflen = length;
1634 context.num_stripes = ci->num_stripes;
1635 context.stripes_left = context.num_stripes;
1636 context.sector_size = Vcb->superblock.sector_size;
1637 context.csum = csum;
1638 context.tree = is_tree;
1639 context.type = type;
1640
1641 if (type == BLOCK_FLAG_RAID0) {
1642 uint64_t startoff, endoff;
1643 uint16_t endoffstripe, stripe;
1644 uint32_t *stripeoff, pos;
1645 PMDL master_mdl;
1646 PFN_NUMBER* pfns;
1647
1648 // FIXME - test this still works if page size isn't the same as sector size
1649
1650 // This relies on the fact that MDLs are followed in memory by the page file numbers,
1651 // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1652 // data for you without doing a memcpy yourself.
1653 // MDLs are officially opaque, so this might very well break in future versions of Windows.
1654
1655 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
1656 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
1657
1658 if (file_read) {
1659 // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1660 // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1661 // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1662
1663 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1664
1665 if (!context.va) {
1666 ERR("out of memory\n");
1667 Status = STATUS_INSUFFICIENT_RESOURCES;
1668 goto exit;
1669 }
1670 } else
1671 context.va = buf;
1672
1673 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1674 if (!master_mdl) {
1675 ERR("out of memory\n");
1676 Status = STATUS_INSUFFICIENT_RESOURCES;
1677 goto exit;
1678 }
1679
1680 Status = STATUS_SUCCESS;
1681
1682 _SEH2_TRY {
1683 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1684 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1685 Status = _SEH2_GetExceptionCode();
1686 } _SEH2_END;
1687
1688 if (!NT_SUCCESS(Status)) {
1689 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1690 IoFreeMdl(master_mdl);
1691 goto exit;
1692 }
1693
1694 pfns = (PFN_NUMBER*)(master_mdl + 1);
1695
1696 for (i = 0; i < ci->num_stripes; i++) {
1697 if (startoffstripe > i)
1698 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1699 else if (startoffstripe == i)
1700 context.stripes[i].stripestart = startoff;
1701 else
1702 context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
1703
1704 if (endoffstripe > i)
1705 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1706 else if (endoffstripe == i)
1707 context.stripes[i].stripeend = endoff + 1;
1708 else
1709 context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
1710
1711 if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
1712 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
1713
1714 if (!context.stripes[i].mdl) {
1715 ERR("IoAllocateMdl failed\n");
1716 MmUnlockPages(master_mdl);
1717 IoFreeMdl(master_mdl);
1718 Status = STATUS_INSUFFICIENT_RESOURCES;
1719 goto exit;
1720 }
1721 }
1722 }
1723
1724 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
1725 if (!stripeoff) {
1726 ERR("out of memory\n");
1727 MmUnlockPages(master_mdl);
1728 IoFreeMdl(master_mdl);
1729 Status = STATUS_INSUFFICIENT_RESOURCES;
1730 goto exit;
1731 }
1732
1733 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
1734
1735 pos = 0;
1736 stripe = startoffstripe;
1737 while (pos < length) {
1738 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
1739
1740 if (pos == 0) {
1741 uint32_t readlen = (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
1742
1743 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1744
1745 stripeoff[stripe] += readlen;
1746 pos += readlen;
1747 } else if (length - pos < ci->stripe_length) {
1748 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1749
1750 pos = length;
1751 } else {
1752 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1753
1754 stripeoff[stripe] += (uint32_t)ci->stripe_length;
1755 pos += (uint32_t)ci->stripe_length;
1756 }
1757
1758 stripe = (stripe + 1) % ci->num_stripes;
1759 }
1760
1761 MmUnlockPages(master_mdl);
1762 IoFreeMdl(master_mdl);
1763
1764 ExFreePool(stripeoff);
1765 } else if (type == BLOCK_FLAG_RAID10) {
1766 uint64_t startoff, endoff;
1767 uint16_t endoffstripe, j, stripe;
1768 ULONG orig_ls;
1769 PMDL master_mdl;
1770 PFN_NUMBER* pfns;
1771 uint32_t* stripeoff, pos;
1772 read_data_stripe** stripes;
1773
1774 if (c)
1775 orig_ls = c->last_stripe;
1776 else
1777 orig_ls = 0;
1778
1779 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
1780 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
1781
1782 if ((ci->num_stripes % ci->sub_stripes) != 0) {
1783 ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
1784 Status = STATUS_INTERNAL_ERROR;
1785 goto exit;
1786 }
1787
1788 if (file_read) {
1789 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1790
1791 if (!context.va) {
1792 ERR("out of memory\n");
1793 Status = STATUS_INSUFFICIENT_RESOURCES;
1794 goto exit;
1795 }
1796 } else
1797 context.va = buf;
1798
1799 context.firstoff = (uint16_t)((startoff % ci->stripe_length) / Vcb->superblock.sector_size);
1800 context.startoffstripe = startoffstripe;
1801 context.sectors_per_stripe = (uint16_t)(ci->stripe_length / Vcb->superblock.sector_size);
1802
1803 startoffstripe *= ci->sub_stripes;
1804 endoffstripe *= ci->sub_stripes;
1805
1806 if (c)
1807 c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
1808
1809 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1810 if (!master_mdl) {
1811 ERR("out of memory\n");
1812 Status = STATUS_INSUFFICIENT_RESOURCES;
1813 goto exit;
1814 }
1815
1816 Status = STATUS_SUCCESS;
1817
1818 _SEH2_TRY {
1819 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
1820 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
1821 Status = _SEH2_GetExceptionCode();
1822 } _SEH2_END;
1823
1824 if (!NT_SUCCESS(Status)) {
1825 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1826 IoFreeMdl(master_mdl);
1827 goto exit;
1828 }
1829
1830 pfns = (PFN_NUMBER*)(master_mdl + 1);
1831
1832 stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1833 if (!stripes) {
1834 ERR("out of memory\n");
1835 MmUnlockPages(master_mdl);
1836 IoFreeMdl(master_mdl);
1837 Status = STATUS_INSUFFICIENT_RESOURCES;
1838 goto exit;
1839 }
1840
1841 RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
1842
1843 for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
1844 uint64_t sstart, send;
1845 bool stripeset = false;
1846
1847 if (startoffstripe > i)
1848 sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1849 else if (startoffstripe == i)
1850 sstart = startoff;
1851 else
1852 sstart = startoff - (startoff % ci->stripe_length);
1853
1854 if (endoffstripe > i)
1855 send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1856 else if (endoffstripe == i)
1857 send = endoff + 1;
1858 else
1859 send = endoff - (endoff % ci->stripe_length);
1860
1861 for (j = 0; j < ci->sub_stripes; j++) {
1862 if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
1863 context.stripes[i+j].stripestart = sstart;
1864 context.stripes[i+j].stripeend = send;
1865 stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1866
1867 if (sstart != send) {
1868 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1869
1870 if (!context.stripes[i+j].mdl) {
1871 ERR("IoAllocateMdl failed\n");
1872 MmUnlockPages(master_mdl);
1873 IoFreeMdl(master_mdl);
1874 Status = STATUS_INSUFFICIENT_RESOURCES;
1875 goto exit;
1876 }
1877 }
1878
1879 stripeset = true;
1880 } else
1881 context.stripes[i+j].status = ReadDataStatus_Skip;
1882 }
1883
1884 if (!stripeset) {
1885 for (j = 0; j < ci->sub_stripes; j++) {
1886 if (devices[i+j] && devices[i+j]->devobj) {
1887 context.stripes[i+j].stripestart = sstart;
1888 context.stripes[i+j].stripeend = send;
1889 context.stripes[i+j].status = ReadDataStatus_Pending;
1890 stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1891
1892 if (sstart != send) {
1893 context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1894
1895 if (!context.stripes[i+j].mdl) {
1896 ERR("IoAllocateMdl failed\n");
1897 MmUnlockPages(master_mdl);
1898 IoFreeMdl(master_mdl);
1899 Status = STATUS_INSUFFICIENT_RESOURCES;
1900 goto exit;
1901 }
1902 }
1903
1904 stripeset = true;
1905 break;
1906 }
1907 }
1908
1909 if (!stripeset) {
1910 ERR("could not find stripe to read\n");
1911 Status = STATUS_DEVICE_NOT_READY;
1912 goto exit;
1913 }
1914 }
1915 }
1916
1917 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1918 if (!stripeoff) {
1919 ERR("out of memory\n");
1920 MmUnlockPages(master_mdl);
1921 IoFreeMdl(master_mdl);
1922 Status = STATUS_INSUFFICIENT_RESOURCES;
1923 goto exit;
1924 }
1925
1926 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes);
1927
1928 pos = 0;
1929 stripe = startoffstripe / ci->sub_stripes;
1930 while (pos < length) {
1931 PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
1932
1933 if (pos == 0) {
1934 uint32_t readlen = (uint32_t)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
1935 ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
1936
1937 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1938
1939 stripeoff[stripe] += readlen;
1940 pos += readlen;
1941 } else if (length - pos < ci->stripe_length) {
1942 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1943
1944 pos = length;
1945 } else {
1946 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1947
1948 stripeoff[stripe] += (ULONG)ci->stripe_length;
1949 pos += (ULONG)ci->stripe_length;
1950 }
1951
1952 stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
1953 }
1954
1955 MmUnlockPages(master_mdl);
1956 IoFreeMdl(master_mdl);
1957
1958 ExFreePool(stripeoff);
1959 ExFreePool(stripes);
1960 } else if (type == BLOCK_FLAG_DUPLICATE) {
1961 uint64_t orig_ls;
1962
1963 if (c)
1964 orig_ls = i = c->last_stripe;
1965 else
1966 orig_ls = i = 0;
1967
1968 while (!devices[i] || !devices[i]->devobj) {
1969 i = (i + 1) % ci->num_stripes;
1970
1971 if (i == orig_ls) {
1972 ERR("no devices available to service request\n");
1973 Status = STATUS_DEVICE_NOT_READY;
1974 goto exit;
1975 }
1976 }
1977
1978 if (c)
1979 c->last_stripe = (i + 1) % ci->num_stripes;
1980
1981 context.stripes[i].stripestart = addr - offset;
1982 context.stripes[i].stripeend = context.stripes[i].stripestart + length;
1983
1984 if (file_read) {
1985 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
1986
1987 if (!context.va) {
1988 ERR("out of memory\n");
1989 Status = STATUS_INSUFFICIENT_RESOURCES;
1990 goto exit;
1991 }
1992
1993 context.stripes[i].mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1994 if (!context.stripes[i].mdl) {
1995 ERR("IoAllocateMdl failed\n");
1996 Status = STATUS_INSUFFICIENT_RESOURCES;
1997 goto exit;
1998 }
1999
2000 MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
2001 } else {
2002 context.stripes[i].mdl = IoAllocateMdl(buf, length, false, false, NULL);
2003
2004 if (!context.stripes[i].mdl) {
2005 ERR("IoAllocateMdl failed\n");
2006 Status = STATUS_INSUFFICIENT_RESOURCES;
2007 goto exit;
2008 }
2009
2010 Status = STATUS_SUCCESS;
2011
2012 _SEH2_TRY {
2013 MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess);
2014 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2015 Status = _SEH2_GetExceptionCode();
2016 } _SEH2_END;
2017
2018 if (!NT_SUCCESS(Status)) {
2019 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2020 goto exit;
2021 }
2022 }
2023 } else if (type == BLOCK_FLAG_RAID5) {
2024 uint64_t startoff, endoff;
2025 uint16_t endoffstripe, parity;
2026 uint32_t *stripeoff, pos;
2027 PMDL master_mdl;
2028 PFN_NUMBER *pfns, dummy;
2029 bool need_dummy = false;
2030
2031 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
2032 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
2033
2034 if (file_read) {
2035 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
2036
2037 if (!context.va) {
2038 ERR("out of memory\n");
2039 Status = STATUS_INSUFFICIENT_RESOURCES;
2040 goto exit;
2041 }
2042 } else
2043 context.va = buf;
2044
2045 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
2046 if (!master_mdl) {
2047 ERR("out of memory\n");
2048 Status = STATUS_INSUFFICIENT_RESOURCES;
2049 goto exit;
2050 }
2051
2052 Status = STATUS_SUCCESS;
2053
2054 _SEH2_TRY {
2055 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
2056 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2057 Status = _SEH2_GetExceptionCode();
2058 } _SEH2_END;
2059
2060 if (!NT_SUCCESS(Status)) {
2061 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2062 IoFreeMdl(master_mdl);
2063 goto exit;
2064 }
2065
2066 pfns = (PFN_NUMBER*)(master_mdl + 1);
2067
2068 pos = 0;
2069 while (pos < length) {
2070 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2071
2072 if (pos == 0) {
2073 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2074 ULONG skip, readlen;
2075
2076 i = startoffstripe;
2077 while (stripe != parity) {
2078 if (i == startoffstripe) {
2079 readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
2080
2081 context.stripes[stripe].stripestart = startoff;
2082 context.stripes[stripe].stripeend = startoff + readlen;
2083
2084 pos += readlen;
2085
2086 if (pos == length)
2087 break;
2088 } else {
2089 readlen = min(length - pos, (ULONG)ci->stripe_length);
2090
2091 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2092 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2093
2094 pos += readlen;
2095
2096 if (pos == length)
2097 break;
2098 }
2099
2100 i++;
2101 stripe = (stripe + 1) % ci->num_stripes;
2102 }
2103
2104 if (pos == length)
2105 break;
2106
2107 for (i = 0; i < startoffstripe; i++) {
2108 uint16_t stripe2 = (parity + i + 1) % ci->num_stripes;
2109
2110 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2111 }
2112
2113 context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2114
2115 if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
2116 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
2117
2118 for (i = 0; i < ci->num_stripes; i++) {
2119 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2120 }
2121
2122 pos += (uint32_t)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
2123 need_dummy = true;
2124 }
2125 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2126 for (i = 0; i < ci->num_stripes; i++) {
2127 context.stripes[i].stripeend += ci->stripe_length;
2128 }
2129
2130 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 1));
2131 need_dummy = true;
2132 } else {
2133 uint16_t stripe = (parity + 1) % ci->num_stripes;
2134
2135 i = 0;
2136 while (stripe != parity) {
2137 if (endoffstripe == i) {
2138 context.stripes[stripe].stripeend = endoff + 1;
2139 break;
2140 } else if (endoffstripe > i)
2141 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2142
2143 i++;
2144 stripe = (stripe + 1) % ci->num_stripes;
2145 }
2146
2147 break;
2148 }
2149 }
2150
2151 for (i = 0; i < ci->num_stripes; i++) {
2152 if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2153 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
2154 false, false, NULL);
2155
2156 if (!context.stripes[i].mdl) {
2157 ERR("IoAllocateMdl failed\n");
2158 MmUnlockPages(master_mdl);
2159 IoFreeMdl(master_mdl);
2160 Status = STATUS_INSUFFICIENT_RESOURCES;
2161 goto exit;
2162 }
2163 }
2164 }
2165
2166 if (need_dummy) {
2167 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2168 if (!dummypage) {
2169 ERR("out of memory\n");
2170 MmUnlockPages(master_mdl);
2171 IoFreeMdl(master_mdl);
2172 Status = STATUS_INSUFFICIENT_RESOURCES;
2173 goto exit;
2174 }
2175
2176 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2177 if (!dummy_mdl) {
2178 ERR("IoAllocateMdl failed\n");
2179 MmUnlockPages(master_mdl);
2180 IoFreeMdl(master_mdl);
2181 Status = STATUS_INSUFFICIENT_RESOURCES;
2182 goto exit;
2183 }
2184
2185 MmBuildMdlForNonPagedPool(dummy_mdl);
2186
2187 dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2188 }
2189
2190 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2191 if (!stripeoff) {
2192 ERR("out of memory\n");
2193 MmUnlockPages(master_mdl);
2194 IoFreeMdl(master_mdl);
2195 Status = STATUS_INSUFFICIENT_RESOURCES;
2196 goto exit;
2197 }
2198
2199 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2200
2201 pos = 0;
2202
2203 while (pos < length) {
2204 PFN_NUMBER* stripe_pfns;
2205
2206 parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2207
2208 if (pos == 0) {
2209 uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2210 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2211 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2212
2213 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2214
2215 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2216
2217 stripeoff[stripe] = readlen;
2218 pos += readlen;
2219
2220 stripe = (stripe + 1) % ci->num_stripes;
2221
2222 while (stripe != parity) {
2223 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2224 readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2225
2226 if (readlen == 0)
2227 break;
2228
2229 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2230
2231 stripeoff[stripe] = readlen;
2232 pos += readlen;
2233
2234 stripe = (stripe + 1) % ci->num_stripes;
2235 }
2236 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2237 uint16_t stripe = (parity + 1) % ci->num_stripes;
2238 ULONG k;
2239
2240 while (stripe != parity) {
2241 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2242
2243 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2244
2245 stripeoff[stripe] += (uint32_t)ci->stripe_length;
2246 pos += (uint32_t)ci->stripe_length;
2247
2248 stripe = (stripe + 1) % ci->num_stripes;
2249 }
2250
2251 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
2252
2253 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2254 stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
2255 stripeoff[parity] += PAGE_SIZE;
2256 }
2257 } else {
2258 uint16_t stripe = (parity + 1) % ci->num_stripes;
2259 uint32_t readlen;
2260
2261 while (pos < length) {
2262 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2263 readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2264
2265 if (readlen == 0)
2266 break;
2267
2268 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2269
2270 stripeoff[stripe] += readlen;
2271 pos += readlen;
2272
2273 stripe = (stripe + 1) % ci->num_stripes;
2274 }
2275 }
2276 }
2277
2278 MmUnlockPages(master_mdl);
2279 IoFreeMdl(master_mdl);
2280
2281 ExFreePool(stripeoff);
2282 } else if (type == BLOCK_FLAG_RAID6) {
2283 uint64_t startoff, endoff;
2284 uint16_t endoffstripe, parity1;
2285 uint32_t *stripeoff, pos;
2286 PMDL master_mdl;
2287 PFN_NUMBER *pfns, dummy;
2288 bool need_dummy = false;
2289
2290 get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
2291 get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
2292
2293 if (file_read) {
2294 context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
2295
2296 if (!context.va) {
2297 ERR("out of memory\n");
2298 Status = STATUS_INSUFFICIENT_RESOURCES;
2299 goto exit;
2300 }
2301 } else
2302 context.va = buf;
2303
2304 master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
2305 if (!master_mdl) {
2306 ERR("out of memory\n");
2307 Status = STATUS_INSUFFICIENT_RESOURCES;
2308 goto exit;
2309 }
2310
2311 Status = STATUS_SUCCESS;
2312
2313 _SEH2_TRY {
2314 MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
2315 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
2316 Status = _SEH2_GetExceptionCode();
2317 } _SEH2_END;
2318
2319 if (!NT_SUCCESS(Status)) {
2320 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2321 IoFreeMdl(master_mdl);
2322 goto exit;
2323 }
2324
2325 pfns = (PFN_NUMBER*)(master_mdl + 1);
2326
2327 pos = 0;
2328 while (pos < length) {
2329 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2330
2331 if (pos == 0) {
2332 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
2333 ULONG skip, readlen;
2334
2335 i = startoffstripe;
2336 while (stripe != parity1) {
2337 if (i == startoffstripe) {
2338 readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
2339
2340 context.stripes[stripe].stripestart = startoff;
2341 context.stripes[stripe].stripeend = startoff + readlen;
2342
2343 pos += readlen;
2344
2345 if (pos == length)
2346 break;
2347 } else {
2348 readlen = min(length - pos, (ULONG)ci->stripe_length);
2349
2350 context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2351 context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2352
2353 pos += readlen;
2354
2355 if (pos == length)
2356 break;
2357 }
2358
2359 i++;
2360 stripe = (stripe + 1) % ci->num_stripes;
2361 }
2362
2363 if (pos == length)
2364 break;
2365
2366 for (i = 0; i < startoffstripe; i++) {
2367 uint16_t stripe2 = (parity1 + i + 2) % ci->num_stripes;
2368
2369 context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2370 }
2371
2372 context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2373
2374 parity2 = (parity1 + 1) % ci->num_stripes;
2375 context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2376
2377 if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
2378 skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
2379
2380 for (i = 0; i < ci->num_stripes; i++) {
2381 context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2382 }
2383
2384 pos += (uint32_t)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
2385 need_dummy = true;
2386 }
2387 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2388 for (i = 0; i < ci->num_stripes; i++) {
2389 context.stripes[i].stripeend += ci->stripe_length;
2390 }
2391
2392 pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 2));
2393 need_dummy = true;
2394 } else {
2395 uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2396
2397 i = 0;
2398 while (stripe != parity1) {
2399 if (endoffstripe == i) {
2400 context.stripes[stripe].stripeend = endoff + 1;
2401 break;
2402 } else if (endoffstripe > i)
2403 context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2404
2405 i++;
2406 stripe = (stripe + 1) % ci->num_stripes;
2407 }
2408
2409 break;
2410 }
2411 }
2412
2413 for (i = 0; i < ci->num_stripes; i++) {
2414 if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2415 context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
2416
2417 if (!context.stripes[i].mdl) {
2418 ERR("IoAllocateMdl failed\n");
2419 MmUnlockPages(master_mdl);
2420 IoFreeMdl(master_mdl);
2421 Status = STATUS_INSUFFICIENT_RESOURCES;
2422 goto exit;
2423 }
2424 }
2425 }
2426
2427 if (need_dummy) {
2428 dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
2429 if (!dummypage) {
2430 ERR("out of memory\n");
2431 MmUnlockPages(master_mdl);
2432 IoFreeMdl(master_mdl);
2433 Status = STATUS_INSUFFICIENT_RESOURCES;
2434 goto exit;
2435 }
2436
2437 dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2438 if (!dummy_mdl) {
2439 ERR("IoAllocateMdl failed\n");
2440 MmUnlockPages(master_mdl);
2441 IoFreeMdl(master_mdl);
2442 Status = STATUS_INSUFFICIENT_RESOURCES;
2443 goto exit;
2444 }
2445
2446 MmBuildMdlForNonPagedPool(dummy_mdl);
2447
2448 dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2449 }
2450
2451 stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2452 if (!stripeoff) {
2453 ERR("out of memory\n");
2454 MmUnlockPages(master_mdl);
2455 IoFreeMdl(master_mdl);
2456 Status = STATUS_INSUFFICIENT_RESOURCES;
2457 goto exit;
2458 }
2459
2460 RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2461
2462 pos = 0;
2463
2464 while (pos < length) {
2465 PFN_NUMBER* stripe_pfns;
2466
2467 parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2468
2469 if (pos == 0) {
2470 uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
2471 uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2472 ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2473
2474 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2475
2476 RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2477
2478 stripeoff[stripe] = readlen;
2479 pos += readlen;
2480
2481 stripe = (stripe + 1) % ci->num_stripes;
2482
2483 while (stripe != parity1) {
2484 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2485 readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2486
2487 if (readlen == 0)
2488 break;
2489
2490 RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2491
2492 stripeoff[stripe] = readlen;
2493 pos += readlen;
2494
2495 stripe = (stripe + 1) % ci->num_stripes;
2496 }
2497 } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2498 uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2499 uint16_t parity2 = (parity1 + 1) % ci->num_stripes;
2500 ULONG k;
2501
2502 while (stripe != parity1) {
2503 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2504
2505 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2506
2507 stripeoff[stripe] += (uint32_t)ci->stripe_length;
2508 pos += (uint32_t)ci->stripe_length;
2509
2510 stripe = (stripe + 1) % ci->num_stripes;
2511 }
2512
2513 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
2514
2515 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2516 stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
2517 stripeoff[parity1] += PAGE_SIZE;
2518 }
2519
2520 stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
2521
2522 for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2523 stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
2524 stripeoff[parity2] += PAGE_SIZE;
2525 }
2526 } else {
2527 uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2528 uint32_t readlen;
2529
2530 while (pos < length) {
2531 stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2532 readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2533
2534 if (readlen == 0)
2535 break;
2536
2537 RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2538
2539 stripeoff[stripe] += readlen;
2540 pos += readlen;
2541
2542 stripe = (stripe + 1) % ci->num_stripes;
2543 }
2544 }
2545 }
2546
2547 MmUnlockPages(master_mdl);
2548 IoFreeMdl(master_mdl);
2549
2550 ExFreePool(stripeoff);
2551 }
2552
2553 context.address = addr;
2554
2555 for (i = 0; i < ci->num_stripes; i++) {
2556 if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
2557 context.stripes[i].status = ReadDataStatus_MissingDevice;
2558 context.stripes_left--;
2559
2560 if (!devices[i] || !devices[i]->devobj)
2561 missing_devices++;
2562 }
2563 }
2564
2565 if (missing_devices > allowed_missing) {
2566 ERR("not enough devices to service request (%u missing)\n", missing_devices);
2567 Status = STATUS_UNEXPECTED_IO_ERROR;
2568 goto exit;
2569 }
2570
2571 for (i = 0; i < ci->num_stripes; i++) {
2572 PIO_STACK_LOCATION IrpSp;
2573
2574 if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
2575 context.stripes[i].context = (struct read_data_context*)&context;
2576
2577 if (type == BLOCK_FLAG_RAID10) {
2578 context.stripes[i].stripenum = i / ci->sub_stripes;
2579 }
2580
2581 if (!Irp) {
2582 context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, false);
2583
2584 if (!context.stripes[i].Irp) {
2585 ERR("IoAllocateIrp failed\n");
2586 Status = STATUS_INSUFFICIENT_RESOURCES;
2587 goto exit;
2588 }
2589 } else {
2590 context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
2591
2592 if (!context.stripes[i].Irp) {
2593 ERR("IoMakeAssociatedIrp failed\n");
2594 Status = STATUS_INSUFFICIENT_RESOURCES;
2595 goto exit;
2596 }
2597 }
2598
2599 IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2600 IrpSp->MajorFunction = IRP_MJ_READ;
2601 IrpSp->MinorFunction = IRP_MN_NORMAL;
2602 IrpSp->FileObject = devices[i]->fileobj;
2603
2604 if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2605 context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
2606 if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2607 ERR("out of memory\n");
2608 Status = STATUS_INSUFFICIENT_RESOURCES;
2609 goto exit;
2610 }
2611
2612 context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
2613
2614 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2615 } else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
2616 context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
2617 else
2618 context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2619
2620 IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
2621 IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
2622
2623 total_reading += IrpSp->Parameters.Read.Length;
2624
2625 context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2626
2627 IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], true, true, true);
2628
2629 context.stripes[i].status = ReadDataStatus_Pending;
2630 }
2631 }
2632
2633 need_to_wait = false;
2634 for (i = 0; i < ci->num_stripes; i++) {
2635 if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
2636 IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
2637 need_to_wait = true;
2638 }
2639 }
2640
2641 if (need_to_wait)
2642 KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
2643
2644 if (diskacc)
2645 fFsRtlUpdateDiskCounters(total_reading, 0);
2646
2647 // check if any of the devices return a "user-induced" error
2648
2649 for (i = 0; i < ci->num_stripes; i++) {
2650 if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
2651 Status = context.stripes[i].iosb.Status;
2652 goto exit;
2653 }
2654 }
2655
2656 if (type == BLOCK_FLAG_RAID0) {
2657 Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2658 if (!NT_SUCCESS(Status)) {
2659 ERR("read_data_raid0 returned %08lx\n", Status);
2660
2661 if (file_read)
2662 ExFreePool(context.va);
2663
2664 goto exit;
2665 }
2666
2667 if (file_read) {
2668 RtlCopyMemory(buf, context.va, length);
2669 ExFreePool(context.va);
2670 }
2671 } else if (type == BLOCK_FLAG_RAID10) {
2672 Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2673
2674 if (!NT_SUCCESS(Status)) {
2675 ERR("read_data_raid10 returned %08lx\n", Status);
2676
2677 if (file_read)
2678 ExFreePool(context.va);
2679
2680 goto exit;
2681 }
2682
2683 if (file_read) {
2684 RtlCopyMemory(buf, context.va, length);
2685 ExFreePool(context.va);
2686 }
2687 } else if (type == BLOCK_FLAG_DUPLICATE) {
2688 Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
2689 if (!NT_SUCCESS(Status)) {
2690 ERR("read_data_dup returned %08lx\n", Status);
2691
2692 if (file_read)
2693 ExFreePool(context.va);
2694
2695 goto exit;
2696 }
2697
2698 if (file_read) {
2699 RtlCopyMemory(buf, context.va, length);
2700 ExFreePool(context.va);
2701 }
2702 } else if (type == BLOCK_FLAG_RAID5) {
2703 Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2704 if (!NT_SUCCESS(Status)) {
2705 ERR("read_data_raid5 returned %08lx\n", Status);
2706
2707 if (file_read)
2708 ExFreePool(context.va);
2709
2710 goto exit;
2711 }
2712
2713 if (file_read) {
2714 RtlCopyMemory(buf, context.va, length);
2715 ExFreePool(context.va);
2716 }
2717 } else if (type == BLOCK_FLAG_RAID6) {
2718 Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2719 if (!NT_SUCCESS(Status)) {
2720 ERR("read_data_raid6 returned %08lx\n", Status);
2721
2722 if (file_read)
2723 ExFreePool(context.va);
2724
2725 goto exit;
2726 }
2727
2728 if (file_read) {
2729 RtlCopyMemory(buf, context.va, length);
2730 ExFreePool(context.va);
2731 }
2732 }
2733
2734 exit:
2735 if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
2736 chunk_unlock_range(Vcb, c, lockaddr, locklen);
2737
2738 if (dummy_mdl)
2739 IoFreeMdl(dummy_mdl);
2740
2741 if (dummypage)
2742 ExFreePool(dummypage);
2743
2744 for (i = 0; i < ci->num_stripes; i++) {
2745 if (context.stripes[i].mdl) {
2746 if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2747 MmUnlockPages(context.stripes[i].mdl);
2748
2749 IoFreeMdl(context.stripes[i].mdl);
2750 }
2751
2752 if (context.stripes[i].Irp)
2753 IoFreeIrp(context.stripes[i].Irp);
2754 }
2755
2756 ExFreePool(context.stripes);
2757
2758 if (!Vcb->log_to_phys_loaded)
2759 ExFreePool(devices);
2760
2761 return Status;
2762 }
2763
2764 NTSTATUS read_stream(fcb* fcb, uint8_t* data, uint64_t start, ULONG length, ULONG* pbr) {
2765 ULONG readlen;
2766
2767 TRACE("(%p, %p, %I64x, %lx, %p)\n", fcb, data, start, length, pbr);
2768
2769 if (pbr) *pbr = 0;
2770
2771 if (start >= fcb->adsdata.Length) {
2772 TRACE("tried to read beyond end of stream\n");
2773 return STATUS_END_OF_FILE;
2774 }
2775
2776 if (length == 0) {
2777 WARN("tried to read zero bytes\n");
2778 return STATUS_SUCCESS;
2779 }
2780
2781 if (start + length < fcb->adsdata.Length)
2782 readlen = length;
2783 else
2784 readlen = fcb->adsdata.Length - (ULONG)start;
2785
2786 if (readlen > 0)
2787 RtlCopyMemory(data, fcb->adsdata.Buffer + start, readlen);
2788
2789 if (pbr) *pbr = readlen;
2790
2791 return STATUS_SUCCESS;
2792 }
2793
2794 typedef struct {
2795 uint64_t off;
2796 uint64_t ed_size;
2797 uint64_t ed_offset;
2798 uint64_t ed_num_bytes;
2799 } read_part_extent;
2800
2801 typedef struct {
2802 LIST_ENTRY list_entry;
2803 uint64_t addr;
2804 chunk* c;
2805 uint32_t read;
2806 uint32_t to_read;
2807 void* csum;
2808 bool csum_free;
2809 uint8_t* buf;
2810 bool buf_free;
2811 uint32_t bumpoff;
2812 bool mdl;
2813 void* data;
2814 uint8_t compression;
2815 unsigned int num_extents;
2816 read_part_extent extents[1];
2817 } read_part;
2818
2819 typedef struct {
2820 LIST_ENTRY list_entry;
2821 calc_job* cj;
2822 void* decomp;
2823 void* data;
2824 unsigned int offset;
2825 size_t length;
2826 } comp_calc_job;
2827
2828 NTSTATUS read_file(fcb* fcb, uint8_t* data, uint64_t start, uint64_t length, ULONG* pbr, PIRP Irp) {
2829 NTSTATUS Status;
2830 uint32_t bytes_read = 0;
2831 uint64_t last_end;
2832 LIST_ENTRY* le;
2833 POOL_TYPE pool_type;
2834 LIST_ENTRY read_parts, calc_jobs;
2835
2836 TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr);
2837
2838 if (pbr)
2839 *pbr = 0;
2840
2841 if (start >= fcb->inode_item.st_size) {
2842 WARN("Tried to read beyond end of file\n");
2843 return STATUS_END_OF_FILE;
2844 }
2845
2846 InitializeListHead(&read_parts);
2847 InitializeListHead(&calc_jobs);
2848
2849 pool_type = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? NonPagedPool : PagedPool;
2850
2851 le = fcb->extents.Flink;
2852
2853 last_end = start;
2854
2855 while (le != &fcb->extents) {
2856 uint64_t len;
2857 extent* ext = CONTAINING_RECORD(le, extent, list_entry);
2858
2859 if (!ext->ignore) {
2860 EXTENT_DATA* ed = &ext->extent_data;
2861 EXTENT_DATA2* ed2 = (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) ? (EXTENT_DATA2*)ed->data : NULL;
2862
2863 len = ed2 ? ed2->num_bytes : ed->decoded_size;
2864
2865 if (ext->offset + len <= start) {
2866 last_end = ext->offset + len;
2867 goto nextitem;
2868 }
2869
2870 if (ext->offset > last_end && ext->offset > start + bytes_read) {
2871 uint32_t read = (uint32_t)min(length, ext->offset - max(start, last_end));
2872
2873 RtlZeroMemory(data + bytes_read, read);
2874 bytes_read += read;
2875 length -= read;
2876 }
2877
2878 if (length == 0 || ext->offset > start + bytes_read + length)
2879 break;
2880
2881 if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
2882 WARN("Encryption not supported\n");
2883 Status = STATUS_NOT_IMPLEMENTED;
2884 goto exit;
2885 }
2886
2887 if (ed->encoding != BTRFS_ENCODING_NONE) {
2888 WARN("Other encodings not supported\n");
2889 Status = STATUS_NOT_IMPLEMENTED;
2890 goto exit;
2891 }
2892
2893 switch (ed->type) {
2894 case EXTENT_TYPE_INLINE:
2895 {
2896 uint64_t off = start + bytes_read - ext->offset;
2897 uint32_t read;
2898
2899 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2900 read = (uint32_t)min(min(len, ext->datalen) - off, length);
2901
2902 RtlCopyMemory(data + bytes_read, &ed->data[off], read);
2903 } else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO || ed->compression == BTRFS_COMPRESSION_ZSTD) {
2904 uint8_t* decomp;
2905 bool decomp_alloc;
2906 uint16_t inlen = ext->datalen - (uint16_t)offsetof(EXTENT_DATA, data[0]);
2907
2908 if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
2909 ERR("ed->decoded_size was invalid (%I64x)\n", ed->decoded_size);
2910 Status = STATUS_INTERNAL_ERROR;
2911 goto exit;
2912 }
2913
2914 read = (uint32_t)min(ed->decoded_size - off, length);
2915
2916 if (off > 0) {
2917 decomp = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)ed->decoded_size, ALLOC_TAG);
2918 if (!decomp) {
2919 ERR("out of memory\n");
2920 Status = STATUS_INSUFFICIENT_RESOURCES;
2921 goto exit;
2922 }
2923
2924 decomp_alloc = true;
2925 } else {
2926 decomp = data + bytes_read;
2927 decomp_alloc = false;
2928 }
2929
2930 if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2931 Status = zlib_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2932 if (!NT_SUCCESS(Status)) {
2933 ERR("zlib_decompress returned %08lx\n", Status);
2934 if (decomp_alloc) ExFreePool(decomp);
2935 goto exit;
2936 }
2937 } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2938 if (inlen < sizeof(uint32_t)) {
2939 ERR("extent data was truncated\n");
2940 Status = STATUS_INTERNAL_ERROR;
2941 if (decomp_alloc) ExFreePool(decomp);
2942 goto exit;
2943 } else
2944 inlen -= sizeof(uint32_t);
2945
2946 Status = lzo_decompress(ed->data + sizeof(uint32_t), inlen, decomp, (uint32_t)(read + off), sizeof(uint32_t));
2947 if (!NT_SUCCESS(Status)) {
2948 ERR("lzo_decompress returned %08lx\n", Status);
2949 if (decomp_alloc) ExFreePool(decomp);
2950 goto exit;
2951 }
2952 } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
2953 Status = zstd_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2954 if (!NT_SUCCESS(Status)) {
2955 ERR("zstd_decompress returned %08lx\n", Status);
2956 if (decomp_alloc) ExFreePool(decomp);
2957 goto exit;
2958 }
2959 }
2960
2961 if (decomp_alloc) {
2962 RtlCopyMemory(data + bytes_read, decomp + off, read);
2963 ExFreePool(decomp);
2964 }
2965 } else {
2966 ERR("unhandled compression type %x\n", ed->compression);
2967 Status = STATUS_NOT_IMPLEMENTED;
2968 goto exit;
2969 }
2970
2971 bytes_read += read;
2972 length -= read;
2973
2974 break;
2975 }
2976
2977 case EXTENT_TYPE_REGULAR:
2978 {
2979 read_part* rp;
2980
2981 rp = ExAllocatePoolWithTag(pool_type, sizeof(read_part), ALLOC_TAG);
2982 if (!rp) {
2983 ERR("out of memory\n");
2984 Status = STATUS_INSUFFICIENT_RESOURCES;
2985 goto exit;
2986 }
2987
2988 rp->mdl = (Irp && Irp->MdlAddress) ? true : false;
2989 rp->extents[0].off = start + bytes_read - ext->offset;
2990 rp->bumpoff = 0;
2991 rp->num_extents = 1;
2992 rp->csum_free = false;
2993
2994 rp->read = (uint32_t)(len - rp->extents[0].off);
2995 if (rp->read > length) rp->read = (uint32_t)length;
2996
2997 if (ed->compression == BTRFS_COMPRESSION_NONE) {
2998 rp->addr = ed2->address + ed2->offset + rp->extents[0].off;
2999 rp->to_read = (uint32_t)sector_align(rp->read, fcb->Vcb->superblock.sector_size);
3000
3001 if (rp->addr % fcb->Vcb->superblock.sector_size > 0) {
3002 rp->bumpoff = rp->addr % fcb->Vcb->superblock.sector_size;
3003 rp->addr -= rp->bumpoff;
3004 rp->to_read = (uint32_t)sector_align(rp->read + rp->bumpoff, fcb->Vcb->superblock.sector_size);
3005 }
3006 } else {
3007 rp->addr = ed2->address;
3008 rp->to_read = (uint32_t)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
3009 }
3010
3011 if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 &&
3012 length % fcb->Vcb->superblock.sector_size == 0) {
3013 rp->buf = data + bytes_read;
3014 rp->buf_free = false;
3015 } else {
3016 rp->buf = ExAllocatePoolWithTag(pool_type, rp->to_read, ALLOC_TAG);
3017 rp->buf_free = true;
3018
3019 if (!rp->buf) {
3020 ERR("out of memory\n");
3021 Status = STATUS_INSUFFICIENT_RESOURCES;
3022 ExFreePool(rp);
3023 goto exit;
3024 }
3025
3026 rp->mdl = false;
3027 }
3028
3029 rp->c = get_chunk_from_address(fcb->Vcb, rp->addr);
3030
3031 if (!rp->c) {
3032 ERR("get_chunk_from_address(%I64x) failed\n", rp->addr);
3033
3034 if (rp->buf_free)
3035 ExFreePool(rp->buf);
3036
3037 ExFreePool(rp);
3038
3039 goto exit;
3040 }
3041
3042 if (ext->csum) {
3043 if (ed->compression == BTRFS_COMPRESSION_NONE) {
3044 rp->csum = (uint8_t*)ext->csum + (fcb->Vcb->csum_size * (rp->extents[0].off / fcb->Vcb->superblock.sector_size));
3045 } else
3046 rp->csum = ext->csum;
3047 } else
3048 rp->csum = NULL;
3049
3050 rp->data = data + bytes_read;
3051 rp->compression = ed->compression;
3052 rp->extents[0].ed_offset = ed2->offset;
3053 rp->extents[0].ed_size = ed2->size;
3054 rp->extents[0].ed_num_bytes = ed2->num_bytes;
3055
3056 InsertTailList(&read_parts, &rp->list_entry);
3057
3058 bytes_read += rp->read;
3059 length -= rp->read;
3060
3061 break;
3062 }
3063
3064 case EXTENT_TYPE_PREALLOC:
3065 {
3066 uint64_t off = start + bytes_read - ext->offset;
3067 uint32_t read = (uint32_t)(len - off);
3068
3069 if (read > length) read = (uint32_t)length;
3070
3071 RtlZeroMemory(data + bytes_read, read);
3072
3073 bytes_read += read;
3074 length -= read;
3075
3076 break;
3077 }
3078
3079 default:
3080 WARN("Unsupported extent data type %u\n", ed->type);
3081 Status = STATUS_NOT_IMPLEMENTED;
3082 goto exit;
3083 }
3084
3085 last_end = ext->offset + len;
3086
3087 if (length == 0)
3088 break;
3089 }
3090
3091 nextitem:
3092 le = le->Flink;
3093 }
3094
3095 if (!IsListEmpty(&read_parts) && read_parts.Flink->Flink != &read_parts) { // at least two entries in list
3096 read_part* last_rp = CONTAINING_RECORD(read_parts.Flink, read_part, list_entry);
3097
3098 le = read_parts.Flink->Flink;
3099 while (le != &read_parts) {
3100 LIST_ENTRY* le2 = le->Flink;
3101 read_part* rp = CONTAINING_RECORD(le, read_part, list_entry);
3102
3103 // merge together runs
3104 if (rp->compression != BTRFS_COMPRESSION_NONE && rp->compression == last_rp->compression && rp->addr == last_rp->addr + last_rp->to_read &&
3105 rp->data == (uint8_t*)last_rp->data + last_rp->read && rp->c == last_rp->c && ((rp->csum && last_rp->csum) || (!rp->csum && !last_rp->csum))) {
3106 read_part* rp2;
3107
3108 rp2 = ExAllocatePoolWithTag(pool_type, offsetof(read_part, extents) + (sizeof(read_part_extent) * (last_rp->num_extents + 1)), ALLOC_TAG);
3109
3110 rp2->addr = last_rp->addr;
3111 rp2->c = last_rp->c;
3112 rp2->read = last_rp->read + rp->read;
3113 rp2->to_read = last_rp->to_read + rp->to_read;
3114 rp2->csum_free = false;
3115
3116 if (last_rp->csum) {
3117 uint32_t sectors = (last_rp->to_read + rp->to_read) / fcb->Vcb->superblock.sector_size;
3118
3119 rp2->csum = ExAllocatePoolWithTag(pool_type, sectors * fcb->Vcb->csum_size, ALLOC_TAG);
3120 if (!rp2->csum) {
3121 ERR("out of memory\n");
3122 ExFreePool(rp2);
3123 Status = STATUS_INSUFFICIENT_RESOURCES;
3124 goto exit;
3125 }
3126
3127 RtlCopyMemory(rp2->csum, last_rp->csum, last_rp->to_read * fcb->Vcb->csum_size / fcb->Vcb->superblock.sector_size);
3128 RtlCopyMemory((uint8_t*)rp2->csum + (last_rp->to_read * fcb->Vcb->csum_size / fcb->Vcb->superblock.sector_size), rp->csum,
3129 rp->to_read * fcb->Vcb->csum_size / fcb->Vcb->superblock.sector_size);
3130
3131 rp2->csum_free = true;
3132 } else
3133 rp2->csum = NULL;
3134
3135 rp2->buf = ExAllocatePoolWithTag(pool_type, rp2->to_read, ALLOC_TAG);
3136 if (!rp2->buf) {
3137 ERR("out of memory\n");
3138
3139 if (rp2->csum)
3140 ExFreePool(rp2->csum);
3141
3142 ExFreePool(rp2);
3143 Status = STATUS_INSUFFICIENT_RESOURCES;
3144 goto exit;
3145 }
3146
3147 rp2->buf_free = true;
3148 rp2->bumpoff = 0;
3149 rp2->mdl = false;
3150 rp2->data = last_rp->data;
3151 rp2->compression = last_rp->compression;
3152 rp2->num_extents = last_rp->num_extents + 1;
3153
3154 RtlCopyMemory(rp2->extents, last_rp->extents, last_rp->num_extents * sizeof(read_part_extent));
3155 RtlCopyMemory(&rp2->extents[last_rp->num_extents], rp->extents, sizeof(read_part_extent));
3156
3157 InsertHeadList(le->Blink, &rp2->list_entry);
3158
3159 if (rp->buf_free)
3160 ExFreePool(rp->buf);
3161
3162 if (rp->csum_free)
3163 ExFreePool(rp->csum);
3164
3165 RemoveEntryList(&rp->list_entry);
3166
3167 ExFreePool(rp);
3168
3169 if (last_rp->buf_free)
3170 ExFreePool(last_rp->buf);
3171
3172 if (last_rp->csum_free)
3173 ExFreePool(last_rp->csum);
3174
3175 RemoveEntryList(&last_rp->list_entry);
3176
3177 ExFreePool(last_rp);
3178
3179 last_rp = rp2;
3180 } else
3181 last_rp = rp;
3182
3183 le = le2;
3184 }
3185 }
3186
3187 le = read_parts.Flink;
3188 while (le != &read_parts) {
3189 read_part* rp = CONTAINING_RECORD(le, read_part, list_entry);
3190
3191 Status = read_data(fcb->Vcb, rp->addr, rp->to_read, rp->csum, false, rp->buf, rp->c, NULL, Irp, 0, rp->mdl,
3192 fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
3193 if (!NT_SUCCESS(Status)) {
3194 ERR("read_data returned %08lx\n", Status);
3195 goto exit;
3196 }
3197
3198 if (rp->compression == BTRFS_COMPRESSION_NONE) {
3199 if (rp->buf_free)
3200 RtlCopyMemory(rp->data, rp->buf + rp->bumpoff, rp->read);
3201 } else {
3202 uint8_t* buf = rp->buf;
3203 #ifdef __REACTOS__
3204 unsigned int i;
3205 for (i = 0; i < rp->num_extents; i++) {
3206 #else
3207 for (unsigned int i = 0; i < rp->num_extents; i++) {
3208 #endif // __REACTOS__
3209 uint8_t *decomp = NULL, *buf2;
3210 ULONG outlen, inlen, off2;
3211 uint32_t inpageoff = 0;
3212 comp_calc_job* ccj;
3213
3214 off2 = (ULONG)(rp->extents[i].ed_offset + rp->extents[i].off);
3215 buf2 = buf;
3216 inlen = (ULONG)rp->extents[i].ed_size;
3217
3218 if (rp->compression == BTRFS_COMPRESSION_LZO) {
3219 ULONG inoff = sizeof(uint32_t);
3220
3221 inlen -= sizeof(uint32_t);
3222
3223 // If reading a few sectors in, skip to the interesting bit
3224 while (off2 > LZO_PAGE_SIZE) {
3225 uint32_t partlen;
3226
3227 if (inlen < sizeof(uint32_t))
3228 break;
3229
3230 partlen = *(uint32_t*)(buf2 + inoff);
3231
3232 if (partlen < inlen) {
3233 off2 -= LZO_PAGE_SIZE;
3234 inoff += partlen + sizeof(uint32_t);
3235 inlen -= partlen + sizeof(uint32_t);
3236
3237 if (LZO_PAGE_SIZE - (inoff % LZO_PAGE_SIZE) < sizeof(uint32_t))
3238 inoff = ((inoff / LZO_PAGE_SIZE) + 1) * LZO_PAGE_SIZE;
3239 } else
3240 break;
3241 }
3242
3243 buf2 = &buf2[inoff];
3244 inpageoff = inoff % LZO_PAGE_SIZE;
3245 }
3246
3247 if (off2 != 0) {
3248 outlen = off2 + min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off));
3249
3250 decomp = ExAllocatePoolWithTag(pool_type, outlen, ALLOC_TAG);
3251 if (!decomp) {
3252 ERR("out of memory\n");
3253 Status = STATUS_INSUFFICIENT_RESOURCES;
3254 goto exit;
3255 }
3256 } else
3257 outlen = min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off));
3258
3259 ccj = (comp_calc_job*)ExAllocatePoolWithTag(pool_type, sizeof(comp_calc_job), ALLOC_TAG);
3260 if (!ccj) {
3261 ERR("out of memory\n");
3262
3263 if (decomp)
3264 ExFreePool(decomp);
3265
3266 Status = STATUS_INSUFFICIENT_RESOURCES;
3267 goto exit;
3268 }
3269
3270 Status = add_calc_job_decomp(fcb->Vcb, rp->compression, buf2, inlen, decomp ? decomp : rp->data, outlen,
3271 inpageoff, &ccj->cj);
3272 if (!NT_SUCCESS(Status)) {
3273 ERR("add_calc_job_decomp returned %08lx\n", Status);
3274
3275 if (decomp)
3276 ExFreePool(decomp);
3277
3278 ExFreePool(ccj);
3279
3280 goto exit;
3281 }
3282
3283 ccj->data = rp->data;
3284 ccj->decomp = decomp;
3285
3286 ccj->offset = off2;
3287 ccj->length = (size_t)min(rp->read, rp->extents[i].ed_num_bytes - rp->extents[i].off);
3288
3289 InsertTailList(&calc_jobs, &ccj->list_entry);
3290
3291 buf += rp->extents[i].ed_size;
3292 rp->data = (uint8_t*)rp->data + rp->extents[i].ed_num_bytes - rp->extents[i].off;
3293 rp->read -= (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off);
3294 }
3295 }
3296
3297 le = le->Flink;
3298 }
3299
3300 if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
3301 uint32_t read = (uint32_t)min(fcb->inode_item.st_size - start - bytes_read, length);
3302
3303 RtlZeroMemory(data + bytes_read, read);
3304
3305 bytes_read += read;
3306 length -= read;
3307 }
3308
3309 Status = STATUS_SUCCESS;
3310
3311 while (!IsListEmpty(&calc_jobs)) {
3312 comp_calc_job* ccj = CONTAINING_RECORD(RemoveTailList(&calc_jobs), comp_calc_job, list_entry);
3313
3314 calc_thread_main(fcb->Vcb, ccj->cj);
3315
3316 KeWaitForSingleObject(&ccj->cj->event, Executive, KernelMode, false, NULL);
3317
3318 if (!NT_SUCCESS(ccj->cj->Status))
3319 Status = ccj->cj->Status;
3320
3321 if (ccj->decomp) {
3322 RtlCopyMemory(ccj->data, (uint8_t*)ccj->decomp + ccj->offset, ccj->length);
3323 ExFreePool(ccj->decomp);
3324 }
3325
3326 ExFreePool(ccj);
3327 }
3328
3329 if (pbr)
3330 *pbr = bytes_read;
3331
3332 exit:
3333 while (!IsListEmpty(&read_parts)) {
3334 read_part* rp = CONTAINING_RECORD(RemoveHeadList(&read_parts), read_part, list_entry);
3335
3336 if (rp->buf_free)
3337 ExFreePool(rp->buf);
3338
3339 if (rp->csum_free)
3340 ExFreePool(rp->csum);
3341
3342 ExFreePool(rp);
3343 }
3344
3345 while (!IsListEmpty(&calc_jobs)) {
3346 comp_calc_job* ccj = CONTAINING_RECORD(RemoveHeadList(&calc_jobs), comp_calc_job, list_entry);
3347
3348 KeWaitForSingleObject(&ccj->cj->event, Executive, KernelMode, false, NULL);
3349
3350 if (ccj->decomp)
3351 ExFreePool(ccj->decomp);
3352
3353 ExFreePool(ccj->cj);
3354
3355 ExFreePool(ccj);
3356 }
3357
3358 return Status;
3359 }
3360
3361 NTSTATUS do_read(PIRP Irp, bool wait, ULONG* bytes_read) {
3362 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3363 PFILE_OBJECT FileObject = IrpSp->FileObject;
3364 fcb* fcb = FileObject->FsContext;
3365 uint8_t* data = NULL;
3366 ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
3367 uint64_t start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
3368
3369 *bytes_read = 0;
3370
3371 if (!fcb || !fcb->Vcb || !fcb->subvol)
3372 return STATUS_INTERNAL_ERROR;
3373
3374 TRACE("fcb = %p\n", fcb);
3375 TRACE("offset = %I64x, length = %lx\n", start, length);
3376 TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "true" : "false", Irp->Flags & IRP_NOCACHE ? "true" : "false");
3377
3378 if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
3379 return STATUS_INVALID_DEVICE_REQUEST;
3380
3381 if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
3382 WARN("tried to read locked region\n");
3383 return STATUS_FILE_LOCK_CONFLICT;
3384 }
3385
3386 if (length == 0) {
3387 TRACE("tried to read zero bytes\n");
3388 return STATUS_SUCCESS;
3389 }
3390
3391 if (start >= (uint64_t)fcb->Header.FileSize.QuadPart) {
3392 TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start, fcb->Header.FileSize.QuadPart);
3393 return STATUS_END_OF_FILE;
3394 }
3395
3396 TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
3397
3398 if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) {
3399 data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
3400
3401 if (Irp->MdlAddress && !data) {
3402 ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3403 return STATUS_INSUFFICIENT_RESOURCES;
3404 }
3405
3406 if (start >= (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3407 length = (ULONG)min(length, min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3408 RtlZeroMemory(data, length);
3409 Irp->IoStatus.Information = *bytes_read = length;
3410 return STATUS_SUCCESS;
3411 }
3412
3413 if (length + start > (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3414 addon = (ULONG)(min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3415 RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
3416 length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
3417 }
3418 }
3419
3420 if (!(Irp->Flags & IRP_NOCACHE)) {
3421 NTSTATUS Status = STATUS_SUCCESS;
3422
3423 _SEH2_TRY {
3424 if (!FileObject->PrivateCacheMap) {
3425 CC_FILE_SIZES ccfs;
3426
3427 ccfs.AllocationSize = fcb->Header.AllocationSize;
3428 ccfs.FileSize = fcb->Header.FileSize;
3429 ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3430
3431 init_file_cache(FileObject, &ccfs);
3432 }
3433
3434 if (IrpSp->MinorFunction & IRP_MN_MDL) {
3435 CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
3436 } else {
3437 if (fCcCopyReadEx) {
3438 TRACE("CcCopyReadEx(%p, %I64x, %lx, %u, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
3439 length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
3440 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
3441 if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
3442 TRACE("CcCopyReadEx could not wait\n");
3443
3444 IoMarkIrpPending(Irp);
3445 return STATUS_PENDING;
3446 }
3447 TRACE("CcCopyReadEx finished\n");
3448 } else {
3449 TRACE("CcCopyRead(%p, %I64x, %lx, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
3450 TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
3451 if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
3452 TRACE("CcCopyRead could not wait\n");
3453
3454 IoMarkIrpPending(Irp);
3455 return STATUS_PENDING;
3456 }
3457 TRACE("CcCopyRead finished\n");
3458 }
3459 }
3460 } _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
3461 Status = _SEH2_GetExceptionCode();
3462 } _SEH2_END;
3463
3464 if (NT_SUCCESS(Status)) {
3465 Status = Irp->IoStatus.Status;
3466 Irp->IoStatus.Information += addon;
3467 *bytes_read = (ULONG)Irp->IoStatus.Information;
3468 } else
3469 ERR("EXCEPTION - %08lx\n", Status);
3470
3471 return Status;
3472 } else {
3473 NTSTATUS Status;
3474
3475 if (!wait) {
3476 IoMarkIrpPending(Irp);
3477 return STATUS_PENDING;
3478 }
3479
3480 if (fcb->ads) {
3481 Status = read_stream(fcb, data, start, length, bytes_read);
3482
3483 if (!NT_SUCCESS(Status))
3484 ERR("read_stream returned %08lx\n", Status);
3485 } else {
3486 Status = read_file(fcb, data, start, length, bytes_read, Irp);
3487
3488 if (!NT_SUCCESS(Status))
3489 ERR("read_file returned %08lx\n", Status);
3490 }
3491
3492 *bytes_read += addon;
3493 TRACE("read %lu bytes\n", *bytes_read);
3494
3495 Irp->IoStatus.Information = *bytes_read;
3496
3497 if (diskacc && Status != STATUS_PENDING) {
3498 PETHREAD thread = NULL;
3499
3500 if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
3501 thread = Irp->Tail.Overlay.Thread;
3502 else if (!IoIsSystemThread(PsGetCurrentThread()))
3503 thread = PsGetCurrentThread();
3504 else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
3505 thread = PsGetCurrentThread();
3506
3507 if (thread)
3508 fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
3509 }
3510
3511 return Status;
3512 }
3513 }
3514
3515 _Dispatch_type_(IRP_MJ_READ)
3516 _Function_class_(DRIVER_DISPATCH)
3517 NTSTATUS __stdcall drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
3518 device_extension* Vcb = DeviceObject->DeviceExtension;
3519 PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
3520 PFILE_OBJECT FileObject = IrpSp->FileObject;
3521 ULONG bytes_read = 0;
3522 NTSTATUS Status;
3523 bool top_level;
3524 fcb* fcb;
3525 ccb* ccb;
3526 bool acquired_fcb_lock = false, wait;
3527
3528 FsRtlEnterFileSystem();
3529
3530 top_level = is_top_level(Irp);
3531
3532 TRACE("read\n");
3533
3534 if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
3535 Status = vol_read(DeviceObject, Irp);
3536 goto exit2;
3537 } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
3538 Status = STATUS_INVALID_PARAMETER;
3539 goto end;
3540 }
3541
3542 Irp->IoStatus.Information = 0;
3543
3544 if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
3545 CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress);
3546
3547 Irp->MdlAddress = NULL;
3548 Status = STATUS_SUCCESS;
3549
3550 goto exit;
3551 }
3552
3553 fcb = FileObject->FsContext;
3554
3555 if (!fcb) {
3556 ERR("fcb was NULL\n");
3557 Status = STATUS_INVALID_PARAMETER;
3558 goto exit;
3559 }
3560
3561 ccb = FileObject->FsContext2;
3562
3563 if (!ccb) {
3564 ERR("ccb was NULL\n");
3565 Status = STATUS_INVALID_PARAMETER;
3566 goto exit;
3567 }
3568
3569 if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) {
3570 WARN("insufficient privileges\n");
3571 Status = STATUS_ACCESS_DENIED;
3572 goto exit;
3573 }
3574
3575 if (fcb == Vcb->volume_fcb) {
3576 TRACE("reading volume FCB\n");
3577
3578 IoSkipCurrentIrpStackLocation(Irp);
3579
3580 Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
3581
3582 goto exit2;
3583 }
3584
3585 if (!(Irp->Flags & IRP_PAGING_IO))
3586 FsRtlCheckOplock(fcb_oplock(fcb), Irp, NULL, NULL, NULL);
3587
3588 wait = IoIsOperationSynchronous(Irp);
3589
3590 // Don't offload jobs when doing paging IO - otherwise this can lead to
3591 // deadlocks in CcCopyRead.
3592 if (Irp->Flags & IRP_PAGING_IO)
3593 wait = true;
3594
3595 if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer && FileObject->SectionObjectPointer->DataSectionObject) {
3596 IO_STATUS_BLOCK iosb;
3597
3598 CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb);
3599 if (!NT_SUCCESS(iosb.Status)) {
3600 ERR("CcFlushCache returned %08lx\n", iosb.Status);
3601 return iosb.Status;
3602 }
3603 }
3604
3605 if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
3606 if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
3607 Status = STATUS_PENDING;
3608 IoMarkIrpPending(Irp);
3609 goto exit;
3610 }
3611
3612 acquired_fcb_lock = true;
3613 }
3614
3615 Status = do_read(Irp, wait, &bytes_read);
3616
3617 if (acquired_fcb_lock)
3618 ExReleaseResourceLite(fcb->Header.Resource);
3619
3620 exit:
3621 if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO))
3622 FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0);
3623
3624 end:
3625 Irp->IoStatus.Status = Status;
3626
3627 TRACE("Irp->IoStatus.Status = %08lx\n", Irp->IoStatus.Status);
3628 TRACE("Irp->IoStatus.Information = %Iu\n", Irp->IoStatus.Information);
3629 TRACE("returning %08lx\n", Status);
3630
3631 if (Status != STATUS_PENDING)
3632 IoCompleteRequest(Irp, IO_NO_INCREMENT);
3633 else {
3634 if (!add_thread_job(Vcb, Irp))
3635 Status = do_read_job(Irp);
3636 }
3637
3638 exit2:
3639 if (top_level)
3640 IoSetTopLevelIrp(NULL);
3641
3642 FsRtlExitFileSystem();
3643
3644 return Status;
3645 }