980089618a4ca08fce902b637dfcda255eef7a4b
[reactos.git] / drivers / filesystems / btrfs / balance.c
1 /* Copyright (c) Mark Harmstone 2016-17
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19 #include "btrfsioctl.h"
20 #include <ntddstor.h>
21
22 typedef struct {
23 UINT64 address;
24 UINT64 new_address;
25 tree_header* data;
26 EXTENT_ITEM* ei;
27 tree* t;
28 BOOL system;
29 LIST_ENTRY refs;
30 LIST_ENTRY list_entry;
31 } metadata_reloc;
32
33 typedef struct {
34 UINT8 type;
35 UINT64 hash;
36
37 union {
38 TREE_BLOCK_REF tbr;
39 SHARED_BLOCK_REF sbr;
40 };
41
42 metadata_reloc* parent;
43 BOOL top;
44 LIST_ENTRY list_entry;
45 } metadata_reloc_ref;
46
47 typedef struct {
48 UINT64 address;
49 UINT64 size;
50 UINT64 new_address;
51 chunk* newchunk;
52 EXTENT_ITEM* ei;
53 LIST_ENTRY refs;
54 LIST_ENTRY list_entry;
55 } data_reloc;
56
57 typedef struct {
58 UINT8 type;
59 UINT64 hash;
60
61 union {
62 EXTENT_DATA_REF edr;
63 SHARED_DATA_REF sdr;
64 };
65
66 metadata_reloc* parent;
67 LIST_ENTRY list_entry;
68 } data_reloc_ref;
69
70 #ifndef __REACTOS__
71 #ifndef _MSC_VER // not in mingw yet
72 #define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000
73 #endif
74 #endif
75
76 #define BALANCE_UNIT 0x100000 // only read 1 MB at a time
77
78 static NTSTATUS add_metadata_reloc(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items, traverse_ptr* tp,
79 BOOL skinny, metadata_reloc** mr2, chunk* c, LIST_ENTRY* rollback) {
80 NTSTATUS Status;
81 metadata_reloc* mr;
82 EXTENT_ITEM* ei;
83 UINT16 len;
84 UINT64 inline_rc;
85 UINT8* ptr;
86
87 mr = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc), ALLOC_TAG);
88 if (!mr) {
89 ERR("out of memory\n");
90 return STATUS_INSUFFICIENT_RESOURCES;
91 }
92
93 mr->address = tp->item->key.obj_id;
94 mr->data = NULL;
95 mr->ei = (EXTENT_ITEM*)tp->item->data;
96 mr->system = FALSE;
97 InitializeListHead(&mr->refs);
98
99 Status = delete_tree_item(Vcb, tp);
100 if (!NT_SUCCESS(Status)) {
101 ERR("delete_tree_item returned %08x\n", Status);
102 ExFreePool(mr);
103 return Status;
104 }
105
106 if (!c)
107 c = get_chunk_from_address(Vcb, tp->item->key.obj_id);
108
109 if (c) {
110 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
111
112 c->used -= Vcb->superblock.node_size;
113
114 space_list_add(c, tp->item->key.obj_id, Vcb->superblock.node_size, rollback);
115
116 ExReleaseResourceLite(&c->lock);
117 }
118
119 ei = (EXTENT_ITEM*)tp->item->data;
120 inline_rc = 0;
121
122 len = tp->item->size - sizeof(EXTENT_ITEM);
123 ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM);
124 if (!skinny) {
125 len -= sizeof(EXTENT_ITEM2);
126 ptr += sizeof(EXTENT_ITEM2);
127 }
128
129 while (len > 0) {
130 UINT8 secttype = *ptr;
131 UINT16 sectlen = secttype == TYPE_TREE_BLOCK_REF ? sizeof(TREE_BLOCK_REF) : (secttype == TYPE_SHARED_BLOCK_REF ? sizeof(SHARED_BLOCK_REF) : 0);
132 metadata_reloc_ref* ref;
133
134 len--;
135
136 if (sectlen > len) {
137 ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen);
138 return STATUS_INTERNAL_ERROR;
139 }
140
141 if (sectlen == 0) {
142 ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype);
143 return STATUS_INTERNAL_ERROR;
144 }
145
146 ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG);
147 if (!ref) {
148 ERR("out of memory\n");
149 return STATUS_INSUFFICIENT_RESOURCES;
150 }
151
152 if (secttype == TYPE_TREE_BLOCK_REF) {
153 ref->type = TYPE_TREE_BLOCK_REF;
154 RtlCopyMemory(&ref->tbr, ptr + sizeof(UINT8), sizeof(TREE_BLOCK_REF));
155 inline_rc++;
156 } else if (secttype == TYPE_SHARED_BLOCK_REF) {
157 ref->type = TYPE_SHARED_BLOCK_REF;
158 RtlCopyMemory(&ref->sbr, ptr + sizeof(UINT8), sizeof(SHARED_BLOCK_REF));
159 inline_rc++;
160 } else {
161 ERR("unexpected tree type %x\n", secttype);
162 ExFreePool(ref);
163 return STATUS_INTERNAL_ERROR;
164 }
165
166 ref->parent = NULL;
167 ref->top = FALSE;
168 InsertTailList(&mr->refs, &ref->list_entry);
169
170 len -= sectlen;
171 ptr += sizeof(UINT8) + sectlen;
172 }
173
174 if (inline_rc < ei->refcount) { // look for non-inline entries
175 traverse_ptr tp2 = *tp, next_tp;
176
177 while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) {
178 tp2 = next_tp;
179
180 if (tp2.item->key.obj_id == tp->item->key.obj_id) {
181 if (tp2.item->key.obj_type == TYPE_TREE_BLOCK_REF) {
182 metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG);
183 if (!ref) {
184 ERR("out of memory\n");
185 return STATUS_INSUFFICIENT_RESOURCES;
186 }
187
188 ref->type = TYPE_TREE_BLOCK_REF;
189 ref->tbr.offset = tp2.item->key.offset;
190 ref->parent = NULL;
191 ref->top = FALSE;
192 InsertTailList(&mr->refs, &ref->list_entry);
193
194 Status = delete_tree_item(Vcb, &tp2);
195 if (!NT_SUCCESS(Status)) {
196 ERR("delete_tree_item returned %08x\n", Status);
197 return Status;
198 }
199 } else if (tp2.item->key.obj_type == TYPE_SHARED_BLOCK_REF) {
200 metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG);
201 if (!ref) {
202 ERR("out of memory\n");
203 return STATUS_INSUFFICIENT_RESOURCES;
204 }
205
206 ref->type = TYPE_SHARED_BLOCK_REF;
207 ref->sbr.offset = tp2.item->key.offset;
208 ref->parent = NULL;
209 ref->top = FALSE;
210 InsertTailList(&mr->refs, &ref->list_entry);
211
212 Status = delete_tree_item(Vcb, &tp2);
213 if (!NT_SUCCESS(Status)) {
214 ERR("delete_tree_item returned %08x\n", Status);
215 return Status;
216 }
217 }
218 } else
219 break;
220 }
221 }
222
223 InsertTailList(items, &mr->list_entry);
224
225 if (mr2)
226 *mr2 = mr;
227
228 return STATUS_SUCCESS;
229 }
230
231 static NTSTATUS add_metadata_reloc_parent(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items,
232 UINT64 address, metadata_reloc** mr2, LIST_ENTRY* rollback) {
233 LIST_ENTRY* le;
234 KEY searchkey;
235 traverse_ptr tp;
236 BOOL skinny = FALSE;
237 NTSTATUS Status;
238
239 le = items->Flink;
240 while (le != items) {
241 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
242
243 if (mr->address == address) {
244 *mr2 = mr;
245 return STATUS_SUCCESS;
246 }
247
248 le = le->Flink;
249 }
250
251 searchkey.obj_id = address;
252 searchkey.obj_type = TYPE_METADATA_ITEM;
253 searchkey.offset = 0xffffffffffffffff;
254
255 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
256 if (!NT_SUCCESS(Status)) {
257 ERR("find_item returned %08x\n", Status);
258 return Status;
259 }
260
261 if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM))
262 skinny = TRUE;
263 else if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size &&
264 tp.item->size >= sizeof(EXTENT_ITEM)) {
265 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
266
267 if (!(ei->flags & EXTENT_ITEM_TREE_BLOCK)) {
268 ERR("EXTENT_ITEM for %llx found, but tree flag not set\n", address);
269 return STATUS_INTERNAL_ERROR;
270 }
271 } else {
272 ERR("could not find valid EXTENT_ITEM for address %llx\n", address);
273 return STATUS_INTERNAL_ERROR;
274 }
275
276 Status = add_metadata_reloc(Vcb, items, &tp, skinny, mr2, NULL, rollback);
277 if (!NT_SUCCESS(Status)) {
278 ERR("add_metadata_reloc returned %08x\n", Status);
279 return Status;
280 }
281
282 return STATUS_SUCCESS;
283 }
284
285 static void sort_metadata_reloc_refs(metadata_reloc* mr) {
286 LIST_ENTRY newlist, *le;
287
288 if (mr->refs.Flink == mr->refs.Blink) // 0 or 1 items
289 return;
290
291 // insertion sort
292
293 InitializeListHead(&newlist);
294
295 while (!IsListEmpty(&mr->refs)) {
296 metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry);
297 BOOL inserted = FALSE;
298
299 if (ref->type == TYPE_TREE_BLOCK_REF)
300 ref->hash = ref->tbr.offset;
301 else if (ref->type == TYPE_SHARED_BLOCK_REF)
302 ref->hash = ref->parent->new_address;
303
304 le = newlist.Flink;
305 while (le != &newlist) {
306 metadata_reloc_ref* ref2 = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
307
308 if (ref->type < ref2->type || (ref->type == ref2->type && ref->hash > ref2->hash)) {
309 InsertHeadList(le->Blink, &ref->list_entry);
310 inserted = TRUE;
311 break;
312 }
313
314 le = le->Flink;
315 }
316
317 if (!inserted)
318 InsertTailList(&newlist, &ref->list_entry);
319 }
320
321 newlist.Flink->Blink = &mr->refs;
322 newlist.Blink->Flink = &mr->refs;
323 mr->refs.Flink = newlist.Flink;
324 mr->refs.Blink = newlist.Blink;
325 }
326
327 static NTSTATUS add_metadata_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, metadata_reloc* mr) {
328 NTSTATUS Status;
329 LIST_ENTRY* le;
330 UINT64 rc = 0;
331 UINT16 inline_len;
332 BOOL all_inline = TRUE;
333 metadata_reloc_ref* first_noninline = NULL;
334 EXTENT_ITEM* ei;
335 UINT8* ptr;
336
337 inline_len = sizeof(EXTENT_ITEM);
338 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA))
339 inline_len += sizeof(EXTENT_ITEM2);
340
341 sort_metadata_reloc_refs(mr);
342
343 le = mr->refs.Flink;
344 while (le != &mr->refs) {
345 metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
346 UINT16 extlen = 0;
347
348 rc++;
349
350 if (ref->type == TYPE_TREE_BLOCK_REF)
351 extlen += sizeof(TREE_BLOCK_REF);
352 else if (ref->type == TYPE_SHARED_BLOCK_REF)
353 extlen += sizeof(SHARED_BLOCK_REF);
354
355 if (all_inline) {
356 if ((ULONG)(inline_len + 1 + extlen) > (Vcb->superblock.node_size >> 2)) {
357 all_inline = FALSE;
358 first_noninline = ref;
359 } else
360 inline_len += extlen + 1;
361 }
362
363 le = le->Flink;
364 }
365
366 ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
367 if (!ei) {
368 ERR("out of memory\n");
369 return STATUS_INSUFFICIENT_RESOURCES;
370 }
371
372 ei->refcount = rc;
373 ei->generation = mr->ei->generation;
374 ei->flags = mr->ei->flags;
375 ptr = (UINT8*)&ei[1];
376
377 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
378 EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)ptr;
379
380 ei2->firstitem = *(KEY*)&mr->data[1];
381 ei2->level = mr->data->level;
382
383 ptr += sizeof(EXTENT_ITEM2);
384 }
385
386 le = mr->refs.Flink;
387 while (le != &mr->refs) {
388 metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
389
390 if (ref == first_noninline)
391 break;
392
393 *ptr = ref->type;
394 ptr++;
395
396 if (ref->type == TYPE_TREE_BLOCK_REF) {
397 TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)ptr;
398
399 tbr->offset = ref->tbr.offset;
400
401 ptr += sizeof(TREE_BLOCK_REF);
402 } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
403 SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)ptr;
404
405 sbr->offset = ref->parent->new_address;
406
407 ptr += sizeof(SHARED_BLOCK_REF);
408 }
409
410 le = le->Flink;
411 }
412
413 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)
414 Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_METADATA_ITEM, mr->data->level, ei, inline_len, NULL, NULL);
415 else
416 Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, ei, inline_len, NULL, NULL);
417
418 if (!NT_SUCCESS(Status)) {
419 ERR("insert_tree_item returned %08x\n", Status);
420 ExFreePool(ei);
421 return Status;
422 }
423
424 if (!all_inline) {
425 le = &first_noninline->list_entry;
426
427 while (le != &mr->refs) {
428 metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
429
430 if (ref->type == TYPE_TREE_BLOCK_REF) {
431 Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_TREE_BLOCK_REF, ref->tbr.offset, NULL, 0, NULL, NULL);
432 if (!NT_SUCCESS(Status)) {
433 ERR("insert_tree_item returned %08x\n", Status);
434 return Status;
435 }
436 } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
437 Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_SHARED_BLOCK_REF, ref->parent->new_address, NULL, 0, NULL, NULL);
438 if (!NT_SUCCESS(Status)) {
439 ERR("insert_tree_item returned %08x\n", Status);
440 return Status;
441 }
442 }
443
444 le = le->Flink;
445 }
446 }
447
448 if (ei->flags & EXTENT_ITEM_SHARED_BACKREFS || mr->data->flags & HEADER_FLAG_SHARED_BACKREF || !(mr->data->flags & HEADER_FLAG_MIXED_BACKREF)) {
449 if (mr->data->level > 0) {
450 UINT16 i;
451 internal_node* in = (internal_node*)&mr->data[1];
452
453 for (i = 0; i < mr->data->num_items; i++) {
454 UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, in[i].address, mr->address, NULL);
455
456 if (sbrrc > 0) {
457 SHARED_BLOCK_REF sbr;
458
459 sbr.offset = mr->new_address;
460
461 Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, NULL);
462 if (!NT_SUCCESS(Status)) {
463 ERR("increase_extent_refcount returned %08x\n", Status);
464 return Status;
465 }
466
467 sbr.offset = mr->address;
468
469 Status = decrease_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
470 sbr.offset, FALSE, NULL);
471 if (!NT_SUCCESS(Status)) {
472 ERR("decrease_extent_refcount returned %08x\n", Status);
473 return Status;
474 }
475 }
476 }
477 } else {
478 UINT16 i;
479 leaf_node* ln = (leaf_node*)&mr->data[1];
480
481 for (i = 0; i < mr->data->num_items; i++) {
482 if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
483 EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset);
484
485 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
486 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
487
488 if (ed2->size > 0) { // not sparse
489 UINT32 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, mr->address, NULL);
490
491 if (sdrrc > 0) {
492 SHARED_DATA_REF sdr;
493 chunk* c;
494
495 sdr.offset = mr->new_address;
496 sdr.count = sdrrc;
497
498 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0, NULL);
499 if (!NT_SUCCESS(Status)) {
500 ERR("increase_extent_refcount returned %08x\n", Status);
501 return Status;
502 }
503
504 sdr.offset = mr->address;
505
506 Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
507 sdr.offset, FALSE, NULL);
508 if (!NT_SUCCESS(Status)) {
509 ERR("decrease_extent_refcount returned %08x\n", Status);
510 return Status;
511 }
512
513 c = get_chunk_from_address(Vcb, ed2->address);
514
515 if (c) {
516 // check changed_extents
517
518 ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
519
520 le = c->changed_extents.Flink;
521
522 while (le != &c->changed_extents) {
523 changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry);
524
525 if (ce->address == ed2->address) {
526 LIST_ENTRY* le2;
527
528 le2 = ce->refs.Flink;
529 while (le2 != &ce->refs) {
530 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
531
532 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) {
533 cer->sdr.offset = mr->new_address;
534 break;
535 }
536
537 le2 = le2->Flink;
538 }
539
540 le2 = ce->old_refs.Flink;
541 while (le2 != &ce->old_refs) {
542 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
543
544 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) {
545 cer->sdr.offset = mr->new_address;
546 break;
547 }
548
549 le2 = le2->Flink;
550 }
551
552 break;
553 }
554
555 le = le->Flink;
556 }
557
558 ExReleaseResourceLite(&c->changed_extents_lock);
559 }
560 }
561 }
562 }
563 }
564 }
565 }
566 }
567
568 return STATUS_SUCCESS;
569 }
570
571 static NTSTATUS write_metadata_items(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items,
572 LIST_ENTRY* data_items, chunk* c, LIST_ENTRY* rollback) {
573 LIST_ENTRY tree_writes, *le;
574 NTSTATUS Status;
575 traverse_ptr tp;
576 UINT8 level, max_level = 0;
577 chunk* newchunk = NULL;
578
579 InitializeListHead(&tree_writes);
580
581 le = items->Flink;
582 while (le != items) {
583 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
584 LIST_ENTRY* le2;
585 chunk* pc;
586
587 mr->data = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
588 if (!mr->data) {
589 ERR("out of memory\n");
590 return STATUS_INSUFFICIENT_RESOURCES;
591 }
592
593 Status = read_data(Vcb, mr->address, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)mr->data,
594 c && mr->address >= c->offset && mr->address < c->offset + c->chunk_item->size ? c : NULL, &pc, NULL, 0, FALSE, NormalPagePriority);
595 if (!NT_SUCCESS(Status)) {
596 ERR("read_data returned %08x\n", Status);
597 return Status;
598 }
599
600 if (pc->chunk_item->type & BLOCK_FLAG_SYSTEM)
601 mr->system = TRUE;
602
603 if (data_items && mr->data->level == 0) {
604 le2 = data_items->Flink;
605 while (le2 != data_items) {
606 data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry);
607 leaf_node* ln = (leaf_node*)&mr->data[1];
608 UINT16 i;
609
610 for (i = 0; i < mr->data->num_items; i++) {
611 if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
612 EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset);
613
614 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
615 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
616
617 if (ed2->address == dr->address)
618 ed2->address = dr->new_address;
619 }
620 }
621 }
622
623 le2 = le2->Flink;
624 }
625 }
626
627 if (mr->data->level > max_level)
628 max_level = mr->data->level;
629
630 le2 = mr->refs.Flink;
631 while (le2 != &mr->refs) {
632 metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry);
633
634 if (ref->type == TYPE_TREE_BLOCK_REF) {
635 KEY* firstitem;
636 root* r = NULL;
637 LIST_ENTRY* le3;
638 tree* t;
639
640 firstitem = (KEY*)&mr->data[1];
641
642 le3 = Vcb->roots.Flink;
643 while (le3 != &Vcb->roots) {
644 root* r2 = CONTAINING_RECORD(le3, root, list_entry);
645
646 if (r2->id == ref->tbr.offset) {
647 r = r2;
648 break;
649 }
650
651 le3 = le3->Flink;
652 }
653
654 if (!r) {
655 ERR("could not find subvol with id %llx\n", ref->tbr.offset);
656 return STATUS_INTERNAL_ERROR;
657 }
658
659 Status = find_item_to_level(Vcb, r, &tp, firstitem, FALSE, mr->data->level + 1, NULL);
660 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
661 ERR("find_item_to_level returned %08x\n", Status);
662 return Status;
663 }
664
665 t = tp.tree;
666 while (t && t->header.level < mr->data->level + 1) {
667 t = t->parent;
668 }
669
670 if (!t)
671 ref->top = TRUE;
672 else {
673 metadata_reloc* mr2;
674
675 Status = add_metadata_reloc_parent(Vcb, items, t->header.address, &mr2, rollback);
676 if (!NT_SUCCESS(Status)) {
677 ERR("add_metadata_reloc_parent returned %08x\n", Status);
678 return Status;
679 }
680
681 ref->parent = mr2;
682 }
683 } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
684 metadata_reloc* mr2;
685
686 Status = add_metadata_reloc_parent(Vcb, items, ref->sbr.offset, &mr2, rollback);
687 if (!NT_SUCCESS(Status)) {
688 ERR("add_metadata_reloc_parent returned %08x\n", Status);
689 return Status;
690 }
691
692 ref->parent = mr2;
693 }
694
695 le2 = le2->Flink;
696 }
697
698 le = le->Flink;
699 }
700
701 le = items->Flink;
702 while (le != items) {
703 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
704 LIST_ENTRY* le2;
705 UINT32 hash;
706
707 mr->t = NULL;
708
709 hash = calc_crc32c(0xffffffff, (UINT8*)&mr->address, sizeof(UINT64));
710
711 le2 = Vcb->trees_ptrs[hash >> 24];
712
713 if (le2) {
714 while (le2 != &Vcb->trees_hash) {
715 tree* t = CONTAINING_RECORD(le2, tree, list_entry_hash);
716
717 if (t->header.address == mr->address) {
718 mr->t = t;
719 break;
720 } else if (t->hash > hash)
721 break;
722
723 le2 = le2->Flink;
724 }
725 }
726
727 le = le->Flink;
728 }
729
730 for (level = 0; level <= max_level; level++) {
731 le = items->Flink;
732 while (le != items) {
733 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
734
735 if (mr->data->level == level) {
736 BOOL done = FALSE;
737 LIST_ENTRY* le2;
738 tree_write* tw;
739 UINT64 flags;
740 tree* t3;
741
742 if (mr->system)
743 flags = Vcb->system_flags;
744 else if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS)
745 flags = Vcb->data_flags;
746 else
747 flags = Vcb->metadata_flags;
748
749 if (newchunk) {
750 ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
751
752 if (newchunk->chunk_item->type == flags && find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) {
753 newchunk->used += Vcb->superblock.node_size;
754 space_list_subtract(newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
755 done = TRUE;
756 }
757
758 ExReleaseResourceLite(&newchunk->lock);
759 }
760
761 if (!done) {
762 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
763
764 le2 = Vcb->chunks.Flink;
765 while (le2 != &Vcb->chunks) {
766 chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry);
767
768 if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == flags) {
769 ExAcquireResourceExclusiveLite(&c2->lock, TRUE);
770
771 if ((c2->chunk_item->size - c2->used) >= Vcb->superblock.node_size) {
772 if (find_metadata_address_in_chunk(Vcb, c2, &mr->new_address)) {
773 c2->used += Vcb->superblock.node_size;
774 space_list_subtract(c2, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
775 ExReleaseResourceLite(&c2->lock);
776 newchunk = c2;
777 done = TRUE;
778 break;
779 }
780 }
781
782 ExReleaseResourceLite(&c2->lock);
783 }
784
785 le2 = le2->Flink;
786 }
787
788 // allocate new chunk if necessary
789 if (!done) {
790 Status = alloc_chunk(Vcb, flags, &newchunk, FALSE);
791
792 if (!NT_SUCCESS(Status)) {
793 ERR("alloc_chunk returned %08x\n", Status);
794 ExReleaseResourceLite(&Vcb->chunk_lock);
795 goto end;
796 }
797
798 ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
799
800 newchunk->balance_num = Vcb->balance.balance_num;
801
802 if (!find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) {
803 ExReleaseResourceLite(&newchunk->lock);
804 ExReleaseResourceLite(&Vcb->chunk_lock);
805 ERR("could not find address in new chunk\n");
806 Status = STATUS_DISK_FULL;
807 goto end;
808 } else {
809 newchunk->used += Vcb->superblock.node_size;
810 space_list_subtract(newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
811 }
812
813 ExReleaseResourceLite(&newchunk->lock);
814 }
815
816 ExReleaseResourceLite(&Vcb->chunk_lock);
817 }
818
819 // update parents
820 le2 = mr->refs.Flink;
821 while (le2 != &mr->refs) {
822 metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry);
823
824 if (ref->parent) {
825 UINT16 i;
826 internal_node* in = (internal_node*)&ref->parent->data[1];
827
828 for (i = 0; i < ref->parent->data->num_items; i++) {
829 if (in[i].address == mr->address) {
830 in[i].address = mr->new_address;
831 break;
832 }
833 }
834
835 if (ref->parent->t) {
836 LIST_ENTRY* le3;
837
838 le3 = ref->parent->t->itemlist.Flink;
839 while (le3 != &ref->parent->t->itemlist) {
840 tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry);
841
842 if (!td->inserted && td->treeholder.address == mr->address)
843 td->treeholder.address = mr->new_address;
844
845 le3 = le3->Flink;
846 }
847 }
848 } else if (ref->top && ref->type == TYPE_TREE_BLOCK_REF) {
849 LIST_ENTRY* le3;
850 root* r = NULL;
851
852 // alter ROOT_ITEM
853
854 le3 = Vcb->roots.Flink;
855 while (le3 != &Vcb->roots) {
856 root* r2 = CONTAINING_RECORD(le3, root, list_entry);
857
858 if (r2->id == ref->tbr.offset) {
859 r = r2;
860 break;
861 }
862
863 le3 = le3->Flink;
864 }
865
866 if (r) {
867 r->treeholder.address = mr->new_address;
868
869 if (r == Vcb->root_root)
870 Vcb->superblock.root_tree_addr = mr->new_address;
871 else if (r == Vcb->chunk_root)
872 Vcb->superblock.chunk_tree_addr = mr->new_address;
873 else if (r->root_item.block_number == mr->address) {
874 KEY searchkey;
875 ROOT_ITEM* ri;
876
877 r->root_item.block_number = mr->new_address;
878
879 searchkey.obj_id = r->id;
880 searchkey.obj_type = TYPE_ROOT_ITEM;
881 searchkey.offset = 0xffffffffffffffff;
882
883 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
884 if (!NT_SUCCESS(Status)) {
885 ERR("find_item returned %08x\n", Status);
886 goto end;
887 }
888
889 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
890 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
891 Status = STATUS_INTERNAL_ERROR;
892 goto end;
893 }
894
895 ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
896 if (!ri) {
897 ERR("out of memory\n");
898 Status = STATUS_INSUFFICIENT_RESOURCES;
899 goto end;
900 }
901
902 RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM));
903
904 Status = delete_tree_item(Vcb, &tp);
905 if (!NT_SUCCESS(Status)) {
906 ERR("delete_tree_item returned %08x\n", Status);
907 goto end;
908 }
909
910 Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, NULL);
911 if (!NT_SUCCESS(Status)) {
912 ERR("insert_tree_item returned %08x\n", Status);
913 goto end;
914 }
915 }
916 }
917 }
918
919 le2 = le2->Flink;
920 }
921
922 mr->data->address = mr->new_address;
923
924 t3 = mr->t;
925
926 while (t3) {
927 UINT8 h;
928 BOOL inserted;
929 tree* t4 = NULL;
930
931 // check if tree loaded more than once
932 if (t3->list_entry.Flink != &Vcb->trees_hash) {
933 tree* nt = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash);
934
935 if (nt->header.address == t3->header.address)
936 t4 = nt;
937 }
938
939 t3->header.address = mr->new_address;
940
941 h = t3->hash >> 24;
942
943 if (Vcb->trees_ptrs[h] == &t3->list_entry_hash) {
944 if (t3->list_entry_hash.Flink == &Vcb->trees_hash)
945 Vcb->trees_ptrs[h] = NULL;
946 else {
947 tree* t2 = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash);
948
949 if (t2->hash >> 24 == h)
950 Vcb->trees_ptrs[h] = &t2->list_entry_hash;
951 else
952 Vcb->trees_ptrs[h] = NULL;
953 }
954 }
955
956 RemoveEntryList(&t3->list_entry_hash);
957
958 t3->hash = calc_crc32c(0xffffffff, (UINT8*)&t3->header.address, sizeof(UINT64));
959 h = t3->hash >> 24;
960
961 if (!Vcb->trees_ptrs[h]) {
962 UINT8 h2 = h;
963
964 le2 = Vcb->trees_hash.Flink;
965
966 if (h2 > 0) {
967 h2--;
968 do {
969 if (Vcb->trees_ptrs[h2]) {
970 le2 = Vcb->trees_ptrs[h2];
971 break;
972 }
973
974 h2--;
975 } while (h2 > 0);
976 }
977 } else
978 le2 = Vcb->trees_ptrs[h];
979
980 inserted = FALSE;
981 while (le2 != &Vcb->trees_hash) {
982 tree* t2 = CONTAINING_RECORD(le2, tree, list_entry_hash);
983
984 if (t2->hash >= t3->hash) {
985 InsertHeadList(le2->Blink, &t3->list_entry_hash);
986 inserted = TRUE;
987 break;
988 }
989
990 le2 = le2->Flink;
991 }
992
993 if (!inserted)
994 InsertTailList(&Vcb->trees_hash, &t3->list_entry_hash);
995
996 if (!Vcb->trees_ptrs[h] || t3->list_entry_hash.Flink == Vcb->trees_ptrs[h])
997 Vcb->trees_ptrs[h] = &t3->list_entry_hash;
998
999 if (data_items && level == 0) {
1000 le2 = data_items->Flink;
1001
1002 while (le2 != data_items) {
1003 data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry);
1004 LIST_ENTRY* le3 = t3->itemlist.Flink;
1005
1006 while (le3 != &t3->itemlist) {
1007 tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry);
1008
1009 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1010 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1011
1012 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
1013 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1014
1015 if (ed2->address == dr->address)
1016 ed2->address = dr->new_address;
1017 }
1018 }
1019
1020 le3 = le3->Flink;
1021 }
1022
1023 le2 = le2->Flink;
1024 }
1025 }
1026
1027 t3 = t4;
1028 }
1029
1030 *((UINT32*)mr->data) = ~calc_crc32c(0xffffffff, (UINT8*)&mr->data->fs_uuid, Vcb->superblock.node_size - sizeof(mr->data->csum));
1031
1032 tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG);
1033 if (!tw) {
1034 ERR("out of memory\n");
1035 Status = STATUS_INSUFFICIENT_RESOURCES;
1036 goto end;
1037 }
1038
1039 tw->address = mr->new_address;
1040 tw->length = Vcb->superblock.node_size;
1041 tw->data = (UINT8*)mr->data;
1042
1043 if (IsListEmpty(&tree_writes))
1044 InsertTailList(&tree_writes, &tw->list_entry);
1045 else {
1046 BOOL inserted = FALSE;
1047
1048 le2 = tree_writes.Flink;
1049 while (le2 != &tree_writes) {
1050 tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry);
1051
1052 if (tw2->address > tw->address) {
1053 InsertHeadList(le2->Blink, &tw->list_entry);
1054 inserted = TRUE;
1055 break;
1056 }
1057
1058 le2 = le2->Flink;
1059 }
1060
1061 if (!inserted)
1062 InsertTailList(&tree_writes, &tw->list_entry);
1063 }
1064 }
1065
1066 le = le->Flink;
1067 }
1068 }
1069
1070 Status = do_tree_writes(Vcb, &tree_writes, TRUE);
1071 if (!NT_SUCCESS(Status)) {
1072 ERR("do_tree_writes returned %08x\n", Status);
1073 goto end;
1074 }
1075
1076 le = items->Flink;
1077 while (le != items) {
1078 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
1079
1080 Status = add_metadata_reloc_extent_item(Vcb, mr);
1081 if (!NT_SUCCESS(Status)) {
1082 ERR("add_metadata_reloc_extent_item returned %08x\n", Status);
1083 goto end;
1084 }
1085
1086 le = le->Flink;
1087 }
1088
1089 Status = STATUS_SUCCESS;
1090
1091 end:
1092 while (!IsListEmpty(&tree_writes)) {
1093 tree_write* tw = CONTAINING_RECORD(RemoveHeadList(&tree_writes), tree_write, list_entry);
1094 ExFreePool(tw);
1095 }
1096
1097 return Status;
1098 }
1099
1100 static NTSTATUS balance_metadata_chunk(device_extension* Vcb, chunk* c, BOOL* changed) {
1101 KEY searchkey;
1102 traverse_ptr tp;
1103 NTSTATUS Status;
1104 BOOL b;
1105 LIST_ENTRY items, rollback;
1106 UINT32 loaded = 0;
1107
1108 TRACE("chunk %llx\n", c->offset);
1109
1110 InitializeListHead(&rollback);
1111 InitializeListHead(&items);
1112
1113 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
1114
1115 searchkey.obj_id = c->offset;
1116 searchkey.obj_type = TYPE_METADATA_ITEM;
1117 searchkey.offset = 0xffffffffffffffff;
1118
1119 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
1120 if (!NT_SUCCESS(Status)) {
1121 ERR("find_item returned %08x\n", Status);
1122 goto end;
1123 }
1124
1125 do {
1126 traverse_ptr next_tp;
1127
1128 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
1129 break;
1130
1131 if (tp.item->key.obj_id >= c->offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) {
1132 BOOL tree = FALSE, skinny = FALSE;
1133
1134 if (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
1135 tree = TRUE;
1136 skinny = TRUE;
1137 } else if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size &&
1138 tp.item->size >= sizeof(EXTENT_ITEM)) {
1139 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
1140
1141 if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
1142 tree = TRUE;
1143 }
1144
1145 if (tree) {
1146 Status = add_metadata_reloc(Vcb, &items, &tp, skinny, NULL, c, &rollback);
1147
1148 if (!NT_SUCCESS(Status)) {
1149 ERR("add_metadata_reloc returned %08x\n", Status);
1150 goto end;
1151 }
1152
1153 loaded++;
1154
1155 if (loaded >= 64) // only do 64 at a time
1156 break;
1157 }
1158 }
1159
1160 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
1161
1162 if (b)
1163 tp = next_tp;
1164 } while (b);
1165
1166 if (IsListEmpty(&items)) {
1167 *changed = FALSE;
1168 Status = STATUS_SUCCESS;
1169 goto end;
1170 } else
1171 *changed = TRUE;
1172
1173 Status = write_metadata_items(Vcb, &items, NULL, c, &rollback);
1174 if (!NT_SUCCESS(Status)) {
1175 ERR("write_metadata_items returned %08x\n", Status);
1176 goto end;
1177 }
1178
1179 Status = STATUS_SUCCESS;
1180
1181 Vcb->need_write = TRUE;
1182
1183 end:
1184 if (NT_SUCCESS(Status)) {
1185 Status = do_write(Vcb, NULL);
1186 if (!NT_SUCCESS(Status))
1187 ERR("do_write returned %08x\n", Status);
1188 }
1189
1190 if (NT_SUCCESS(Status))
1191 clear_rollback(&rollback);
1192 else
1193 do_rollback(Vcb, &rollback);
1194
1195 free_trees(Vcb);
1196
1197 ExReleaseResourceLite(&Vcb->tree_lock);
1198
1199 while (!IsListEmpty(&items)) {
1200 metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&items), metadata_reloc, list_entry);
1201
1202 while (!IsListEmpty(&mr->refs)) {
1203 metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry);
1204
1205 ExFreePool(ref);
1206 }
1207
1208 ExFreePool(mr);
1209 }
1210
1211 return Status;
1212 }
1213
1214 static NTSTATUS data_reloc_add_tree_edr(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* metadata_items,
1215 data_reloc* dr, EXTENT_DATA_REF* edr, LIST_ENTRY* rollback) {
1216 NTSTATUS Status;
1217 LIST_ENTRY* le;
1218 KEY searchkey;
1219 traverse_ptr tp;
1220 root* r = NULL;
1221 metadata_reloc* mr;
1222 UINT64 last_tree = 0;
1223 data_reloc_ref* ref;
1224
1225 le = Vcb->roots.Flink;
1226 while (le != &Vcb->roots) {
1227 root* r2 = CONTAINING_RECORD(le, root, list_entry);
1228
1229 if (r2->id == edr->root) {
1230 r = r2;
1231 break;
1232 }
1233
1234 le = le->Flink;
1235 }
1236
1237 if (!r) {
1238 ERR("could not find subvol %llx\n", edr->count);
1239 return STATUS_INTERNAL_ERROR;
1240 }
1241
1242 searchkey.obj_id = edr->objid;
1243 searchkey.obj_type = TYPE_EXTENT_DATA;
1244 searchkey.offset = 0;
1245
1246 Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL);
1247 if (!NT_SUCCESS(Status)) {
1248 ERR("find_item returned %08x\n", Status);
1249 return Status;
1250 }
1251
1252 if (tp.item->key.obj_id < searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type < searchkey.obj_type)) {
1253 traverse_ptr tp2;
1254
1255 if (find_next_item(Vcb, &tp, &tp2, FALSE, NULL))
1256 tp = tp2;
1257 else {
1258 ERR("could not find EXTENT_DATA for inode %llx in root %llx\n", searchkey.obj_id, r->id);
1259 return STATUS_INTERNAL_ERROR;
1260 }
1261 }
1262
1263 ref = NULL;
1264
1265 while (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
1266 traverse_ptr tp2;
1267
1268 if (tp.item->size >= sizeof(EXTENT_DATA)) {
1269 EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
1270
1271 if ((ed->type == EXTENT_TYPE_PREALLOC || ed->type == EXTENT_TYPE_REGULAR) && tp.item->size >= offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2)) {
1272 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1273
1274 if (ed2->address == dr->address && ed2->size == dr->size && tp.item->key.offset - ed2->offset == edr->offset) {
1275 if (ref && last_tree == tp.tree->header.address)
1276 ref->edr.count++;
1277 else {
1278 ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
1279 if (!ref) {
1280 ERR("out of memory\n");
1281 return STATUS_INSUFFICIENT_RESOURCES;
1282 }
1283
1284 ref->type = TYPE_EXTENT_DATA_REF;
1285 RtlCopyMemory(&ref->edr, edr, sizeof(EXTENT_DATA_REF));
1286 ref->edr.count = 1;
1287
1288 Status = add_metadata_reloc_parent(Vcb, metadata_items, tp.tree->header.address, &mr, rollback);
1289 if (!NT_SUCCESS(Status)) {
1290 ERR("add_metadata_reloc_parent returned %08x\n", Status);
1291 ExFreePool(ref);
1292 return Status;
1293 }
1294
1295 last_tree = tp.tree->header.address;
1296 ref->parent = mr;
1297
1298 InsertTailList(&dr->refs, &ref->list_entry);
1299 }
1300 }
1301 }
1302 }
1303
1304 if (find_next_item(Vcb, &tp, &tp2, FALSE, NULL))
1305 tp = tp2;
1306 else
1307 break;
1308 }
1309
1310 return STATUS_SUCCESS;
1311 }
1312
1313 static NTSTATUS add_data_reloc(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* metadata_items,
1314 traverse_ptr* tp, chunk* c, LIST_ENTRY* rollback) {
1315 NTSTATUS Status;
1316 data_reloc* dr;
1317 EXTENT_ITEM* ei;
1318 UINT16 len;
1319 UINT64 inline_rc;
1320 UINT8* ptr;
1321
1322 dr = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc), ALLOC_TAG);
1323 if (!dr) {
1324 ERR("out of memory\n");
1325 return STATUS_INSUFFICIENT_RESOURCES;
1326 }
1327
1328 dr->address = tp->item->key.obj_id;
1329 dr->size = tp->item->key.offset;
1330 dr->ei = (EXTENT_ITEM*)tp->item->data;
1331 InitializeListHead(&dr->refs);
1332
1333 Status = delete_tree_item(Vcb, tp);
1334 if (!NT_SUCCESS(Status)) {
1335 ERR("delete_tree_item returned %08x\n", Status);
1336 return Status;
1337 }
1338
1339 if (!c)
1340 c = get_chunk_from_address(Vcb, tp->item->key.obj_id);
1341
1342 if (c) {
1343 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
1344
1345 c->used -= tp->item->key.offset;
1346
1347 space_list_add(c, tp->item->key.obj_id, tp->item->key.offset, rollback);
1348
1349 ExReleaseResourceLite(&c->lock);
1350 }
1351
1352 ei = (EXTENT_ITEM*)tp->item->data;
1353 inline_rc = 0;
1354
1355 len = tp->item->size - sizeof(EXTENT_ITEM);
1356 ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM);
1357
1358 while (len > 0) {
1359 UINT8 secttype = *ptr;
1360 UINT16 sectlen = secttype == TYPE_EXTENT_DATA_REF ? sizeof(EXTENT_DATA_REF) : (secttype == TYPE_SHARED_DATA_REF ? sizeof(SHARED_DATA_REF) : 0);
1361
1362 len--;
1363
1364 if (sectlen > len) {
1365 ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen);
1366 return STATUS_INTERNAL_ERROR;
1367 }
1368
1369 if (sectlen == 0) {
1370 ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype);
1371 return STATUS_INTERNAL_ERROR;
1372 }
1373
1374 if (secttype == TYPE_EXTENT_DATA_REF) {
1375 EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8));
1376
1377 inline_rc += edr->count;
1378
1379 Status = data_reloc_add_tree_edr(Vcb, metadata_items, dr, edr, rollback);
1380 if (!NT_SUCCESS(Status)) {
1381 ERR("data_reloc_add_tree_edr returned %08x\n", Status);
1382 return Status;
1383 }
1384 } else if (secttype == TYPE_SHARED_DATA_REF) {
1385 metadata_reloc* mr;
1386 data_reloc_ref* ref;
1387
1388 ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
1389 if (!ref) {
1390 ERR("out of memory\n");
1391 return STATUS_INSUFFICIENT_RESOURCES;
1392 }
1393
1394 ref->type = TYPE_SHARED_DATA_REF;
1395 RtlCopyMemory(&ref->sdr, ptr + sizeof(UINT8), sizeof(SHARED_DATA_REF));
1396 inline_rc += ref->sdr.count;
1397
1398 Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback);
1399 if (!NT_SUCCESS(Status)) {
1400 ERR("add_metadata_reloc_parent returned %08x\n", Status);
1401 ExFreePool(ref);
1402 return Status;
1403 }
1404
1405 ref->parent = mr;
1406
1407 InsertTailList(&dr->refs, &ref->list_entry);
1408 } else {
1409 ERR("unexpected tree type %x\n", secttype);
1410 return STATUS_INTERNAL_ERROR;
1411 }
1412
1413
1414 len -= sectlen;
1415 ptr += sizeof(UINT8) + sectlen;
1416 }
1417
1418 if (inline_rc < ei->refcount) { // look for non-inline entries
1419 traverse_ptr tp2 = *tp, next_tp;
1420
1421 while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) {
1422 tp2 = next_tp;
1423
1424 if (tp2.item->key.obj_id == tp->item->key.obj_id) {
1425 if (tp2.item->key.obj_type == TYPE_EXTENT_DATA_REF && tp2.item->size >= sizeof(EXTENT_DATA_REF)) {
1426 Status = data_reloc_add_tree_edr(Vcb, metadata_items, dr, (EXTENT_DATA_REF*)tp2.item->data, rollback);
1427 if (!NT_SUCCESS(Status)) {
1428 ERR("data_reloc_add_tree_edr returned %08x\n", Status);
1429 return Status;
1430 }
1431
1432 Status = delete_tree_item(Vcb, &tp2);
1433 if (!NT_SUCCESS(Status)) {
1434 ERR("delete_tree_item returned %08x\n", Status);
1435 return Status;
1436 }
1437 } else if (tp2.item->key.obj_type == TYPE_SHARED_DATA_REF && tp2.item->size >= sizeof(UINT32)) {
1438 metadata_reloc* mr;
1439 data_reloc_ref* ref;
1440
1441 ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
1442 if (!ref) {
1443 ERR("out of memory\n");
1444 return STATUS_INSUFFICIENT_RESOURCES;
1445 }
1446
1447 ref->type = TYPE_SHARED_DATA_REF;
1448 ref->sdr.offset = tp2.item->key.offset;
1449 ref->sdr.count = *((UINT32*)tp2.item->data);
1450
1451 Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback);
1452 if (!NT_SUCCESS(Status)) {
1453 ERR("add_metadata_reloc_parent returned %08x\n", Status);
1454 ExFreePool(ref);
1455 return Status;
1456 }
1457
1458 ref->parent = mr;
1459 InsertTailList(&dr->refs, &ref->list_entry);
1460
1461 Status = delete_tree_item(Vcb, &tp2);
1462 if (!NT_SUCCESS(Status)) {
1463 ERR("delete_tree_item returned %08x\n", Status);
1464 return Status;
1465 }
1466 }
1467 } else
1468 break;
1469 }
1470 }
1471
1472 InsertTailList(items, &dr->list_entry);
1473
1474 return STATUS_SUCCESS;
1475 }
1476
1477 static void sort_data_reloc_refs(data_reloc* dr) {
1478 LIST_ENTRY newlist, *le;
1479
1480 if (IsListEmpty(&dr->refs))
1481 return;
1482
1483 // insertion sort
1484
1485 InitializeListHead(&newlist);
1486
1487 while (!IsListEmpty(&dr->refs)) {
1488 data_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&dr->refs), data_reloc_ref, list_entry);
1489 BOOL inserted = FALSE;
1490
1491 if (ref->type == TYPE_EXTENT_DATA_REF)
1492 ref->hash = get_extent_data_ref_hash2(ref->edr.root, ref->edr.objid, ref->edr.offset);
1493 else if (ref->type == TYPE_SHARED_DATA_REF)
1494 ref->hash = ref->parent->new_address;
1495
1496 le = newlist.Flink;
1497 while (le != &newlist) {
1498 data_reloc_ref* ref2 = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
1499
1500 if (ref->type < ref2->type || (ref->type == ref2->type && ref->hash > ref2->hash)) {
1501 InsertHeadList(le->Blink, &ref->list_entry);
1502 inserted = TRUE;
1503 break;
1504 }
1505
1506 le = le->Flink;
1507 }
1508
1509 if (!inserted)
1510 InsertTailList(&newlist, &ref->list_entry);
1511 }
1512
1513 le = newlist.Flink;
1514 while (le != &newlist) {
1515 data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
1516
1517 if (le->Flink != &newlist) {
1518 data_reloc_ref* ref2 = CONTAINING_RECORD(le->Flink, data_reloc_ref, list_entry);
1519
1520 if (ref->type == TYPE_EXTENT_DATA_REF && ref2->type == TYPE_EXTENT_DATA_REF && ref->edr.root == ref2->edr.root &&
1521 ref->edr.objid == ref2->edr.objid && ref->edr.offset == ref2->edr.offset) {
1522 RemoveEntryList(&ref2->list_entry);
1523 ref->edr.count += ref2->edr.count;
1524 ExFreePool(ref2);
1525 continue;
1526 }
1527 }
1528
1529 le = le->Flink;
1530 }
1531
1532 newlist.Flink->Blink = &dr->refs;
1533 newlist.Blink->Flink = &dr->refs;
1534 dr->refs.Flink = newlist.Flink;
1535 dr->refs.Blink = newlist.Blink;
1536 }
1537
1538 static NTSTATUS add_data_reloc_extent_item(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, data_reloc* dr) {
1539 NTSTATUS Status;
1540 LIST_ENTRY* le;
1541 UINT64 rc = 0;
1542 UINT16 inline_len;
1543 BOOL all_inline = TRUE;
1544 data_reloc_ref* first_noninline = NULL;
1545 EXTENT_ITEM* ei;
1546 UINT8* ptr;
1547
1548 inline_len = sizeof(EXTENT_ITEM);
1549
1550 sort_data_reloc_refs(dr);
1551
1552 le = dr->refs.Flink;
1553 while (le != &dr->refs) {
1554 data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
1555 UINT16 extlen = 0;
1556
1557 if (ref->type == TYPE_EXTENT_DATA_REF) {
1558 extlen += sizeof(EXTENT_DATA_REF);
1559 rc += ref->edr.count;
1560 } else if (ref->type == TYPE_SHARED_DATA_REF) {
1561 extlen += sizeof(SHARED_DATA_REF);
1562 rc++;
1563 }
1564
1565 if (all_inline) {
1566 if ((ULONG)(inline_len + 1 + extlen) > (Vcb->superblock.node_size >> 2)) {
1567 all_inline = FALSE;
1568 first_noninline = ref;
1569 } else
1570 inline_len += extlen + 1;
1571 }
1572
1573 le = le->Flink;
1574 }
1575
1576 ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
1577 if (!ei) {
1578 ERR("out of memory\n");
1579 return STATUS_INSUFFICIENT_RESOURCES;
1580 }
1581
1582 ei->refcount = rc;
1583 ei->generation = dr->ei->generation;
1584 ei->flags = dr->ei->flags;
1585 ptr = (UINT8*)&ei[1];
1586
1587 le = dr->refs.Flink;
1588 while (le != &dr->refs) {
1589 data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
1590
1591 if (ref == first_noninline)
1592 break;
1593
1594 *ptr = ref->type;
1595 ptr++;
1596
1597 if (ref->type == TYPE_EXTENT_DATA_REF) {
1598 EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)ptr;
1599
1600 RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF));
1601
1602 ptr += sizeof(EXTENT_DATA_REF);
1603 } else if (ref->type == TYPE_SHARED_DATA_REF) {
1604 SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)ptr;
1605
1606 sdr->offset = ref->parent->new_address;
1607 sdr->count = ref->sdr.count;
1608
1609 ptr += sizeof(SHARED_DATA_REF);
1610 }
1611
1612 le = le->Flink;
1613 }
1614
1615 Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_ITEM, dr->size, ei, inline_len, NULL, NULL);
1616 if (!NT_SUCCESS(Status)) {
1617 ERR("insert_tree_item returned %08x\n", Status);
1618 return Status;
1619 }
1620
1621 if (!all_inline) {
1622 le = &first_noninline->list_entry;
1623
1624 while (le != &dr->refs) {
1625 data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
1626
1627 if (ref->type == TYPE_EXTENT_DATA_REF) {
1628 EXTENT_DATA_REF* edr;
1629
1630 edr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA_REF), ALLOC_TAG);
1631 if (!edr) {
1632 ERR("out of memory\n");
1633 return STATUS_INSUFFICIENT_RESOURCES;
1634 }
1635
1636 RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF));
1637
1638 Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_DATA_REF, ref->hash, edr, sizeof(EXTENT_DATA_REF), NULL, NULL);
1639 if (!NT_SUCCESS(Status)) {
1640 ERR("insert_tree_item returned %08x\n", Status);
1641 return Status;
1642 }
1643 } else if (ref->type == TYPE_SHARED_DATA_REF) {
1644 UINT32* sdr;
1645
1646 sdr = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32), ALLOC_TAG);
1647 if (!sdr) {
1648 ERR("out of memory\n");
1649 return STATUS_INSUFFICIENT_RESOURCES;
1650 }
1651
1652 *sdr = ref->sdr.count;
1653
1654 Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_SHARED_DATA_REF, ref->parent->new_address, sdr, sizeof(UINT32), NULL, NULL);
1655 if (!NT_SUCCESS(Status)) {
1656 ERR("insert_tree_item returned %08x\n", Status);
1657 return Status;
1658 }
1659 }
1660
1661 le = le->Flink;
1662 }
1663 }
1664
1665 return STATUS_SUCCESS;
1666 }
1667
1668 static NTSTATUS balance_data_chunk(device_extension* Vcb, chunk* c, BOOL* changed) {
1669 KEY searchkey;
1670 traverse_ptr tp;
1671 NTSTATUS Status;
1672 BOOL b;
1673 LIST_ENTRY items, metadata_items, rollback, *le;
1674 UINT64 loaded = 0, num_loaded = 0;
1675 chunk* newchunk = NULL;
1676 UINT8* data = NULL;
1677
1678 TRACE("chunk %llx\n", c->offset);
1679
1680 InitializeListHead(&rollback);
1681 InitializeListHead(&items);
1682 InitializeListHead(&metadata_items);
1683
1684 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
1685
1686 searchkey.obj_id = c->offset;
1687 searchkey.obj_type = TYPE_EXTENT_ITEM;
1688 searchkey.offset = 0xffffffffffffffff;
1689
1690 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
1691 if (!NT_SUCCESS(Status)) {
1692 ERR("find_item returned %08x\n", Status);
1693 goto end;
1694 }
1695
1696 do {
1697 traverse_ptr next_tp;
1698
1699 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
1700 break;
1701
1702 if (tp.item->key.obj_id >= c->offset && tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
1703 BOOL tree = FALSE;
1704
1705 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
1706 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
1707
1708 if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
1709 tree = TRUE;
1710 }
1711
1712 if (!tree) {
1713 Status = add_data_reloc(Vcb, &items, &metadata_items, &tp, c, &rollback);
1714
1715 if (!NT_SUCCESS(Status)) {
1716 ERR("add_data_reloc returned %08x\n", Status);
1717 goto end;
1718 }
1719
1720 loaded += tp.item->key.offset;
1721 num_loaded++;
1722
1723 if (loaded >= 0x1000000 || num_loaded >= 100) // only do so much at a time, so we don't block too obnoxiously
1724 break;
1725 }
1726 }
1727
1728 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
1729
1730 if (b)
1731 tp = next_tp;
1732 } while (b);
1733
1734 if (IsListEmpty(&items)) {
1735 *changed = FALSE;
1736 Status = STATUS_SUCCESS;
1737 goto end;
1738 } else
1739 *changed = TRUE;
1740
1741 data = ExAllocatePoolWithTag(PagedPool, BALANCE_UNIT, ALLOC_TAG);
1742 if (!data) {
1743 ERR("out of memory\n");
1744 Status = STATUS_INSUFFICIENT_RESOURCES;
1745 goto end;
1746 }
1747
1748 le = items.Flink;
1749 while (le != &items) {
1750 data_reloc* dr = CONTAINING_RECORD(le, data_reloc, list_entry);
1751 BOOL done = FALSE;
1752 LIST_ENTRY* le2;
1753 UINT32* csum;
1754 RTL_BITMAP bmp;
1755 ULONG* bmparr;
1756 ULONG runlength, index, lastoff;
1757
1758 if (newchunk) {
1759 ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
1760
1761 if (find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) {
1762 newchunk->used += dr->size;
1763 space_list_subtract(newchunk, FALSE, dr->new_address, dr->size, &rollback);
1764 done = TRUE;
1765 }
1766
1767 ExReleaseResourceLite(&newchunk->lock);
1768 }
1769
1770 if (!done) {
1771 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
1772
1773 le2 = Vcb->chunks.Flink;
1774 while (le2 != &Vcb->chunks) {
1775 chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry);
1776
1777 if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == Vcb->data_flags) {
1778 ExAcquireResourceExclusiveLite(&c2->lock, TRUE);
1779
1780 if ((c2->chunk_item->size - c2->used) >= dr->size) {
1781 if (find_data_address_in_chunk(Vcb, c2, dr->size, &dr->new_address)) {
1782 c2->used += dr->size;
1783 space_list_subtract(c2, FALSE, dr->new_address, dr->size, &rollback);
1784 ExReleaseResourceLite(&c2->lock);
1785 newchunk = c2;
1786 done = TRUE;
1787 break;
1788 }
1789 }
1790
1791 ExReleaseResourceLite(&c2->lock);
1792 }
1793
1794 le2 = le2->Flink;
1795 }
1796
1797 // allocate new chunk if necessary
1798 if (!done) {
1799 Status = alloc_chunk(Vcb, Vcb->data_flags, &newchunk, FALSE);
1800
1801 if (!NT_SUCCESS(Status)) {
1802 ERR("alloc_chunk returned %08x\n", Status);
1803 ExReleaseResourceLite(&Vcb->chunk_lock);
1804 goto end;
1805 }
1806
1807 ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
1808
1809 newchunk->balance_num = Vcb->balance.balance_num;
1810
1811 if (!find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) {
1812 ExReleaseResourceLite(&newchunk->lock);
1813 ExReleaseResourceLite(&Vcb->chunk_lock);
1814 ERR("could not find address in new chunk\n");
1815 Status = STATUS_DISK_FULL;
1816 goto end;
1817 } else {
1818 newchunk->used += dr->size;
1819 space_list_subtract(newchunk, FALSE, dr->new_address, dr->size, &rollback);
1820 }
1821
1822 ExReleaseResourceLite(&newchunk->lock);
1823 }
1824
1825 ExReleaseResourceLite(&Vcb->chunk_lock);
1826 }
1827
1828 dr->newchunk = newchunk;
1829
1830 bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align((dr->size / Vcb->superblock.sector_size) + 1, sizeof(ULONG)), ALLOC_TAG);
1831 if (!bmparr) {
1832 ERR("out of memory\n");
1833 Status = STATUS_INSUFFICIENT_RESOURCES;
1834 goto end;
1835 }
1836
1837 csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(dr->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG);
1838 if (!csum) {
1839 ERR("out of memory\n");
1840 ExFreePool(bmparr);
1841 Status = STATUS_INSUFFICIENT_RESOURCES;
1842 goto end;
1843 }
1844
1845 RtlInitializeBitMap(&bmp, bmparr, (ULONG)(dr->size / Vcb->superblock.sector_size));
1846 RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum
1847
1848 searchkey.obj_id = EXTENT_CSUM_ID;
1849 searchkey.obj_type = TYPE_EXTENT_CSUM;
1850 searchkey.offset = dr->address;
1851
1852 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, NULL);
1853 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
1854 ERR("find_item returned %08x\n", Status);
1855 ExFreePool(csum);
1856 ExFreePool(bmparr);
1857 goto end;
1858 }
1859
1860 if (Status != STATUS_NOT_FOUND) {
1861 do {
1862 traverse_ptr next_tp;
1863
1864 if (tp.item->key.obj_type == TYPE_EXTENT_CSUM) {
1865 if (tp.item->key.offset >= dr->address + dr->size)
1866 break;
1867 else if (tp.item->size >= sizeof(UINT32) && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= dr->address) {
1868 UINT64 cs = max(dr->address, tp.item->key.offset);
1869 UINT64 ce = min(dr->address + dr->size, tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)));
1870
1871 RtlCopyMemory(csum + ((cs - dr->address) / Vcb->superblock.sector_size),
1872 tp.item->data + ((cs - tp.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size),
1873 (ULONG)((ce - cs) * sizeof(UINT32) / Vcb->superblock.sector_size));
1874
1875 RtlClearBits(&bmp, (ULONG)((cs - dr->address) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size));
1876
1877 if (ce == dr->address + dr->size)
1878 break;
1879 }
1880 }
1881
1882 if (find_next_item(Vcb, &tp, &next_tp, FALSE, NULL))
1883 tp = next_tp;
1884 else
1885 break;
1886 } while (TRUE);
1887 }
1888
1889 lastoff = 0;
1890 runlength = RtlFindFirstRunClear(&bmp, &index);
1891
1892 while (runlength != 0) {
1893 if (index > lastoff) {
1894 ULONG off = lastoff;
1895 ULONG size = index - lastoff;
1896
1897 // handle no csum run
1898 do {
1899 ULONG rl;
1900
1901 if (size * Vcb->superblock.sector_size > BALANCE_UNIT)
1902 rl = BALANCE_UNIT / Vcb->superblock.sector_size;
1903 else
1904 rl = size;
1905
1906 Status = read_data(Vcb, dr->address + (off * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, NULL, FALSE, data,
1907 c, NULL, NULL, 0, FALSE, NormalPagePriority);
1908 if (!NT_SUCCESS(Status)) {
1909 ERR("read_data returned %08x\n", Status);
1910 ExFreePool(csum);
1911 ExFreePool(bmparr);
1912 goto end;
1913 }
1914
1915 Status = write_data_complete(Vcb, dr->new_address + (off * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size,
1916 NULL, newchunk, FALSE, 0, NormalPagePriority);
1917 if (!NT_SUCCESS(Status)) {
1918 ERR("write_data_complete returned %08x\n", Status);
1919 ExFreePool(csum);
1920 ExFreePool(bmparr);
1921 goto end;
1922 }
1923
1924 size -= rl;
1925 off += rl;
1926 } while (size > 0);
1927 }
1928
1929 add_checksum_entry(Vcb, dr->new_address + (index * Vcb->superblock.sector_size), runlength, &csum[index], NULL);
1930 add_checksum_entry(Vcb, dr->address + (index * Vcb->superblock.sector_size), runlength, NULL, NULL);
1931
1932 // handle csum run
1933 do {
1934 ULONG rl;
1935
1936 if (runlength * Vcb->superblock.sector_size > BALANCE_UNIT)
1937 rl = BALANCE_UNIT / Vcb->superblock.sector_size;
1938 else
1939 rl = runlength;
1940
1941 Status = read_data(Vcb, dr->address + (index * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, &csum[index], FALSE, data,
1942 c, NULL, NULL, 0, FALSE, NormalPagePriority);
1943 if (!NT_SUCCESS(Status)) {
1944 ERR("read_data returned %08x\n", Status);
1945 ExFreePool(csum);
1946 ExFreePool(bmparr);
1947 goto end;
1948 }
1949
1950 Status = write_data_complete(Vcb, dr->new_address + (index * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size,
1951 NULL, newchunk, FALSE, 0, NormalPagePriority);
1952 if (!NT_SUCCESS(Status)) {
1953 ERR("write_data_complete returned %08x\n", Status);
1954 ExFreePool(csum);
1955 ExFreePool(bmparr);
1956 goto end;
1957 }
1958
1959 runlength -= rl;
1960 index += rl;
1961 } while (runlength > 0);
1962
1963 lastoff = index;
1964 runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
1965 }
1966
1967 ExFreePool(csum);
1968 ExFreePool(bmparr);
1969
1970 // handle final nocsum run
1971 if (lastoff < dr->size / Vcb->superblock.sector_size) {
1972 ULONG off = lastoff;
1973 ULONG size = (ULONG)((dr->size / Vcb->superblock.sector_size) - lastoff);
1974
1975 do {
1976 ULONG rl;
1977
1978 if (size * Vcb->superblock.sector_size > BALANCE_UNIT)
1979 rl = BALANCE_UNIT / Vcb->superblock.sector_size;
1980 else
1981 rl = size;
1982
1983 Status = read_data(Vcb, dr->address + (off * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, NULL, FALSE, data,
1984 c, NULL, NULL, 0, FALSE, NormalPagePriority);
1985 if (!NT_SUCCESS(Status)) {
1986 ERR("read_data returned %08x\n", Status);
1987 ExFreePool(csum);
1988 ExFreePool(bmparr);
1989 goto end;
1990 }
1991
1992 Status = write_data_complete(Vcb, dr->new_address + (off * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size,
1993 NULL, newchunk, FALSE, 0, NormalPagePriority);
1994 if (!NT_SUCCESS(Status)) {
1995 ERR("write_data_complete returned %08x\n", Status);
1996 ExFreePool(csum);
1997 ExFreePool(bmparr);
1998 goto end;
1999 }
2000
2001 size -= rl;
2002 off += rl;
2003 } while (size > 0);
2004 }
2005
2006 le = le->Flink;
2007 }
2008
2009 ExFreePool(data);
2010 data = NULL;
2011
2012 Status = write_metadata_items(Vcb, &metadata_items, &items, NULL, &rollback);
2013 if (!NT_SUCCESS(Status)) {
2014 ERR("write_metadata_items returned %08x\n", Status);
2015 goto end;
2016 }
2017
2018 le = items.Flink;
2019 while (le != &items) {
2020 data_reloc* dr = CONTAINING_RECORD(le, data_reloc, list_entry);
2021
2022 Status = add_data_reloc_extent_item(Vcb, dr);
2023 if (!NT_SUCCESS(Status)) {
2024 ERR("add_data_reloc_extent_item returned %08x\n", Status);
2025 goto end;
2026 }
2027
2028 le = le->Flink;
2029 }
2030
2031 le = c->changed_extents.Flink;
2032 while (le != &c->changed_extents) {
2033 LIST_ENTRY *le2, *le3;
2034 changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry);
2035
2036 le3 = le->Flink;
2037
2038 le2 = items.Flink;
2039 while (le2 != &items) {
2040 data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry);
2041
2042 if (ce->address == dr->address) {
2043 ce->address = dr->new_address;
2044 RemoveEntryList(&ce->list_entry);
2045 InsertTailList(&dr->newchunk->changed_extents, &ce->list_entry);
2046 break;
2047 }
2048
2049 le2 = le2->Flink;
2050 }
2051
2052 le = le3;
2053 }
2054
2055 Status = STATUS_SUCCESS;
2056
2057 Vcb->need_write = TRUE;
2058
2059 end:
2060 if (NT_SUCCESS(Status)) {
2061 // update extents in cache inodes before we flush
2062 le = Vcb->chunks.Flink;
2063 while (le != &Vcb->chunks) {
2064 chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
2065
2066 if (c2->cache) {
2067 LIST_ENTRY* le2;
2068
2069 ExAcquireResourceExclusiveLite(c2->cache->Header.Resource, TRUE);
2070
2071 le2 = c2->cache->extents.Flink;
2072 while (le2 != &c2->cache->extents) {
2073 extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
2074
2075 if (!ext->ignore) {
2076 if (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) {
2077 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
2078
2079 if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) {
2080 LIST_ENTRY* le3 = items.Flink;
2081 while (le3 != &items) {
2082 data_reloc* dr = CONTAINING_RECORD(le3, data_reloc, list_entry);
2083
2084 if (ed2->address == dr->address) {
2085 ed2->address = dr->new_address;
2086 break;
2087 }
2088
2089 le3 = le3->Flink;
2090 }
2091 }
2092 }
2093 }
2094
2095 le2 = le2->Flink;
2096 }
2097
2098 ExReleaseResourceLite(c2->cache->Header.Resource);
2099 }
2100
2101 le = le->Flink;
2102 }
2103
2104 Status = do_write(Vcb, NULL);
2105 if (!NT_SUCCESS(Status))
2106 ERR("do_write returned %08x\n", Status);
2107 }
2108
2109 if (NT_SUCCESS(Status)) {
2110 clear_rollback(&rollback);
2111
2112 // update open FCBs
2113 // FIXME - speed this up(?)
2114
2115 acquire_fcb_lock_shared(Vcb);
2116
2117 le = Vcb->all_fcbs.Flink;
2118 while (le != &Vcb->all_fcbs) {
2119 struct _fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_all);
2120 LIST_ENTRY* le2;
2121
2122 ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
2123
2124 le2 = fcb->extents.Flink;
2125 while (le2 != &fcb->extents) {
2126 extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
2127
2128 if (!ext->ignore) {
2129 if (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) {
2130 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
2131
2132 if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) {
2133 LIST_ENTRY* le3 = items.Flink;
2134 while (le3 != &items) {
2135 data_reloc* dr = CONTAINING_RECORD(le3, data_reloc, list_entry);
2136
2137 if (ed2->address == dr->address) {
2138 ed2->address = dr->new_address;
2139 break;
2140 }
2141
2142 le3 = le3->Flink;
2143 }
2144 }
2145 }
2146 }
2147
2148 le2 = le2->Flink;
2149 }
2150
2151 ExReleaseResourceLite(fcb->Header.Resource);
2152
2153 le = le->Flink;
2154 }
2155
2156 release_fcb_lock(Vcb);
2157 } else
2158 do_rollback(Vcb, &rollback);
2159
2160 free_trees(Vcb);
2161
2162 ExReleaseResourceLite(&Vcb->tree_lock);
2163
2164 if (data)
2165 ExFreePool(data);
2166
2167 while (!IsListEmpty(&items)) {
2168 data_reloc* dr = CONTAINING_RECORD(RemoveHeadList(&items), data_reloc, list_entry);
2169
2170 while (!IsListEmpty(&dr->refs)) {
2171 data_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&dr->refs), data_reloc_ref, list_entry);
2172
2173 ExFreePool(ref);
2174 }
2175
2176 ExFreePool(dr);
2177 }
2178
2179 while (!IsListEmpty(&metadata_items)) {
2180 metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&metadata_items), metadata_reloc, list_entry);
2181
2182 while (!IsListEmpty(&mr->refs)) {
2183 metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry);
2184
2185 ExFreePool(ref);
2186 }
2187
2188 ExFreePool(mr);
2189 }
2190
2191 return Status;
2192 }
2193
2194 static __inline UINT64 get_chunk_dup_type(chunk* c) {
2195 if (c->chunk_item->type & BLOCK_FLAG_RAID0)
2196 return BLOCK_FLAG_RAID0;
2197 else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
2198 return BLOCK_FLAG_RAID1;
2199 else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
2200 return BLOCK_FLAG_DUPLICATE;
2201 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
2202 return BLOCK_FLAG_RAID10;
2203 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
2204 return BLOCK_FLAG_RAID5;
2205 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
2206 return BLOCK_FLAG_RAID6;
2207 else
2208 return BLOCK_FLAG_SINGLE;
2209 }
2210
2211 static BOOL should_balance_chunk(device_extension* Vcb, UINT8 sort, chunk* c) {
2212 btrfs_balance_opts* opts;
2213
2214 opts = &Vcb->balance.opts[sort];
2215
2216 if (!(opts->flags & BTRFS_BALANCE_OPTS_ENABLED))
2217 return FALSE;
2218
2219 if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) {
2220 UINT64 type = get_chunk_dup_type(c);
2221
2222 if (!(type & opts->profiles))
2223 return FALSE;
2224 }
2225
2226 if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) {
2227 UINT16 i;
2228 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
2229 BOOL b = FALSE;
2230
2231 for (i = 0; i < c->chunk_item->num_stripes; i++) {
2232 if (cis[i].dev_id == opts->devid) {
2233 b = TRUE;
2234 break;
2235 }
2236 }
2237
2238 if (!b)
2239 return FALSE;
2240 }
2241
2242 if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) {
2243 UINT16 i, factor;
2244 UINT64 physsize;
2245 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
2246 BOOL b = FALSE;
2247
2248 if (c->chunk_item->type & BLOCK_FLAG_RAID0)
2249 factor = c->chunk_item->num_stripes;
2250 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
2251 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
2252 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
2253 factor = c->chunk_item->num_stripes - 1;
2254 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
2255 factor = c->chunk_item->num_stripes - 2;
2256 else // SINGLE, DUPLICATE, RAID1
2257 factor = 1;
2258
2259 physsize = c->chunk_item->size / factor;
2260
2261 for (i = 0; i < c->chunk_item->num_stripes; i++) {
2262 if (cis[i].offset < opts->drange_end && cis[i].offset + physsize >= opts->drange_start &&
2263 (!(opts->flags & BTRFS_BALANCE_OPTS_DEVID) || cis[i].dev_id == opts->devid)) {
2264 b = TRUE;
2265 break;
2266 }
2267 }
2268
2269 if (!b)
2270 return FALSE;
2271 }
2272
2273 if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) {
2274 if (c->offset + c->chunk_item->size <= opts->vrange_start || c->offset > opts->vrange_end)
2275 return FALSE;
2276 }
2277
2278 if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) {
2279 if (c->chunk_item->num_stripes < opts->stripes_start || c->chunk_item->num_stripes < opts->stripes_end)
2280 return FALSE;
2281 }
2282
2283 if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) {
2284 UINT64 usage = c->used * 100 / c->chunk_item->size;
2285
2286 // usage == 0 should mean completely empty, not just that usage rounds to 0%
2287 if (c->used > 0 && usage == 0)
2288 usage = 1;
2289
2290 if (usage < opts->usage_start || usage > opts->usage_end)
2291 return FALSE;
2292 }
2293
2294 if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT && opts->flags & BTRFS_BALANCE_OPTS_SOFT) {
2295 UINT64 type = get_chunk_dup_type(c);
2296
2297 if (type == opts->convert)
2298 return FALSE;
2299 }
2300
2301 return TRUE;
2302 }
2303
2304 static void copy_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) {
2305 if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) {
2306 args->profiles = opts->profiles;
2307 args->flags |= BALANCE_ARGS_FLAGS_PROFILES;
2308 }
2309
2310 if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) {
2311 if (args->usage_start == 0) {
2312 args->flags |= BALANCE_ARGS_FLAGS_USAGE_RANGE;
2313 args->usage_start = opts->usage_start;
2314 args->usage_end = opts->usage_end;
2315 } else {
2316 args->flags |= BALANCE_ARGS_FLAGS_USAGE;
2317 args->usage = opts->usage_end;
2318 }
2319 }
2320
2321 if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) {
2322 args->devid = opts->devid;
2323 args->flags |= BALANCE_ARGS_FLAGS_DEVID;
2324 }
2325
2326 if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) {
2327 args->drange_start = opts->drange_start;
2328 args->drange_end = opts->drange_end;
2329 args->flags |= BALANCE_ARGS_FLAGS_DRANGE;
2330 }
2331
2332 if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) {
2333 args->vrange_start = opts->vrange_start;
2334 args->vrange_end = opts->vrange_end;
2335 args->flags |= BALANCE_ARGS_FLAGS_VRANGE;
2336 }
2337
2338 if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT) {
2339 args->convert = opts->convert;
2340 args->flags |= BALANCE_ARGS_FLAGS_CONVERT;
2341
2342 if (opts->flags & BTRFS_BALANCE_OPTS_SOFT)
2343 args->flags |= BALANCE_ARGS_FLAGS_SOFT;
2344 }
2345
2346 if (opts->flags & BTRFS_BALANCE_OPTS_LIMIT) {
2347 if (args->limit_start == 0) {
2348 args->flags |= BALANCE_ARGS_FLAGS_LIMIT_RANGE;
2349 args->limit_start = (UINT32)opts->limit_start;
2350 args->limit_end = (UINT32)opts->limit_end;
2351 } else {
2352 args->flags |= BALANCE_ARGS_FLAGS_LIMIT;
2353 args->limit = opts->limit_end;
2354 }
2355 }
2356
2357 if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) {
2358 args->stripes_start = opts->stripes_start;
2359 args->stripes_end = opts->stripes_end;
2360 args->flags |= BALANCE_ARGS_FLAGS_STRIPES_RANGE;
2361 }
2362 }
2363
2364 static NTSTATUS add_balance_item(device_extension* Vcb) {
2365 KEY searchkey;
2366 traverse_ptr tp;
2367 NTSTATUS Status;
2368 BALANCE_ITEM* bi;
2369
2370 searchkey.obj_id = BALANCE_ITEM_ID;
2371 searchkey.obj_type = TYPE_TEMP_ITEM;
2372 searchkey.offset = 0;
2373
2374 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
2375
2376 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
2377 if (!NT_SUCCESS(Status)) {
2378 ERR("find_item returned %08x\n", Status);
2379 goto end;
2380 }
2381
2382 if (!keycmp(tp.item->key, searchkey)) {
2383 Status = delete_tree_item(Vcb, &tp);
2384 if (!NT_SUCCESS(Status)) {
2385 ERR("delete_tree_item returned %08x\n", Status);
2386 goto end;
2387 }
2388 }
2389
2390 bi = ExAllocatePoolWithTag(PagedPool, sizeof(BALANCE_ITEM), ALLOC_TAG);
2391 if (!bi) {
2392 ERR("out of memory\n");
2393 Status = STATUS_INSUFFICIENT_RESOURCES;
2394 goto end;
2395 }
2396
2397 RtlZeroMemory(bi, sizeof(BALANCE_ITEM));
2398
2399 if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) {
2400 bi->flags |= BALANCE_FLAGS_DATA;
2401 copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data);
2402 }
2403
2404 if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) {
2405 bi->flags |= BALANCE_FLAGS_METADATA;
2406 copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata);
2407 }
2408
2409 if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED) {
2410 bi->flags |= BALANCE_FLAGS_SYSTEM;
2411 copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system);
2412 }
2413
2414 Status = insert_tree_item(Vcb, Vcb->root_root, BALANCE_ITEM_ID, TYPE_TEMP_ITEM, 0, bi, sizeof(BALANCE_ITEM), NULL, NULL);
2415 if (!NT_SUCCESS(Status)) {
2416 ERR("insert_tree_item returned %08x\n", Status);
2417 ExFreePool(bi);
2418 goto end;
2419 }
2420
2421 Status = STATUS_SUCCESS;
2422
2423 end:
2424 if (NT_SUCCESS(Status)) {
2425 Status = do_write(Vcb, NULL);
2426 if (!NT_SUCCESS(Status))
2427 ERR("do_write returned %08x\n", Status);
2428 }
2429
2430 free_trees(Vcb);
2431
2432 ExReleaseResourceLite(&Vcb->tree_lock);
2433
2434 return Status;
2435 }
2436
2437 static NTSTATUS remove_balance_item(device_extension* Vcb) {
2438 KEY searchkey;
2439 traverse_ptr tp;
2440 NTSTATUS Status;
2441
2442 searchkey.obj_id = BALANCE_ITEM_ID;
2443 searchkey.obj_type = TYPE_TEMP_ITEM;
2444 searchkey.offset = 0;
2445
2446 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
2447
2448 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
2449 if (!NT_SUCCESS(Status)) {
2450 ERR("find_item returned %08x\n", Status);
2451 goto end;
2452 }
2453
2454 if (!keycmp(tp.item->key, searchkey)) {
2455 Status = delete_tree_item(Vcb, &tp);
2456 if (!NT_SUCCESS(Status)) {
2457 ERR("delete_tree_item returned %08x\n", Status);
2458 goto end;
2459 }
2460
2461 Status = do_write(Vcb, NULL);
2462 if (!NT_SUCCESS(Status)) {
2463 ERR("do_write returned %08x\n", Status);
2464 goto end;
2465 }
2466
2467 free_trees(Vcb);
2468 }
2469
2470 Status = STATUS_SUCCESS;
2471
2472 end:
2473 ExReleaseResourceLite(&Vcb->tree_lock);
2474
2475 return Status;
2476 }
2477
2478 static void load_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) {
2479 opts->flags = BTRFS_BALANCE_OPTS_ENABLED;
2480
2481 if (args->flags & BALANCE_ARGS_FLAGS_PROFILES) {
2482 opts->flags |= BTRFS_BALANCE_OPTS_PROFILES;
2483 opts->profiles = args->profiles;
2484 }
2485
2486 if (args->flags & BALANCE_ARGS_FLAGS_USAGE) {
2487 opts->flags |= BTRFS_BALANCE_OPTS_USAGE;
2488
2489 opts->usage_start = 0;
2490 opts->usage_end = (UINT8)args->usage;
2491 } else if (args->flags & BALANCE_ARGS_FLAGS_USAGE_RANGE) {
2492 opts->flags |= BTRFS_BALANCE_OPTS_USAGE;
2493
2494 opts->usage_start = (UINT8)args->usage_start;
2495 opts->usage_end = (UINT8)args->usage_end;
2496 }
2497
2498 if (args->flags & BALANCE_ARGS_FLAGS_DEVID) {
2499 opts->flags |= BTRFS_BALANCE_OPTS_DEVID;
2500 opts->devid = args->devid;
2501 }
2502
2503 if (args->flags & BALANCE_ARGS_FLAGS_DRANGE) {
2504 opts->flags |= BTRFS_BALANCE_OPTS_DRANGE;
2505 opts->drange_start = args->drange_start;
2506 opts->drange_end = args->drange_end;
2507 }
2508
2509 if (args->flags & BALANCE_ARGS_FLAGS_VRANGE) {
2510 opts->flags |= BTRFS_BALANCE_OPTS_VRANGE;
2511 opts->vrange_start = args->vrange_start;
2512 opts->vrange_end = args->vrange_end;
2513 }
2514
2515 if (args->flags & BALANCE_ARGS_FLAGS_LIMIT) {
2516 opts->flags |= BTRFS_BALANCE_OPTS_LIMIT;
2517
2518 opts->limit_start = 0;
2519 opts->limit_end = args->limit;
2520 } else if (args->flags & BALANCE_ARGS_FLAGS_LIMIT_RANGE) {
2521 opts->flags |= BTRFS_BALANCE_OPTS_LIMIT;
2522
2523 opts->limit_start = args->limit_start;
2524 opts->limit_end = args->limit_end;
2525 }
2526
2527 if (args->flags & BALANCE_ARGS_FLAGS_STRIPES_RANGE) {
2528 opts->flags |= BTRFS_BALANCE_OPTS_STRIPES;
2529
2530 opts->stripes_start = (UINT16)args->stripes_start;
2531 opts->stripes_end = (UINT16)args->stripes_end;
2532 }
2533
2534 if (args->flags & BALANCE_ARGS_FLAGS_CONVERT) {
2535 opts->flags |= BTRFS_BALANCE_OPTS_CONVERT;
2536 opts->convert = args->convert;
2537
2538 if (args->flags & BALANCE_ARGS_FLAGS_SOFT)
2539 opts->flags |= BTRFS_BALANCE_OPTS_SOFT;
2540 }
2541 }
2542
2543 static NTSTATUS remove_superblocks(device* dev) {
2544 NTSTATUS Status;
2545 superblock* sb;
2546 int i = 0;
2547
2548 sb = ExAllocatePoolWithTag(PagedPool, sizeof(superblock), ALLOC_TAG);
2549 if (!sb) {
2550 ERR("out of memory\n");
2551 return STATUS_INSUFFICIENT_RESOURCES;
2552 }
2553
2554 RtlZeroMemory(sb, sizeof(superblock));
2555
2556 while (superblock_addrs[i] > 0 && dev->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) {
2557 Status = write_data_phys(dev->devobj, superblock_addrs[i], sb, sizeof(superblock));
2558
2559 if (!NT_SUCCESS(Status)) {
2560 ExFreePool(sb);
2561 return Status;
2562 }
2563
2564 i++;
2565 }
2566
2567 ExFreePool(sb);
2568
2569 return STATUS_SUCCESS;
2570 }
2571
2572 static NTSTATUS finish_removing_device(_Requires_exclusive_lock_held_(_Curr_->tree_lock) device_extension* Vcb, device* dev) {
2573 KEY searchkey;
2574 traverse_ptr tp;
2575 NTSTATUS Status;
2576 LIST_ENTRY* le;
2577 volume_device_extension* vde;
2578
2579 if (Vcb->need_write) {
2580 Status = do_write(Vcb, NULL);
2581
2582 if (!NT_SUCCESS(Status))
2583 ERR("do_write returned %08x\n", Status);
2584 } else
2585 Status = STATUS_SUCCESS;
2586
2587 free_trees(Vcb);
2588
2589 if (!NT_SUCCESS(Status))
2590 return Status;
2591
2592 // remove entry in chunk tree
2593
2594 searchkey.obj_id = 1;
2595 searchkey.obj_type = TYPE_DEV_ITEM;
2596 searchkey.offset = dev->devitem.dev_id;
2597
2598 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, NULL);
2599 if (!NT_SUCCESS(Status)) {
2600 ERR("find_item returned %08x\n", Status);
2601 return Status;
2602 }
2603
2604 if (!keycmp(searchkey, tp.item->key)) {
2605 Status = delete_tree_item(Vcb, &tp);
2606
2607 if (!NT_SUCCESS(Status)) {
2608 ERR("delete_tree_item returned %08x\n", Status);
2609 return Status;
2610 }
2611 }
2612
2613 // remove stats entry in device tree
2614
2615 searchkey.obj_id = 0;
2616 searchkey.obj_type = TYPE_DEV_STATS;
2617 searchkey.offset = dev->devitem.dev_id;
2618
2619 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, NULL);
2620 if (!NT_SUCCESS(Status)) {
2621 ERR("find_item returned %08x\n", Status);
2622 return Status;
2623 }
2624
2625 if (!keycmp(searchkey, tp.item->key)) {
2626 Status = delete_tree_item(Vcb, &tp);
2627
2628 if (!NT_SUCCESS(Status)) {
2629 ERR("delete_tree_item returned %08x\n", Status);
2630 return Status;
2631 }
2632 }
2633
2634 // update superblock
2635
2636 Vcb->superblock.num_devices--;
2637 Vcb->superblock.total_bytes -= dev->devitem.num_bytes;
2638 Vcb->devices_loaded--;
2639
2640 RemoveEntryList(&dev->list_entry);
2641
2642 // flush
2643
2644 Status = do_write(Vcb, NULL);
2645 if (!NT_SUCCESS(Status))
2646 ERR("do_write returned %08x\n", Status);
2647
2648 free_trees(Vcb);
2649
2650 if (!NT_SUCCESS(Status))
2651 return Status;
2652
2653 if (!dev->readonly && dev->devobj) {
2654 Status = remove_superblocks(dev);
2655 if (!NT_SUCCESS(Status))
2656 WARN("remove_superblocks returned %08x\n", Status);
2657 }
2658
2659 // remove entry in volume list
2660
2661 vde = Vcb->vde;
2662
2663 if (dev->devobj) {
2664 pdo_device_extension* pdode = vde->pdode;
2665
2666 ExAcquireResourceExclusiveLite(&pdode->child_lock, TRUE);
2667
2668 le = pdode->children.Flink;
2669 while (le != &pdode->children) {
2670 volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry);
2671
2672 if (RtlCompareMemory(&dev->devitem.device_uuid, &vc->uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
2673 PFILE_OBJECT FileObject;
2674 PDEVICE_OBJECT mountmgr;
2675 UNICODE_STRING mmdevpath;
2676
2677 pdode->children_loaded--;
2678
2679 if (vc->had_drive_letter) { // re-add entry to mountmgr
2680 RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME);
2681 Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr);
2682 if (!NT_SUCCESS(Status))
2683 ERR("IoGetDeviceObjectPointer returned %08x\n", Status);
2684 else {
2685 MOUNTDEV_NAME mdn;
2686
2687 Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL);
2688 if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW)
2689 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
2690 else {
2691 MOUNTDEV_NAME* mdn2;
2692 ULONG mdnsize = (ULONG)offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength;
2693
2694 mdn2 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG);
2695 if (!mdn2)
2696 ERR("out of memory\n");
2697 else {
2698 Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn2, mdnsize, TRUE, NULL);
2699 if (!NT_SUCCESS(Status))
2700 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
2701 else {
2702 UNICODE_STRING name;
2703
2704 name.Buffer = mdn2->Name;
2705 name.Length = name.MaximumLength = mdn2->NameLength;
2706
2707 Status = mountmgr_add_drive_letter(mountmgr, &name);
2708 if (!NT_SUCCESS(Status))
2709 WARN("mountmgr_add_drive_letter returned %08x\n", Status);
2710 }
2711
2712 ExFreePool(mdn2);
2713 }
2714 }
2715
2716 ObDereferenceObject(FileObject);
2717 }
2718 }
2719
2720 ExFreePool(vc->pnp_name.Buffer);
2721 RemoveEntryList(&vc->list_entry);
2722 ExFreePool(vc);
2723
2724 ObDereferenceObject(vc->fileobj);
2725
2726 break;
2727 }
2728
2729 le = le->Flink;
2730 }
2731
2732 if (pdode->children_loaded > 0 && vde->device->Characteristics & FILE_REMOVABLE_MEDIA) {
2733 vde->device->Characteristics &= ~FILE_REMOVABLE_MEDIA;
2734
2735 le = pdode->children.Flink;
2736 while (le != &pdode->children) {
2737 volume_child* vc = CONTAINING_RECORD(le, volume_child, list_entry);
2738
2739 if (vc->devobj->Characteristics & FILE_REMOVABLE_MEDIA) {
2740 vde->device->Characteristics |= FILE_REMOVABLE_MEDIA;
2741 break;
2742 }
2743
2744 le = le->Flink;
2745 }
2746 }
2747
2748 pdode->num_children = Vcb->superblock.num_devices;
2749
2750 ExReleaseResourceLite(&pdode->child_lock);
2751
2752 // free dev
2753
2754 if (dev->trim && !dev->readonly && !Vcb->options.no_trim)
2755 trim_whole_device(dev);
2756 }
2757
2758 while (!IsListEmpty(&dev->space)) {
2759 LIST_ENTRY* le2 = RemoveHeadList(&dev->space);
2760 space* s = CONTAINING_RECORD(le2, space, list_entry);
2761
2762 ExFreePool(s);
2763 }
2764
2765 ExFreePool(dev);
2766
2767 if (Vcb->trim) {
2768 Vcb->trim = FALSE;
2769
2770 le = Vcb->devices.Flink;
2771 while (le != &Vcb->devices) {
2772 device* dev2 = CONTAINING_RECORD(le, device, list_entry);
2773
2774 if (dev2->trim) {
2775 Vcb->trim = TRUE;
2776 break;
2777 }
2778
2779 le = le->Flink;
2780 }
2781 }
2782
2783 FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_CHANGE_SIZE);
2784
2785 return STATUS_SUCCESS;
2786 }
2787
2788 static void trim_unalloc_space(_Requires_lock_held_(_Curr_->tree_lock) device_extension* Vcb, device* dev) {
2789 DEVICE_MANAGE_DATA_SET_ATTRIBUTES* dmdsa;
2790 DEVICE_DATA_SET_RANGE* ranges;
2791 ULONG datalen, i;
2792 KEY searchkey;
2793 traverse_ptr tp;
2794 NTSTATUS Status;
2795 BOOL b;
2796 UINT64 lastoff = 0x100000; // don't TRIM the first megabyte, in case someone has been daft enough to install GRUB there
2797 LIST_ENTRY* le;
2798
2799 dev->num_trim_entries = 0;
2800
2801 searchkey.obj_id = dev->devitem.dev_id;
2802 searchkey.obj_type = TYPE_DEV_EXTENT;
2803 searchkey.offset = 0;
2804
2805 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, NULL);
2806 if (!NT_SUCCESS(Status)) {
2807 ERR("find_item returned %08x\n", Status);
2808 return;
2809 }
2810
2811 do {
2812 traverse_ptr next_tp;
2813
2814 if (tp.item->key.obj_id == dev->devitem.dev_id && tp.item->key.obj_type == TYPE_DEV_EXTENT) {
2815 if (tp.item->size >= sizeof(DEV_EXTENT)) {
2816 DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data;
2817
2818 if (tp.item->key.offset > lastoff)
2819 add_trim_entry_avoid_sb(Vcb, dev, lastoff, tp.item->key.offset - lastoff);
2820
2821 lastoff = tp.item->key.offset + de->length;
2822 } else {
2823 ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DEV_EXTENT));
2824 return;
2825 }
2826 }
2827
2828 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
2829
2830 if (b) {
2831 tp = next_tp;
2832 if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
2833 break;
2834 }
2835 } while (b);
2836
2837 if (lastoff < dev->devitem.num_bytes)
2838 add_trim_entry_avoid_sb(Vcb, dev, lastoff, dev->devitem.num_bytes - lastoff);
2839
2840 if (dev->num_trim_entries == 0)
2841 return;
2842
2843 datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE));
2844
2845 dmdsa = ExAllocatePoolWithTag(PagedPool, datalen, ALLOC_TAG);
2846 if (!dmdsa) {
2847 ERR("out of memory\n");
2848 goto end;
2849 }
2850
2851 dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES);
2852 dmdsa->Action = DeviceDsmAction_Trim;
2853 dmdsa->Flags = DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED;
2854 dmdsa->ParameterBlockOffset = 0;
2855 dmdsa->ParameterBlockLength = 0;
2856 dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64));
2857 dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE);
2858
2859 ranges = (DEVICE_DATA_SET_RANGE*)((UINT8*)dmdsa + dmdsa->DataSetRangesOffset);
2860
2861 i = 0;
2862 le = dev->trim_list.Flink;
2863 while (le != &dev->trim_list) {
2864 space* s = CONTAINING_RECORD(le, space, list_entry);
2865
2866 ranges[i].StartingOffset = s->address;
2867 ranges[i].LengthInBytes = s->size;
2868 i++;
2869
2870 le = le->Flink;
2871 }
2872
2873 Status = dev_ioctl(dev->devobj, IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES, dmdsa, datalen, NULL, 0, TRUE, NULL);
2874 if (!NT_SUCCESS(Status))
2875 WARN("IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES returned %08x\n", Status);
2876
2877 ExFreePool(dmdsa);
2878
2879 end:
2880 while (!IsListEmpty(&dev->trim_list)) {
2881 space* s = CONTAINING_RECORD(RemoveHeadList(&dev->trim_list), space, list_entry);
2882 ExFreePool(s);
2883 }
2884
2885 dev->num_trim_entries = 0;
2886 }
2887
2888 static NTSTATUS try_consolidation(device_extension* Vcb, UINT64 flags, chunk** newchunk) {
2889 NTSTATUS Status;
2890 BOOL changed;
2891 LIST_ENTRY* le;
2892 chunk* rc;
2893
2894 // FIXME - allow with metadata chunks?
2895
2896 while (TRUE) {
2897 rc = NULL;
2898
2899 ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
2900
2901 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
2902
2903 // choose the least-used chunk we haven't looked at yet
2904 le = Vcb->chunks.Flink;
2905 while (le != &Vcb->chunks) {
2906 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
2907
2908 // FIXME - skip full-size chunks over e.g. 90% full?
2909 if (c->chunk_item->type & BLOCK_FLAG_DATA && !c->readonly && c->balance_num != Vcb->balance.balance_num && (!rc || c->used < rc->used))
2910 rc = c;
2911
2912 le = le->Flink;
2913 }
2914
2915 ExReleaseResourceLite(&Vcb->chunk_lock);
2916
2917 if (!rc) {
2918 ExReleaseResourceLite(&Vcb->tree_lock);
2919 break;
2920 }
2921
2922 if (rc->list_entry_balance.Flink) {
2923 RemoveEntryList(&rc->list_entry_balance);
2924 Vcb->balance.chunks_left--;
2925 }
2926
2927 rc->list_entry_balance.Flink = (LIST_ENTRY*)1; // so it doesn't get dropped
2928 rc->reloc = TRUE;
2929
2930 ExReleaseResourceLite(&Vcb->tree_lock);
2931
2932 do {
2933 changed = FALSE;
2934
2935 Status = balance_data_chunk(Vcb, rc, &changed);
2936 if (!NT_SUCCESS(Status)) {
2937 ERR("balance_data_chunk returned %08x\n", Status);
2938 Vcb->balance.status = Status;
2939 rc->list_entry_balance.Flink = NULL;
2940 rc->reloc = FALSE;
2941 return Status;
2942 }
2943
2944 KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
2945
2946 if (Vcb->readonly)
2947 Vcb->balance.stopping = TRUE;
2948
2949 if (Vcb->balance.stopping)
2950 return STATUS_SUCCESS;
2951 } while (changed);
2952
2953 rc->list_entry_balance.Flink = NULL;
2954
2955 rc->changed = TRUE;
2956 rc->space_changed = TRUE;
2957 rc->balance_num = Vcb->balance.balance_num;
2958
2959 Status = do_write(Vcb, NULL);
2960 if (!NT_SUCCESS(Status)) {
2961 ERR("do_write returned %08x\n", Status);
2962 return Status;
2963 }
2964 }
2965
2966 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
2967
2968 Status = alloc_chunk(Vcb, flags, &rc, TRUE);
2969
2970 ExReleaseResourceLite(&Vcb->chunk_lock);
2971
2972 if (NT_SUCCESS(Status)) {
2973 *newchunk = rc;
2974 return Status;
2975 } else {
2976 ERR("alloc_chunk returned %08x\n", Status);
2977 return Status;
2978 }
2979 }
2980
2981 static NTSTATUS regenerate_space_list(device_extension* Vcb, device* dev) {
2982 LIST_ENTRY* le;
2983
2984 while (!IsListEmpty(&dev->space)) {
2985 space* s = CONTAINING_RECORD(RemoveHeadList(&dev->space), space, list_entry);
2986
2987 ExFreePool(s);
2988 }
2989
2990 // The Linux driver doesn't like to allocate chunks within the first megabyte of a device.
2991
2992 space_list_add2(&dev->space, NULL, 0x100000, dev->devitem.num_bytes - 0x100000, NULL, NULL);
2993
2994 le = Vcb->chunks.Flink;
2995 while (le != &Vcb->chunks) {
2996 UINT16 n;
2997 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
2998 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
2999
3000 for (n = 0; n < c->chunk_item->num_stripes; n++) {
3001 UINT64 stripe_size = 0;
3002
3003 if (cis[n].dev_id == dev->devitem.dev_id) {
3004 if (stripe_size == 0) {
3005 UINT16 factor;
3006
3007 if (c->chunk_item->type & BLOCK_FLAG_RAID0)
3008 factor = c->chunk_item->num_stripes;
3009 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
3010 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
3011 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
3012 factor = c->chunk_item->num_stripes - 1;
3013 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
3014 factor = c->chunk_item->num_stripes - 2;
3015 else // SINGLE, DUP, RAID1
3016 factor = 1;
3017
3018 stripe_size = c->chunk_item->size / factor;
3019 }
3020
3021 space_list_subtract2(&dev->space, NULL, cis[n].offset, stripe_size, NULL, NULL);
3022 }
3023 }
3024
3025 le = le->Flink;
3026 }
3027
3028 return STATUS_SUCCESS;
3029 }
3030
3031 _Function_class_(KSTART_ROUTINE)
3032 #ifndef __REACTOS__
3033 void balance_thread(void* context) {
3034 #else
3035 void NTAPI balance_thread(void* context) {
3036 #endif
3037 device_extension* Vcb = (device_extension*)context;
3038 LIST_ENTRY chunks;
3039 LIST_ENTRY* le;
3040 UINT64 num_chunks[3], okay_metadata_chunks = 0, okay_data_chunks = 0, okay_system_chunks = 0;
3041 UINT64 old_data_flags = 0, old_metadata_flags = 0, old_system_flags = 0;
3042 NTSTATUS Status;
3043
3044 Vcb->balance.balance_num++;
3045
3046 Vcb->balance.stopping = FALSE;
3047 KeInitializeEvent(&Vcb->balance.finished, NotificationEvent, FALSE);
3048
3049 if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_CONVERT) {
3050 old_data_flags = Vcb->data_flags;
3051 Vcb->data_flags = BLOCK_FLAG_DATA | (Vcb->balance.opts[BALANCE_OPTS_DATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_DATA].convert);
3052
3053 FsRtlNotifyVolumeEvent(Vcb->root_file, FSRTL_VOLUME_CHANGE_SIZE);
3054 }
3055
3056 if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_CONVERT) {
3057 old_metadata_flags = Vcb->metadata_flags;
3058 Vcb->metadata_flags = BLOCK_FLAG_METADATA | (Vcb->balance.opts[BALANCE_OPTS_METADATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_METADATA].convert);
3059 }
3060
3061 if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_CONVERT) {
3062 old_system_flags = Vcb->system_flags;
3063 Vcb->system_flags = BLOCK_FLAG_SYSTEM | (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert);
3064 }
3065
3066 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) {
3067 if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED)
3068 RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &Vcb->balance.opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts));
3069 else if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED)
3070 RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &Vcb->balance.opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts));
3071 }
3072
3073 num_chunks[0] = num_chunks[1] = num_chunks[2] = 0;
3074 Vcb->balance.total_chunks = Vcb->balance.chunks_left = 0;
3075
3076 InitializeListHead(&chunks);
3077
3078 // FIXME - what are we supposed to do with limit_start?
3079
3080 if (!Vcb->readonly) {
3081 if (!Vcb->balance.removing && !Vcb->balance.shrinking) {
3082 Status = add_balance_item(Vcb);
3083 if (!NT_SUCCESS(Status)) {
3084 ERR("add_balance_item returned %08x\n", Status);
3085 Vcb->balance.status = Status;
3086 goto end;
3087 }
3088 } else {
3089 if (Vcb->need_write) {
3090 Status = do_write(Vcb, NULL);
3091
3092 free_trees(Vcb);
3093
3094 if (!NT_SUCCESS(Status)) {
3095 ERR("do_write returned %08x\n", Status);
3096 Vcb->balance.status = Status;
3097 goto end;
3098 }
3099 }
3100 }
3101 }
3102
3103 KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
3104
3105 if (Vcb->balance.stopping)
3106 goto end;
3107
3108 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
3109
3110 le = Vcb->chunks.Flink;
3111 while (le != &Vcb->chunks) {
3112 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
3113 UINT8 sort;
3114
3115 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
3116
3117 if (c->chunk_item->type & BLOCK_FLAG_DATA)
3118 sort = BALANCE_OPTS_DATA;
3119 else if (c->chunk_item->type & BLOCK_FLAG_METADATA)
3120 sort = BALANCE_OPTS_METADATA;
3121 else if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
3122 sort = BALANCE_OPTS_SYSTEM;
3123 else {
3124 ERR("unexpected chunk type %llx\n", c->chunk_item->type);
3125 ExReleaseResourceLite(&c->lock);
3126 break;
3127 }
3128
3129 if ((!(Vcb->balance.opts[sort].flags & BTRFS_BALANCE_OPTS_LIMIT) || num_chunks[sort] < Vcb->balance.opts[sort].limit_end) &&
3130 should_balance_chunk(Vcb, sort, c)) {
3131 InsertTailList(&chunks, &c->list_entry_balance);
3132
3133 num_chunks[sort]++;
3134 Vcb->balance.total_chunks++;
3135 Vcb->balance.chunks_left++;
3136 } else if (sort == BALANCE_OPTS_METADATA)
3137 okay_metadata_chunks++;
3138 else if (sort == BALANCE_OPTS_DATA)
3139 okay_data_chunks++;
3140 else if (sort == BALANCE_OPTS_SYSTEM)
3141 okay_system_chunks++;
3142
3143 if (!c->cache_loaded) {
3144 Status = load_cache_chunk(Vcb, c, NULL);
3145
3146 if (!NT_SUCCESS(Status)) {
3147 ERR("load_cache_chunk returned %08x\n", Status);
3148 Vcb->balance.status = Status;
3149 ExReleaseResourceLite(&c->lock);
3150 ExReleaseResourceLite(&Vcb->chunk_lock);
3151 goto end;
3152 }
3153 }
3154
3155 ExReleaseResourceLite(&c->lock);
3156
3157 le = le->Flink;
3158 }
3159
3160 ExReleaseResourceLite(&Vcb->chunk_lock);
3161
3162 // If we're doing a full balance, try and allocate a new chunk now, before we mess things up
3163 if (okay_metadata_chunks == 0 || okay_data_chunks == 0 || okay_system_chunks == 0) {
3164 BOOL consolidated = FALSE;
3165 chunk* c;
3166
3167 if (okay_metadata_chunks == 0) {
3168 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
3169
3170 Status = alloc_chunk(Vcb, Vcb->metadata_flags, &c, TRUE);
3171 if (NT_SUCCESS(Status))
3172 c->balance_num = Vcb->balance.balance_num;
3173 else if (Status != STATUS_DISK_FULL || consolidated) {
3174 ERR("alloc_chunk returned %08x\n", Status);
3175 ExReleaseResourceLite(&Vcb->chunk_lock);
3176 Vcb->balance.status = Status;
3177 goto end;
3178 }
3179
3180 ExReleaseResourceLite(&Vcb->chunk_lock);
3181
3182 if (Status == STATUS_DISK_FULL) {
3183 Status = try_consolidation(Vcb, Vcb->metadata_flags, &c);
3184 if (!NT_SUCCESS(Status)) {
3185 ERR("try_consolidation returned %08x\n", Status);
3186 Vcb->balance.status = Status;
3187 goto end;
3188 } else
3189 c->balance_num = Vcb->balance.balance_num;
3190
3191 consolidated = TRUE;
3192
3193 if (Vcb->balance.stopping)
3194 goto end;
3195 }
3196 }
3197
3198 if (okay_data_chunks == 0) {
3199 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
3200
3201 Status = alloc_chunk(Vcb, Vcb->data_flags, &c, TRUE);
3202 if (NT_SUCCESS(Status))
3203 c->balance_num = Vcb->balance.balance_num;
3204 else if (Status != STATUS_DISK_FULL || consolidated) {
3205 ERR("alloc_chunk returned %08x\n", Status);
3206 ExReleaseResourceLite(&Vcb->chunk_lock);
3207 Vcb->balance.status = Status;
3208 goto end;
3209 }
3210
3211 ExReleaseResourceLite(&Vcb->chunk_lock);
3212
3213 if (Status == STATUS_DISK_FULL) {
3214 Status = try_consolidation(Vcb, Vcb->data_flags, &c);
3215 if (!NT_SUCCESS(Status)) {
3216 ERR("try_consolidation returned %08x\n", Status);
3217 Vcb->balance.status = Status;
3218 goto end;
3219 } else
3220 c->balance_num = Vcb->balance.balance_num;
3221
3222 consolidated = TRUE;
3223
3224 if (Vcb->balance.stopping)
3225 goto end;
3226 }
3227 }
3228
3229 if (okay_system_chunks == 0) {
3230 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
3231
3232 Status = alloc_chunk(Vcb, Vcb->system_flags, &c, TRUE);
3233 if (NT_SUCCESS(Status))
3234 c->balance_num = Vcb->balance.balance_num;
3235 else if (Status != STATUS_DISK_FULL || consolidated) {
3236 ERR("alloc_chunk returned %08x\n", Status);
3237 ExReleaseResourceLite(&Vcb->chunk_lock);
3238 Vcb->balance.status = Status;
3239 goto end;
3240 }
3241
3242 ExReleaseResourceLite(&Vcb->chunk_lock);
3243
3244 if (Status == STATUS_DISK_FULL) {
3245 Status = try_consolidation(Vcb, Vcb->system_flags, &c);
3246 if (!NT_SUCCESS(Status)) {
3247 ERR("try_consolidation returned %08x\n", Status);
3248 Vcb->balance.status = Status;
3249 goto end;
3250 } else
3251 c->balance_num = Vcb->balance.balance_num;
3252
3253 consolidated = TRUE;
3254
3255 if (Vcb->balance.stopping)
3256 goto end;
3257 }
3258 }
3259 }
3260
3261 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
3262
3263 le = chunks.Flink;
3264 while (le != &chunks) {
3265 chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance);
3266
3267 c->reloc = TRUE;
3268
3269 le = le->Flink;
3270 }
3271
3272 ExReleaseResourceLite(&Vcb->chunk_lock);
3273
3274 // do data chunks before metadata
3275 le = chunks.Flink;
3276 while (le != &chunks) {
3277 chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance);
3278 LIST_ENTRY* le2 = le->Flink;
3279
3280 if (c->chunk_item->type & BLOCK_FLAG_DATA) {
3281 BOOL changed;
3282
3283 do {
3284 changed = FALSE;
3285
3286 Status = balance_data_chunk(Vcb, c, &changed);
3287 if (!NT_SUCCESS(Status)) {
3288 ERR("balance_data_chunk returned %08x\n", Status);
3289 Vcb->balance.status = Status;
3290 goto end;
3291 }
3292
3293 KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
3294
3295 if (Vcb->readonly)
3296 Vcb->balance.stopping = TRUE;
3297
3298 if (Vcb->balance.stopping)
3299 break;
3300 } while (changed);
3301
3302 c->changed = TRUE;
3303 c->space_changed = TRUE;
3304 }
3305
3306 if (Vcb->balance.stopping)
3307 goto end;
3308
3309 if (c->chunk_item->type & BLOCK_FLAG_DATA &&
3310 (!(Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) {
3311 RemoveEntryList(&c->list_entry_balance);
3312 c->list_entry_balance.Flink = NULL;
3313
3314 Vcb->balance.chunks_left--;
3315 }
3316
3317 le = le2;
3318 }
3319
3320 // do metadata chunks
3321 while (!IsListEmpty(&chunks)) {
3322 chunk* c;
3323 BOOL changed;
3324
3325 le = RemoveHeadList(&chunks);
3326 c = CONTAINING_RECORD(le, chunk, list_entry_balance);
3327
3328 if (c->chunk_item->type & BLOCK_FLAG_METADATA || c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
3329 do {
3330 Status = balance_metadata_chunk(Vcb, c, &changed);
3331 if (!NT_SUCCESS(Status)) {
3332 ERR("balance_metadata_chunk returned %08x\n", Status);
3333 Vcb->balance.status = Status;
3334 goto end;
3335 }
3336
3337 KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
3338
3339 if (Vcb->readonly)
3340 Vcb->balance.stopping = TRUE;
3341
3342 if (Vcb->balance.stopping)
3343 break;
3344 } while (changed);
3345
3346 c->changed = TRUE;
3347 c->space_changed = TRUE;
3348 }
3349
3350 if (Vcb->balance.stopping)
3351 break;
3352
3353 c->list_entry_balance.Flink = NULL;
3354
3355 Vcb->balance.chunks_left--;
3356 }
3357
3358 end:
3359 if (!Vcb->readonly) {
3360 if (Vcb->balance.stopping || !NT_SUCCESS(Vcb->balance.status)) {
3361 le = chunks.Flink;
3362 while (le != &chunks) {
3363 chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance);
3364 c->reloc = FALSE;
3365
3366 le = le->Flink;
3367 c->list_entry_balance.Flink = NULL;
3368 }
3369
3370 if (old_data_flags != 0)
3371 Vcb->data_flags = old_data_flags;
3372
3373 if (old_metadata_flags != 0)
3374 Vcb->metadata_flags = old_metadata_flags;
3375
3376 if (old_system_flags != 0)
3377 Vcb->system_flags = old_system_flags;
3378 }
3379
3380 if (Vcb->balance.removing) {
3381 device* dev = NULL;
3382
3383 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
3384
3385 le = Vcb->devices.Flink;
3386 while (le != &Vcb->devices) {
3387 device* dev2 = CONTAINING_RECORD(le, device, list_entry);
3388
3389 if (dev2->devitem.dev_id == Vcb->balance.opts[0].devid) {
3390 dev = dev2;
3391 break;
3392 }
3393
3394 le = le->Flink;
3395 }
3396
3397 if (dev) {
3398 if (Vcb->balance.chunks_left == 0) {
3399 Status = finish_removing_device(Vcb, dev);
3400
3401 if (!NT_SUCCESS(Status)) {
3402 ERR("finish_removing_device returned %08x\n", Status);
3403 dev->reloc = FALSE;
3404 }
3405 } else
3406 dev->reloc = FALSE;
3407 }
3408
3409 ExReleaseResourceLite(&Vcb->tree_lock);
3410 } else if (Vcb->balance.shrinking) {
3411 device* dev = NULL;
3412
3413 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
3414
3415 le = Vcb->devices.Flink;
3416 while (le != &Vcb->devices) {
3417 device* dev2 = CONTAINING_RECORD(le, device, list_entry);
3418
3419 if (dev2->devitem.dev_id == Vcb->balance.opts[0].devid) {
3420 dev = dev2;
3421 break;
3422 }
3423
3424 le = le->Flink;
3425 }
3426
3427 if (!dev) {
3428 ERR("could not find device %llx\n", Vcb->balance.opts[0].devid);
3429 Vcb->balance.status = STATUS_INTERNAL_ERROR;
3430 }
3431
3432 if (Vcb->balance.stopping || !NT_SUCCESS(Vcb->balance.status)) {
3433 if (dev) {
3434 Status = regenerate_space_list(Vcb, dev);
3435