[BTRFS]
[reactos.git] / reactos / drivers / filesystems / btrfs / balance.c
1 /* Copyright (c) Mark Harmstone 2016
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include "btrfs_drv.h"
19 #include "btrfsioctl.h"
20
21 typedef struct {
22 UINT64 address;
23 UINT64 new_address;
24 tree_header* data;
25 EXTENT_ITEM* ei;
26 tree* t;
27 BOOL system;
28 LIST_ENTRY refs;
29 LIST_ENTRY list_entry;
30 } metadata_reloc;
31
32 typedef struct {
33 UINT8 type;
34
35 union {
36 TREE_BLOCK_REF tbr;
37 SHARED_BLOCK_REF sbr;
38 };
39
40 metadata_reloc* parent;
41 BOOL top;
42 LIST_ENTRY list_entry;
43 } metadata_reloc_ref;
44
45 typedef struct {
46 UINT64 address;
47 UINT64 size;
48 UINT64 new_address;
49 chunk* newchunk;
50 EXTENT_ITEM* ei;
51 LIST_ENTRY refs;
52 LIST_ENTRY list_entry;
53 } data_reloc;
54
55 typedef struct {
56 UINT8 type;
57
58 union {
59 EXTENT_DATA_REF edr;
60 SHARED_DATA_REF sdr;
61 };
62
63 metadata_reloc* parent;
64 LIST_ENTRY list_entry;
65 } data_reloc_ref;
66
67 extern LIST_ENTRY volumes;
68 extern ERESOURCE volumes_lock;
69
70 static NTSTATUS add_metadata_reloc(device_extension* Vcb, LIST_ENTRY* items, traverse_ptr* tp, BOOL skinny, metadata_reloc** mr2, chunk* c, LIST_ENTRY* rollback) {
71 metadata_reloc* mr;
72 EXTENT_ITEM* ei;
73 UINT16 len;
74 UINT64 inline_rc;
75 UINT8* ptr;
76
77 mr = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc), ALLOC_TAG);
78 if (!mr) {
79 ERR("out of memory\n");
80 return STATUS_INSUFFICIENT_RESOURCES;
81 }
82
83 mr->address = tp->item->key.obj_id;
84 mr->data = NULL;
85 mr->ei = (EXTENT_ITEM*)tp->item->data;
86 mr->system = FALSE;
87 InitializeListHead(&mr->refs);
88
89 delete_tree_item(Vcb, tp, rollback);
90
91 if (!c)
92 c = get_chunk_from_address(Vcb, tp->item->key.obj_id);
93
94 if (c) {
95 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
96
97 decrease_chunk_usage(c, Vcb->superblock.node_size);
98
99 space_list_add(Vcb, c, TRUE, tp->item->key.obj_id, Vcb->superblock.node_size, rollback);
100
101 ExReleaseResourceLite(&c->lock);
102 }
103
104 ei = (EXTENT_ITEM*)tp->item->data;
105 inline_rc = 0;
106
107 len = tp->item->size - sizeof(EXTENT_ITEM);
108 ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM);
109 if (!skinny) {
110 len -= sizeof(EXTENT_ITEM2);
111 ptr += sizeof(EXTENT_ITEM2);
112 }
113
114 while (len > 0) {
115 UINT8 secttype = *ptr;
116 ULONG sectlen = secttype == TYPE_TREE_BLOCK_REF ? sizeof(TREE_BLOCK_REF) : (secttype == TYPE_SHARED_BLOCK_REF ? sizeof(SHARED_BLOCK_REF) : 0);
117 metadata_reloc_ref* ref;
118
119 len--;
120
121 if (sectlen > len) {
122 ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen);
123 return STATUS_INTERNAL_ERROR;
124 }
125
126 if (sectlen == 0) {
127 ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype);
128 return STATUS_INTERNAL_ERROR;
129 }
130
131 ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG);
132 if (!ref) {
133 ERR("out of memory\n");
134 return STATUS_INSUFFICIENT_RESOURCES;
135 }
136
137 if (secttype == TYPE_TREE_BLOCK_REF) {
138 ref->type = TYPE_TREE_BLOCK_REF;
139 RtlCopyMemory(&ref->tbr, ptr + sizeof(UINT8), sizeof(TREE_BLOCK_REF));
140 inline_rc++;
141 } else if (secttype == TYPE_SHARED_BLOCK_REF) {
142 ref->type = TYPE_SHARED_BLOCK_REF;
143 RtlCopyMemory(&ref->sbr, ptr + sizeof(UINT8), sizeof(SHARED_BLOCK_REF));
144 inline_rc++;
145 } else {
146 ERR("unexpected tree type %x\n", secttype);
147 ExFreePool(ref);
148 return STATUS_INTERNAL_ERROR;
149 }
150
151 ref->parent = NULL;
152 ref->top = FALSE;
153 InsertTailList(&mr->refs, &ref->list_entry);
154
155 len -= sectlen;
156 ptr += sizeof(UINT8) + sectlen;
157 }
158
159 if (inline_rc < ei->refcount) { // look for non-inline entries
160 traverse_ptr tp2 = *tp, next_tp;
161
162 while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) {
163 tp2 = next_tp;
164
165 if (tp2.item->key.obj_id == tp->item->key.obj_id) {
166 if (tp2.item->key.obj_type == TYPE_TREE_BLOCK_REF && tp2.item->size >= sizeof(TREE_BLOCK_REF)) {
167 metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG);
168 if (!ref) {
169 ERR("out of memory\n");
170 return STATUS_INSUFFICIENT_RESOURCES;
171 }
172
173 ref->type = TYPE_TREE_BLOCK_REF;
174 RtlCopyMemory(&ref->tbr, tp2.item->data, sizeof(TREE_BLOCK_REF));
175 ref->parent = NULL;
176 ref->top = FALSE;
177 InsertTailList(&mr->refs, &ref->list_entry);
178
179 delete_tree_item(Vcb, &tp2, rollback);
180 } else if (tp2.item->key.obj_type == TYPE_SHARED_BLOCK_REF && tp2.item->size >= sizeof(SHARED_BLOCK_REF)) {
181 metadata_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(metadata_reloc_ref), ALLOC_TAG);
182 if (!ref) {
183 ERR("out of memory\n");
184 return STATUS_INSUFFICIENT_RESOURCES;
185 }
186
187 ref->type = TYPE_SHARED_BLOCK_REF;
188 RtlCopyMemory(&ref->sbr, tp2.item->data, sizeof(SHARED_BLOCK_REF));
189 ref->parent = NULL;
190 ref->top = FALSE;
191 InsertTailList(&mr->refs, &ref->list_entry);
192
193 delete_tree_item(Vcb, &tp2, rollback);
194 }
195 } else
196 break;
197 }
198 }
199
200 InsertTailList(items, &mr->list_entry);
201
202 if (mr2)
203 *mr2 = mr;
204
205 return STATUS_SUCCESS;
206 }
207
208 static NTSTATUS add_metadata_reloc_parent(device_extension* Vcb, LIST_ENTRY* items, UINT64 address, metadata_reloc** mr2, LIST_ENTRY* rollback) {
209 LIST_ENTRY* le;
210 KEY searchkey;
211 traverse_ptr tp;
212 BOOL skinny = FALSE;
213 NTSTATUS Status;
214
215 le = items->Flink;
216 while (le != items) {
217 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
218
219 if (mr->address == address) {
220 *mr2 = mr;
221 return STATUS_SUCCESS;
222 }
223
224 le = le->Flink;
225 }
226
227 searchkey.obj_id = address;
228 searchkey.obj_type = TYPE_METADATA_ITEM;
229 searchkey.offset = 0xffffffffffffffff;
230
231 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
232 if (!NT_SUCCESS(Status)) {
233 ERR("find_item returned %08x\n", Status);
234 return Status;
235 }
236
237 if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM))
238 skinny = TRUE;
239 else if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size &&
240 tp.item->size >= sizeof(EXTENT_ITEM)) {
241 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
242
243 if (!(ei->flags & EXTENT_ITEM_TREE_BLOCK)) {
244 ERR("EXTENT_ITEM for %llx found, but tree flag not set\n", address);
245 return STATUS_INTERNAL_ERROR;
246 }
247 } else {
248 ERR("could not find valid EXTENT_ITEM for address %llx\n", address);
249 return STATUS_INTERNAL_ERROR;
250 }
251
252 Status = add_metadata_reloc(Vcb, items, &tp, skinny, mr2, NULL, rollback);
253 if (!NT_SUCCESS(Status)) {
254 ERR("add_metadata_reloc returned %08x\n", Status);
255 return Status;
256 }
257
258 return STATUS_SUCCESS;
259 }
260
261 static NTSTATUS add_metadata_reloc_extent_item(device_extension* Vcb, metadata_reloc* mr, LIST_ENTRY* rollback) {
262 LIST_ENTRY* le;
263 UINT64 rc = 0;
264 UINT16 inline_len;
265 BOOL all_inline = TRUE;
266 metadata_reloc_ref* first_noninline = NULL;
267 EXTENT_ITEM* ei;
268 UINT8* ptr;
269
270 inline_len = sizeof(EXTENT_ITEM);
271 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA))
272 inline_len += sizeof(EXTENT_ITEM2);
273
274 le = mr->refs.Flink;
275 while (le != &mr->refs) {
276 metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
277 ULONG extlen = 0;
278
279 rc++;
280
281 if (ref->type == TYPE_TREE_BLOCK_REF)
282 extlen += sizeof(TREE_BLOCK_REF);
283 else if (ref->type == TYPE_SHARED_BLOCK_REF)
284 extlen += sizeof(SHARED_BLOCK_REF);
285
286 if (all_inline) {
287 if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) {
288 all_inline = FALSE;
289 first_noninline = ref;
290 } else
291 inline_len += extlen + 1;
292 }
293
294 le = le->Flink;
295 }
296
297 ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
298 if (!ei) {
299 ERR("out of memory\n");
300 return STATUS_INSUFFICIENT_RESOURCES;
301 }
302
303 ei->refcount = rc;
304 ei->generation = mr->ei->generation;
305 ei->flags = mr->ei->flags;
306 ptr = (UINT8*)&ei[1];
307
308 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
309 EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)ptr;
310
311 ei2->firstitem = *(KEY*)&mr->data[1];
312 ei2->level = mr->data->level;
313
314 ptr += sizeof(EXTENT_ITEM2);
315 }
316
317 le = mr->refs.Flink;
318 while (le != &mr->refs) {
319 metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
320
321 if (ref == first_noninline)
322 break;
323
324 *ptr = ref->type;
325 ptr++;
326
327 if (ref->type == TYPE_TREE_BLOCK_REF) {
328 TREE_BLOCK_REF* tbr = (TREE_BLOCK_REF*)ptr;
329
330 tbr->offset = ref->tbr.offset;
331
332 ptr += sizeof(TREE_BLOCK_REF);
333 } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
334 SHARED_BLOCK_REF* sbr = (SHARED_BLOCK_REF*)ptr;
335
336 sbr->offset = ref->parent->new_address;
337
338 ptr += sizeof(SHARED_BLOCK_REF);
339 }
340
341 le = le->Flink;
342 }
343
344 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
345 if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_METADATA_ITEM, mr->data->level, ei, inline_len, NULL, NULL, rollback)) {
346 ERR("insert_tree_item failed\n");
347 return STATUS_INTERNAL_ERROR;
348 }
349 } else {
350 if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, ei, inline_len, NULL, NULL, rollback)) {
351 ERR("insert_tree_item failed\n");
352 return STATUS_INTERNAL_ERROR;
353 }
354 }
355
356 if (!all_inline) {
357 le = &first_noninline->list_entry;
358
359 while (le != &mr->refs) {
360 metadata_reloc_ref* ref = CONTAINING_RECORD(le, metadata_reloc_ref, list_entry);
361
362 if (ref->type == TYPE_TREE_BLOCK_REF) {
363 TREE_BLOCK_REF* tbr;
364
365 tbr = ExAllocatePoolWithTag(PagedPool, sizeof(TREE_BLOCK_REF), ALLOC_TAG);
366 if (!tbr) {
367 ERR("out of memory\n");
368 return STATUS_INSUFFICIENT_RESOURCES;
369 }
370
371 tbr->offset = ref->tbr.offset;
372
373 if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_TREE_BLOCK_REF, tbr->offset, tbr, sizeof(TREE_BLOCK_REF), NULL, NULL, rollback)) {
374 ERR("insert_tree_item failed\n");
375 return STATUS_INTERNAL_ERROR;
376 }
377 } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
378 SHARED_BLOCK_REF* sbr;
379
380 sbr = ExAllocatePoolWithTag(PagedPool, sizeof(SHARED_BLOCK_REF), ALLOC_TAG);
381 if (!sbr) {
382 ERR("out of memory\n");
383 return STATUS_INSUFFICIENT_RESOURCES;
384 }
385
386 sbr->offset = ref->parent->new_address;
387
388 if (!insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_SHARED_BLOCK_REF, sbr->offset, sbr, sizeof(SHARED_BLOCK_REF), NULL, NULL, rollback)) {
389 ERR("insert_tree_item failed\n");
390 return STATUS_INTERNAL_ERROR;
391 }
392 }
393
394 le = le->Flink;
395 }
396 }
397
398 if (ei->flags & EXTENT_ITEM_SHARED_BACKREFS || mr->data->flags & HEADER_FLAG_SHARED_BACKREF || !(mr->data->flags & HEADER_FLAG_MIXED_BACKREF)) {
399 if (mr->data->level > 0) {
400 UINT16 i;
401 internal_node* in = (internal_node*)&mr->data[1];
402
403 for (i = 0; i < mr->data->num_items; i++) {
404 UINT64 sbrrc = find_extent_shared_tree_refcount(Vcb, in[i].address, mr->address, NULL);
405
406 if (sbrrc > 0) {
407 NTSTATUS Status;
408 SHARED_BLOCK_REF sbr;
409
410 sbr.offset = mr->new_address;
411
412 Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
413 NULL, rollback);
414 if (!NT_SUCCESS(Status)) {
415 ERR("increase_extent_refcount returned %08x\n", Status);
416 return Status;
417 }
418
419 sbr.offset = mr->address;
420
421 Status = decrease_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
422 sbr.offset, FALSE, NULL, rollback);
423 if (!NT_SUCCESS(Status)) {
424 ERR("decrease_extent_refcount returned %08x\n", Status);
425 return Status;
426 }
427 }
428 }
429 } else {
430 UINT16 i;
431 leaf_node* ln = (leaf_node*)&mr->data[1];
432
433 for (i = 0; i < mr->data->num_items; i++) {
434 if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
435 EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset);
436
437 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
438 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
439
440 if (ed2->size > 0) { // not sparse
441 UINT64 sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, mr->address, NULL);
442
443 if (sdrrc > 0) {
444 NTSTATUS Status;
445 SHARED_DATA_REF sdr;
446 chunk* c;
447
448 sdr.offset = mr->new_address;
449 sdr.count = sdrrc;
450
451 Status = increase_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
452 NULL, rollback);
453 if (!NT_SUCCESS(Status)) {
454 ERR("increase_extent_refcount returned %08x\n", Status);
455 return Status;
456 }
457
458 sdr.offset = mr->address;
459
460 Status = decrease_extent_refcount(Vcb, ed2->address, ed2->size, TYPE_SHARED_DATA_REF, &sdr, NULL, 0,
461 sdr.offset, FALSE, NULL, rollback);
462 if (!NT_SUCCESS(Status)) {
463 ERR("decrease_extent_refcount returned %08x\n", Status);
464 return Status;
465 }
466
467 c = get_chunk_from_address(Vcb, ed2->address);
468
469 if (c) {
470 // check changed_extents
471
472 ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
473
474 le = c->changed_extents.Flink;
475
476 while (le != &c->changed_extents) {
477 changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry);
478
479 if (ce->address == ed2->address) {
480 LIST_ENTRY* le2;
481
482 le2 = ce->refs.Flink;
483 while (le2 != &ce->refs) {
484 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
485
486 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) {
487 cer->sdr.offset = mr->new_address;
488 break;
489 }
490
491 le2 = le2->Flink;
492 }
493
494 le2 = ce->old_refs.Flink;
495 while (le2 != &ce->old_refs) {
496 changed_extent_ref* cer = CONTAINING_RECORD(le2, changed_extent_ref, list_entry);
497
498 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) {
499 cer->sdr.offset = mr->new_address;
500 break;
501 }
502
503 le2 = le2->Flink;
504 }
505
506 break;
507 }
508
509 le = le->Flink;
510 }
511
512 ExReleaseResourceLite(&c->changed_extents_lock);
513 }
514 }
515 }
516 }
517 }
518 }
519 }
520 }
521
522 return STATUS_SUCCESS;
523 }
524
525 static NTSTATUS write_metadata_items(device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* data_items, chunk* c, LIST_ENTRY* rollback) {
526 LIST_ENTRY tree_writes, *le;
527 NTSTATUS Status;
528 traverse_ptr tp;
529 UINT8 level, max_level = 0;
530 chunk* newchunk = NULL;
531
532 InitializeListHead(&tree_writes);
533
534 le = items->Flink;
535 while (le != items) {
536 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
537 LIST_ENTRY* le2;
538 chunk* pc;
539
540 // ERR("address %llx\n", mr->address);
541
542 mr->data = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
543 if (!mr->data) {
544 ERR("out of memory\n");
545 return STATUS_INSUFFICIENT_RESOURCES;
546 }
547
548 Status = read_data(Vcb, mr->address, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)mr->data,
549 c && mr->address >= c->offset && mr->address < c->offset + c->chunk_item->size ? c : NULL, &pc, NULL, FALSE);
550 if (!NT_SUCCESS(Status)) {
551 ERR("read_data returned %08x\n", Status);
552 return Status;
553 }
554
555 if (pc->chunk_item->type & BLOCK_FLAG_SYSTEM)
556 mr->system = TRUE;
557
558 if (data_items && mr->data->level == 0) {
559 LIST_ENTRY* le2 = data_items->Flink;
560 while (le2 != data_items) {
561 data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry);
562 leaf_node* ln = (leaf_node*)&mr->data[1];
563 UINT16 i;
564
565 for (i = 0; i < mr->data->num_items; i++) {
566 if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
567 EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset);
568
569 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
570 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
571
572 if (ed2->address == dr->address)
573 ed2->address = dr->new_address;
574 }
575 }
576 }
577
578 le2 = le2->Flink;
579 }
580 }
581
582 if (mr->data->level > max_level)
583 max_level = mr->data->level;
584
585 le2 = mr->refs.Flink;
586 while (le2 != &mr->refs) {
587 metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry);
588
589 if (ref->type == TYPE_TREE_BLOCK_REF) {
590 KEY* firstitem;
591 root* r = NULL;
592 LIST_ENTRY* le3;
593 tree* t;
594
595 firstitem = (KEY*)&mr->data[1];
596
597 le3 = Vcb->roots.Flink;
598 while (le3 != &Vcb->roots) {
599 root* r2 = CONTAINING_RECORD(le3, root, list_entry);
600
601 if (r2->id == ref->tbr.offset) {
602 r = r2;
603 break;
604 }
605
606 le3 = le3->Flink;
607 }
608
609 if (!r) {
610 ERR("could not find subvol with id %llx\n", ref->tbr.offset);
611 return STATUS_INTERNAL_ERROR;
612 }
613
614 Status = find_item_to_level(Vcb, r, &tp, firstitem, FALSE, mr->data->level + 1, NULL);
615 if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
616 ERR("find_item_to_level returned %08x\n", Status);
617 return Status;
618 }
619
620 t = tp.tree;
621 while (t && t->header.level < mr->data->level + 1) {
622 t = t->parent;
623 }
624
625 if (!t)
626 ref->top = TRUE;
627 else {
628 metadata_reloc* mr2;
629
630 Status = add_metadata_reloc_parent(Vcb, items, t->header.address, &mr2, rollback);
631 if (!NT_SUCCESS(Status)) {
632 ERR("add_metadata_reloc_parent returned %08x\n", Status);
633 return Status;
634 }
635
636 ref->parent = mr2;
637 }
638 } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
639 metadata_reloc* mr2;
640
641 Status = add_metadata_reloc_parent(Vcb, items, ref->sbr.offset, &mr2, rollback);
642 if (!NT_SUCCESS(Status)) {
643 ERR("add_metadata_reloc_parent returned %08x\n", Status);
644 return Status;
645 }
646
647 ref->parent = mr2;
648 }
649
650 le2 = le2->Flink;
651 }
652
653 le = le->Flink;
654 }
655
656 le = items->Flink;
657 while (le != items) {
658 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
659 LIST_ENTRY* le2;
660 UINT32 hash;
661
662 mr->t = NULL;
663
664 hash = calc_crc32c(0xffffffff, (UINT8*)&mr->address, sizeof(UINT64));
665
666 le2 = Vcb->trees_ptrs[hash >> 24];
667
668 if (le2) {
669 while (le2 != &Vcb->trees_hash) {
670 tree* t = CONTAINING_RECORD(le2, tree, list_entry_hash);
671
672 if (t->header.address == mr->address) {
673 mr->t = t;
674 break;
675 } else if (t->hash > hash)
676 break;
677
678 le2 = le2->Flink;
679 }
680 }
681
682 le = le->Flink;
683 }
684
685 for (level = 0; level <= max_level; level++) {
686 le = items->Flink;
687 while (le != items) {
688 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
689
690 if (mr->data->level == level) {
691 BOOL done = FALSE;
692 LIST_ENTRY* le2;
693 tree_write* tw;
694 UINT64 flags;
695 tree* t3;
696
697 if (mr->system)
698 flags = Vcb->system_flags;
699 else if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS)
700 flags = Vcb->data_flags;
701 else
702 flags = Vcb->metadata_flags;
703
704 if (newchunk) {
705 ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
706
707 if (newchunk->chunk_item->type == flags && find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) {
708 increase_chunk_usage(newchunk, Vcb->superblock.node_size);
709 space_list_subtract(Vcb, newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
710 done = TRUE;
711 }
712
713 ExReleaseResourceLite(&newchunk->lock);
714 }
715
716 if (!done) {
717 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
718
719 le2 = Vcb->chunks.Flink;
720 while (le2 != &Vcb->chunks) {
721 chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry);
722
723 if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == flags) {
724 ExAcquireResourceExclusiveLite(&c2->lock, TRUE);
725
726 if ((c2->chunk_item->size - c2->used) >= Vcb->superblock.node_size) {
727 if (find_metadata_address_in_chunk(Vcb, c2, &mr->new_address)) {
728 increase_chunk_usage(c2, Vcb->superblock.node_size);
729 space_list_subtract(Vcb, c2, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
730 ExReleaseResourceLite(&c2->lock);
731 newchunk = c2;
732 done = TRUE;
733 break;
734 }
735 }
736
737 ExReleaseResourceLite(&c2->lock);
738 }
739
740 le2 = le2->Flink;
741 }
742
743 // allocate new chunk if necessary
744 if (!done) {
745 newchunk = alloc_chunk(Vcb, flags);
746
747 if (!newchunk) {
748 ERR("could not allocate new chunk\n");
749 ExReleaseResourceLite(&Vcb->chunk_lock);
750 Status = STATUS_DISK_FULL;
751 goto end;
752 }
753
754 ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
755
756 if (!find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) {
757 ExReleaseResourceLite(&newchunk->lock);
758 ERR("could not find address in new chunk\n");
759 Status = STATUS_DISK_FULL;
760 goto end;
761 } else {
762 increase_chunk_usage(newchunk, Vcb->superblock.node_size);
763 space_list_subtract(Vcb, newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
764 }
765
766 ExReleaseResourceLite(&newchunk->lock);
767 }
768
769 ExReleaseResourceLite(&Vcb->chunk_lock);
770 }
771
772 // update parents
773 le2 = mr->refs.Flink;
774 while (le2 != &mr->refs) {
775 metadata_reloc_ref* ref = CONTAINING_RECORD(le2, metadata_reloc_ref, list_entry);
776
777 if (ref->parent) {
778 UINT16 i;
779 internal_node* in = (internal_node*)&ref->parent->data[1];
780
781 for (i = 0; i < ref->parent->data->num_items; i++) {
782 if (in[i].address == mr->address) {
783 in[i].address = mr->new_address;
784 break;
785 }
786 }
787
788 if (ref->parent->t) {
789 LIST_ENTRY* le3;
790
791 le3 = ref->parent->t->itemlist.Flink;
792 while (le3 != &ref->parent->t->itemlist) {
793 tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry);
794
795 if (!td->inserted && td->treeholder.address == mr->address)
796 td->treeholder.address = mr->new_address;
797
798 le3 = le3->Flink;
799 }
800 }
801 } else if (ref->top && ref->type == TYPE_TREE_BLOCK_REF) {
802 LIST_ENTRY* le3;
803 root* r = NULL;
804
805 // alter ROOT_ITEM
806
807 le3 = Vcb->roots.Flink;
808 while (le3 != &Vcb->roots) {
809 root* r2 = CONTAINING_RECORD(le3, root, list_entry);
810
811 if (r2->id == ref->tbr.offset) {
812 r = r2;
813 break;
814 }
815
816 le3 = le3->Flink;
817 }
818
819 if (r) {
820 r->treeholder.address = mr->new_address;
821
822 if (r == Vcb->root_root)
823 Vcb->superblock.root_tree_addr = mr->new_address;
824 else if (r == Vcb->chunk_root)
825 Vcb->superblock.chunk_tree_addr = mr->new_address;
826 else if (r->root_item.block_number == mr->address) {
827 KEY searchkey;
828 ROOT_ITEM* ri;
829
830 r->root_item.block_number = mr->new_address;
831
832 searchkey.obj_id = r->id;
833 searchkey.obj_type = TYPE_ROOT_ITEM;
834 searchkey.offset = 0xffffffffffffffff;
835
836 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
837 if (!NT_SUCCESS(Status)) {
838 ERR("find_item returned %08x\n", Status);
839 goto end;
840 }
841
842 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
843 ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
844 Status = STATUS_INTERNAL_ERROR;
845 goto end;
846 }
847
848 ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
849 if (!ri) {
850 ERR("out of memory\n");
851 Status = STATUS_INSUFFICIENT_RESOURCES;
852 goto end;
853 }
854
855 RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM));
856
857 delete_tree_item(Vcb, &tp, rollback);
858
859 if (!insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, NULL, rollback)) {
860 ERR("insert_tree_item failed\n");
861 Status = STATUS_INTERNAL_ERROR;
862 goto end;
863 }
864 }
865 }
866 }
867
868 le2 = le2->Flink;
869 }
870
871 mr->data->address = mr->new_address;
872
873 t3 = mr->t;
874
875 while (t3) {
876 UINT8 h;
877 BOOL inserted;
878 tree* t4 = NULL;
879
880 // check if tree loaded more than once
881 if (t3->list_entry.Flink != &Vcb->trees_hash) {
882 tree* nt = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash);
883
884 if (nt->header.address == t3->header.address)
885 t4 = nt;
886 }
887
888 t3->header.address = mr->new_address;
889
890 h = t3->hash >> 24;
891
892 if (Vcb->trees_ptrs[h] == &t3->list_entry_hash) {
893 if (t3->list_entry_hash.Flink == &Vcb->trees_hash)
894 Vcb->trees_ptrs[h] = NULL;
895 else {
896 tree* t2 = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash);
897
898 if (t2->hash >> 24 == h)
899 Vcb->trees_ptrs[h] = &t2->list_entry_hash;
900 else
901 Vcb->trees_ptrs[h] = NULL;
902 }
903 }
904
905 RemoveEntryList(&t3->list_entry_hash);
906
907 t3->hash = calc_crc32c(0xffffffff, (UINT8*)&t3->header.address, sizeof(UINT64));
908 h = t3->hash >> 24;
909
910 if (!Vcb->trees_ptrs[h]) {
911 UINT8 h2 = h;
912
913 le2 = Vcb->trees_hash.Flink;
914
915 if (h2 > 0) {
916 h2--;
917 do {
918 if (Vcb->trees_ptrs[h2]) {
919 le2 = Vcb->trees_ptrs[h2];
920 break;
921 }
922
923 h2--;
924 } while (h2 > 0);
925 }
926 } else
927 le2 = Vcb->trees_ptrs[h];
928
929 inserted = FALSE;
930 while (le2 != &Vcb->trees_hash) {
931 tree* t2 = CONTAINING_RECORD(le2, tree, list_entry_hash);
932
933 if (t2->hash >= t3->hash) {
934 InsertHeadList(le2->Blink, &t3->list_entry_hash);
935 inserted = TRUE;
936 break;
937 }
938
939 le2 = le2->Flink;
940 }
941
942 if (!inserted)
943 InsertTailList(&Vcb->trees_hash, &t3->list_entry_hash);
944
945 if (!Vcb->trees_ptrs[h] || t3->list_entry_hash.Flink == Vcb->trees_ptrs[h])
946 Vcb->trees_ptrs[h] = &t3->list_entry_hash;
947
948 if (data_items && level == 0) {
949 le2 = data_items->Flink;
950
951 while (le2 != data_items) {
952 data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry);
953 LIST_ENTRY* le3 = t3->itemlist.Flink;
954
955 while (le3 != &t3->itemlist) {
956 tree_data* td = CONTAINING_RECORD(le3, tree_data, list_entry);
957
958 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
959 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
960
961 if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
962 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
963
964 if (ed2->address == dr->address)
965 ed2->address = dr->new_address;
966 }
967 }
968
969 le3 = le3->Flink;
970 }
971
972 le2 = le2->Flink;
973 }
974 }
975
976 t3 = t4;
977 }
978
979 *((UINT32*)mr->data) = ~calc_crc32c(0xffffffff, (UINT8*)&mr->data->fs_uuid, Vcb->superblock.node_size - sizeof(mr->data->csum));
980
981 tw = ExAllocatePoolWithTag(PagedPool, sizeof(tree_write), ALLOC_TAG);
982 if (!tw) {
983 ERR("out of memory\n");
984 Status = STATUS_INSUFFICIENT_RESOURCES;
985 goto end;
986 }
987
988 tw->address = mr->new_address;
989 tw->length = Vcb->superblock.node_size;
990 tw->data = (UINT8*)mr->data;
991 tw->overlap = FALSE;
992
993 if (IsListEmpty(&tree_writes))
994 InsertTailList(&tree_writes, &tw->list_entry);
995 else {
996 BOOL inserted = FALSE;
997
998 le2 = tree_writes.Flink;
999 while (le2 != &tree_writes) {
1000 tree_write* tw2 = CONTAINING_RECORD(le2, tree_write, list_entry);
1001
1002 if (tw2->address > tw->address) {
1003 InsertHeadList(le2->Blink, &tw->list_entry);
1004 inserted = TRUE;
1005 break;
1006 }
1007
1008 le2 = le2->Flink;
1009 }
1010
1011 if (!inserted)
1012 InsertTailList(&tree_writes, &tw->list_entry);
1013 }
1014 }
1015
1016 le = le->Flink;
1017 }
1018 }
1019
1020 le = items->Flink;
1021 while (le != items) {
1022 metadata_reloc* mr = CONTAINING_RECORD(le, metadata_reloc, list_entry);
1023
1024 Status = add_metadata_reloc_extent_item(Vcb, mr, rollback);
1025 if (!NT_SUCCESS(Status)) {
1026 ERR("add_metadata_reloc_extent_item returned %08x\n", Status);
1027 goto end;
1028 }
1029
1030 le = le->Flink;
1031 }
1032
1033 Status = do_tree_writes(Vcb, &tree_writes, NULL);
1034 if (!NT_SUCCESS(Status)) {
1035 ERR("do_tree_writes returned %08x\n", Status);
1036 goto end;
1037 }
1038
1039 Status = STATUS_SUCCESS;
1040
1041 end:
1042 while (!IsListEmpty(&tree_writes)) {
1043 tree_write* tw = CONTAINING_RECORD(RemoveHeadList(&tree_writes), tree_write, list_entry);
1044 ExFreePool(tw);
1045 }
1046
1047 return Status;
1048 }
1049
1050 static NTSTATUS balance_metadata_chunk(device_extension* Vcb, chunk* c, BOOL* changed) {
1051 KEY searchkey;
1052 traverse_ptr tp;
1053 NTSTATUS Status;
1054 BOOL b;
1055 LIST_ENTRY items, rollback;
1056 UINT32 loaded = 0;
1057
1058 TRACE("chunk %llx\n", c->offset);
1059
1060 InitializeListHead(&rollback);
1061 InitializeListHead(&items);
1062
1063 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
1064
1065 searchkey.obj_id = c->offset;
1066 searchkey.obj_type = TYPE_METADATA_ITEM;
1067 searchkey.offset = 0xffffffffffffffff;
1068
1069 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
1070 if (!NT_SUCCESS(Status)) {
1071 ERR("find_item returned %08x\n", Status);
1072 goto end;
1073 }
1074
1075 do {
1076 traverse_ptr next_tp;
1077
1078 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
1079 break;
1080
1081 if (tp.item->key.obj_id >= c->offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) {
1082 BOOL tree = FALSE, skinny = FALSE;
1083
1084 if (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
1085 tree = TRUE;
1086 skinny = TRUE;
1087 } else if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size &&
1088 tp.item->size >= sizeof(EXTENT_ITEM)) {
1089 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
1090
1091 if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
1092 tree = TRUE;
1093 }
1094
1095 if (tree) {
1096 Status = add_metadata_reloc(Vcb, &items, &tp, skinny, NULL, c, &rollback);
1097
1098 if (!NT_SUCCESS(Status)) {
1099 ERR("add_metadata_reloc returned %08x\n", Status);
1100 goto end;
1101 }
1102
1103 loaded++;
1104
1105 if (loaded >= 64) // only do 64 at a time
1106 break;
1107 }
1108 }
1109
1110 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
1111
1112 if (b)
1113 tp = next_tp;
1114 } while (b);
1115
1116 if (IsListEmpty(&items)) {
1117 *changed = FALSE;
1118 Status = STATUS_SUCCESS;
1119 goto end;
1120 } else
1121 *changed = TRUE;
1122
1123 Status = write_metadata_items(Vcb, &items, NULL, c, &rollback);
1124 if (!NT_SUCCESS(Status)) {
1125 ERR("write_metadata_items returned %08x\n", Status);
1126 goto end;
1127 }
1128
1129 Status = STATUS_SUCCESS;
1130
1131 Vcb->need_write = TRUE;
1132
1133 end:
1134 if (NT_SUCCESS(Status))
1135 clear_rollback(Vcb, &rollback);
1136 else
1137 do_rollback(Vcb, &rollback);
1138
1139 ExReleaseResourceLite(&Vcb->tree_lock);
1140
1141 while (!IsListEmpty(&items)) {
1142 metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&items), metadata_reloc, list_entry);
1143
1144 while (!IsListEmpty(&mr->refs)) {
1145 metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry);
1146
1147 ExFreePool(ref);
1148 }
1149
1150 ExFreePool(mr);
1151 }
1152
1153 return Status;
1154 }
1155
1156 static NTSTATUS add_data_reloc(device_extension* Vcb, LIST_ENTRY* items, LIST_ENTRY* metadata_items, traverse_ptr* tp, chunk* c, LIST_ENTRY* rollback) {
1157 data_reloc* dr;
1158 EXTENT_ITEM* ei;
1159 UINT16 len;
1160 UINT64 inline_rc;
1161 UINT8* ptr;
1162
1163 dr = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc), ALLOC_TAG);
1164 if (!dr) {
1165 ERR("out of memory\n");
1166 return STATUS_INSUFFICIENT_RESOURCES;
1167 }
1168
1169 dr->address = tp->item->key.obj_id;
1170 dr->size = tp->item->key.offset;
1171 dr->ei = (EXTENT_ITEM*)tp->item->data;
1172 InitializeListHead(&dr->refs);
1173
1174 delete_tree_item(Vcb, tp, rollback);
1175
1176 if (!c)
1177 c = get_chunk_from_address(Vcb, tp->item->key.obj_id);
1178
1179 if (c) {
1180 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
1181
1182 decrease_chunk_usage(c, tp->item->key.offset);
1183
1184 space_list_add(Vcb, c, TRUE, tp->item->key.obj_id, tp->item->key.offset, rollback);
1185
1186 ExReleaseResourceLite(&c->lock);
1187 }
1188
1189 ei = (EXTENT_ITEM*)tp->item->data;
1190 inline_rc = 0;
1191
1192 len = tp->item->size - sizeof(EXTENT_ITEM);
1193 ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM);
1194
1195 while (len > 0) {
1196 UINT8 secttype = *ptr;
1197 ULONG sectlen = secttype == TYPE_EXTENT_DATA_REF ? sizeof(EXTENT_DATA_REF) : (secttype == TYPE_SHARED_DATA_REF ? sizeof(SHARED_DATA_REF) : 0);
1198 data_reloc_ref* ref;
1199 NTSTATUS Status;
1200 metadata_reloc* mr;
1201
1202 len--;
1203
1204 if (sectlen > len) {
1205 ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen);
1206 return STATUS_INTERNAL_ERROR;
1207 }
1208
1209 if (sectlen == 0) {
1210 ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype);
1211 return STATUS_INTERNAL_ERROR;
1212 }
1213
1214 ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
1215 if (!ref) {
1216 ERR("out of memory\n");
1217 return STATUS_INSUFFICIENT_RESOURCES;
1218 }
1219
1220 if (secttype == TYPE_EXTENT_DATA_REF) {
1221 LIST_ENTRY* le;
1222 KEY searchkey;
1223 traverse_ptr tp3;
1224 root* r = NULL;
1225
1226 ref->type = TYPE_EXTENT_DATA_REF;
1227 RtlCopyMemory(&ref->edr, ptr + sizeof(UINT8), sizeof(EXTENT_DATA_REF));
1228 inline_rc += ref->edr.count;
1229
1230 le = Vcb->roots.Flink;
1231 while (le != &Vcb->roots) {
1232 root* r2 = CONTAINING_RECORD(le, root, list_entry);
1233
1234 if (r2->id == ref->edr.root) {
1235 r = r2;
1236 break;
1237 }
1238
1239 le = le->Flink;
1240 }
1241
1242 if (!r) {
1243 ERR("could not find subvol %llx\n", ref->edr.count);
1244 ExFreePool(ref);
1245 return STATUS_INTERNAL_ERROR;
1246 }
1247
1248 searchkey.obj_id = ref->edr.objid;
1249 searchkey.obj_type = TYPE_EXTENT_DATA;
1250 searchkey.offset = ref->edr.offset;
1251
1252 Status = find_item(Vcb, r, &tp3, &searchkey, FALSE, NULL);
1253 if (!NT_SUCCESS(Status)) {
1254 ERR("find_item returned %08x\n", Status);
1255 ExFreePool(ref);
1256 return Status;
1257 }
1258
1259 if (keycmp(tp3.item->key, searchkey)) {
1260 ERR("could not find (%llx,%x,%llx) in root %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, r->id);
1261 ExFreePool(ref);
1262 return STATUS_INTERNAL_ERROR;
1263 }
1264
1265 Status = add_metadata_reloc_parent(Vcb, metadata_items, tp3.tree->header.address, &mr, rollback);
1266 if (!NT_SUCCESS(Status)) {
1267 ERR("add_metadata_reloc_parent returned %08x\n", Status);
1268 ExFreePool(ref);
1269 return Status;
1270 }
1271
1272 ref->parent = mr;
1273 } else if (secttype == TYPE_SHARED_DATA_REF) {
1274 ref->type = TYPE_SHARED_DATA_REF;
1275 RtlCopyMemory(&ref->sdr, ptr + sizeof(UINT8), sizeof(SHARED_DATA_REF));
1276 inline_rc += ref->sdr.count;
1277
1278 Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback);
1279 if (!NT_SUCCESS(Status)) {
1280 ERR("add_metadata_reloc_parent returned %08x\n", Status);
1281 ExFreePool(ref);
1282 return Status;
1283 }
1284
1285 ref->parent = mr;
1286 } else {
1287 ERR("unexpected tree type %x\n", secttype);
1288 ExFreePool(ref);
1289 return STATUS_INTERNAL_ERROR;
1290 }
1291
1292 InsertTailList(&dr->refs, &ref->list_entry);
1293
1294 len -= sectlen;
1295 ptr += sizeof(UINT8) + sectlen;
1296 }
1297
1298 if (inline_rc < ei->refcount) { // look for non-inline entries
1299 traverse_ptr tp2 = *tp, next_tp;
1300
1301 while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) {
1302 metadata_reloc* mr;
1303 NTSTATUS Status;
1304
1305 tp2 = next_tp;
1306
1307 if (tp2.item->key.obj_id == tp->item->key.obj_id) {
1308 if (tp2.item->key.obj_type == TYPE_EXTENT_DATA_REF && tp2.item->size >= sizeof(EXTENT_DATA_REF)) {
1309 data_reloc_ref* ref;
1310 LIST_ENTRY* le;
1311 KEY searchkey;
1312 traverse_ptr tp3;
1313 root* r = NULL;
1314
1315 ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
1316 if (!ref) {
1317 ERR("out of memory\n");
1318 return STATUS_INSUFFICIENT_RESOURCES;
1319 }
1320
1321 ref->type = TYPE_EXTENT_DATA_REF;
1322 RtlCopyMemory(&ref->edr, tp2.item->data, sizeof(EXTENT_DATA_REF));
1323
1324 le = Vcb->roots.Flink;
1325 while (le != &Vcb->roots) {
1326 root* r2 = CONTAINING_RECORD(le, root, list_entry);
1327
1328 if (r2->id == ref->edr.root) {
1329 r = r2;
1330 break;
1331 }
1332
1333 le = le->Flink;
1334 }
1335
1336 if (!r) {
1337 ERR("could not find subvol %llx\n", ref->edr.count);
1338 ExFreePool(ref);
1339 return STATUS_INTERNAL_ERROR;
1340 }
1341
1342 searchkey.obj_id = ref->edr.objid;
1343 searchkey.obj_type = TYPE_EXTENT_DATA;
1344 searchkey.offset = ref->edr.offset;
1345
1346 Status = find_item(Vcb, r, &tp3, &searchkey, FALSE, NULL);
1347 if (!NT_SUCCESS(Status)) {
1348 ERR("find_item returned %08x\n", Status);
1349 ExFreePool(ref);
1350 return Status;
1351 }
1352
1353 if (!keycmp(tp3.item->key, searchkey)) {
1354 ERR("could not find (%llx,%x,%llx) in root %llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, r->id);
1355 ExFreePool(ref);
1356 return STATUS_INTERNAL_ERROR;
1357 }
1358
1359 Status = add_metadata_reloc_parent(Vcb, metadata_items, tp3.tree->header.address, &mr, rollback);
1360 if (!NT_SUCCESS(Status)) {
1361 ERR("add_metadata_reloc_parent returned %08x\n", Status);
1362 ExFreePool(ref);
1363 return Status;
1364 }
1365
1366 ref->parent = mr;
1367 InsertTailList(&dr->refs, &ref->list_entry);
1368
1369 delete_tree_item(Vcb, &tp2, rollback);
1370 } else if (tp2.item->key.obj_type == TYPE_SHARED_DATA_REF && tp2.item->size >= sizeof(SHARED_DATA_REF)) {
1371 data_reloc_ref* ref = ExAllocatePoolWithTag(PagedPool, sizeof(data_reloc_ref), ALLOC_TAG);
1372 if (!ref) {
1373 ERR("out of memory\n");
1374 return STATUS_INSUFFICIENT_RESOURCES;
1375 }
1376
1377 ref->type = TYPE_SHARED_DATA_REF;
1378 RtlCopyMemory(&ref->sdr, tp2.item->data, sizeof(SHARED_DATA_REF));
1379
1380 Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback);
1381 if (!NT_SUCCESS(Status)) {
1382 ERR("add_metadata_reloc_parent returned %08x\n", Status);
1383 ExFreePool(ref);
1384 return Status;
1385 }
1386
1387 ref->parent = mr;
1388 InsertTailList(&dr->refs, &ref->list_entry);
1389
1390 delete_tree_item(Vcb, &tp2, rollback);
1391 }
1392 } else
1393 break;
1394 }
1395 }
1396
1397 InsertTailList(items, &dr->list_entry);
1398
1399 return STATUS_SUCCESS;
1400 }
1401
1402 static NTSTATUS add_data_reloc_extent_item(device_extension* Vcb, data_reloc* dr, LIST_ENTRY* rollback) {
1403 LIST_ENTRY* le;
1404 UINT64 rc = 0;
1405 UINT16 inline_len;
1406 BOOL all_inline = TRUE;
1407 data_reloc_ref* first_noninline = NULL;
1408 EXTENT_ITEM* ei;
1409 UINT8* ptr;
1410
1411 inline_len = sizeof(EXTENT_ITEM);
1412
1413 le = dr->refs.Flink;
1414 while (le != &dr->refs) {
1415 data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
1416 ULONG extlen = 0;
1417
1418 rc++;
1419
1420 if (ref->type == TYPE_EXTENT_DATA_REF)
1421 extlen += sizeof(EXTENT_DATA_REF);
1422 else if (ref->type == TYPE_SHARED_DATA_REF)
1423 extlen += sizeof(SHARED_DATA_REF);
1424
1425 if (all_inline) {
1426 if (inline_len + 1 + extlen > Vcb->superblock.node_size / 4) {
1427 all_inline = FALSE;
1428 first_noninline = ref;
1429 } else
1430 inline_len += extlen + 1;
1431 }
1432
1433 le = le->Flink;
1434 }
1435
1436 ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
1437 if (!ei) {
1438 ERR("out of memory\n");
1439 return STATUS_INSUFFICIENT_RESOURCES;
1440 }
1441
1442 ei->refcount = rc;
1443 ei->generation = dr->ei->generation;
1444 ei->flags = dr->ei->flags;
1445 ptr = (UINT8*)&ei[1];
1446
1447 le = dr->refs.Flink;
1448 while (le != &dr->refs) {
1449 data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
1450
1451 if (ref == first_noninline)
1452 break;
1453
1454 *ptr = ref->type;
1455 ptr++;
1456
1457 if (ref->type == TYPE_EXTENT_DATA_REF) {
1458 EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)ptr;
1459
1460 RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF));
1461
1462 ptr += sizeof(EXTENT_DATA_REF);
1463 } else if (ref->type == TYPE_SHARED_DATA_REF) {
1464 SHARED_DATA_REF* sdr = (SHARED_DATA_REF*)ptr;
1465
1466 sdr->offset = ref->parent->new_address;
1467 sdr->count = ref->sdr.count;
1468
1469 ptr += sizeof(SHARED_DATA_REF);
1470 }
1471
1472 le = le->Flink;
1473 }
1474
1475 if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_ITEM, dr->size, ei, inline_len, NULL, NULL, rollback)) {
1476 ERR("insert_tree_item failed\n");
1477 return STATUS_INTERNAL_ERROR;
1478 }
1479
1480 if (!all_inline) {
1481 le = &first_noninline->list_entry;
1482
1483 while (le != &dr->refs) {
1484 data_reloc_ref* ref = CONTAINING_RECORD(le, data_reloc_ref, list_entry);
1485
1486 if (ref->type == TYPE_EXTENT_DATA_REF) {
1487 EXTENT_DATA_REF* edr;
1488 UINT64 off;
1489
1490 edr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA_REF), ALLOC_TAG);
1491 if (!edr) {
1492 ERR("out of memory\n");
1493 return STATUS_INSUFFICIENT_RESOURCES;
1494 }
1495
1496 RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF));
1497
1498 off = get_extent_data_ref_hash2(ref->edr.root, ref->edr.objid, ref->edr.offset);
1499
1500 if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_DATA_REF, off, edr, sizeof(EXTENT_DATA_REF), NULL, NULL, rollback)) {
1501 ERR("insert_tree_item failed\n");
1502 return STATUS_INTERNAL_ERROR;
1503 }
1504 } else if (ref->type == TYPE_SHARED_DATA_REF) {
1505 SHARED_DATA_REF* sdr;
1506
1507 sdr = ExAllocatePoolWithTag(PagedPool, sizeof(SHARED_DATA_REF), ALLOC_TAG);
1508 if (!sdr) {
1509 ERR("out of memory\n");
1510 return STATUS_INSUFFICIENT_RESOURCES;
1511 }
1512
1513 sdr->offset = ref->parent->new_address;
1514 sdr->count = ref->sdr.count;
1515
1516 if (!insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_SHARED_DATA_REF, sdr->offset, sdr, sizeof(SHARED_DATA_REF), NULL, NULL, rollback)) {
1517 ERR("insert_tree_item failed\n");
1518 return STATUS_INTERNAL_ERROR;
1519 }
1520 }
1521
1522 le = le->Flink;
1523 }
1524 }
1525
1526 return STATUS_SUCCESS;
1527 }
1528
1529 static NTSTATUS balance_data_chunk(device_extension* Vcb, chunk* c, BOOL* changed) {
1530 KEY searchkey;
1531 traverse_ptr tp;
1532 NTSTATUS Status;
1533 BOOL b;
1534 LIST_ENTRY items, metadata_items, rollback, *le;
1535 UINT64 loaded = 0, num_loaded = 0;
1536 chunk* newchunk = NULL;
1537 UINT8* data = NULL;
1538
1539 TRACE("chunk %llx\n", c->offset);
1540
1541 InitializeListHead(&rollback);
1542 InitializeListHead(&items);
1543 InitializeListHead(&metadata_items);
1544
1545 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
1546
1547 searchkey.obj_id = c->offset;
1548 searchkey.obj_type = TYPE_EXTENT_ITEM;
1549 searchkey.offset = 0xffffffffffffffff;
1550
1551 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
1552 if (!NT_SUCCESS(Status)) {
1553 ERR("find_item returned %08x\n", Status);
1554 goto end;
1555 }
1556
1557 do {
1558 traverse_ptr next_tp;
1559
1560 if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
1561 break;
1562
1563 if (tp.item->key.obj_id >= c->offset && tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
1564 BOOL tree = FALSE;
1565
1566 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
1567 EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
1568
1569 if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
1570 tree = TRUE;
1571 }
1572
1573 if (!tree) {
1574 Status = add_data_reloc(Vcb, &items, &metadata_items, &tp, c, &rollback);
1575
1576 if (!NT_SUCCESS(Status)) {
1577 ERR("add_data_reloc returned %08x\n", Status);
1578 goto end;
1579 }
1580
1581 loaded += tp.item->key.offset;
1582 num_loaded++;
1583
1584 if (loaded >= 0x1000000 || num_loaded >= 100) // only do so much at a time, so we don't block too obnoxiously
1585 break;
1586 }
1587 }
1588
1589 b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
1590
1591 if (b)
1592 tp = next_tp;
1593 } while (b);
1594
1595 if (IsListEmpty(&items)) {
1596 *changed = FALSE;
1597 Status = STATUS_SUCCESS;
1598 goto end;
1599 } else
1600 *changed = TRUE;
1601
1602 data = ExAllocatePoolWithTag(PagedPool, 0x100000, ALLOC_TAG);
1603 if (!data) {
1604 ERR("out of memory\n");
1605 Status = STATUS_INSUFFICIENT_RESOURCES;
1606 goto end;
1607 }
1608
1609 le = items.Flink;
1610 while (le != &items) {
1611 data_reloc* dr = CONTAINING_RECORD(le, data_reloc, list_entry);
1612 BOOL done = FALSE;
1613 LIST_ENTRY* le2;
1614 UINT32* csum;
1615 UINT64 off;
1616
1617 if (newchunk) {
1618 ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
1619
1620 if (find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) {
1621 increase_chunk_usage(newchunk, dr->size);
1622 space_list_subtract(Vcb, newchunk, FALSE, dr->new_address, dr->size, &rollback);
1623 done = TRUE;
1624 }
1625
1626 ExReleaseResourceLite(&newchunk->lock);
1627 }
1628
1629 if (!done) {
1630 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
1631
1632 le2 = Vcb->chunks.Flink;
1633 while (le2 != &Vcb->chunks) {
1634 chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry);
1635
1636 if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == Vcb->data_flags) {
1637 ExAcquireResourceExclusiveLite(&c2->lock, TRUE);
1638
1639 if ((c2->chunk_item->size - c2->used) >= dr->size) {
1640 if (find_data_address_in_chunk(Vcb, c2, dr->size, &dr->new_address)) {
1641 increase_chunk_usage(c2, dr->size);
1642 space_list_subtract(Vcb, c2, FALSE, dr->new_address, dr->size, &rollback);
1643 ExReleaseResourceLite(&c2->lock);
1644 newchunk = c2;
1645 done = TRUE;
1646 break;
1647 }
1648 }
1649
1650 ExReleaseResourceLite(&c2->lock);
1651 }
1652
1653 le2 = le2->Flink;
1654 }
1655
1656 // allocate new chunk if necessary
1657 if (!done) {
1658 newchunk = alloc_chunk(Vcb, Vcb->data_flags);
1659
1660 if (!newchunk) {
1661 ERR("could not allocate new chunk\n");
1662 ExReleaseResourceLite(&Vcb->chunk_lock);
1663 Status = STATUS_DISK_FULL;
1664 goto end;
1665 }
1666
1667 ExAcquireResourceExclusiveLite(&newchunk->lock, TRUE);
1668
1669 if (!find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) {
1670 ExReleaseResourceLite(&newchunk->lock);
1671 ERR("could not find address in new chunk\n");
1672 Status = STATUS_DISK_FULL;
1673 goto end;
1674 } else {
1675 increase_chunk_usage(newchunk, dr->size);
1676 space_list_subtract(Vcb, newchunk, FALSE, dr->new_address, dr->size, &rollback);
1677 }
1678
1679 ExReleaseResourceLite(&newchunk->lock);
1680 }
1681
1682 ExReleaseResourceLite(&Vcb->chunk_lock);
1683 }
1684
1685 dr->newchunk = newchunk;
1686
1687 csum = ExAllocatePoolWithTag(PagedPool, dr->size * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG);
1688 if (!csum) {
1689 ERR("out of memory\n");
1690 Status = STATUS_INSUFFICIENT_RESOURCES;
1691 goto end;
1692 }
1693
1694 Status = load_csum(Vcb, csum, dr->address, dr->size / Vcb->superblock.sector_size, NULL);
1695
1696 if (NT_SUCCESS(Status)) {
1697 add_checksum_entry(Vcb, dr->new_address, dr->size / Vcb->superblock.sector_size, csum, NULL, &rollback);
1698 add_checksum_entry(Vcb, dr->address, dr->size / Vcb->superblock.sector_size, NULL, NULL, &rollback);
1699 }
1700
1701 ExFreePool(csum);
1702
1703 off = 0;
1704
1705 while (off < dr->size) {
1706 ULONG ds = min(dr->size - off, 0x100000);
1707
1708 Status = read_data(Vcb, dr->address + off, ds, NULL, FALSE, data, c, NULL, NULL, FALSE);
1709 if (!NT_SUCCESS(Status)) {
1710 ERR("read_data returned %08x\n", Status);
1711 goto end;
1712 }
1713
1714 Status = write_data_complete(Vcb, dr->new_address + off, data, ds, NULL, newchunk);
1715 if (!NT_SUCCESS(Status)) {
1716 ERR("write_data_complete returned %08x\n", Status);
1717 goto end;
1718 }
1719
1720 off += ds;
1721 }
1722
1723 le = le->Flink;
1724 }
1725
1726 ExFreePool(data);
1727 data = NULL;
1728
1729 Status = write_metadata_items(Vcb, &metadata_items, &items, NULL, &rollback);
1730 if (!NT_SUCCESS(Status)) {
1731 ERR("write_metadata_items returned %08x\n", Status);
1732 goto end;
1733 }
1734
1735 le = items.Flink;
1736 while (le != &items) {
1737 data_reloc* dr = CONTAINING_RECORD(le, data_reloc, list_entry);
1738
1739 Status = add_data_reloc_extent_item(Vcb, dr, &rollback);
1740 if (!NT_SUCCESS(Status)) {
1741 ERR("add_data_reloc_extent_item returned %08x\n", Status);
1742 goto end;
1743 }
1744
1745 le = le->Flink;
1746 }
1747
1748 le = c->changed_extents.Flink;
1749 while (le != &c->changed_extents) {
1750 LIST_ENTRY *le2, *le3;
1751 changed_extent* ce = CONTAINING_RECORD(le, changed_extent, list_entry);
1752
1753 le3 = le->Flink;
1754
1755 le2 = items.Flink;
1756 while (le2 != &items) {
1757 data_reloc* dr = CONTAINING_RECORD(le2, data_reloc, list_entry);
1758
1759 if (ce->address == dr->address) {
1760 ce->address = dr->new_address;
1761 RemoveEntryList(&ce->list_entry);
1762 InsertTailList(&dr->newchunk->changed_extents, &ce->list_entry);
1763 break;
1764 }
1765
1766 le2 = le2->Flink;
1767 }
1768
1769 le = le3;
1770 }
1771
1772 // update open FCBs
1773 // FIXME - speed this up
1774
1775 ExAcquireResourceSharedLite(&Vcb->fcb_lock, TRUE);
1776
1777 le = Vcb->all_fcbs.Flink;
1778 while (le != &Vcb->all_fcbs) {
1779 struct _fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_all);
1780 LIST_ENTRY* le2;
1781
1782 ExAcquireResourceExclusiveLite(fcb->Header.Resource, TRUE);
1783
1784 le2 = fcb->extents.Flink;
1785 while (le2 != &fcb->extents) {
1786 extent* ext = CONTAINING_RECORD(le2, extent, list_entry);
1787
1788 if (!ext->ignore) {
1789 if (ext->data->type == EXTENT_TYPE_REGULAR || ext->data->type == EXTENT_TYPE_PREALLOC) {
1790 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->data->data;
1791
1792 if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) {
1793 LIST_ENTRY* le3 = items.Flink;
1794 while (le3 != &items) {
1795 data_reloc* dr = CONTAINING_RECORD(le3, data_reloc, list_entry);
1796
1797 if (ed2->address == dr->address) {
1798 ed2->address = dr->new_address;
1799 break;
1800 }
1801
1802 le3 = le3->Flink;
1803 }
1804 }
1805 }
1806 }
1807
1808 le2 = le2->Flink;
1809 }
1810
1811 ExReleaseResourceLite(fcb->Header.Resource);
1812
1813 le = le->Flink;
1814 }
1815
1816 ExReleaseResourceLite(&Vcb->fcb_lock);
1817
1818 Status = STATUS_SUCCESS;
1819
1820 Vcb->need_write = TRUE;
1821
1822 end:
1823 if (NT_SUCCESS(Status))
1824 clear_rollback(Vcb, &rollback);
1825 else
1826 do_rollback(Vcb, &rollback);
1827
1828 ExReleaseResourceLite(&Vcb->tree_lock);
1829
1830 if (data)
1831 ExFreePool(data);
1832
1833 while (!IsListEmpty(&items)) {
1834 data_reloc* dr = CONTAINING_RECORD(RemoveHeadList(&items), data_reloc, list_entry);
1835
1836 while (!IsListEmpty(&dr->refs)) {
1837 data_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&dr->refs), data_reloc_ref, list_entry);
1838
1839 ExFreePool(ref);
1840 }
1841
1842 ExFreePool(dr);
1843 }
1844
1845 while (!IsListEmpty(&metadata_items)) {
1846 metadata_reloc* mr = CONTAINING_RECORD(RemoveHeadList(&metadata_items), metadata_reloc, list_entry);
1847
1848 while (!IsListEmpty(&mr->refs)) {
1849 metadata_reloc_ref* ref = CONTAINING_RECORD(RemoveHeadList(&mr->refs), metadata_reloc_ref, list_entry);
1850
1851 ExFreePool(ref);
1852 }
1853
1854 ExFreePool(mr);
1855 }
1856
1857 return Status;
1858 }
1859
1860 static __inline UINT64 get_chunk_dup_type(chunk* c) {
1861 if (c->chunk_item->type & BLOCK_FLAG_RAID0)
1862 return BLOCK_FLAG_RAID0;
1863 else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
1864 return BLOCK_FLAG_RAID1;
1865 else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
1866 return BLOCK_FLAG_DUPLICATE;
1867 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
1868 return BLOCK_FLAG_RAID10;
1869 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
1870 return BLOCK_FLAG_RAID5;
1871 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
1872 return BLOCK_FLAG_RAID6;
1873 else
1874 return BLOCK_FLAG_SINGLE;
1875 }
1876
1877 static BOOL should_balance_chunk(device_extension* Vcb, UINT8 sort, chunk* c) {
1878 btrfs_balance_opts* opts;
1879
1880 opts = &Vcb->balance.opts[sort];
1881
1882 if (!(opts->flags & BTRFS_BALANCE_OPTS_ENABLED))
1883 return FALSE;
1884
1885 if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) {
1886 UINT64 type = get_chunk_dup_type(c);
1887
1888 if (!(type & opts->profiles))
1889 return FALSE;
1890 }
1891
1892 if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) {
1893 UINT16 i;
1894 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1895 BOOL b = FALSE;
1896
1897 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1898 if (cis[i].dev_id == opts->devid) {
1899 b = TRUE;
1900 break;
1901 }
1902 }
1903
1904 if (!b)
1905 return FALSE;
1906 }
1907
1908 if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) {
1909 UINT16 i, factor;
1910 UINT64 physsize;
1911 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1912 BOOL b = FALSE;
1913
1914 if (c->chunk_item->type & BLOCK_FLAG_RAID0)
1915 factor = c->chunk_item->num_stripes;
1916 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
1917 factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
1918 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
1919 factor = c->chunk_item->num_stripes - 1;
1920 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
1921 factor = c->chunk_item->num_stripes - 2;
1922 else // SINGLE, DUPLICATE, RAID1
1923 factor = 1;
1924
1925 physsize = c->chunk_item->size / factor;
1926
1927 for (i = 0; i < c->chunk_item->num_stripes; i++) {
1928 if (cis[i].offset >= opts->drange_start && cis[i].offset + physsize < opts->drange_end) {
1929 b = TRUE;
1930 break;
1931 }
1932 }
1933
1934 if (!b)
1935 return FALSE;
1936 }
1937
1938 if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) {
1939 if (c->offset + c->chunk_item->size <= opts->vrange_start || c->offset > opts->vrange_end)
1940 return FALSE;
1941 }
1942
1943 if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) {
1944 if (c->chunk_item->num_stripes < opts->stripes_start || c->chunk_item->num_stripes < opts->stripes_end)
1945 return FALSE;
1946 }
1947
1948 if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) {
1949 UINT64 usage = c->used * 100 / c->chunk_item->size;
1950
1951 // usage == 0 should mean completely empty, not just that usage rounds to 0%
1952 if (c->used > 0 && usage == 0)
1953 usage = 1;
1954
1955 if (usage < opts->usage_start || usage > opts->usage_end)
1956 return FALSE;
1957 }
1958
1959 if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT && opts->flags & BTRFS_BALANCE_OPTS_SOFT) {
1960 UINT64 type = get_chunk_dup_type(c);
1961
1962 if (type == opts->convert)
1963 return FALSE;
1964 }
1965
1966 return TRUE;
1967 }
1968
1969 static void copy_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) {
1970 if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) {
1971 args->profiles = opts->profiles;
1972 args->flags |= BALANCE_ARGS_FLAGS_PROFILES;
1973 }
1974
1975 if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) {
1976 if (args->usage_start == 0) {
1977 args->flags |= BALANCE_ARGS_FLAGS_USAGE_RANGE;
1978 args->usage_start = opts->usage_start;
1979 args->usage_end = opts->usage_end;
1980 } else {
1981 args->flags |= BALANCE_ARGS_FLAGS_USAGE;
1982 args->usage = opts->usage_end;
1983 }
1984 }
1985
1986 if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) {
1987 args->devid = opts->devid;
1988 args->flags |= BALANCE_ARGS_FLAGS_DEVID;
1989 }
1990
1991 if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) {
1992 args->drange_start = opts->drange_start;
1993 args->drange_end = opts->drange_end;
1994 args->flags |= BALANCE_ARGS_FLAGS_DRANGE;
1995 }
1996
1997 if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) {
1998 args->vrange_start = opts->vrange_start;
1999 args->vrange_end = opts->vrange_end;
2000 args->flags |= BALANCE_ARGS_FLAGS_VRANGE;
2001 }
2002
2003 if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT) {
2004 args->convert = opts->convert;
2005 args->flags |= BALANCE_ARGS_FLAGS_CONVERT;
2006
2007 if (opts->flags & BTRFS_BALANCE_OPTS_SOFT)
2008 args->flags |= BALANCE_ARGS_FLAGS_SOFT;
2009 }
2010
2011 if (opts->flags & BTRFS_BALANCE_OPTS_LIMIT) {
2012 if (args->limit_start == 0) {
2013 args->flags |= BALANCE_ARGS_FLAGS_LIMIT_RANGE;
2014 args->limit_start = opts->limit_start;
2015 args->limit_end = opts->limit_end;
2016 } else {
2017 args->flags |= BALANCE_ARGS_FLAGS_LIMIT;
2018 args->limit = opts->limit_end;
2019 }
2020 }
2021
2022 if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) {
2023 args->stripes_start = opts->stripes_start;
2024 args->stripes_end = opts->stripes_end;
2025 args->flags |= BALANCE_ARGS_FLAGS_STRIPES_RANGE;
2026 }
2027 }
2028
2029 static NTSTATUS add_balance_item(device_extension* Vcb) {
2030 LIST_ENTRY rollback;
2031 KEY searchkey;
2032 traverse_ptr tp;
2033 NTSTATUS Status;
2034 BALANCE_ITEM* bi;
2035
2036 InitializeListHead(&rollback);
2037
2038 searchkey.obj_id = BALANCE_ITEM_ID;
2039 searchkey.obj_type = TYPE_TEMP_ITEM;
2040 searchkey.offset = 0;
2041
2042 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
2043
2044 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
2045 if (!NT_SUCCESS(Status)) {
2046 ERR("find_item returned %08x\n", Status);
2047 goto end;
2048 }
2049
2050 if (!keycmp(tp.item->key, searchkey))
2051 delete_tree_item(Vcb, &tp, &rollback);
2052
2053 bi = ExAllocatePoolWithTag(PagedPool, sizeof(BALANCE_ITEM), ALLOC_TAG);
2054 if (!bi) {
2055 ERR("out of memory\n");
2056 Status = STATUS_INSUFFICIENT_RESOURCES;
2057 goto end;
2058 }
2059
2060 RtlZeroMemory(bi, sizeof(BALANCE_ITEM));
2061
2062 if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) {
2063 bi->flags |= BALANCE_FLAGS_DATA;
2064 copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data);
2065 }
2066
2067 if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) {
2068 bi->flags |= BALANCE_FLAGS_METADATA;
2069 copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata);
2070 }
2071
2072 if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED) {
2073 bi->flags |= BALANCE_FLAGS_SYSTEM;
2074 copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system);
2075 }
2076
2077 if (!insert_tree_item(Vcb, Vcb->root_root, BALANCE_ITEM_ID, TYPE_TEMP_ITEM, 0, bi, sizeof(BALANCE_ITEM), NULL, NULL, &rollback)) {
2078 ERR("insert_tree_item failed\n");
2079 Status = STATUS_INTERNAL_ERROR;
2080 goto end;
2081 }
2082
2083 Status = STATUS_SUCCESS;
2084
2085 end:
2086 if (NT_SUCCESS(Status)) {
2087 do_write(Vcb, NULL, &rollback);
2088 free_trees(Vcb);
2089
2090 clear_rollback(Vcb, &rollback);
2091 } else
2092 do_rollback(Vcb, &rollback);
2093
2094 ExReleaseResourceLite(&Vcb->tree_lock);
2095
2096 return Status;
2097 }
2098
2099 static NTSTATUS remove_balance_item(device_extension* Vcb) {
2100 LIST_ENTRY rollback;
2101 KEY searchkey;
2102 traverse_ptr tp;
2103 NTSTATUS Status;
2104
2105 InitializeListHead(&rollback);
2106
2107 searchkey.obj_id = BALANCE_ITEM_ID;
2108 searchkey.obj_type = TYPE_TEMP_ITEM;
2109 searchkey.offset = 0;
2110
2111 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
2112
2113 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
2114 if (!NT_SUCCESS(Status)) {
2115 ERR("find_item returned %08x\n", Status);
2116 goto end;
2117 }
2118
2119 if (!keycmp(tp.item->key, searchkey)) {
2120 delete_tree_item(Vcb, &tp, &rollback);
2121
2122 do_write(Vcb, NULL, &rollback);
2123 free_trees(Vcb);
2124 }
2125
2126 Status = STATUS_SUCCESS;
2127
2128 end:
2129 if (NT_SUCCESS(Status))
2130 clear_rollback(Vcb, &rollback);
2131 else
2132 do_rollback(Vcb, &rollback);
2133
2134 ExReleaseResourceLite(&Vcb->tree_lock);
2135
2136 return Status;
2137 }
2138
2139 static void load_balance_args(btrfs_balance_opts* opts, BALANCE_ARGS* args) {
2140 opts->flags = BTRFS_BALANCE_OPTS_ENABLED;
2141
2142 if (args->flags & BALANCE_ARGS_FLAGS_PROFILES) {
2143 opts->flags |= BTRFS_BALANCE_OPTS_PROFILES;
2144 opts->profiles = args->profiles;
2145 }
2146
2147 if (args->flags & BALANCE_ARGS_FLAGS_USAGE) {
2148 opts->flags |= BTRFS_BALANCE_OPTS_USAGE;
2149
2150 opts->usage_start = 0;
2151 opts->usage_end = args->usage;
2152 } else if (args->flags & BALANCE_ARGS_FLAGS_USAGE_RANGE) {
2153 opts->flags |= BTRFS_BALANCE_OPTS_USAGE;
2154
2155 opts->usage_start = args->usage_start;
2156 opts->usage_end = args->usage_end;
2157 }
2158
2159 if (args->flags & BALANCE_ARGS_FLAGS_DEVID) {
2160 opts->flags |= BTRFS_BALANCE_OPTS_DEVID;
2161 opts->devid = args->devid;
2162 }
2163
2164 if (args->flags & BALANCE_ARGS_FLAGS_DRANGE) {
2165 opts->flags |= BTRFS_BALANCE_OPTS_DRANGE;
2166 opts->drange_start = args->drange_start;
2167 opts->drange_end = args->drange_end;
2168 }
2169
2170 if (args->flags & BALANCE_ARGS_FLAGS_VRANGE) {
2171 opts->flags |= BTRFS_BALANCE_OPTS_VRANGE;
2172 opts->vrange_start = args->vrange_start;
2173 opts->vrange_end = args->vrange_end;
2174 }
2175
2176 if (args->flags & BALANCE_ARGS_FLAGS_LIMIT) {
2177 opts->flags |= BTRFS_BALANCE_OPTS_LIMIT;
2178
2179 opts->limit_start = 0;
2180 opts->limit_end = args->limit;
2181 } else if (args->flags & BALANCE_ARGS_FLAGS_LIMIT_RANGE) {
2182 opts->flags |= BTRFS_BALANCE_OPTS_LIMIT;
2183
2184 opts->limit_start = args->limit_start;
2185 opts->limit_end = args->limit_end;
2186 }
2187
2188 if (args->flags & BALANCE_ARGS_FLAGS_STRIPES_RANGE) {
2189 opts->flags |= BTRFS_BALANCE_OPTS_STRIPES;
2190
2191 opts->stripes_start = args->stripes_start;
2192 opts->stripes_end = args->stripes_end;
2193 }
2194
2195 if (args->flags & BALANCE_ARGS_FLAGS_CONVERT) {
2196 opts->flags |= BTRFS_BALANCE_OPTS_CONVERT;
2197 opts->convert = args->convert;
2198
2199 if (args->flags & BALANCE_ARGS_FLAGS_SOFT)
2200 opts->flags |= BTRFS_BALANCE_OPTS_SOFT;
2201 }
2202 }
2203
2204 static NTSTATUS remove_superblocks(device* dev) {
2205 NTSTATUS Status;
2206 superblock* sb;
2207 int i = 0;
2208
2209 sb = ExAllocatePoolWithTag(PagedPool, sizeof(superblock), ALLOC_TAG);
2210 if (!sb) {
2211 ERR("out of memory\n");
2212 return STATUS_INSUFFICIENT_RESOURCES;
2213 }
2214
2215 RtlZeroMemory(sb, sizeof(superblock));
2216
2217 while (superblock_addrs[i] > 0 && dev->length >= superblock_addrs[i] + sizeof(superblock)) {
2218 Status = write_data_phys(dev->devobj, superblock_addrs[i], sb, sizeof(superblock));
2219
2220 if (!NT_SUCCESS(Status)) {
2221 ExFreePool(sb);
2222 return Status;
2223 }
2224
2225 i++;
2226 }
2227
2228 ExFreePool(sb);
2229
2230 return STATUS_SUCCESS;
2231 }
2232
2233 static NTSTATUS replace_mount_dev(device_extension* Vcb, device* dev, PDEVICE_OBJECT mountmgr, BOOL part0) {
2234 NTSTATUS Status;
2235 MOUNTDEV_NAME mdn, *mdn2 = NULL, *mdn3 = NULL;
2236 ULONG mdnsize, mmpsize;
2237 MOUNTMGR_MOUNT_POINT* mmp = NULL;
2238 MOUNTMGR_MOUNT_POINTS mmps, *mmps2 = NULL;
2239 ULONG i;
2240 UNICODE_STRING us;
2241
2242 // get old device name
2243
2244 Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL);
2245 if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {
2246 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
2247 return Status;
2248 }
2249
2250 mdnsize = offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength;
2251
2252 mdn2 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG);
2253 if (!mdn2) {
2254 ERR("out of memory\n");
2255 return STATUS_INSUFFICIENT_RESOURCES;
2256 }
2257
2258 Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn2, mdnsize, TRUE, NULL);
2259 if (!NT_SUCCESS(Status)) {
2260 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
2261 goto end;
2262 }
2263
2264 // get new device name
2265
2266 Status = dev_ioctl(first_device(Vcb)->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, &mdn, sizeof(MOUNTDEV_NAME), TRUE, NULL);
2267 if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {
2268 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
2269 goto end2;
2270 }
2271
2272 mdnsize = offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength;
2273
2274 mdn3 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG);
2275 if (!mdn3) {
2276 ERR("out of memory\n");
2277 Status = STATUS_INSUFFICIENT_RESOURCES;
2278 goto end2;
2279 }
2280
2281 Status = dev_ioctl(first_device(Vcb)->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn3, mdnsize, TRUE, NULL);
2282 if (!NT_SUCCESS(Status)) {
2283 ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
2284 goto end2;
2285 }
2286
2287 // query and delete existing mount points
2288
2289 mmpsize = sizeof(MOUNTMGR_MOUNT_POINT) + mdn2->NameLength;
2290
2291 mmp = ExAllocatePoolWithTag(PagedPool, mmpsize, ALLOC_TAG);
2292 if (!mmp) {
2293 ERR("out of memory\n");
2294 Status = STATUS_INSUFFICIENT_RESOURCES;
2295 goto end2;
2296 }
2297
2298 RtlZeroMemory(mmp, sizeof(MOUNTMGR_MOUNT_POINT));
2299 mmp->DeviceNameOffset = sizeof(MOUNTMGR_MOUNT_POINT);
2300 mmp->DeviceNameLength = mdn2->NameLength;
2301 RtlCopyMemory(&mmp[1], mdn2->Name, mdn2->NameLength);
2302
2303 Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_QUERY_POINTS, mmp, mmpsize, &mmps, mmpsize, TRUE, NULL);
2304 if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {
2305 ERR("IOCTL_MOUNTMGR_QUERY_POINTS returned %08x\n", Status);
2306 goto end2;
2307 }
2308
2309 mmps2 = ExAllocatePoolWithTag(PagedPool, mmps.Size, ALLOC_TAG);
2310 if (!mmps2) {
2311 ERR("out of memory\n");
2312 Status = STATUS_INSUFFICIENT_RESOURCES;
2313 goto end2;
2314 }
2315
2316 Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_DELETE_POINTS, mmp, mmpsize, mmps2, mmps.Size, TRUE, NULL);
2317 if (!NT_SUCCESS(Status) && Status != STATUS_BUFFER_OVERFLOW) {
2318 ERR("IOCTL_MOUNTMGR_DELETE_POINTS returned %08x\n", Status);
2319 goto end2;
2320 }
2321
2322 // re-create mount points
2323
2324 for (i = 0; i < mmps2->NumberOfMountPoints; i++) {
2325 if (mmps2->MountPoints[i].SymbolicLinkNameOffset != 0) {
2326 ULONG mcpilen;
2327 MOUNTMGR_CREATE_POINT_INPUT* mcpi;
2328
2329 mcpilen = sizeof(MOUNTMGR_CREATE_POINT_INPUT) + mmps2->MountPoints[i].SymbolicLinkNameLength + mdn3->NameLength;
2330
2331 mcpi = ExAllocatePoolWithTag(PagedPool, mcpilen, ALLOC_TAG);
2332 if (!mcpi) {
2333 ERR("out of memory\n");
2334 Status = STATUS_INSUFFICIENT_RESOURCES;
2335 goto end2;
2336 }
2337
2338 mcpi->SymbolicLinkNameOffset = sizeof(MOUNTMGR_CREATE_POINT_INPUT);
2339 mcpi->SymbolicLinkNameLength = mmps2->MountPoints[i].SymbolicLinkNameLength;
2340 mcpi->DeviceNameOffset = mcpi->SymbolicLinkNameOffset + mcpi->SymbolicLinkNameLength;
2341 mcpi->DeviceNameLength = mdn3->NameLength;
2342
2343 RtlCopyMemory((UINT8*)mcpi + mcpi->SymbolicLinkNameOffset, (UINT8*)mmps2 + mmps2->MountPoints[i].SymbolicLinkNameOffset,
2344 mcpi->SymbolicLinkNameLength);
2345 RtlCopyMemory((UINT8*)mcpi + mcpi->DeviceNameOffset, mdn3->Name, mdn3->NameLength);
2346
2347 Status = dev_ioctl(mountmgr, IOCTL_MOUNTMGR_CREATE_POINT, mcpi, mcpilen, NULL, 0, TRUE, NULL);
2348 if (!NT_SUCCESS(Status)) {
2349 ERR("IOCTL_MOUNTMGR_CREATE_POINT returned %08x\n", Status);
2350 ExFreePool(mcpi);
2351 goto end2;
2352 }
2353
2354 ExFreePool(mcpi);
2355 }
2356 }
2357
2358 Status = STATUS_SUCCESS;
2359
2360 end2:
2361 // re-add old device back to mountmgr
2362
2363 if (!part0) {
2364 us.Buffer = mdn2->Name;
2365 us.Length = us.MaximumLength = mdn2->NameLength;
2366
2367 add_volume(mountmgr, &us);
2368 }
2369
2370 end:
2371 if (mdn2)
2372 ExFreePool(mdn2);
2373
2374 if (mdn3)
2375 ExFreePool(mdn3);
2376
2377 if (mmp)
2378 ExFreePool(mmp);
2379
2380 if (mmps2)
2381 ExFreePool(mmps2);
2382
2383 return Status;
2384 }
2385
2386 static NTSTATUS finish_removing_device(device_extension* Vcb, device* dev) {
2387 KEY searchkey;
2388 traverse_ptr tp;
2389 NTSTATUS Status;
2390 LIST_ENTRY rollback, *le;
2391 BOOL first_dev, part0 = FALSE;
2392
2393 InitializeListHead(&rollback);
2394
2395 if (Vcb->need_write)
2396 do_write(Vcb, NULL, &rollback);
2397
2398 free_trees(Vcb);
2399
2400 clear_rollback(Vcb, &rollback);
2401
2402 // remove entry in chunk tree
2403
2404 searchkey.obj_id = 1;
2405 searchkey.obj_type = TYPE_DEV_ITEM;
2406 searchkey.offset = dev->devitem.dev_id;
2407
2408 Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, NULL);
2409 if (!NT_SUCCESS(Status)) {
2410 ERR("find_item returned %08x\n", Status);
2411 return Status;
2412 }
2413
2414 if (!keycmp(searchkey, tp.item->key))
2415 delete_tree_item(Vcb, &tp, &rollback);
2416
2417 // remove stats entry in device tree
2418
2419 searchkey.obj_id = 0;
2420 searchkey.obj_type = TYPE_DEV_STATS;
2421 searchkey.offset = dev->devitem.dev_id;
2422
2423 Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, NULL);
2424 if (!NT_SUCCESS(Status)) {
2425 ERR("find_item returned %08x\n", Status);
2426 return Status;
2427 }
2428
2429 if (!keycmp(searchkey, tp.item->key))
2430 delete_tree_item(Vcb, &tp, &rollback);
2431
2432 // update superblock
2433
2434 Vcb->superblock.num_devices--;
2435 Vcb->superblock.total_bytes -= dev->devitem.num_bytes;
2436 Vcb->devices_loaded--;
2437
2438 first_dev = first_device(Vcb) == dev;
2439
2440 RemoveEntryList(&dev->list_entry);
2441
2442 // flush
2443
2444 do_write(Vcb, NULL, &rollback);
2445
2446 free_trees(Vcb);
2447
2448 clear_rollback(Vcb, &rollback);
2449
2450 if (!dev->readonly) {
2451 Status = remove_superblocks(dev);
2452 if (!NT_SUCCESS(Status))
2453 WARN("remove_superblocks returned %08x\n", Status);
2454 }
2455
2456 // remove entry in volume list
2457
2458 ExAcquireResourceExclusiveLite(&volumes_lock, TRUE);
2459
2460 le = volumes.Flink;
2461 while (le != &volumes) {
2462 volume* v = CONTAINING_RECORD(le, volume, list_entry);
2463
2464 if (RtlCompareMemory(&Vcb->superblock.uuid, &v->fsuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID) &&
2465 RtlCompareMemory(&dev->devitem.device_uuid, &v->devuuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
2466 PFILE_OBJECT FileObject;
2467 PDEVICE_OBJECT mountmgr;
2468 UNICODE_STRING mmdevpath;
2469
2470 RemoveEntryList(&v->list_entry);
2471
2472 // re-add entry to mountmgr
2473
2474 if (!first_dev && v->part_num != 0) {
2475 RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME);
2476 Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr);
2477 if (!NT_SUCCESS(Status))
2478 ERR("IoGetDeviceObjectPointer returned %08x\n", Status);
2479 else {
2480 add_volume(mountmgr, &v->devpath);
2481 ObDereferenceObject(FileObject);
2482 }
2483 }
2484
2485 part0 = v->part_num == 0 ? TRUE : FALSE;
2486
2487 if (v->devpath.Buffer)
2488 ExFreePool(v->devpath.Buffer);
2489
2490 ExFreePool(v);
2491 break;
2492 }
2493
2494 le = le->Flink;
2495 }
2496
2497 ExReleaseResourceLite(&volumes_lock);
2498
2499 if (first_dev) {
2500 PDEVICE_OBJECT DeviceObject, olddev;
2501 device* newfirstdev;
2502 PFILE_OBJECT FileObject;
2503 UNICODE_STRING mmdevpath;
2504 PDEVICE_OBJECT mountmgr;
2505
2506 DeviceObject = Vcb->Vpb->DeviceObject;
2507
2508 olddev = DeviceObject->Vpb->RealDevice;
2509 newfirstdev = first_device(Vcb);
2510
2511 ObReferenceObject(newfirstdev->devobj);
2512 DeviceObject->Vpb->RealDevice = newfirstdev->devobj;
2513 ObDereferenceObject(olddev);
2514
2515 RtlInitUnicodeString(&mmdevpath, MOUNTMGR_DEVICE_NAME);
2516 Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr);
2517 if (!NT_SUCCESS(Status))
2518 ERR("IoGetDeviceObjectPointer returned %08x\n", Status);
2519 else {
2520 Status = replace_mount_dev(Vcb, dev, mountmgr, part0);
2521 if (!NT_SUCCESS(Status))
2522 ERR("replace_mount_dev returned %08x\n", Status);
2523
2524 ObDereferenceObject(FileObject);
2525 }
2526
2527 }
2528
2529 // free dev
2530
2531 ObDereferenceObject(dev->devobj);
2532
2533 while (!IsListEmpty(&dev->space)) {
2534 LIST_ENTRY* le2 = RemoveHeadList(&dev->space);
2535 space* s = CONTAINING_RECORD(le2, space, list_entry);
2536
2537 ExFreePool(s);
2538 }
2539
2540 ExFreePool(dev);
2541
2542 return STATUS_SUCCESS;
2543 }
2544
2545 #ifndef __REACTOS__
2546 static void balance_thread(void* context) {
2547 #else
2548 static void NTAPI balance_thread(void* context) {
2549 #endif
2550 device_extension* Vcb = (device_extension*)context;
2551 LIST_ENTRY chunks;
2552 LIST_ENTRY* le;
2553 UINT64 num_chunks[3];
2554 NTSTATUS Status;
2555
2556 Vcb->balance.stopping = FALSE;
2557 Vcb->balance.cancelling = FALSE;
2558 KeInitializeEvent(&Vcb->balance.finished, NotificationEvent, FALSE);
2559
2560 if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_CONVERT)
2561 Vcb->data_flags = BLOCK_FLAG_DATA | (Vcb->balance.opts[BALANCE_OPTS_DATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_DATA].convert);
2562
2563 if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_CONVERT)
2564 Vcb->metadata_flags = BLOCK_FLAG_METADATA | (Vcb->balance.opts[BALANCE_OPTS_METADATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_METADATA].convert);
2565
2566 if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_CONVERT)
2567 Vcb->system_flags = BLOCK_FLAG_SYSTEM | (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert);
2568
2569 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) {
2570 if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED)
2571 RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &Vcb->balance.opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts));
2572 else if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED)
2573 RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &Vcb->balance.opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts));
2574 }
2575
2576 // FIXME - what are we supposed to do with limit_start?
2577
2578 if (!Vcb->readonly) {
2579 if (!Vcb->balance.removing) {
2580 Status = add_balance_item(Vcb);
2581 if (!NT_SUCCESS(Status)) {
2582 ERR("add_balance_item returned %08x\n", Status);
2583 goto end;
2584 }
2585 } else {
2586 if (Vcb->need_write) {
2587 LIST_ENTRY rollback;
2588
2589 InitializeListHead(&rollback);
2590 do_write(Vcb, NULL, &rollback);
2591 free_trees(Vcb);
2592
2593 clear_rollback(Vcb, &rollback);
2594 }
2595 }
2596 }
2597
2598 num_chunks[0] = num_chunks[1] = num_chunks[2] = 0;
2599 Vcb->balance.total_chunks = 0;
2600
2601 InitializeListHead(&chunks);
2602
2603 KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
2604
2605 if (Vcb->balance.stopping)
2606 goto end;
2607
2608 ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
2609
2610 le = Vcb->chunks.Flink;
2611 while (le != &Vcb->chunks) {
2612 chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
2613 UINT8 sort;
2614
2615 ExAcquireResourceExclusiveLite(&c->lock, TRUE);
2616
2617 if (c->chunk_item->type & BLOCK_FLAG_DATA)
2618 sort = BALANCE_OPTS_DATA;
2619 else if (c->chunk_item->type & BLOCK_FLAG_METADATA)
2620 sort = BALANCE_OPTS_METADATA;
2621 else if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
2622 sort = BALANCE_OPTS_SYSTEM;
2623 else {
2624 ERR("unexpected chunk type %llx\n", c->chunk_item->type);
2625 ExReleaseResourceLite(&c->lock);
2626 break;
2627 }
2628
2629 if ((!(Vcb->balance.opts[sort].flags & BTRFS_BALANCE_OPTS_LIMIT) || num_chunks[sort] < Vcb->balance.opts[sort].limit_end) &&
2630 should_balance_chunk(Vcb, sort, c)) {
2631 c->reloc = TRUE;
2632
2633 InsertTailList(&chunks, &c->list_entry_balance);
2634
2635 num_chunks[sort]++;
2636 Vcb->balance.total_chunks++;
2637 }
2638
2639 ExReleaseResourceLite(&c->lock);
2640
2641 le = le->Flink;
2642 }
2643
2644 ExReleaseResourceLite(&Vcb->chunk_lock);
2645
2646 Vcb->balance.chunks_left = Vcb->balance.total_chunks;
2647
2648 // do data chunks before metadata
2649 le = chunks.Flink;
2650 while (le != &chunks) {
2651 chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance);
2652 LIST_ENTRY* le2 = le->Flink;
2653
2654 if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2655 NTSTATUS Status;
2656 BOOL changed;
2657
2658 do {
2659 changed = FALSE;
2660
2661 FsRtlEnterFileSystem();
2662
2663 Status = balance_data_chunk(Vcb, c, &changed);
2664
2665 FsRtlExitFileSystem();
2666
2667 if (!NT_SUCCESS(Status)) {
2668 ERR("balance_data_chunk returned %08x\n", Status);
2669 Vcb->balance.status = Status;
2670 goto end;
2671 }
2672
2673 KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
2674
2675 if (Vcb->balance.stopping)
2676 break;
2677 } while (changed);
2678
2679 if (!c->list_entry_changed.Flink)
2680 InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed);
2681 }
2682
2683 if (Vcb->balance.stopping) {
2684 while (le != &chunks) {
2685 c = CONTAINING_RECORD(le, chunk, list_entry_balance);
2686 c->reloc = FALSE;
2687
2688 le = le->Flink;
2689 }
2690 goto end;
2691 }
2692
2693 if (c->chunk_item->type & BLOCK_FLAG_DATA &&
2694 (!(Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) {
2695 RemoveEntryList(&c->list_entry_balance);
2696 c->list_entry_balance.Flink = NULL;
2697
2698 Vcb->balance.chunks_left--;
2699 }
2700
2701 le = le2;
2702 }
2703
2704 // do metadata chunks
2705 while (!IsListEmpty(&chunks)) {
2706 chunk* c;
2707 NTSTATUS Status;
2708 BOOL changed;
2709
2710 le = RemoveHeadList(&chunks);
2711 c = CONTAINING_RECORD(le, chunk, list_entry_balance);
2712
2713 if (c->chunk_item->type & BLOCK_FLAG_METADATA || c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
2714 do {
2715 FsRtlEnterFileSystem();
2716
2717 Status = balance_metadata_chunk(Vcb, c, &changed);
2718
2719 FsRtlExitFileSystem();
2720
2721 if (!NT_SUCCESS(Status)) {
2722 ERR("balance_metadata_chunk returned %08x\n", Status);
2723 Vcb->balance.status = Status;
2724 goto end;
2725 }
2726
2727 KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
2728
2729 if (Vcb->balance.stopping)
2730 break;
2731 } while (changed);
2732
2733 if (!c->list_entry_changed.Flink)
2734 InsertTailList(&Vcb->chunks_changed, &c->list_entry_changed);
2735 }
2736
2737 if (Vcb->balance.stopping) {
2738 while (le != &chunks) {
2739 c = CONTAINING_RECORD(le, chunk, list_entry_balance);
2740 c->reloc = FALSE;
2741
2742 le = le->Flink;
2743 c->list_entry_balance.Flink = NULL;
2744 }
2745 break;
2746 }
2747
2748 c->list_entry_balance.Flink = NULL;
2749
2750 Vcb->balance.chunks_left--;
2751 }
2752
2753 end:
2754 if (!Vcb->readonly) {
2755 if (!Vcb->balance.removing) {
2756 FsRtlEnterFileSystem();
2757 Status = remove_balance_item(Vcb);
2758 FsRtlExitFileSystem();
2759
2760 if (!NT_SUCCESS(Status)) {
2761 ERR("remove_balance_item returned %08x\n", Status);
2762 goto end;
2763 }
2764 } else {
2765 device* dev = NULL;
2766
2767 FsRtlEnterFileSystem();
2768 ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
2769
2770 le = Vcb->devices.Flink;
2771 while (le != &Vcb->devices) {
2772 device* dev2 = CONTAINING_RECORD(le, device, list_entry);
2773
2774 if (dev2->devitem.dev_id == Vcb->balance.opts[0].devid) {
2775 dev = dev2;
2776 break;
2777 }
2778
2779 le = le->Flink;
2780 }
2781
2782 if (dev) {
2783 if (Vcb->balance.chunks_left == 0) {
2784 Status = finish_removing_device(Vcb, dev);
2785
2786 if (!NT_SUCCESS(Status)) {
2787 ERR("finish_removing_device returned %08x\n", Status);
2788 dev->reloc = FALSE;
2789 }
2790 } else
2791 dev->reloc = FALSE;
2792 }
2793
2794 ExReleaseResourceLite(&Vcb->tree_lock);
2795 FsRtlExitFileSystem();
2796 }
2797 }
2798
2799 ZwClose(Vcb->balance.thread);
2800 Vcb->balance.thread = NULL;
2801
2802 KeSetEvent(&Vcb->balance.finished, 0, FALSE);
2803 }
2804
2805 NTSTATUS start_balance(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode) {
2806 NTSTATUS Status;
2807 btrfs_start_balance* bsb = (btrfs_start_balance*)data;
2808 UINT8 i;
2809
2810 if (length < sizeof(btrfs_start_balance) || !data)
2811 return STATUS_INVALID_PARAMETER;
2812
2813 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
2814 return STATUS_PRIVILEGE_NOT_HELD;
2815
2816 if (Vcb->balance.thread) {
2817 WARN("balance already running\n");
2818 return STATUS_DEVICE_NOT_READY;
2819 }
2820
2821 if (Vcb->readonly)
2822 return STATUS_MEDIA_WRITE_PROTECTED;
2823
2824 if (!(bsb->opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) &&
2825 !(bsb->opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) &&
2826 !(bsb->opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED))
2827 return STATUS_SUCCESS;
2828
2829 for (i = 0; i < 3; i++) {
2830 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) {
2831 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_PROFILES) {
2832 bsb->opts[i].profiles &= BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1 | BLOCK_FLAG_DUPLICATE | BLOCK_FLAG_RAID10 |
2833 BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6 | BLOCK_FLAG_SINGLE;
2834
2835 if (bsb->opts[i].profiles == 0)
2836 return STATUS_INVALID_PARAMETER;
2837 }
2838
2839 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DEVID) {
2840 if (bsb->opts[i].devid == 0)
2841 return STATUS_INVALID_PARAMETER;
2842 }
2843
2844 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DRANGE) {
2845 if (bsb->opts[i].drange_start > bsb->opts[i].drange_end)
2846 return STATUS_INVALID_PARAMETER;
2847 }
2848
2849 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_VRANGE) {
2850 if (bsb->opts[i].vrange_start > bsb->opts[i].vrange_end)
2851 return STATUS_INVALID_PARAMETER;
2852 }
2853
2854 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_LIMIT) {
2855 bsb->opts[i].limit_start = max(1, bsb->opts[i].limit_start);
2856 bsb->opts[i].limit_end = max(1, bsb->opts[i].limit_end);
2857
2858 if (bsb->opts[i].limit_start > bsb->opts[i].limit_end)
2859 return STATUS_INVALID_PARAMETER;
2860 }
2861
2862 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_STRIPES) {
2863 bsb->opts[i].stripes_start = max(1, bsb->opts[i].stripes_start);
2864 bsb->opts[i].stripes_end = max(1, bsb->opts[i].stripes_end);
2865
2866 if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end)
2867 return STATUS_INVALID_PARAMETER;
2868 }
2869
2870 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) {
2871 bsb->opts[i].usage_start = min(100, bsb->opts[i].stripes_start);
2872 bsb->opts[i].usage_end = min(100, bsb->opts[i].stripes_end);
2873
2874 if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end)
2875 return STATUS_INVALID_PARAMETER;
2876 }
2877
2878 if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT) {
2879 if (bsb->opts[i].convert != BLOCK_FLAG_RAID0 && bsb->opts[i].convert != BLOCK_FLAG_RAID1 &&
2880 bsb->opts[i].convert != BLOCK_FLAG_DUPLICATE && bsb->opts[i].convert != BLOCK_FLAG_RAID10 &&
2881 bsb->opts[i].convert != BLOCK_FLAG_RAID5 && bsb->opts[i].convert != BLOCK_FLAG_RAID6 &&
2882 bsb->opts[i].convert != BLOCK_FLAG_SINGLE)
2883 return STATUS_INVALID_PARAMETER;
2884 }
2885 }
2886 }
2887
2888 RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bsb->opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts));
2889 RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bsb->opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts));
2890 RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bsb->opts[BALANCE_OPTS_SYSTEM], sizeof(btrfs_balance_opts));
2891
2892 Vcb->balance.paused = FALSE;
2893 Vcb->balance.removing = FALSE;
2894 Vcb->balance.status = STATUS_SUCCESS;
2895 KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused);
2896
2897 Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb);
2898 if (!NT_SUCCESS(Status)) {
2899 ERR("PsCreateSystemThread returned %08x\n", Status);
2900 return Status;
2901 }
2902
2903 return STATUS_SUCCESS;
2904 }
2905
2906 NTSTATUS look_for_balance_item(device_extension* Vcb) {
2907 LIST_ENTRY rollback;
2908 KEY searchkey;
2909 traverse_ptr tp;
2910 NTSTATUS Status;
2911 BALANCE_ITEM* bi;
2912 int i;
2913
2914 InitializeListHead(&rollback);
2915
2916 searchkey.obj_id = BALANCE_ITEM_ID;
2917 searchkey.obj_type = TYPE_TEMP_ITEM;
2918 searchkey.offset = 0;
2919
2920 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
2921 if (!NT_SUCCESS(Status)) {
2922 ERR("find_item returned %08x\n", Status);
2923 return Status;
2924 }
2925
2926 if (keycmp(tp.item->key, searchkey)) {
2927 TRACE("no balance item found\n");
2928 return STATUS_NOT_FOUND;
2929 }
2930
2931 if (tp.item->size < sizeof(BALANCE_ITEM)) {
2932 WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
2933 tp.item->size, sizeof(BALANCE_ITEM));
2934 return STATUS_INTERNAL_ERROR;
2935 }
2936
2937 bi = (BALANCE_ITEM*)tp.item->data;
2938
2939 if (bi->flags & BALANCE_FLAGS_DATA)
2940 load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data);
2941
2942 if (bi->flags & BALANCE_FLAGS_METADATA)
2943 load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata);
2944
2945 if (bi->flags & BALANCE_FLAGS_SYSTEM)
2946 load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system);
2947
2948 // do the heuristics that Linux driver does
2949
2950 for (i = 0; i < 3; i++) {
2951 if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) {
2952 // if converting, don't redo chunks already done
2953
2954 if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT)
2955 Vcb->balance.opts[i].flags |= BTRFS_BALANCE_OPTS_SOFT;
2956
2957 // don't balance chunks more than 90% filled - presumably these
2958 // have already been done
2959
2960 if (!(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) &&
2961 !(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT)
2962 ) {
2963 Vcb->balance.opts[i].flags |= BTRFS_BALANCE_OPTS_USAGE;
2964 Vcb->balance.opts[i].usage_start = 0;
2965 Vcb->balance.opts[i].usage_end = 90;
2966 }
2967 }
2968 }
2969
2970 if (Vcb->readonly || Vcb->options.skip_balance)
2971 Vcb->balance.paused = TRUE;
2972 else
2973 Vcb->balance.paused = FALSE;
2974
2975 Vcb->balance.removing = FALSE;
2976 Vcb->balance.status = STATUS_SUCCESS;
2977 KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused);
2978
2979 Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb);
2980 if (!NT_SUCCESS(Status)) {
2981 ERR("PsCreateSystemThread returned %08x\n", Status);
2982 return Status;
2983 }
2984
2985 return STATUS_SUCCESS;
2986 }
2987
2988 NTSTATUS query_balance(device_extension* Vcb, void* data, ULONG length) {
2989 btrfs_query_balance* bqb = (btrfs_query_balance*)data;
2990
2991 if (length < sizeof(btrfs_query_balance) || !data)
2992 return STATUS_INVALID_PARAMETER;
2993
2994 if (!Vcb->balance.thread) {
2995 bqb->status = BTRFS_BALANCE_STOPPED;
2996
2997 if (!NT_SUCCESS(Vcb->balance.status)) {
2998 bqb->status |= BTRFS_BALANCE_ERROR;
2999 bqb->error = Vcb->balance.status;
3000 }
3001
3002 return STATUS_SUCCESS;
3003 }
3004
3005 bqb->status = Vcb->balance.paused ? BTRFS_BALANCE_PAUSED : BTRFS_BALANCE_RUNNING;
3006
3007 if (Vcb->balance.removing)
3008 bqb->status |= BTRFS_BALANCE_REMOVAL;
3009
3010 if (!NT_SUCCESS(Vcb->balance.status))
3011 bqb->status |= BTRFS_BALANCE_ERROR;
3012
3013 bqb->chunks_left = Vcb->balance.chunks_left;
3014 bqb->total_chunks = Vcb->balance.total_chunks;
3015 bqb->error = Vcb->balance.status;
3016 RtlCopyMemory(&bqb->data_opts, &Vcb->balance.opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts));
3017 RtlCopyMemory(&bqb->metadata_opts, &Vcb->balance.opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts));
3018 RtlCopyMemory(&bqb->system_opts, &Vcb->balance.opts[BALANCE_OPTS_SYSTEM], sizeof(btrfs_balance_opts));
3019
3020 return STATUS_SUCCESS;
3021 }
3022
3023 NTSTATUS pause_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
3024 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3025 return STATUS_PRIVILEGE_NOT_HELD;
3026
3027 if (!Vcb->balance.thread)
3028 return STATUS_DEVICE_NOT_READY;
3029
3030 if (Vcb->balance.paused)
3031 return STATUS_DEVICE_NOT_READY;
3032
3033 Vcb->balance.paused = TRUE;
3034 KeClearEvent(&Vcb->balance.event);
3035
3036 return STATUS_SUCCESS;
3037 }
3038
3039 NTSTATUS resume_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
3040 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3041 return STATUS_PRIVILEGE_NOT_HELD;
3042
3043 if (!Vcb->balance.thread)
3044 return STATUS_DEVICE_NOT_READY;
3045
3046 if (!Vcb->balance.paused)
3047 return STATUS_DEVICE_NOT_READY;
3048
3049 if (Vcb->readonly)
3050 return STATUS_MEDIA_WRITE_PROTECTED;
3051
3052 Vcb->balance.paused = FALSE;
3053 KeSetEvent(&Vcb->balance.event, 0, FALSE);
3054
3055 return STATUS_SUCCESS;
3056 }
3057
3058 NTSTATUS stop_balance(device_extension* Vcb, KPROCESSOR_MODE processor_mode) {
3059 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3060 return STATUS_PRIVILEGE_NOT_HELD;
3061
3062 if (!Vcb->balance.thread)
3063 return STATUS_DEVICE_NOT_READY;
3064
3065 Vcb->balance.paused = FALSE;
3066 Vcb->balance.stopping = TRUE;
3067 Vcb->balance.cancelling = TRUE;
3068 Vcb->balance.status = STATUS_SUCCESS;
3069 KeSetEvent(&Vcb->balance.event, 0, FALSE);
3070
3071 return STATUS_SUCCESS;
3072 }
3073
3074 NTSTATUS remove_device(device_extension* Vcb, void* data, ULONG length, KPROCESSOR_MODE processor_mode) {
3075 UINT64 devid;
3076 LIST_ENTRY* le;
3077 device* dev = NULL;
3078 NTSTATUS Status;
3079 int i;
3080 UINT64 num_rw_devices;
3081
3082 TRACE("(%p, %p, %x)\n", Vcb, data, length);
3083
3084 if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3085 return STATUS_PRIVILEGE_NOT_HELD;
3086
3087 if (length < sizeof(UINT64))
3088 return STATUS_INVALID_PARAMETER;
3089
3090 devid = *(UINT64*)data;
3091
3092 ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
3093
3094 if (Vcb->readonly) {
3095 ExReleaseResourceLite(&Vcb->tree_lock);
3096 return STATUS_MEDIA_WRITE_PROTECTED;
3097 }
3098
3099 num_rw_devices = 0;
3100
3101 le = Vcb->devices.Flink;
3102 while (le != &Vcb->devices) {
3103 device* dev2 = CONTAINING_RECORD(le, device, list_entry);
3104
3105 if (dev2->devitem.dev_id == devid)
3106 dev = dev2;
3107
3108 if (!dev2->readonly)
3109 num_rw_devices++;
3110
3111 le = le->Flink;
3112 }
3113
3114 if (!dev) {
3115 ExReleaseResourceLite(&Vcb->tree_lock);
3116 WARN("device %llx not found\n", devid);
3117 return STATUS_NOT_FOUND;
3118 }
3119
3120 if (!dev->readonly) {
3121 if (num_rw_devices == 1) {
3122 ExReleaseResourceLite(&Vcb->tree_lock);
3123 WARN("not removing last non-readonly device\n");
3124 return STATUS_INVALID_PARAMETER;
3125 }
3126
3127 if (num_rw_devices == 4 &&
3128 ((Vcb->data_flags & BLOCK_FLAG_RAID10 || Vcb->metadata_flags & BLOCK_FLAG_RAID10 || Vcb->system_flags & BLOCK_FLAG_RAID10) ||
3129 (Vcb->data_flags & BLOCK_FLAG_RAID6 || Vcb->metadata_flags & BLOCK_FLAG_RAID6 || Vcb->system_flags & BLOCK_FLAG_RAID6))
3130 ) {
3131 ExReleaseResourceLite(&Vcb->tree_lock);
3132 ERR("would not be enough devices to satisfy RAID requirement (RAID6/10)\n");
3133 return STATUS_CANNOT_DELETE;
3134 }
3135
3136 if (num_rw_devices == 3 && (Vcb->data_flags & BLOCK_FLAG_RAID5 || Vcb->metadata_flags & BLOCK_FLAG_RAID5 || Vcb->system_flags & BLOCK_FLAG_RAID5)) {
3137 ExReleaseResourceLite(&Vcb->tree_lock);
3138 ERR("would not be enough devices to satisfy RAID requirement (RAID5)\n");
3139 return STATUS_CANNOT_DELETE;
3140 }
3141
3142 if (num_rw_devices == 2 &&
3143 ((Vcb->data_flags & BLOCK_FLAG_RAID0 || Vcb->metadata_flags & BLOCK_FLAG_RAID0 || Vcb->system_flags & BLOCK_FLAG_RAID0) ||
3144 (Vcb->data_flags & BLOCK_FLAG_RAID1 || Vcb->metadata_flags & BLOCK_FLAG_RAID1 || Vcb->system_flags & BLOCK_FLAG_RAID1))
3145 ) {
3146 ExReleaseResourceLite(&Vcb->tree_lock);
3147 ERR("would not be enough devices to satisfy RAID requirement (RAID0/1)\n");
3148 return STATUS_CANNOT_DELETE;
3149 }
3150 }
3151
3152 ExReleaseResourceLite(&Vcb->tree_lock);
3153
3154 if (Vcb->balance.thread) {
3155 WARN("balance already running\n");
3156 return STATUS_DEVICE_NOT_READY;
3157 }
3158
3159 dev->reloc = TRUE;
3160
3161 RtlZeroMemory(Vcb->balance.opts, sizeof(btrfs_balance_opts) * 3);
3162
3163 for (i = 0; i < 3; i++) {
3164 Vcb->balance.opts[i].flags = BTRFS_BALANCE_OPTS_ENABLED | BTRFS_BALANCE_OPTS_DEVID;
3165 Vcb->balance.opts[i].devid = devid;
3166 }
3167
3168 Vcb->balance.paused = FALSE;
3169 Vcb->balance.removing = TRUE;
3170 KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused);
3171
3172 Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb);
3173 if (!NT_SUCCESS(Status)) {
3174 ERR("PsCreateSystemThread returned %08x\n", Status);
3175 dev->reloc = FALSE;
3176 return Status;
3177 }
3178
3179 return STATUS_SUCCESS;
3180 }