[NFSD] [TELNET] Remove the hardcoded directory paths C:\ReactOS
[reactos.git] / base / services / nfsd / pnfs_layout.c
1 /* NFSv4.1 client for Windows
2 * Copyright © 2012 The Regents of the University of Michigan
3 *
4 * Olga Kornievskaia <aglo@umich.edu>
5 * Casey Bodley <cbodley@umich.edu>
6 *
7 * This library is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or (at
10 * your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful, but
13 * without any warranty; without even the implied warranty of merchantability
14 * or fitness for a particular purpose. See the GNU Lesser General Public
15 * License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 */
21
22 #include <stdio.h>
23
24 #include "nfs41_ops.h"
25 #include "nfs41_callback.h"
26 #include "util.h"
27 #include "daemon_debug.h"
28
29
30 #define FLLVL 2 /* dprintf level for file layout logging */
31
32
33 /* pnfs_layout_list */
34 struct pnfs_layout_list {
35 struct list_entry head;
36 CRITICAL_SECTION lock;
37 };
38
39 #define state_entry(pos) list_container(pos, pnfs_layout_state, entry)
40 #define layout_entry(pos) list_container(pos, pnfs_layout, entry)
41 #define file_layout_entry(pos) list_container(pos, pnfs_file_layout, layout.entry)
42
43 static enum pnfs_status layout_state_create(
44 IN const nfs41_fh *meta_fh,
45 OUT pnfs_layout_state **layout_out)
46 {
47 pnfs_layout_state *layout;
48 enum pnfs_status status = PNFS_SUCCESS;
49
50 layout = calloc(1, sizeof(pnfs_layout_state));
51 if (layout == NULL) {
52 status = PNFSERR_RESOURCES;
53 goto out;
54 }
55
56 fh_copy(&layout->meta_fh, meta_fh);
57 list_init(&layout->layouts);
58 list_init(&layout->recalls);
59 InitializeSRWLock(&layout->lock);
60 InitializeConditionVariable(&layout->cond);
61
62 *layout_out = layout;
63 out:
64 return status;
65 }
66
67 static void file_layout_free(
68 IN pnfs_file_layout *layout)
69 {
70 if (layout->device) pnfs_file_device_put(layout->device);
71 free(layout->filehandles.arr);
72 free(layout);
73 }
74
75 static void layout_state_free_layouts(
76 IN pnfs_layout_state *state)
77 {
78 struct list_entry *entry, *tmp;
79 list_for_each_tmp(entry, tmp, &state->layouts)
80 file_layout_free(file_layout_entry(entry));
81 list_init(&state->layouts);
82 }
83
84 static void layout_state_free_recalls(
85 IN pnfs_layout_state *state)
86 {
87 struct list_entry *entry, *tmp;
88 list_for_each_tmp(entry, tmp, &state->recalls)
89 free(layout_entry(entry));
90 list_init(&state->recalls);
91 }
92
93 static void layout_state_free(
94 IN pnfs_layout_state *state)
95 {
96 layout_state_free_layouts(state);
97 layout_state_free_recalls(state);
98 free(state);
99 }
100
101 static int layout_entry_compare(
102 IN const struct list_entry *entry,
103 IN const void *value)
104 {
105 const pnfs_layout_state *layout = state_entry(entry);
106 const nfs41_fh *meta_fh = (const nfs41_fh*)value;
107 const nfs41_fh *layout_fh = (const nfs41_fh*)&layout->meta_fh;
108 const uint32_t diff = layout_fh->len - meta_fh->len;
109 return diff ? diff : memcmp(layout_fh->fh, meta_fh->fh, meta_fh->len);
110 }
111
112 static enum pnfs_status layout_entry_find(
113 IN struct pnfs_layout_list *layouts,
114 IN const nfs41_fh *meta_fh,
115 OUT struct list_entry **entry_out)
116 {
117 *entry_out = list_search(&layouts->head, meta_fh, layout_entry_compare);
118 return *entry_out ? PNFS_SUCCESS : PNFSERR_NO_LAYOUT;
119 }
120
121 enum pnfs_status pnfs_layout_list_create(
122 OUT struct pnfs_layout_list **layouts_out)
123 {
124 struct pnfs_layout_list *layouts;
125 enum pnfs_status status = PNFS_SUCCESS;
126
127 layouts = calloc(1, sizeof(struct pnfs_layout_list));
128 if (layouts == NULL) {
129 status = PNFSERR_RESOURCES;
130 goto out;
131 }
132 list_init(&layouts->head);
133 InitializeCriticalSection(&layouts->lock);
134 *layouts_out = layouts;
135 out:
136 return status;
137 }
138
139 void pnfs_layout_list_free(
140 IN struct pnfs_layout_list *layouts)
141 {
142 struct list_entry *entry, *tmp;
143
144 EnterCriticalSection(&layouts->lock);
145
146 list_for_each_tmp(entry, tmp, &layouts->head)
147 layout_state_free(state_entry(entry));
148
149 LeaveCriticalSection(&layouts->lock);
150 DeleteCriticalSection(&layouts->lock);
151 free(layouts);
152 }
153
154 static enum pnfs_status layout_state_find_or_create(
155 IN struct pnfs_layout_list *layouts,
156 IN const nfs41_fh *meta_fh,
157 OUT pnfs_layout_state **layout_out)
158 {
159 struct list_entry *entry;
160 enum pnfs_status status;
161
162 dprintf(FLLVL, "--> layout_state_find_or_create()\n");
163
164 EnterCriticalSection(&layouts->lock);
165
166 /* search for an existing layout */
167 status = layout_entry_find(layouts, meta_fh, &entry);
168 if (status) {
169 /* create a new layout */
170 pnfs_layout_state *layout;
171 status = layout_state_create(meta_fh, &layout);
172 if (status == PNFS_SUCCESS) {
173 /* add it to the list */
174 list_add_head(&layouts->head, &layout->entry);
175 *layout_out = layout;
176
177 dprintf(FLLVL, "<-- layout_state_find_or_create() "
178 "returning new layout %p\n", layout);
179 } else {
180 dprintf(FLLVL, "<-- layout_state_find_or_create() "
181 "returning %s\n", pnfs_error_string(status));
182 }
183 } else {
184 *layout_out = state_entry(entry);
185
186 dprintf(FLLVL, "<-- layout_state_find_or_create() "
187 "returning existing layout %p\n", *layout_out);
188 }
189
190 LeaveCriticalSection(&layouts->lock);
191 return status;
192 }
193
194 static enum pnfs_status layout_state_find_and_delete(
195 IN struct pnfs_layout_list *layouts,
196 IN const nfs41_fh *meta_fh)
197 {
198 struct list_entry *entry;
199 enum pnfs_status status;
200
201 dprintf(FLLVL, "--> layout_state_find_and_delete()\n");
202
203 EnterCriticalSection(&layouts->lock);
204
205 status = layout_entry_find(layouts, meta_fh, &entry);
206 if (status == PNFS_SUCCESS) {
207 list_remove(entry);
208 layout_state_free(state_entry(entry));
209 }
210
211 LeaveCriticalSection(&layouts->lock);
212
213 dprintf(FLLVL, "<-- layout_state_find_and_delete() "
214 "returning %s\n", pnfs_error_string(status));
215 return status;
216 }
217
218
219 /* pnfs_file_layout */
220 static uint64_t range_max(
221 IN const pnfs_layout *layout)
222 {
223 uint64_t result = layout->offset + layout->length;
224 return result < layout->offset ? NFS4_UINT64_MAX : result;
225 }
226
227 static bool_t layout_sanity_check(
228 IN pnfs_file_layout *layout)
229 {
230 /* prevent div/0 */
231 if (layout->layout.length == 0 ||
232 layout->layout.iomode < PNFS_IOMODE_READ ||
233 layout->layout.iomode > PNFS_IOMODE_RW ||
234 layout_unit_size(layout) == 0)
235 return FALSE;
236
237 /* put a cap on layout.length to prevent overflow */
238 layout->layout.length = range_max(&layout->layout) - layout->layout.offset;
239 return TRUE;
240 }
241
242 static int layout_filehandles_cmp(
243 IN const pnfs_file_layout_handles *lhs,
244 IN const pnfs_file_layout_handles *rhs)
245 {
246 const uint32_t diff = rhs->count - lhs->count;
247 return diff ? diff : memcmp(rhs->arr, lhs->arr,
248 rhs->count * sizeof(nfs41_path_fh));
249 }
250
251 static bool_t layout_merge_segments(
252 IN pnfs_file_layout *to,
253 IN pnfs_file_layout *from)
254 {
255 const uint64_t to_max = range_max(&to->layout);
256 const uint64_t from_max = range_max(&from->layout);
257
258 /* cannot merge a segment with itself */
259 if (to == from)
260 return FALSE;
261
262 /* the ranges must meet or overlap */
263 if (to_max < from->layout.offset || from_max < to->layout.offset)
264 return FALSE;
265
266 /* the following fields must match: */
267 if (to->layout.iomode != from->layout.iomode ||
268 to->layout.type != from->layout.type ||
269 layout_filehandles_cmp(&to->filehandles, &from->filehandles) != 0 ||
270 memcmp(to->deviceid, from->deviceid, PNFS_DEVICEID_SIZE) != 0 ||
271 to->pattern_offset != from->pattern_offset ||
272 to->first_index != from->first_index ||
273 to->util != from->util)
274 return FALSE;
275
276 dprintf(FLLVL, "merging layout range {%llu, %llu} with {%llu, %llu}\n",
277 to->layout.offset, to->layout.length,
278 from->layout.offset, from->layout.length);
279
280 /* calculate the union of the two ranges */
281 to->layout.offset = min(to->layout.offset, from->layout.offset);
282 to->layout.length = max(to_max, from_max) - to->layout.offset;
283 return TRUE;
284 }
285
286 static enum pnfs_status layout_state_merge(
287 IN pnfs_layout_state *state,
288 IN pnfs_file_layout *from)
289 {
290 struct list_entry *entry, *tmp;
291 pnfs_file_layout *to;
292 enum pnfs_status status = PNFSERR_NO_LAYOUT;
293
294 /* attempt to merge the new segment with each existing segment */
295 list_for_each_tmp(entry, tmp, &state->layouts) {
296 to = file_layout_entry(entry);
297 if (!layout_merge_segments(to, from))
298 continue;
299
300 /* on success, remove/free the new segment */
301 list_remove(&from->layout.entry);
302 file_layout_free(from);
303 status = PNFS_SUCCESS;
304
305 /* because the existing segment 'to' has grown, we may
306 * be able to merge it with later segments */
307 from = to;
308
309 /* but if there could be io threads referencing this segment,
310 * we can't free it until io is finished */
311 if (state->io_count)
312 break;
313 }
314 return status;
315 }
316
317 static void layout_ordered_insert(
318 IN pnfs_layout_state *state,
319 IN pnfs_layout *layout)
320 {
321 struct list_entry *entry;
322 list_for_each(entry, &state->layouts) {
323 pnfs_layout *existing = layout_entry(entry);
324
325 /* maintain an order of increasing offset */
326 if (existing->offset < layout->offset)
327 continue;
328
329 /* when offsets are equal, prefer a longer segment first */
330 if (existing->offset == layout->offset &&
331 existing->length > layout->length)
332 continue;
333
334 list_add(&layout->entry, existing->entry.prev, &existing->entry);
335 return;
336 }
337
338 list_add_tail(&state->layouts, &layout->entry);
339 }
340
341 static enum pnfs_status layout_update_range(
342 IN OUT pnfs_layout_state *state,
343 IN const struct list_entry *layouts)
344 {
345 struct list_entry *entry, *tmp;
346 pnfs_file_layout *layout;
347 enum pnfs_status status = PNFSERR_NO_LAYOUT;
348
349 list_for_each_tmp(entry, tmp, layouts) {
350 layout = file_layout_entry(entry);
351
352 /* don't know what to do with non-file layouts */
353 if (layout->layout.type != PNFS_LAYOUTTYPE_FILE)
354 continue;
355
356 if (!layout_sanity_check(layout)) {
357 file_layout_free(layout);
358 continue;
359 }
360
361 /* attempt to merge the range with existing segments */
362 status = layout_state_merge(state, layout);
363 if (status) {
364 dprintf(FLLVL, "saving new layout:\n");
365 dprint_layout(FLLVL, layout);
366
367 layout_ordered_insert(state, &layout->layout);
368 status = PNFS_SUCCESS;
369 }
370 }
371 return status;
372 }
373
374 static enum pnfs_status layout_update_stateid(
375 IN OUT pnfs_layout_state *state,
376 IN const stateid4 *stateid)
377 {
378 enum pnfs_status status = PNFS_SUCCESS;
379
380 if (state->stateid.seqid == 0) {
381 /* save a new layout stateid */
382 memcpy(&state->stateid, stateid, sizeof(stateid4));
383 } else if (memcmp(&state->stateid.other, stateid->other,
384 NFS4_STATEID_OTHER) == 0) {
385 /* update an existing layout stateid */
386 state->stateid.seqid = stateid->seqid;
387 } else {
388 status = PNFSERR_NO_LAYOUT;
389 }
390 return status;
391 }
392
393 static enum pnfs_status layout_update(
394 IN OUT pnfs_layout_state *state,
395 IN const pnfs_layoutget_res_ok *layoutget_res)
396 {
397 enum pnfs_status status;
398
399 /* update the layout ranges held by the client */
400 status = layout_update_range(state, &layoutget_res->layouts);
401 if (status) {
402 eprintf("LAYOUTGET didn't return any file layouts\n");
403 goto out;
404 }
405 /* update the layout stateid */
406 status = layout_update_stateid(state, &layoutget_res->stateid);
407 if (status) {
408 eprintf("LAYOUTGET returned a new stateid when we already had one\n");
409 goto out;
410 }
411 /* if a previous LAYOUTGET set return_on_close, don't overwrite it */
412 if (!state->return_on_close)
413 state->return_on_close = layoutget_res->return_on_close;
414 out:
415 return status;
416 }
417
418 static enum pnfs_status file_layout_fetch(
419 IN OUT pnfs_layout_state *state,
420 IN nfs41_session *session,
421 IN nfs41_path_fh *meta_file,
422 IN stateid_arg *stateid,
423 IN enum pnfs_iomode iomode,
424 IN uint64_t offset,
425 IN uint64_t minlength,
426 IN uint64_t length)
427 {
428 pnfs_layoutget_res_ok layoutget_res = { 0 };
429 enum pnfs_status pnfsstat = PNFS_SUCCESS;
430 enum nfsstat4 nfsstat;
431
432 dprintf(FLLVL, "--> file_layout_fetch(%s, seqid=%u)\n",
433 pnfs_iomode_string(iomode), state->stateid.seqid);
434
435 list_init(&layoutget_res.layouts);
436
437 /* drop the lock during the rpc call */
438 ReleaseSRWLockExclusive(&state->lock);
439 nfsstat = pnfs_rpc_layoutget(session, meta_file, stateid,
440 iomode, offset, minlength, length, &layoutget_res);
441 AcquireSRWLockExclusive(&state->lock);
442
443 if (nfsstat) {
444 dprintf(FLLVL, "pnfs_rpc_layoutget() failed with %s\n",
445 nfs_error_string(nfsstat));
446 pnfsstat = PNFSERR_NOT_SUPPORTED;
447 }
448
449 switch (nfsstat) {
450 case NFS4_OK:
451 /* use the LAYOUTGET results to update our view of the layout */
452 pnfsstat = layout_update(state, &layoutget_res);
453 break;
454
455 case NFS4ERR_BADIOMODE:
456 /* don't try RW again */
457 if (iomode == PNFS_IOMODE_RW)
458 state->status |= PNFS_LAYOUT_NOT_RW;
459 break;
460
461 case NFS4ERR_LAYOUTUNAVAILABLE:
462 case NFS4ERR_UNKNOWN_LAYOUTTYPE:
463 case NFS4ERR_BADLAYOUT:
464 /* don't try again at all */
465 state->status |= PNFS_LAYOUT_UNAVAILABLE;
466 break;
467 }
468
469 dprintf(FLLVL, "<-- file_layout_fetch() returning %s\n",
470 pnfs_error_string(pnfsstat));
471 return pnfsstat;
472 }
473
474 /* returns PNFS_SUCCESS if the client holds valid layouts that cover
475 * the entire range requested. otherwise, returns PNFS_PENDING and
476 * sets 'offset_missing' to the lowest offset that is not covered */
477 static enum pnfs_status layout_coverage_status(
478 IN pnfs_layout_state *state,
479 IN enum pnfs_iomode iomode,
480 IN uint64_t offset,
481 IN uint64_t length,
482 OUT uint64_t *offset_missing)
483 {
484 uint64_t position = offset;
485 struct list_entry *entry;
486
487 list_for_each(entry, &state->layouts) {
488 /* if the current position intersects with a compatible
489 * layout, move the position to the end of that layout */
490 pnfs_layout *layout = layout_entry(entry);
491 if (layout->iomode >= iomode &&
492 layout->offset <= position &&
493 position < layout->offset + layout->length)
494 position = layout->offset + layout->length;
495 }
496
497 if (position >= offset + length)
498 return PNFS_SUCCESS;
499
500 *offset_missing = position;
501 return PNFS_PENDING;
502 }
503
504 static enum pnfs_status layout_fetch(
505 IN pnfs_layout_state *state,
506 IN nfs41_session *session,
507 IN nfs41_path_fh *meta_file,
508 IN stateid_arg *stateid,
509 IN enum pnfs_iomode iomode,
510 IN uint64_t offset,
511 IN uint64_t length)
512 {
513 stateid_arg layout_stateid = { 0 };
514 enum pnfs_status status = PNFS_PENDING;
515
516 /* check for previous errors from LAYOUTGET */
517 if ((state->status & PNFS_LAYOUT_UNAVAILABLE) ||
518 ((state->status & PNFS_LAYOUT_NOT_RW) && iomode == PNFS_IOMODE_RW)) {
519 status = PNFSERR_NO_LAYOUT;
520 goto out;
521 }
522
523 /* wait for any pending LAYOUTGETs/LAYOUTRETURNs */
524 while (state->pending)
525 SleepConditionVariableSRW(&state->cond, &state->lock, INFINITE, 0);
526 state->pending = TRUE;
527
528 /* if there's an existing layout stateid, use it */
529 if (state->stateid.seqid) {
530 memcpy(&layout_stateid.stateid, &state->stateid, sizeof(stateid4));
531 layout_stateid.type = STATEID_LAYOUT;
532 stateid = &layout_stateid;
533 }
534
535 if ((state->status & PNFS_LAYOUT_NOT_RW) == 0) {
536 /* try to get a RW layout first */
537 status = file_layout_fetch(state, session, meta_file,
538 stateid, PNFS_IOMODE_RW, offset, length, NFS4_UINT64_MAX);
539 }
540
541 if (status && iomode == PNFS_IOMODE_READ) {
542 /* fall back on READ if necessary */
543 status = file_layout_fetch(state, session, meta_file,
544 stateid, iomode, offset, length, NFS4_UINT64_MAX);
545 }
546
547 state->pending = FALSE;
548 WakeConditionVariable(&state->cond);
549 out:
550 return status;
551 }
552
553 static enum pnfs_status device_status(
554 IN pnfs_layout_state *state,
555 IN uint64_t offset,
556 IN uint64_t length,
557 OUT unsigned char *deviceid)
558 {
559 struct list_entry *entry;
560 enum pnfs_status status = PNFS_SUCCESS;
561
562 list_for_each(entry, &state->layouts) {
563 pnfs_file_layout *layout = file_layout_entry(entry);
564
565 if (layout->device == NULL) {
566 /* copy missing deviceid */
567 memcpy(deviceid, layout->deviceid, PNFS_DEVICEID_SIZE);
568 status = PNFS_PENDING;
569 break;
570 }
571 }
572 return status;
573 }
574
575 static void device_assign(
576 IN pnfs_layout_state *state,
577 IN const unsigned char *deviceid,
578 IN pnfs_file_device *device)
579 {
580 struct list_entry *entry;
581 list_for_each(entry, &state->layouts) {
582 pnfs_file_layout *layout = file_layout_entry(entry);
583
584 /* assign the device to any matching layouts */
585 if (layout->device == NULL &&
586 memcmp(layout->deviceid, deviceid, PNFS_DEVICEID_SIZE) == 0) {
587 layout->device = device;
588
589 /* XXX: only assign the device to a single segment, because
590 * pnfs_file_device_get() only gives us a single reference */
591 break;
592 }
593 }
594 }
595
596 static enum pnfs_status device_fetch(
597 IN pnfs_layout_state *state,
598 IN nfs41_session *session,
599 IN unsigned char *deviceid)
600 {
601 pnfs_file_device *device;
602 enum pnfs_status status;
603
604 /* drop the layoutstate lock for the rpc call */
605 ReleaseSRWLockExclusive(&state->lock);
606 status = pnfs_file_device_get(session,
607 session->client->devices, deviceid, &device);
608 AcquireSRWLockExclusive(&state->lock);
609
610 if (status == PNFS_SUCCESS)
611 device_assign(state, deviceid, device);
612 return status;
613 }
614
615
616 /* nfs41_open_state */
617 static enum pnfs_status client_supports_pnfs(
618 IN nfs41_client *client)
619 {
620 enum pnfs_status status;
621 AcquireSRWLockShared(&client->exid_lock);
622 status = client->roles & EXCHGID4_FLAG_USE_PNFS_MDS
623 ? PNFS_SUCCESS : PNFSERR_NOT_SUPPORTED;
624 ReleaseSRWLockShared(&client->exid_lock);
625 return status;
626 }
627
628 static enum pnfs_status fs_supports_layout(
629 IN const nfs41_superblock *superblock,
630 IN enum pnfs_layout_type type)
631 {
632 const uint32_t flag = 1 << (type - 1);
633 return (superblock->layout_types & flag) == 0
634 ? PNFSERR_NOT_SUPPORTED : PNFS_SUCCESS;
635 }
636
637 static enum pnfs_status open_state_layout_cached(
638 IN nfs41_open_state *state,
639 OUT pnfs_layout_state **layout_out)
640 {
641 enum pnfs_status status = PNFSERR_NO_LAYOUT;
642
643 if (state->layout) {
644 status = PNFS_SUCCESS;
645 *layout_out = state->layout;
646
647 dprintf(FLLVL, "pnfs_open_state_layout() found "
648 "cached layout %p\n", *layout_out);
649 }
650 return status;
651 }
652
653 enum pnfs_status pnfs_layout_state_open(
654 IN nfs41_open_state *state,
655 OUT pnfs_layout_state **layout_out)
656 {
657 struct pnfs_layout_list *layouts = state->session->client->layouts;
658 nfs41_session *session = state->session;
659 pnfs_layout_state *layout;
660 enum pnfs_status status;
661
662 dprintf(FLLVL, "--> pnfs_layout_state_open()\n");
663
664 status = client_supports_pnfs(session->client);
665 if (status)
666 goto out;
667 status = fs_supports_layout(state->file.fh.superblock, PNFS_LAYOUTTYPE_FILE);
668 if (status)
669 goto out;
670
671 /* under shared lock, check open state for cached layouts */
672 AcquireSRWLockShared(&state->lock);
673 status = open_state_layout_cached(state, &layout);
674 ReleaseSRWLockShared(&state->lock);
675
676 if (status) {
677 /* under exclusive lock, find or create a layout for this file */
678 AcquireSRWLockExclusive(&state->lock);
679
680 status = open_state_layout_cached(state, &layout);
681 if (status) {
682 status = layout_state_find_or_create(layouts, &state->file.fh, &layout);
683 if (status == PNFS_SUCCESS) {
684 LONG open_count = InterlockedIncrement(&layout->open_count);
685 state->layout = layout;
686
687 dprintf(FLLVL, "pnfs_layout_state_open() caching layout %p "
688 "(%u opens)\n", state->layout, open_count);
689 }
690 }
691
692 ReleaseSRWLockExclusive(&state->lock);
693
694 if (status)
695 goto out;
696 }
697
698 *layout_out = layout;
699 out:
700 dprintf(FLLVL, "<-- pnfs_layout_state_open() returning %s\n",
701 pnfs_error_string(status));
702 return status;
703 }
704
705 /* expects caller to hold an exclusive lock on pnfs_layout_state */
706 enum pnfs_status pnfs_layout_state_prepare(
707 IN pnfs_layout_state *state,
708 IN nfs41_session *session,
709 IN nfs41_path_fh *meta_file,
710 IN stateid_arg *stateid,
711 IN enum pnfs_iomode iomode,
712 IN uint64_t offset,
713 IN uint64_t length)
714 {
715 unsigned char deviceid[PNFS_DEVICEID_SIZE];
716 struct list_entry *entry;
717 uint64_t missing;
718 enum pnfs_status status;
719
720 /* fail if the range intersects any pending recalls */
721 list_for_each(entry, &state->recalls) {
722 const pnfs_layout *recall = layout_entry(entry);
723 if (offset <= recall->offset + recall->length
724 && recall->offset <= offset + length) {
725 status = PNFSERR_LAYOUT_RECALLED;
726 goto out;
727 }
728 }
729
730 /* if part of the given range is not covered by a layout,
731 * attempt to fetch it with LAYOUTGET */
732 status = layout_coverage_status(state, iomode, offset, length, &missing);
733 if (status == PNFS_PENDING) {
734 status = layout_fetch(state, session, meta_file, stateid,
735 iomode, missing, offset + length - missing);
736
737 /* return pending because layout_fetch() dropped the lock */
738 if (status == PNFS_SUCCESS)
739 status = PNFS_PENDING;
740 goto out;
741 }
742
743 /* if any layouts in the range are missing device info,
744 * fetch them with GETDEVICEINFO */
745 status = device_status(state, offset, length, deviceid);
746 if (status == PNFS_PENDING) {
747 status = device_fetch(state, session, deviceid);
748
749 /* return pending because device_fetch() dropped the lock */
750 if (status == PNFS_SUCCESS)
751 status = PNFS_PENDING;
752 goto out;
753 }
754 out:
755 return status;
756 }
757
758 static enum pnfs_status layout_return_status(
759 IN const pnfs_layout_state *state)
760 {
761 /* return the layout if we have a stateid */
762 return state->stateid.seqid ? PNFS_SUCCESS : PNFS_PENDING;
763 }
764
765 static enum pnfs_status file_layout_return(
766 IN nfs41_session *session,
767 IN nfs41_path_fh *file,
768 IN pnfs_layout_state *state)
769 {
770 enum pnfs_status status;
771 enum nfsstat4 nfsstat;
772
773 dprintf(FLLVL, "--> file_layout_return()\n");
774
775 /* under shared lock, determine whether we need to return the layout */
776 AcquireSRWLockShared(&state->lock);
777 status = layout_return_status(state);
778 ReleaseSRWLockShared(&state->lock);
779
780 if (status != PNFS_PENDING)
781 goto out;
782
783 /* under exclusive lock, return the layout and reset status flags */
784 AcquireSRWLockExclusive(&state->lock);
785
786 /* wait for any pending LAYOUTGETs/LAYOUTRETURNs */
787 while (state->pending)
788 SleepConditionVariableSRW(&state->cond, &state->lock, INFINITE, 0);
789 state->pending = TRUE;
790
791 status = layout_return_status(state);
792 if (status == PNFS_PENDING) {
793 pnfs_layoutreturn_res layoutreturn_res = { 0 };
794 stateid4 stateid;
795 memcpy(&stateid, &state->stateid, sizeof(stateid));
796
797 /* drop the lock during the rpc call */
798 ReleaseSRWLockExclusive(&state->lock);
799 nfsstat = pnfs_rpc_layoutreturn(session, file, PNFS_LAYOUTTYPE_FILE,
800 PNFS_IOMODE_ANY, 0, NFS4_UINT64_MAX, &stateid, &layoutreturn_res);
801 AcquireSRWLockExclusive(&state->lock);
802
803 if (nfsstat) {
804 eprintf("pnfs_rpc_layoutreturn() failed with %s\n",
805 nfs_error_string(nfsstat));
806 status = PNFSERR_NO_LAYOUT;
807 } else {
808 status = PNFS_SUCCESS;
809
810 /* update the layout range held by the client */
811 layout_state_free_layouts(state);
812
813 /* 12.5.3. Layout Stateid: Once a client has no more
814 * layouts on a file, the layout stateid is no longer
815 * valid and MUST NOT be used. */
816 ZeroMemory(&state->stateid, sizeof(stateid4));
817 }
818 }
819
820 state->pending = FALSE;
821 WakeConditionVariable(&state->cond);
822 ReleaseSRWLockExclusive(&state->lock);
823
824 out:
825 dprintf(FLLVL, "<-- file_layout_return() returning %s\n",
826 pnfs_error_string(status));
827 return status;
828 }
829
830 void pnfs_layout_state_close(
831 IN nfs41_session *session,
832 IN nfs41_open_state *state,
833 IN bool_t remove)
834 {
835 pnfs_layout_state *layout;
836 bool_t return_layout;
837 enum pnfs_status status;
838
839 AcquireSRWLockExclusive(&state->lock);
840 layout = state->layout;
841 state->layout = NULL;
842 ReleaseSRWLockExclusive(&state->lock);
843
844 if (layout) {
845 LONG open_count = InterlockedDecrement(&layout->open_count);
846
847 AcquireSRWLockShared(&layout->lock);
848 /* only return on close if it's the last close */
849 return_layout = layout->return_on_close && (open_count <= 0);
850 ReleaseSRWLockShared(&layout->lock);
851
852 if (return_layout) {
853 status = file_layout_return(session, &state->file, layout);
854 if (status)
855 eprintf("file_layout_return() failed with %s\n",
856 pnfs_error_string(status));
857 }
858 }
859
860 if (remove && session->client->layouts) {
861 /* free the layout when the file is removed */
862 layout_state_find_and_delete(session->client->layouts, &state->file.fh);
863 }
864 }
865
866
867 /* pnfs_layout_recall */
868 struct layout_recall {
869 pnfs_layout layout;
870 bool_t changed;
871 };
872 #define recall_entry(pos) list_container(pos, struct layout_recall, layout.entry)
873
874 static bool_t layout_recall_compatible(
875 IN const pnfs_layout *layout,
876 IN const pnfs_layout *recall)
877 {
878 return layout->type == recall->type
879 && layout->offset <= (recall->offset + recall->length)
880 && recall->offset <= (layout->offset + layout->length)
881 && (recall->iomode == PNFS_IOMODE_ANY ||
882 layout->iomode == recall->iomode);
883 }
884
885 static pnfs_file_layout* layout_allocate_copy(
886 IN const pnfs_file_layout *existing)
887 {
888 /* allocate a segment to cover the end of the range */
889 pnfs_file_layout *layout = calloc(1, sizeof(pnfs_file_layout));
890 if (layout == NULL)
891 goto out;
892
893 memcpy(layout, existing, sizeof(pnfs_file_layout));
894
895 /* XXX: don't use the device from existing layout;
896 * we need to get a reference for ourselves */
897 layout->device = NULL;
898
899 /* allocate a copy of the filehandle array */
900 layout->filehandles.arr = calloc(layout->filehandles.count,
901 sizeof(nfs41_path_fh));
902 if (layout->filehandles.arr == NULL)
903 goto out_free;
904
905 memcpy(layout->filehandles.arr, existing->filehandles.arr,
906 layout->filehandles.count * sizeof(nfs41_path_fh));
907 out:
908 return layout;
909
910 out_free:
911 file_layout_free(layout);
912 layout = NULL;
913 goto out;
914 }
915
916 static void layout_recall_range(
917 IN pnfs_layout_state *state,
918 IN const pnfs_layout *recall)
919 {
920 struct list_entry *entry, *tmp;
921 list_for_each_tmp(entry, tmp, &state->layouts) {
922 pnfs_file_layout *layout = file_layout_entry(entry);
923 const uint64_t layout_end = layout->layout.offset + layout->layout.length;
924
925 if (!layout_recall_compatible(&layout->layout, recall))
926 continue;
927
928 if (recall->offset > layout->layout.offset) {
929 /* segment starts before recall; shrink length */
930 layout->layout.length = recall->offset - layout->layout.offset;
931
932 if (layout_end > recall->offset + recall->length) {
933 /* middle chunk of the segment is recalled;
934 * allocate a new segment to cover the end */
935 pnfs_file_layout *remainder = layout_allocate_copy(layout);
936 if (remainder == NULL) {
937 /* silently ignore allocation errors here. behave
938 * as if we 'forgot' this last segment */
939 } else {
940 layout->layout.offset = recall->offset + recall->length;
941 layout->layout.length = layout_end - layout->layout.offset;
942 layout_ordered_insert(state, &remainder->layout);
943 }
944 }
945 } else {
946 /* segment starts after recall */
947 if (layout_end <= recall->offset + recall->length) {
948 /* entire segment is recalled */
949 list_remove(&layout->layout.entry);
950 file_layout_free(layout);
951 } else {
952 /* beginning of segment is recalled; shrink offset/length */
953 layout->layout.offset = recall->offset + recall->length;
954 layout->layout.length = layout_end - layout->layout.offset;
955 }
956 }
957 }
958 }
959
960 static void layout_state_deferred_recalls(
961 IN pnfs_layout_state *state)
962 {
963 struct list_entry *entry, *tmp;
964 list_for_each_tmp(entry, tmp, &state->recalls) {
965 /* process each deferred layout recall */
966 pnfs_layout *recall = layout_entry(entry);
967 layout_recall_range(state, recall);
968
969 /* remove/free the recall entry */
970 list_remove(&recall->entry);
971 free(recall);
972 }
973 }
974
975 static void layout_recall_entry_init(
976 OUT struct layout_recall *lrc,
977 IN const struct cb_layoutrecall_args *recall)
978 {
979 list_init(&lrc->layout.entry);
980 if (recall->recall.type == PNFS_RETURN_FILE) {
981 lrc->layout.offset = recall->recall.args.file.offset;
982 lrc->layout.length = recall->recall.args.file.length;
983 } else {
984 lrc->layout.offset = 0;
985 lrc->layout.length = NFS4_UINT64_MAX;
986 }
987 lrc->layout.iomode = recall->iomode;
988 lrc->layout.type = PNFS_LAYOUTTYPE_FILE;
989 lrc->changed = recall->changed;
990 }
991
992 static enum pnfs_status layout_recall_merge(
993 IN struct list_entry *list,
994 IN pnfs_layout *from)
995 {
996 struct list_entry *entry, *tmp;
997 enum pnfs_status status = PNFSERR_NO_LAYOUT;
998
999 /* attempt to merge the new recall with each existing recall */
1000 list_for_each_tmp(entry, tmp, list) {
1001 pnfs_layout *to = layout_entry(entry);
1002 const uint64_t to_max = to->offset + to->length;
1003 const uint64_t from_max = from->offset + from->length;
1004
1005 /* the ranges must meet or overlap */
1006 if (to_max < from->offset || from_max < to->offset)
1007 continue;
1008
1009 /* the following fields must match: */
1010 if (to->iomode != from->iomode || to->type != from->type)
1011 continue;
1012
1013 dprintf(FLLVL, "merging recalled range {%llu, %llu} with {%llu, %llu}\n",
1014 to->offset, to->length, from->offset, from->length);
1015
1016 /* calculate the union of the two ranges */
1017 to->offset = min(to->offset, from->offset);
1018 to->length = max(to_max, from_max) - to->offset;
1019
1020 /* on success, remove/free the new segment */
1021 list_remove(&from->entry);
1022 free(from);
1023 status = PNFS_SUCCESS;
1024
1025 /* because the existing segment 'to' has grown, we may
1026 * be able to merge it with later segments */
1027 from = to;
1028 }
1029 return status;
1030 }
1031
1032 static enum pnfs_status file_layout_recall(
1033 IN pnfs_layout_state *state,
1034 IN const struct cb_layoutrecall_args *recall)
1035 {
1036 const stateid4 *stateid = &recall->recall.args.file.stateid;
1037 enum pnfs_status status = PNFS_SUCCESS;
1038
1039 /* under an exclusive lock, flag the layout as recalled */
1040 AcquireSRWLockExclusive(&state->lock);
1041
1042 if (state->stateid.seqid == 0) {
1043 /* return NOMATCHINGLAYOUT if it wasn't actually granted */
1044 status = PNFSERR_NO_LAYOUT;
1045 goto out;
1046 }
1047
1048 if (recall->recall.type == PNFS_RETURN_FILE) {
1049 /* detect races between CB_LAYOUTRECALL and LAYOUTGET/LAYOUTRETURN */
1050 if (stateid->seqid > state->stateid.seqid + 1) {
1051 /* the server has processed an outstanding LAYOUTGET or
1052 * LAYOUTRETURN; we must return ERR_DELAY until we get the
1053 * response and update our view of the layout */
1054 status = PNFS_PENDING;
1055 goto out;
1056 }
1057
1058 /* save the updated seqid */
1059 state->stateid.seqid = stateid->seqid;
1060 }
1061
1062 if (state->io_count) {
1063 /* save an entry for this recall, and process it once io finishes */
1064 struct layout_recall *lrc = calloc(1, sizeof(struct layout_recall));
1065 if (lrc == NULL) {
1066 /* on failure to allocate, we'll have to respond
1067 * to the CB_LAYOUTRECALL with NFS4ERR_DELAY */
1068 status = PNFS_PENDING;
1069 goto out;
1070 }
1071 layout_recall_entry_init(lrc, recall);
1072 if (layout_recall_merge(&state->recalls, &lrc->layout) != PNFS_SUCCESS)
1073 list_add_tail(&state->recalls, &lrc->layout.entry);
1074 } else {
1075 /* if there is no pending io, process the recall immediately */
1076 struct layout_recall lrc = { 0 };
1077 layout_recall_entry_init(&lrc, recall);
1078 layout_recall_range(state, &lrc.layout);
1079 }
1080 out:
1081 ReleaseSRWLockExclusive(&state->lock);
1082 return status;
1083 }
1084
1085 static enum pnfs_status file_layout_recall_file(
1086 IN nfs41_client *client,
1087 IN const struct cb_layoutrecall_args *recall)
1088 {
1089 struct list_entry *entry;
1090 enum pnfs_status status;
1091
1092 dprintf(FLLVL, "--> file_layout_recall_file()\n");
1093
1094 EnterCriticalSection(&client->layouts->lock);
1095
1096 status = layout_entry_find(client->layouts, &recall->recall.args.file.fh, &entry);
1097 if (status == PNFS_SUCCESS)
1098 status = file_layout_recall(state_entry(entry), recall);
1099
1100 LeaveCriticalSection(&client->layouts->lock);
1101
1102 dprintf(FLLVL, "<-- file_layout_recall_file() returning %s\n",
1103 pnfs_error_string(status));
1104 return status;
1105 }
1106
1107 static bool_t fsid_matches(
1108 IN const nfs41_fsid *lhs,
1109 IN const nfs41_fsid *rhs)
1110 {
1111 return lhs->major == rhs->major && lhs->minor == rhs->minor;
1112 }
1113
1114 static enum pnfs_status file_layout_recall_fsid(
1115 IN nfs41_client *client,
1116 IN const struct cb_layoutrecall_args *recall)
1117 {
1118 struct list_entry *entry;
1119 pnfs_layout_state *state;
1120 nfs41_fh *fh;
1121 enum pnfs_status status = PNFSERR_NO_LAYOUT;
1122
1123 dprintf(FLLVL, "--> file_layout_recall_fsid(%llu, %llu)\n",
1124 recall->recall.args.fsid.major, recall->recall.args.fsid.minor);
1125
1126 EnterCriticalSection(&client->layouts->lock);
1127
1128 list_for_each(entry, &client->layouts->head) {
1129 state = state_entry(entry);
1130 /* no locks needed to read layout.meta_fh or superblock.fsid,
1131 * because they are only written once on creation */
1132 fh = &state->meta_fh;
1133 if (fsid_matches(&recall->recall.args.fsid, &fh->superblock->fsid))
1134 status = file_layout_recall(state, recall);
1135 }
1136
1137 LeaveCriticalSection(&client->layouts->lock);
1138
1139 /* bulk recalls require invalidation of cached device info */
1140 pnfs_file_device_list_invalidate(client->devices);
1141
1142 dprintf(FLLVL, "<-- file_layout_recall_fsid() returning %s\n",
1143 pnfs_error_string(status));
1144 return status;
1145 }
1146
1147 static enum pnfs_status file_layout_recall_all(
1148 IN nfs41_client *client,
1149 IN const struct cb_layoutrecall_args *recall)
1150 {
1151 struct list_entry *entry;
1152 enum pnfs_status status = PNFSERR_NO_LAYOUT;
1153
1154 dprintf(FLLVL, "--> file_layout_recall_all()\n");
1155
1156 EnterCriticalSection(&client->layouts->lock);
1157
1158 list_for_each(entry, &client->layouts->head)
1159 status = file_layout_recall(state_entry(entry), recall);
1160
1161 LeaveCriticalSection(&client->layouts->lock);
1162
1163 /* bulk recalls require invalidation of cached device info */
1164 pnfs_file_device_list_invalidate(client->devices);
1165
1166 dprintf(FLLVL, "<-- file_layout_recall_all() returning %s\n",
1167 pnfs_error_string(status));
1168 return status;
1169 }
1170
1171 enum pnfs_status pnfs_file_layout_recall(
1172 IN nfs41_client *client,
1173 IN const struct cb_layoutrecall_args *recall)
1174 {
1175 enum pnfs_status status = PNFS_SUCCESS;
1176
1177 dprintf(FLLVL, "--> pnfs_file_layout_recall(%u, %s, %u)\n",
1178 recall->recall.type, pnfs_iomode_string(recall->iomode),
1179 recall->changed);
1180
1181 if (recall->type != PNFS_LAYOUTTYPE_FILE) {
1182 dprintf(FLLVL, "invalid layout type %u (%s)!\n",
1183 recall->type, pnfs_layout_type_string(recall->type));
1184 status = PNFSERR_NOT_SUPPORTED;
1185 goto out;
1186 }
1187
1188 switch (recall->recall.type) {
1189 case PNFS_RETURN_FILE:
1190 status = file_layout_recall_file(client, recall);
1191 break;
1192 case PNFS_RETURN_FSID:
1193 status = file_layout_recall_fsid(client, recall);
1194 break;
1195 case PNFS_RETURN_ALL:
1196 status = file_layout_recall_all(client, recall);
1197 break;
1198
1199 default:
1200 dprintf(FLLVL, "invalid return type %u!\n", recall->recall);
1201 status = PNFSERR_NOT_SUPPORTED;
1202 goto out;
1203 }
1204 out:
1205 dprintf(FLLVL, "<-- pnfs_file_layout_recall() returning %s\n",
1206 pnfs_error_string(status));
1207 return status;
1208 }
1209
1210 /* expects caller to hold a shared lock on pnfs_layout_state */
1211 enum pnfs_status pnfs_layout_recall_status(
1212 IN const pnfs_layout_state *state,
1213 IN const pnfs_layout *layout)
1214 {
1215 struct list_entry *entry;
1216 enum pnfs_status status = PNFS_SUCCESS;
1217
1218 /* search for a pending recall that intersects with the given segment */
1219 list_for_each(entry, &state->recalls) {
1220 const struct layout_recall *recall = recall_entry(entry);
1221 if (!layout_recall_compatible(layout, &recall->layout))
1222 continue;
1223
1224 if (recall->changed)
1225 status = PNFSERR_LAYOUT_CHANGED;
1226 else
1227 status = PNFSERR_LAYOUT_RECALLED;
1228 break;
1229 }
1230 return status;
1231 }
1232
1233 void pnfs_layout_recall_fenced(
1234 IN pnfs_layout_state *state,
1235 IN const pnfs_layout *layout)
1236 {
1237 struct layout_recall *lrc = calloc(1, sizeof(struct layout_recall));
1238 if (lrc == NULL)
1239 return;
1240
1241 AcquireSRWLockExclusive(&state->lock);
1242
1243 list_init(&lrc->layout.entry);
1244 lrc->layout.offset = layout->offset;
1245 lrc->layout.length = layout->length;
1246 lrc->layout.iomode = layout->iomode;
1247 lrc->layout.type = layout->type;
1248 lrc->changed = TRUE;
1249
1250 if (layout_recall_merge(&state->recalls, &lrc->layout) != PNFS_SUCCESS)
1251 list_add_tail(&state->recalls, &lrc->layout.entry);
1252
1253 ReleaseSRWLockExclusive(&state->lock);
1254 }
1255
1256 /* expects caller to hold an exclusive lock on pnfs_layout_state */
1257 void pnfs_layout_io_start(
1258 IN pnfs_layout_state *state)
1259 {
1260 /* take a reference on the layout, so that it won't be recalled
1261 * until all io is finished */
1262 state->io_count++;
1263 dprintf(FLLVL, "pnfs_layout_io_start(): count -> %u\n",
1264 state->io_count);
1265 }
1266
1267 void pnfs_layout_io_finished(
1268 IN pnfs_layout_state *state)
1269 {
1270 AcquireSRWLockExclusive(&state->lock);
1271
1272 /* return the reference to signify that an io request is finished */
1273 state->io_count--;
1274 dprintf(FLLVL, "pnfs_layout_io_finished() count -> %u\n",
1275 state->io_count);
1276
1277 if (state->io_count > 0) /* more io pending */
1278 goto out_unlock;
1279
1280 /* once all io is finished, process any layout recalls */
1281 layout_state_deferred_recalls(state);
1282
1283 /* finish any segment merging that was delayed during io */
1284 if (!list_empty(&state->layouts))
1285 layout_state_merge(state, file_layout_entry(state->layouts.next));
1286
1287 out_unlock:
1288 ReleaseSRWLockExclusive(&state->lock);
1289 }