1 /* NFSv4.1 client for Windows
2 * Copyright © 2012 The Regents of the University of Michigan
4 * Olga Kornievskaia <aglo@umich.edu>
5 * Casey Bodley <cbodley@umich.edu>
7 * This library is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or (at
10 * your option) any later version.
12 * This library is distributed in the hope that it will be useful, but
13 * without any warranty; without even the implied warranty of merchantability
14 * or fitness for a particular purpose. See the GNU Lesser General Public
15 * License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "nfs41_ops.h"
25 #include "nfs41_callback.h"
27 #include "daemon_debug.h"
30 #define FLLVL 2 /* dprintf level for file layout logging */
33 /* pnfs_layout_list */
34 struct pnfs_layout_list
{
35 struct list_entry head
;
36 CRITICAL_SECTION lock
;
39 #define state_entry(pos) list_container(pos, pnfs_layout_state, entry)
40 #define layout_entry(pos) list_container(pos, pnfs_layout, entry)
41 #define file_layout_entry(pos) list_container(pos, pnfs_file_layout, layout.entry)
43 static enum pnfs_status
layout_state_create(
44 IN
const nfs41_fh
*meta_fh
,
45 OUT pnfs_layout_state
**layout_out
)
47 pnfs_layout_state
*layout
;
48 enum pnfs_status status
= PNFS_SUCCESS
;
50 layout
= calloc(1, sizeof(pnfs_layout_state
));
52 status
= PNFSERR_RESOURCES
;
56 fh_copy(&layout
->meta_fh
, meta_fh
);
57 list_init(&layout
->layouts
);
58 list_init(&layout
->recalls
);
59 InitializeSRWLock(&layout
->lock
);
60 InitializeConditionVariable(&layout
->cond
);
67 static void file_layout_free(
68 IN pnfs_file_layout
*layout
)
70 if (layout
->device
) pnfs_file_device_put(layout
->device
);
71 free(layout
->filehandles
.arr
);
75 static void layout_state_free_layouts(
76 IN pnfs_layout_state
*state
)
78 struct list_entry
*entry
, *tmp
;
79 list_for_each_tmp(entry
, tmp
, &state
->layouts
)
80 file_layout_free(file_layout_entry(entry
));
81 list_init(&state
->layouts
);
84 static void layout_state_free_recalls(
85 IN pnfs_layout_state
*state
)
87 struct list_entry
*entry
, *tmp
;
88 list_for_each_tmp(entry
, tmp
, &state
->recalls
)
89 free(layout_entry(entry
));
90 list_init(&state
->recalls
);
93 static void layout_state_free(
94 IN pnfs_layout_state
*state
)
96 layout_state_free_layouts(state
);
97 layout_state_free_recalls(state
);
101 static int layout_entry_compare(
102 IN
const struct list_entry
*entry
,
103 IN
const void *value
)
105 const pnfs_layout_state
*layout
= state_entry(entry
);
106 const nfs41_fh
*meta_fh
= (const nfs41_fh
*)value
;
107 const nfs41_fh
*layout_fh
= (const nfs41_fh
*)&layout
->meta_fh
;
108 const uint32_t diff
= layout_fh
->len
- meta_fh
->len
;
109 return diff
? diff
: memcmp(layout_fh
->fh
, meta_fh
->fh
, meta_fh
->len
);
112 static enum pnfs_status
layout_entry_find(
113 IN
struct pnfs_layout_list
*layouts
,
114 IN
const nfs41_fh
*meta_fh
,
115 OUT
struct list_entry
**entry_out
)
117 *entry_out
= list_search(&layouts
->head
, meta_fh
, layout_entry_compare
);
118 return *entry_out
? PNFS_SUCCESS
: PNFSERR_NO_LAYOUT
;
121 enum pnfs_status
pnfs_layout_list_create(
122 OUT
struct pnfs_layout_list
**layouts_out
)
124 struct pnfs_layout_list
*layouts
;
125 enum pnfs_status status
= PNFS_SUCCESS
;
127 layouts
= calloc(1, sizeof(struct pnfs_layout_list
));
128 if (layouts
== NULL
) {
129 status
= PNFSERR_RESOURCES
;
132 list_init(&layouts
->head
);
133 InitializeCriticalSection(&layouts
->lock
);
134 *layouts_out
= layouts
;
139 void pnfs_layout_list_free(
140 IN
struct pnfs_layout_list
*layouts
)
142 struct list_entry
*entry
, *tmp
;
144 EnterCriticalSection(&layouts
->lock
);
146 list_for_each_tmp(entry
, tmp
, &layouts
->head
)
147 layout_state_free(state_entry(entry
));
149 LeaveCriticalSection(&layouts
->lock
);
150 DeleteCriticalSection(&layouts
->lock
);
154 static enum pnfs_status
layout_state_find_or_create(
155 IN
struct pnfs_layout_list
*layouts
,
156 IN
const nfs41_fh
*meta_fh
,
157 OUT pnfs_layout_state
**layout_out
)
159 struct list_entry
*entry
;
160 enum pnfs_status status
;
162 dprintf(FLLVL
, "--> layout_state_find_or_create()\n");
164 EnterCriticalSection(&layouts
->lock
);
166 /* search for an existing layout */
167 status
= layout_entry_find(layouts
, meta_fh
, &entry
);
169 /* create a new layout */
170 pnfs_layout_state
*layout
;
171 status
= layout_state_create(meta_fh
, &layout
);
172 if (status
== PNFS_SUCCESS
) {
173 /* add it to the list */
174 list_add_head(&layouts
->head
, &layout
->entry
);
175 *layout_out
= layout
;
177 dprintf(FLLVL
, "<-- layout_state_find_or_create() "
178 "returning new layout %p\n", layout
);
180 dprintf(FLLVL
, "<-- layout_state_find_or_create() "
181 "returning %s\n", pnfs_error_string(status
));
184 *layout_out
= state_entry(entry
);
186 dprintf(FLLVL
, "<-- layout_state_find_or_create() "
187 "returning existing layout %p\n", *layout_out
);
190 LeaveCriticalSection(&layouts
->lock
);
194 static enum pnfs_status
layout_state_find_and_delete(
195 IN
struct pnfs_layout_list
*layouts
,
196 IN
const nfs41_fh
*meta_fh
)
198 struct list_entry
*entry
;
199 enum pnfs_status status
;
201 dprintf(FLLVL
, "--> layout_state_find_and_delete()\n");
203 EnterCriticalSection(&layouts
->lock
);
205 status
= layout_entry_find(layouts
, meta_fh
, &entry
);
206 if (status
== PNFS_SUCCESS
) {
208 layout_state_free(state_entry(entry
));
211 LeaveCriticalSection(&layouts
->lock
);
213 dprintf(FLLVL
, "<-- layout_state_find_and_delete() "
214 "returning %s\n", pnfs_error_string(status
));
219 /* pnfs_file_layout */
220 static uint64_t range_max(
221 IN
const pnfs_layout
*layout
)
223 uint64_t result
= layout
->offset
+ layout
->length
;
224 return result
< layout
->offset
? NFS4_UINT64_MAX
: result
;
227 static bool_t
layout_sanity_check(
228 IN pnfs_file_layout
*layout
)
231 if (layout
->layout
.length
== 0 ||
232 layout
->layout
.iomode
< PNFS_IOMODE_READ
||
233 layout
->layout
.iomode
> PNFS_IOMODE_RW
||
234 layout_unit_size(layout
) == 0)
237 /* put a cap on layout.length to prevent overflow */
238 layout
->layout
.length
= range_max(&layout
->layout
) - layout
->layout
.offset
;
242 static int layout_filehandles_cmp(
243 IN
const pnfs_file_layout_handles
*lhs
,
244 IN
const pnfs_file_layout_handles
*rhs
)
246 const uint32_t diff
= rhs
->count
- lhs
->count
;
247 return diff
? diff
: memcmp(rhs
->arr
, lhs
->arr
,
248 rhs
->count
* sizeof(nfs41_path_fh
));
251 static bool_t
layout_merge_segments(
252 IN pnfs_file_layout
*to
,
253 IN pnfs_file_layout
*from
)
255 const uint64_t to_max
= range_max(&to
->layout
);
256 const uint64_t from_max
= range_max(&from
->layout
);
258 /* cannot merge a segment with itself */
262 /* the ranges must meet or overlap */
263 if (to_max
< from
->layout
.offset
|| from_max
< to
->layout
.offset
)
266 /* the following fields must match: */
267 if (to
->layout
.iomode
!= from
->layout
.iomode
||
268 to
->layout
.type
!= from
->layout
.type
||
269 layout_filehandles_cmp(&to
->filehandles
, &from
->filehandles
) != 0 ||
270 memcmp(to
->deviceid
, from
->deviceid
, PNFS_DEVICEID_SIZE
) != 0 ||
271 to
->pattern_offset
!= from
->pattern_offset
||
272 to
->first_index
!= from
->first_index
||
273 to
->util
!= from
->util
)
276 dprintf(FLLVL
, "merging layout range {%llu, %llu} with {%llu, %llu}\n",
277 to
->layout
.offset
, to
->layout
.length
,
278 from
->layout
.offset
, from
->layout
.length
);
280 /* calculate the union of the two ranges */
281 to
->layout
.offset
= min(to
->layout
.offset
, from
->layout
.offset
);
282 to
->layout
.length
= max(to_max
, from_max
) - to
->layout
.offset
;
286 static enum pnfs_status
layout_state_merge(
287 IN pnfs_layout_state
*state
,
288 IN pnfs_file_layout
*from
)
290 struct list_entry
*entry
, *tmp
;
291 pnfs_file_layout
*to
;
292 enum pnfs_status status
= PNFSERR_NO_LAYOUT
;
294 /* attempt to merge the new segment with each existing segment */
295 list_for_each_tmp(entry
, tmp
, &state
->layouts
) {
296 to
= file_layout_entry(entry
);
297 if (!layout_merge_segments(to
, from
))
300 /* on success, remove/free the new segment */
301 list_remove(&from
->layout
.entry
);
302 file_layout_free(from
);
303 status
= PNFS_SUCCESS
;
305 /* because the existing segment 'to' has grown, we may
306 * be able to merge it with later segments */
309 /* but if there could be io threads referencing this segment,
310 * we can't free it until io is finished */
317 static void layout_ordered_insert(
318 IN pnfs_layout_state
*state
,
319 IN pnfs_layout
*layout
)
321 struct list_entry
*entry
;
322 list_for_each(entry
, &state
->layouts
) {
323 pnfs_layout
*existing
= layout_entry(entry
);
325 /* maintain an order of increasing offset */
326 if (existing
->offset
< layout
->offset
)
329 /* when offsets are equal, prefer a longer segment first */
330 if (existing
->offset
== layout
->offset
&&
331 existing
->length
> layout
->length
)
334 list_add(&layout
->entry
, existing
->entry
.prev
, &existing
->entry
);
338 list_add_tail(&state
->layouts
, &layout
->entry
);
341 static enum pnfs_status
layout_update_range(
342 IN OUT pnfs_layout_state
*state
,
343 IN
const struct list_entry
*layouts
)
345 struct list_entry
*entry
, *tmp
;
346 pnfs_file_layout
*layout
;
347 enum pnfs_status status
= PNFSERR_NO_LAYOUT
;
349 list_for_each_tmp(entry
, tmp
, layouts
) {
350 layout
= file_layout_entry(entry
);
352 /* don't know what to do with non-file layouts */
353 if (layout
->layout
.type
!= PNFS_LAYOUTTYPE_FILE
)
356 if (!layout_sanity_check(layout
)) {
357 file_layout_free(layout
);
361 /* attempt to merge the range with existing segments */
362 status
= layout_state_merge(state
, layout
);
364 dprintf(FLLVL
, "saving new layout:\n");
365 dprint_layout(FLLVL
, layout
);
367 layout_ordered_insert(state
, &layout
->layout
);
368 status
= PNFS_SUCCESS
;
374 static enum pnfs_status
layout_update_stateid(
375 IN OUT pnfs_layout_state
*state
,
376 IN
const stateid4
*stateid
)
378 enum pnfs_status status
= PNFS_SUCCESS
;
380 if (state
->stateid
.seqid
== 0) {
381 /* save a new layout stateid */
382 memcpy(&state
->stateid
, stateid
, sizeof(stateid4
));
383 } else if (memcmp(&state
->stateid
.other
, stateid
->other
,
384 NFS4_STATEID_OTHER
) == 0) {
385 /* update an existing layout stateid */
386 state
->stateid
.seqid
= stateid
->seqid
;
388 status
= PNFSERR_NO_LAYOUT
;
393 static enum pnfs_status
layout_update(
394 IN OUT pnfs_layout_state
*state
,
395 IN
const pnfs_layoutget_res_ok
*layoutget_res
)
397 enum pnfs_status status
;
399 /* update the layout ranges held by the client */
400 status
= layout_update_range(state
, &layoutget_res
->layouts
);
402 eprintf("LAYOUTGET didn't return any file layouts\n");
405 /* update the layout stateid */
406 status
= layout_update_stateid(state
, &layoutget_res
->stateid
);
408 eprintf("LAYOUTGET returned a new stateid when we already had one\n");
411 /* if a previous LAYOUTGET set return_on_close, don't overwrite it */
412 if (!state
->return_on_close
)
413 state
->return_on_close
= layoutget_res
->return_on_close
;
418 static enum pnfs_status
file_layout_fetch(
419 IN OUT pnfs_layout_state
*state
,
420 IN nfs41_session
*session
,
421 IN nfs41_path_fh
*meta_file
,
422 IN stateid_arg
*stateid
,
423 IN
enum pnfs_iomode iomode
,
425 IN
uint64_t minlength
,
428 pnfs_layoutget_res_ok layoutget_res
= { 0 };
429 enum pnfs_status pnfsstat
= PNFS_SUCCESS
;
430 enum nfsstat4 nfsstat
;
432 dprintf(FLLVL
, "--> file_layout_fetch(%s, seqid=%u)\n",
433 pnfs_iomode_string(iomode
), state
->stateid
.seqid
);
435 list_init(&layoutget_res
.layouts
);
437 /* drop the lock during the rpc call */
438 ReleaseSRWLockExclusive(&state
->lock
);
439 nfsstat
= pnfs_rpc_layoutget(session
, meta_file
, stateid
,
440 iomode
, offset
, minlength
, length
, &layoutget_res
);
441 AcquireSRWLockExclusive(&state
->lock
);
444 dprintf(FLLVL
, "pnfs_rpc_layoutget() failed with %s\n",
445 nfs_error_string(nfsstat
));
446 pnfsstat
= PNFSERR_NOT_SUPPORTED
;
451 /* use the LAYOUTGET results to update our view of the layout */
452 pnfsstat
= layout_update(state
, &layoutget_res
);
455 case NFS4ERR_BADIOMODE
:
456 /* don't try RW again */
457 if (iomode
== PNFS_IOMODE_RW
)
458 state
->status
|= PNFS_LAYOUT_NOT_RW
;
461 case NFS4ERR_LAYOUTUNAVAILABLE
:
462 case NFS4ERR_UNKNOWN_LAYOUTTYPE
:
463 case NFS4ERR_BADLAYOUT
:
464 /* don't try again at all */
465 state
->status
|= PNFS_LAYOUT_UNAVAILABLE
;
469 dprintf(FLLVL
, "<-- file_layout_fetch() returning %s\n",
470 pnfs_error_string(pnfsstat
));
474 /* returns PNFS_SUCCESS if the client holds valid layouts that cover
475 * the entire range requested. otherwise, returns PNFS_PENDING and
476 * sets 'offset_missing' to the lowest offset that is not covered */
477 static enum pnfs_status
layout_coverage_status(
478 IN pnfs_layout_state
*state
,
479 IN
enum pnfs_iomode iomode
,
482 OUT
uint64_t *offset_missing
)
484 uint64_t position
= offset
;
485 struct list_entry
*entry
;
487 list_for_each(entry
, &state
->layouts
) {
488 /* if the current position intersects with a compatible
489 * layout, move the position to the end of that layout */
490 pnfs_layout
*layout
= layout_entry(entry
);
491 if (layout
->iomode
>= iomode
&&
492 layout
->offset
<= position
&&
493 position
< layout
->offset
+ layout
->length
)
494 position
= layout
->offset
+ layout
->length
;
497 if (position
>= offset
+ length
)
500 *offset_missing
= position
;
504 static enum pnfs_status
layout_fetch(
505 IN pnfs_layout_state
*state
,
506 IN nfs41_session
*session
,
507 IN nfs41_path_fh
*meta_file
,
508 IN stateid_arg
*stateid
,
509 IN
enum pnfs_iomode iomode
,
513 stateid_arg layout_stateid
= { 0 };
514 enum pnfs_status status
= PNFS_PENDING
;
516 /* check for previous errors from LAYOUTGET */
517 if ((state
->status
& PNFS_LAYOUT_UNAVAILABLE
) ||
518 ((state
->status
& PNFS_LAYOUT_NOT_RW
) && iomode
== PNFS_IOMODE_RW
)) {
519 status
= PNFSERR_NO_LAYOUT
;
523 /* wait for any pending LAYOUTGETs/LAYOUTRETURNs */
524 while (state
->pending
)
525 SleepConditionVariableSRW(&state
->cond
, &state
->lock
, INFINITE
, 0);
526 state
->pending
= TRUE
;
528 /* if there's an existing layout stateid, use it */
529 if (state
->stateid
.seqid
) {
530 memcpy(&layout_stateid
.stateid
, &state
->stateid
, sizeof(stateid4
));
531 layout_stateid
.type
= STATEID_LAYOUT
;
532 stateid
= &layout_stateid
;
535 if ((state
->status
& PNFS_LAYOUT_NOT_RW
) == 0) {
536 /* try to get a RW layout first */
537 status
= file_layout_fetch(state
, session
, meta_file
,
538 stateid
, PNFS_IOMODE_RW
, offset
, length
, NFS4_UINT64_MAX
);
541 if (status
&& iomode
== PNFS_IOMODE_READ
) {
542 /* fall back on READ if necessary */
543 status
= file_layout_fetch(state
, session
, meta_file
,
544 stateid
, iomode
, offset
, length
, NFS4_UINT64_MAX
);
547 state
->pending
= FALSE
;
548 WakeConditionVariable(&state
->cond
);
553 static enum pnfs_status
device_status(
554 IN pnfs_layout_state
*state
,
557 OUT
unsigned char *deviceid
)
559 struct list_entry
*entry
;
560 enum pnfs_status status
= PNFS_SUCCESS
;
562 list_for_each(entry
, &state
->layouts
) {
563 pnfs_file_layout
*layout
= file_layout_entry(entry
);
565 if (layout
->device
== NULL
) {
566 /* copy missing deviceid */
567 memcpy(deviceid
, layout
->deviceid
, PNFS_DEVICEID_SIZE
);
568 status
= PNFS_PENDING
;
575 static void device_assign(
576 IN pnfs_layout_state
*state
,
577 IN
const unsigned char *deviceid
,
578 IN pnfs_file_device
*device
)
580 struct list_entry
*entry
;
581 list_for_each(entry
, &state
->layouts
) {
582 pnfs_file_layout
*layout
= file_layout_entry(entry
);
584 /* assign the device to any matching layouts */
585 if (layout
->device
== NULL
&&
586 memcmp(layout
->deviceid
, deviceid
, PNFS_DEVICEID_SIZE
) == 0) {
587 layout
->device
= device
;
589 /* XXX: only assign the device to a single segment, because
590 * pnfs_file_device_get() only gives us a single reference */
596 static enum pnfs_status
device_fetch(
597 IN pnfs_layout_state
*state
,
598 IN nfs41_session
*session
,
599 IN
unsigned char *deviceid
)
601 pnfs_file_device
*device
;
602 enum pnfs_status status
;
604 /* drop the layoutstate lock for the rpc call */
605 ReleaseSRWLockExclusive(&state
->lock
);
606 status
= pnfs_file_device_get(session
,
607 session
->client
->devices
, deviceid
, &device
);
608 AcquireSRWLockExclusive(&state
->lock
);
610 if (status
== PNFS_SUCCESS
)
611 device_assign(state
, deviceid
, device
);
616 /* nfs41_open_state */
617 static enum pnfs_status
client_supports_pnfs(
618 IN nfs41_client
*client
)
620 enum pnfs_status status
;
621 AcquireSRWLockShared(&client
->exid_lock
);
622 status
= client
->roles
& EXCHGID4_FLAG_USE_PNFS_MDS
623 ? PNFS_SUCCESS
: PNFSERR_NOT_SUPPORTED
;
624 ReleaseSRWLockShared(&client
->exid_lock
);
628 static enum pnfs_status
fs_supports_layout(
629 IN
const nfs41_superblock
*superblock
,
630 IN
enum pnfs_layout_type type
)
632 const uint32_t flag
= 1 << (type
- 1);
633 return (superblock
->layout_types
& flag
) == 0
634 ? PNFSERR_NOT_SUPPORTED
: PNFS_SUCCESS
;
637 static enum pnfs_status
open_state_layout_cached(
638 IN nfs41_open_state
*state
,
639 OUT pnfs_layout_state
**layout_out
)
641 enum pnfs_status status
= PNFSERR_NO_LAYOUT
;
644 status
= PNFS_SUCCESS
;
645 *layout_out
= state
->layout
;
647 dprintf(FLLVL
, "pnfs_open_state_layout() found "
648 "cached layout %p\n", *layout_out
);
653 enum pnfs_status
pnfs_layout_state_open(
654 IN nfs41_open_state
*state
,
655 OUT pnfs_layout_state
**layout_out
)
657 struct pnfs_layout_list
*layouts
= state
->session
->client
->layouts
;
658 nfs41_session
*session
= state
->session
;
659 pnfs_layout_state
*layout
;
660 enum pnfs_status status
;
662 dprintf(FLLVL
, "--> pnfs_layout_state_open()\n");
664 status
= client_supports_pnfs(session
->client
);
667 status
= fs_supports_layout(state
->file
.fh
.superblock
, PNFS_LAYOUTTYPE_FILE
);
671 /* under shared lock, check open state for cached layouts */
672 AcquireSRWLockShared(&state
->lock
);
673 status
= open_state_layout_cached(state
, &layout
);
674 ReleaseSRWLockShared(&state
->lock
);
677 /* under exclusive lock, find or create a layout for this file */
678 AcquireSRWLockExclusive(&state
->lock
);
680 status
= open_state_layout_cached(state
, &layout
);
682 status
= layout_state_find_or_create(layouts
, &state
->file
.fh
, &layout
);
683 if (status
== PNFS_SUCCESS
) {
684 LONG open_count
= InterlockedIncrement(&layout
->open_count
);
685 state
->layout
= layout
;
687 dprintf(FLLVL
, "pnfs_layout_state_open() caching layout %p "
688 "(%u opens)\n", state
->layout
, open_count
);
692 ReleaseSRWLockExclusive(&state
->lock
);
698 *layout_out
= layout
;
700 dprintf(FLLVL
, "<-- pnfs_layout_state_open() returning %s\n",
701 pnfs_error_string(status
));
705 /* expects caller to hold an exclusive lock on pnfs_layout_state */
706 enum pnfs_status
pnfs_layout_state_prepare(
707 IN pnfs_layout_state
*state
,
708 IN nfs41_session
*session
,
709 IN nfs41_path_fh
*meta_file
,
710 IN stateid_arg
*stateid
,
711 IN
enum pnfs_iomode iomode
,
715 unsigned char deviceid
[PNFS_DEVICEID_SIZE
];
716 struct list_entry
*entry
;
718 enum pnfs_status status
;
720 /* fail if the range intersects any pending recalls */
721 list_for_each(entry
, &state
->recalls
) {
722 const pnfs_layout
*recall
= layout_entry(entry
);
723 if (offset
<= recall
->offset
+ recall
->length
724 && recall
->offset
<= offset
+ length
) {
725 status
= PNFSERR_LAYOUT_RECALLED
;
730 /* if part of the given range is not covered by a layout,
731 * attempt to fetch it with LAYOUTGET */
732 status
= layout_coverage_status(state
, iomode
, offset
, length
, &missing
);
733 if (status
== PNFS_PENDING
) {
734 status
= layout_fetch(state
, session
, meta_file
, stateid
,
735 iomode
, missing
, offset
+ length
- missing
);
737 /* return pending because layout_fetch() dropped the lock */
738 if (status
== PNFS_SUCCESS
)
739 status
= PNFS_PENDING
;
743 /* if any layouts in the range are missing device info,
744 * fetch them with GETDEVICEINFO */
745 status
= device_status(state
, offset
, length
, deviceid
);
746 if (status
== PNFS_PENDING
) {
747 status
= device_fetch(state
, session
, deviceid
);
749 /* return pending because device_fetch() dropped the lock */
750 if (status
== PNFS_SUCCESS
)
751 status
= PNFS_PENDING
;
758 static enum pnfs_status
layout_return_status(
759 IN
const pnfs_layout_state
*state
)
761 /* return the layout if we have a stateid */
762 return state
->stateid
.seqid
? PNFS_SUCCESS
: PNFS_PENDING
;
765 static enum pnfs_status
file_layout_return(
766 IN nfs41_session
*session
,
767 IN nfs41_path_fh
*file
,
768 IN pnfs_layout_state
*state
)
770 enum pnfs_status status
;
771 enum nfsstat4 nfsstat
;
773 dprintf(FLLVL
, "--> file_layout_return()\n");
775 /* under shared lock, determine whether we need to return the layout */
776 AcquireSRWLockShared(&state
->lock
);
777 status
= layout_return_status(state
);
778 ReleaseSRWLockShared(&state
->lock
);
780 if (status
!= PNFS_PENDING
)
783 /* under exclusive lock, return the layout and reset status flags */
784 AcquireSRWLockExclusive(&state
->lock
);
786 /* wait for any pending LAYOUTGETs/LAYOUTRETURNs */
787 while (state
->pending
)
788 SleepConditionVariableSRW(&state
->cond
, &state
->lock
, INFINITE
, 0);
789 state
->pending
= TRUE
;
791 status
= layout_return_status(state
);
792 if (status
== PNFS_PENDING
) {
793 pnfs_layoutreturn_res layoutreturn_res
= { 0 };
795 memcpy(&stateid
, &state
->stateid
, sizeof(stateid
));
797 /* drop the lock during the rpc call */
798 ReleaseSRWLockExclusive(&state
->lock
);
799 nfsstat
= pnfs_rpc_layoutreturn(session
, file
, PNFS_LAYOUTTYPE_FILE
,
800 PNFS_IOMODE_ANY
, 0, NFS4_UINT64_MAX
, &stateid
, &layoutreturn_res
);
801 AcquireSRWLockExclusive(&state
->lock
);
804 eprintf("pnfs_rpc_layoutreturn() failed with %s\n",
805 nfs_error_string(nfsstat
));
806 status
= PNFSERR_NO_LAYOUT
;
808 status
= PNFS_SUCCESS
;
810 /* update the layout range held by the client */
811 layout_state_free_layouts(state
);
813 /* 12.5.3. Layout Stateid: Once a client has no more
814 * layouts on a file, the layout stateid is no longer
815 * valid and MUST NOT be used. */
816 ZeroMemory(&state
->stateid
, sizeof(stateid4
));
820 state
->pending
= FALSE
;
821 WakeConditionVariable(&state
->cond
);
822 ReleaseSRWLockExclusive(&state
->lock
);
825 dprintf(FLLVL
, "<-- file_layout_return() returning %s\n",
826 pnfs_error_string(status
));
830 void pnfs_layout_state_close(
831 IN nfs41_session
*session
,
832 IN nfs41_open_state
*state
,
835 pnfs_layout_state
*layout
;
836 bool_t return_layout
;
837 enum pnfs_status status
;
839 AcquireSRWLockExclusive(&state
->lock
);
840 layout
= state
->layout
;
841 state
->layout
= NULL
;
842 ReleaseSRWLockExclusive(&state
->lock
);
845 LONG open_count
= InterlockedDecrement(&layout
->open_count
);
847 AcquireSRWLockShared(&layout
->lock
);
848 /* only return on close if it's the last close */
849 return_layout
= layout
->return_on_close
&& (open_count
<= 0);
850 ReleaseSRWLockShared(&layout
->lock
);
853 status
= file_layout_return(session
, &state
->file
, layout
);
855 eprintf("file_layout_return() failed with %s\n",
856 pnfs_error_string(status
));
860 if (remove
&& session
->client
->layouts
) {
861 /* free the layout when the file is removed */
862 layout_state_find_and_delete(session
->client
->layouts
, &state
->file
.fh
);
867 /* pnfs_layout_recall */
868 struct layout_recall
{
872 #define recall_entry(pos) list_container(pos, struct layout_recall, layout.entry)
874 static bool_t
layout_recall_compatible(
875 IN
const pnfs_layout
*layout
,
876 IN
const pnfs_layout
*recall
)
878 return layout
->type
== recall
->type
879 && layout
->offset
<= (recall
->offset
+ recall
->length
)
880 && recall
->offset
<= (layout
->offset
+ layout
->length
)
881 && (recall
->iomode
== PNFS_IOMODE_ANY
||
882 layout
->iomode
== recall
->iomode
);
885 static pnfs_file_layout
* layout_allocate_copy(
886 IN
const pnfs_file_layout
*existing
)
888 /* allocate a segment to cover the end of the range */
889 pnfs_file_layout
*layout
= calloc(1, sizeof(pnfs_file_layout
));
893 memcpy(layout
, existing
, sizeof(pnfs_file_layout
));
895 /* XXX: don't use the device from existing layout;
896 * we need to get a reference for ourselves */
897 layout
->device
= NULL
;
899 /* allocate a copy of the filehandle array */
900 layout
->filehandles
.arr
= calloc(layout
->filehandles
.count
,
901 sizeof(nfs41_path_fh
));
902 if (layout
->filehandles
.arr
== NULL
)
905 memcpy(layout
->filehandles
.arr
, existing
->filehandles
.arr
,
906 layout
->filehandles
.count
* sizeof(nfs41_path_fh
));
911 file_layout_free(layout
);
916 static void layout_recall_range(
917 IN pnfs_layout_state
*state
,
918 IN
const pnfs_layout
*recall
)
920 struct list_entry
*entry
, *tmp
;
921 list_for_each_tmp(entry
, tmp
, &state
->layouts
) {
922 pnfs_file_layout
*layout
= file_layout_entry(entry
);
923 const uint64_t layout_end
= layout
->layout
.offset
+ layout
->layout
.length
;
925 if (!layout_recall_compatible(&layout
->layout
, recall
))
928 if (recall
->offset
> layout
->layout
.offset
) {
929 /* segment starts before recall; shrink length */
930 layout
->layout
.length
= recall
->offset
- layout
->layout
.offset
;
932 if (layout_end
> recall
->offset
+ recall
->length
) {
933 /* middle chunk of the segment is recalled;
934 * allocate a new segment to cover the end */
935 pnfs_file_layout
*remainder
= layout_allocate_copy(layout
);
936 if (remainder
== NULL
) {
937 /* silently ignore allocation errors here. behave
938 * as if we 'forgot' this last segment */
940 layout
->layout
.offset
= recall
->offset
+ recall
->length
;
941 layout
->layout
.length
= layout_end
- layout
->layout
.offset
;
942 layout_ordered_insert(state
, &remainder
->layout
);
946 /* segment starts after recall */
947 if (layout_end
<= recall
->offset
+ recall
->length
) {
948 /* entire segment is recalled */
949 list_remove(&layout
->layout
.entry
);
950 file_layout_free(layout
);
952 /* beginning of segment is recalled; shrink offset/length */
953 layout
->layout
.offset
= recall
->offset
+ recall
->length
;
954 layout
->layout
.length
= layout_end
- layout
->layout
.offset
;
960 static void layout_state_deferred_recalls(
961 IN pnfs_layout_state
*state
)
963 struct list_entry
*entry
, *tmp
;
964 list_for_each_tmp(entry
, tmp
, &state
->recalls
) {
965 /* process each deferred layout recall */
966 pnfs_layout
*recall
= layout_entry(entry
);
967 layout_recall_range(state
, recall
);
969 /* remove/free the recall entry */
970 list_remove(&recall
->entry
);
975 static void layout_recall_entry_init(
976 OUT
struct layout_recall
*lrc
,
977 IN
const struct cb_layoutrecall_args
*recall
)
979 list_init(&lrc
->layout
.entry
);
980 if (recall
->recall
.type
== PNFS_RETURN_FILE
) {
981 lrc
->layout
.offset
= recall
->recall
.args
.file
.offset
;
982 lrc
->layout
.length
= recall
->recall
.args
.file
.length
;
984 lrc
->layout
.offset
= 0;
985 lrc
->layout
.length
= NFS4_UINT64_MAX
;
987 lrc
->layout
.iomode
= recall
->iomode
;
988 lrc
->layout
.type
= PNFS_LAYOUTTYPE_FILE
;
989 lrc
->changed
= recall
->changed
;
992 static enum pnfs_status
layout_recall_merge(
993 IN
struct list_entry
*list
,
994 IN pnfs_layout
*from
)
996 struct list_entry
*entry
, *tmp
;
997 enum pnfs_status status
= PNFSERR_NO_LAYOUT
;
999 /* attempt to merge the new recall with each existing recall */
1000 list_for_each_tmp(entry
, tmp
, list
) {
1001 pnfs_layout
*to
= layout_entry(entry
);
1002 const uint64_t to_max
= to
->offset
+ to
->length
;
1003 const uint64_t from_max
= from
->offset
+ from
->length
;
1005 /* the ranges must meet or overlap */
1006 if (to_max
< from
->offset
|| from_max
< to
->offset
)
1009 /* the following fields must match: */
1010 if (to
->iomode
!= from
->iomode
|| to
->type
!= from
->type
)
1013 dprintf(FLLVL
, "merging recalled range {%llu, %llu} with {%llu, %llu}\n",
1014 to
->offset
, to
->length
, from
->offset
, from
->length
);
1016 /* calculate the union of the two ranges */
1017 to
->offset
= min(to
->offset
, from
->offset
);
1018 to
->length
= max(to_max
, from_max
) - to
->offset
;
1020 /* on success, remove/free the new segment */
1021 list_remove(&from
->entry
);
1023 status
= PNFS_SUCCESS
;
1025 /* because the existing segment 'to' has grown, we may
1026 * be able to merge it with later segments */
1032 static enum pnfs_status
file_layout_recall(
1033 IN pnfs_layout_state
*state
,
1034 IN
const struct cb_layoutrecall_args
*recall
)
1036 const stateid4
*stateid
= &recall
->recall
.args
.file
.stateid
;
1037 enum pnfs_status status
= PNFS_SUCCESS
;
1039 /* under an exclusive lock, flag the layout as recalled */
1040 AcquireSRWLockExclusive(&state
->lock
);
1042 if (state
->stateid
.seqid
== 0) {
1043 /* return NOMATCHINGLAYOUT if it wasn't actually granted */
1044 status
= PNFSERR_NO_LAYOUT
;
1048 if (recall
->recall
.type
== PNFS_RETURN_FILE
) {
1049 /* detect races between CB_LAYOUTRECALL and LAYOUTGET/LAYOUTRETURN */
1050 if (stateid
->seqid
> state
->stateid
.seqid
+ 1) {
1051 /* the server has processed an outstanding LAYOUTGET or
1052 * LAYOUTRETURN; we must return ERR_DELAY until we get the
1053 * response and update our view of the layout */
1054 status
= PNFS_PENDING
;
1058 /* save the updated seqid */
1059 state
->stateid
.seqid
= stateid
->seqid
;
1062 if (state
->io_count
) {
1063 /* save an entry for this recall, and process it once io finishes */
1064 struct layout_recall
*lrc
= calloc(1, sizeof(struct layout_recall
));
1066 /* on failure to allocate, we'll have to respond
1067 * to the CB_LAYOUTRECALL with NFS4ERR_DELAY */
1068 status
= PNFS_PENDING
;
1071 layout_recall_entry_init(lrc
, recall
);
1072 if (layout_recall_merge(&state
->recalls
, &lrc
->layout
) != PNFS_SUCCESS
)
1073 list_add_tail(&state
->recalls
, &lrc
->layout
.entry
);
1075 /* if there is no pending io, process the recall immediately */
1076 struct layout_recall lrc
= { 0 };
1077 layout_recall_entry_init(&lrc
, recall
);
1078 layout_recall_range(state
, &lrc
.layout
);
1081 ReleaseSRWLockExclusive(&state
->lock
);
1085 static enum pnfs_status
file_layout_recall_file(
1086 IN nfs41_client
*client
,
1087 IN
const struct cb_layoutrecall_args
*recall
)
1089 struct list_entry
*entry
;
1090 enum pnfs_status status
;
1092 dprintf(FLLVL
, "--> file_layout_recall_file()\n");
1094 EnterCriticalSection(&client
->layouts
->lock
);
1096 status
= layout_entry_find(client
->layouts
, &recall
->recall
.args
.file
.fh
, &entry
);
1097 if (status
== PNFS_SUCCESS
)
1098 status
= file_layout_recall(state_entry(entry
), recall
);
1100 LeaveCriticalSection(&client
->layouts
->lock
);
1102 dprintf(FLLVL
, "<-- file_layout_recall_file() returning %s\n",
1103 pnfs_error_string(status
));
1107 static bool_t
fsid_matches(
1108 IN
const nfs41_fsid
*lhs
,
1109 IN
const nfs41_fsid
*rhs
)
1111 return lhs
->major
== rhs
->major
&& lhs
->minor
== rhs
->minor
;
1114 static enum pnfs_status
file_layout_recall_fsid(
1115 IN nfs41_client
*client
,
1116 IN
const struct cb_layoutrecall_args
*recall
)
1118 struct list_entry
*entry
;
1119 pnfs_layout_state
*state
;
1121 enum pnfs_status status
= PNFSERR_NO_LAYOUT
;
1123 dprintf(FLLVL
, "--> file_layout_recall_fsid(%llu, %llu)\n",
1124 recall
->recall
.args
.fsid
.major
, recall
->recall
.args
.fsid
.minor
);
1126 EnterCriticalSection(&client
->layouts
->lock
);
1128 list_for_each(entry
, &client
->layouts
->head
) {
1129 state
= state_entry(entry
);
1130 /* no locks needed to read layout.meta_fh or superblock.fsid,
1131 * because they are only written once on creation */
1132 fh
= &state
->meta_fh
;
1133 if (fsid_matches(&recall
->recall
.args
.fsid
, &fh
->superblock
->fsid
))
1134 status
= file_layout_recall(state
, recall
);
1137 LeaveCriticalSection(&client
->layouts
->lock
);
1139 /* bulk recalls require invalidation of cached device info */
1140 pnfs_file_device_list_invalidate(client
->devices
);
1142 dprintf(FLLVL
, "<-- file_layout_recall_fsid() returning %s\n",
1143 pnfs_error_string(status
));
1147 static enum pnfs_status
file_layout_recall_all(
1148 IN nfs41_client
*client
,
1149 IN
const struct cb_layoutrecall_args
*recall
)
1151 struct list_entry
*entry
;
1152 enum pnfs_status status
= PNFSERR_NO_LAYOUT
;
1154 dprintf(FLLVL
, "--> file_layout_recall_all()\n");
1156 EnterCriticalSection(&client
->layouts
->lock
);
1158 list_for_each(entry
, &client
->layouts
->head
)
1159 status
= file_layout_recall(state_entry(entry
), recall
);
1161 LeaveCriticalSection(&client
->layouts
->lock
);
1163 /* bulk recalls require invalidation of cached device info */
1164 pnfs_file_device_list_invalidate(client
->devices
);
1166 dprintf(FLLVL
, "<-- file_layout_recall_all() returning %s\n",
1167 pnfs_error_string(status
));
1171 enum pnfs_status
pnfs_file_layout_recall(
1172 IN nfs41_client
*client
,
1173 IN
const struct cb_layoutrecall_args
*recall
)
1175 enum pnfs_status status
= PNFS_SUCCESS
;
1177 dprintf(FLLVL
, "--> pnfs_file_layout_recall(%u, %s, %u)\n",
1178 recall
->recall
.type
, pnfs_iomode_string(recall
->iomode
),
1181 if (recall
->type
!= PNFS_LAYOUTTYPE_FILE
) {
1182 dprintf(FLLVL
, "invalid layout type %u (%s)!\n",
1183 recall
->type
, pnfs_layout_type_string(recall
->type
));
1184 status
= PNFSERR_NOT_SUPPORTED
;
1188 switch (recall
->recall
.type
) {
1189 case PNFS_RETURN_FILE
:
1190 status
= file_layout_recall_file(client
, recall
);
1192 case PNFS_RETURN_FSID
:
1193 status
= file_layout_recall_fsid(client
, recall
);
1195 case PNFS_RETURN_ALL
:
1196 status
= file_layout_recall_all(client
, recall
);
1200 dprintf(FLLVL
, "invalid return type %u!\n", recall
->recall
);
1201 status
= PNFSERR_NOT_SUPPORTED
;
1205 dprintf(FLLVL
, "<-- pnfs_file_layout_recall() returning %s\n",
1206 pnfs_error_string(status
));
1210 /* expects caller to hold a shared lock on pnfs_layout_state */
1211 enum pnfs_status
pnfs_layout_recall_status(
1212 IN
const pnfs_layout_state
*state
,
1213 IN
const pnfs_layout
*layout
)
1215 struct list_entry
*entry
;
1216 enum pnfs_status status
= PNFS_SUCCESS
;
1218 /* search for a pending recall that intersects with the given segment */
1219 list_for_each(entry
, &state
->recalls
) {
1220 const struct layout_recall
*recall
= recall_entry(entry
);
1221 if (!layout_recall_compatible(layout
, &recall
->layout
))
1224 if (recall
->changed
)
1225 status
= PNFSERR_LAYOUT_CHANGED
;
1227 status
= PNFSERR_LAYOUT_RECALLED
;
1233 void pnfs_layout_recall_fenced(
1234 IN pnfs_layout_state
*state
,
1235 IN
const pnfs_layout
*layout
)
1237 struct layout_recall
*lrc
= calloc(1, sizeof(struct layout_recall
));
1241 AcquireSRWLockExclusive(&state
->lock
);
1243 list_init(&lrc
->layout
.entry
);
1244 lrc
->layout
.offset
= layout
->offset
;
1245 lrc
->layout
.length
= layout
->length
;
1246 lrc
->layout
.iomode
= layout
->iomode
;
1247 lrc
->layout
.type
= layout
->type
;
1248 lrc
->changed
= TRUE
;
1250 if (layout_recall_merge(&state
->recalls
, &lrc
->layout
) != PNFS_SUCCESS
)
1251 list_add_tail(&state
->recalls
, &lrc
->layout
.entry
);
1253 ReleaseSRWLockExclusive(&state
->lock
);
1256 /* expects caller to hold an exclusive lock on pnfs_layout_state */
1257 void pnfs_layout_io_start(
1258 IN pnfs_layout_state
*state
)
1260 /* take a reference on the layout, so that it won't be recalled
1261 * until all io is finished */
1263 dprintf(FLLVL
, "pnfs_layout_io_start(): count -> %u\n",
1267 void pnfs_layout_io_finished(
1268 IN pnfs_layout_state
*state
)
1270 AcquireSRWLockExclusive(&state
->lock
);
1272 /* return the reference to signify that an io request is finished */
1274 dprintf(FLLVL
, "pnfs_layout_io_finished() count -> %u\n",
1277 if (state
->io_count
> 0) /* more io pending */
1280 /* once all io is finished, process any layout recalls */
1281 layout_state_deferred_recalls(state
);
1283 /* finish any segment merging that was delayed during io */
1284 if (!list_empty(&state
->layouts
))
1285 layout_state_merge(state
, file_layout_entry(state
->layouts
.next
));
1288 ReleaseSRWLockExclusive(&state
->lock
);