nfs_clstate.c 158 KB
Newer Older
1
/*-
2
3
 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 *
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
 * Copyright (c) 2009 Rick Macklem, University of Guelph
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");

/*
 * These functions implement the client side state handling for NFSv4.
 * NFSv4 state handling:
 * - A lockowner is used to determine lock contention, so it
 *   corresponds directly to a Posix pid. (1 to 1 mapping)
 * - The correct granularity of an OpenOwner is not nearly so
 *   obvious. An OpenOwner does the following:
 *   - provides a serial sequencing of Open/Close/Lock-with-new-lockowner
Jaakko Heinonen's avatar
Jaakko Heinonen committed
41
 *   - is used to check for Open/Share contention (not applicable to
42
 *     this client, since all Opens are Deny_None)
Jaakko Heinonen's avatar
Jaakko Heinonen committed
43
 *   As such, I considered both extreme.
44
45
46
47
48
49
50
51
52
53
54
 *   1 OpenOwner per ClientID - Simple to manage, but fully serializes
 *   all Open, Close and Lock (with a new lockowner) Ops.
 *   1 OpenOwner for each Open - This one results in an OpenConfirm for
 *   every Open, for most servers.
 *   So, I chose to use the same mapping as I did for LockOwnwers.
 *   The main concern here is that you can end up with multiple Opens
 *   for the same File Handle, but on different OpenOwners (opens
 *   inherited from parents, grandparents...) and you do not know
 *   which of these the vnodeop close applies to. This is handled by
 *   delaying the Close Op(s) until all of the Opens have been closed.
 *   (It is not yet obvious if this is the correct granularity.)
Jaakko Heinonen's avatar
Jaakko Heinonen committed
55
56
 * - How the code handles serialization:
 *   - For the ClientId, it uses an exclusive lock while getting its
57
58
59
60
61
62
63
64
65
66
67
68
 *     SetClientId and during recovery. Otherwise, it uses a shared
 *     lock via a reference count.
 *   - For the rest of the data structures, it uses an SMP mutex
 *     (once the nfs client is SMP safe) and doesn't sleep while
 *     manipulating the linked lists.
 *   - The serialization of Open/Close/Lock/LockU falls out in the
 *     "wash", since OpenOwners and LockOwners are both mapped from
 *     Posix pid. In other words, there is only one Posix pid using
 *     any given owner, so that owner is serialized. (If you change
 *     the granularity of the OpenOwner, then code must be added to
 *     serialize Ops on the OpenOwner.)
 * - When to get rid of OpenOwners and LockOwners.
69
70
 *   - The function nfscl_cleanup_common() is executed after a process exits.
 *     It goes through the client list looking for all Open and Lock Owners.
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
 *     When one is found, it is marked "defunct" or in the case of
 *     an OpenOwner without any Opens, freed.
 *     The renew thread scans for defunct Owners and gets rid of them,
 *     if it can. The LockOwners will also be deleted when the
 *     associated Open is closed.
 *   - If the LockU or Close Op(s) fail during close in a way
 *     that could be recovered upon retry, they are relinked to the
 *     ClientId's defunct open list and retried by the renew thread
 *     until they succeed or an unmount/recovery occurs.
 *     (Since we are done with them, they do not need to be recovered.)
 */

#include <fs/nfs/nfsport.h>

/*
 * Global variables
 */
88
extern struct nfsstatsv1 nfsstatsv1;
89
extern struct nfsreqhead nfsd_reqq;
90
91
extern u_int32_t newnfs_false, newnfs_true;
extern int nfscl_debuglevel;
92
93
extern int nfscl_enablecallb;
extern int nfs_numnfscbd;
94
95
96
97
98
NFSREQSPINLOCK;
NFSCLSTATEMUTEX;
int nfscl_inited = 0;
struct nfsclhead nfsclhead;	/* Head of clientid list */
int nfscl_deleghighwater = NFSCLDELEGHIGHWATER;
99
int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER;
100
101

static int nfscl_delegcnt = 0;
102
static int nfscl_layoutcnt = 0;
103
104
105
static int nfscl_getopen(struct nfsclownerhead *, struct nfsclopenhash *,
    u_int8_t *, int, u_int8_t *, u_int8_t *, u_int32_t,
    struct nfscllockowner **, struct nfsclopen **);
106
107
108
static bool nfscl_checkown(struct nfsclowner *, struct nfsclopen *, uint8_t *,
    uint8_t *, struct nfscllockowner **, struct nfsclopen **,
    struct nfsclopen **);
109
static void nfscl_clrelease(struct nfsclclient *);
110
static void nfscl_unlinkopen(struct nfsclopen *);
111
112
113
114
115
static void nfscl_cleanclient(struct nfsclclient *);
static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *,
    struct ucred *, NFSPROC_T *);
static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *,
    struct nfsmount *, struct ucred *, NFSPROC_T *);
116
117
static void nfscl_recover(struct nfsclclient *, bool *, struct ucred *,
    NFSPROC_T *);
118
119
120
121
static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *,
    struct nfscllock *, int);
static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **,
    struct nfscllock **, int);
122
123
static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *,
    struct nfscldeleghead *);
124
static u_int32_t nfscl_nextcbident(void);
125
126
127
static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **);
static struct nfsclclient *nfscl_getclnt(u_int32_t);
static struct nfsclclient *nfscl_getclntsess(uint8_t *);
128
129
static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *,
    int);
130
static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *,
131
    int, struct nfsclrecalllayout **, struct nfscllayout **);
132
133
134
135
static void nfscl_reldevinfo_locked(struct nfscldevinfo *);
static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *,
    int);
static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *);
136
137
138
static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *,
    u_int8_t *, struct nfscllock **);
static void nfscl_freealllocks(struct nfscllockownerhead *, int);
139
140
static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int,
    struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **);
141
142
static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *,
    struct nfsclowner **, struct nfsclowner **, struct nfsclopen **,
143
    struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *);
144
145
146
147
148
149
150
151
152
153
154
155
156
157
static int nfscl_moveopen(vnode_t , struct nfsclclient *,
    struct nfsmount *, struct nfsclopen *, struct nfsclowner *,
    struct nfscldeleg *, struct ucred *, NFSPROC_T *);
static void nfscl_totalrecall(struct nfsclclient *);
static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *,
    struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *);
static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int,
    u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int,
    struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *);
static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *,
    int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short,
    struct ucred *, NFSPROC_T *);
static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t,
    struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *);
158
159
static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *,
    bool);
160
static int nfscl_errmap(struct nfsrv_descript *, u_int32_t);
161
static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *);
162
static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *,
163
164
    struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int,
    vnode_t *);
165
166
static void nfscl_freeopenowner(struct nfsclowner *, int);
static void nfscl_cleandeleg(struct nfscldeleg *);
167
168
static void nfscl_emptylockowner(struct nfscllockowner *,
    struct nfscllockownerfhhead *);
169
170
171
static void nfscl_mergeflayouts(struct nfsclflayouthead *,
    struct nfsclflayouthead *);
static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t,
172
    uint64_t, uint32_t, uint32_t, uint32_t, char *, struct nfsclrecalllayout *);
173
174
175
176
177
static int nfscl_seq(uint32_t, uint32_t);
static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *,
    struct ucred *, NFSPROC_T *);
static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *,
    struct ucred *, NFSPROC_T *);
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217

static short nfscberr_null[] = {
	0,
	0,
};

static short nfscberr_getattr[] = {
	NFSERR_RESOURCE,
	NFSERR_BADHANDLE,
	NFSERR_BADXDR,
	NFSERR_RESOURCE,
	NFSERR_SERVERFAULT,
	0,
};

static short nfscberr_recall[] = {
	NFSERR_RESOURCE,
	NFSERR_BADHANDLE,
	NFSERR_BADSTATEID,
	NFSERR_BADXDR,
	NFSERR_RESOURCE,
	NFSERR_SERVERFAULT,
	0,
};

static short *nfscl_cberrmap[] = {
	nfscberr_null,
	nfscberr_null,
	nfscberr_null,
	nfscberr_getattr,
	nfscberr_recall
};

#define	NETFAMILY(clp) \
		(((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET)

/*
 * Called for an open operation.
 * If the nfhp argument is NULL, just get an openowner.
 */
Ryan Moeller's avatar
Ryan Moeller committed
218
int
219
220
nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
    struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp,
221
    struct nfsclopen **opp, int *newonep, int *retp, int lockit, bool firstref)
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
{
	struct nfsclclient *clp;
	struct nfsclowner *owp, *nowp;
	struct nfsclopen *op = NULL, *nop = NULL;
	struct nfscldeleg *dp;
	struct nfsclownerhead *ohp;
	u_int8_t own[NFSV4CL_LOCKNAMELEN];
	int ret;

	if (newonep != NULL)
		*newonep = 0;
	if (opp != NULL)
		*opp = NULL;
	if (owpp != NULL)
		*owpp = NULL;

	/*
	 * Might need one or both of these, so MALLOC them now, to
	 * avoid a tsleep() in MALLOC later.
	 */
242
	nowp = malloc(sizeof (struct nfsclowner),
243
	    M_NFSCLOWNER, M_WAITOK);
244
	if (nfhp != NULL) {
245
	    nop = malloc(sizeof (struct nfsclopen) +
246
		fhlen - 1, M_NFSCLOPEN, M_WAITOK);
247
248
	    nop->nfso_hash.le_prev = NULL;
	}
249
	ret = nfscl_getcl(vp->v_mount, cred, p, false, firstref, &clp);
250
	if (ret != 0) {
251
		free(nowp, M_NFSCLOWNER);
252
		if (nop != NULL)
253
			free(nop, M_NFSCLOPEN);
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
		return (ret);
	}

	/*
	 * Get the Open iff it already exists.
	 * If none found, add the new one or return error, depending upon
	 * "create".
	 */
	NFSLOCKCLSTATE();
	dp = NULL;
	/* First check the delegation list */
	if (nfhp != NULL && usedeleg) {
		LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
			if (dp->nfsdl_fhlen == fhlen &&
			    !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
				if (!(amode & NFSV4OPEN_ACCESSWRITE) ||
				    (dp->nfsdl_flags & NFSCLDL_WRITE))
					break;
				dp = NULL;
				break;
			}
		}
	}

278
279
280
281
	/* For NFSv4.1/4.2 and this option, use a single open_owner. */
	if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
		nfscl_filllockowner(NULL, own, F_POSIX);
	else
282
		nfscl_filllockowner(p->td_proc, own, F_POSIX);
283
	if (dp != NULL)
284
		ohp = &dp->nfsdl_owner;
285
	else
286
287
288
289
290
291
292
293
294
295
296
		ohp = &clp->nfsc_owner;
	/* Now, search for an openowner */
	LIST_FOREACH(owp, ohp, nfsow_list) {
		if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN))
			break;
	}

	/*
	 * Create a new open, as required.
	 */
	nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen,
297
	    cred, newonep);
298

299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
	/*
	 * Now, check the mode on the open and return the appropriate
	 * value.
	 */
	if (retp != NULL) {
		if (nfhp != NULL && dp != NULL && nop == NULL)
			/* new local open on delegation */
			*retp = NFSCLOPEN_SETCRED;
		else
			*retp = NFSCLOPEN_OK;
	}
	if (op != NULL && (amode & ~(op->nfso_mode))) {
		op->nfso_mode |= amode;
		if (retp != NULL && dp == NULL)
			*retp = NFSCLOPEN_DOOPEN;
	}

316
317
318
	/*
	 * Serialize modifications to the open owner for multiple threads
	 * within the same process using a read/write sleep lock.
319
320
321
	 * For NFSv4.1 and a single OpenOwner, allow concurrent open operations
	 * by acquiring a shared lock.  The close operations still use an
	 * exclusive lock for this case.
322
	 */
323
	if (lockit != 0) {
324
		if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount))) {
325
326
327
328
329
330
331
332
333
334
335
336
			/*
			 * Get a shared lock on the OpenOwner, but first
			 * wait for any pending exclusive lock, so that the
			 * exclusive locker gets priority.
			 */
			nfsv4_lock(&owp->nfsow_rwlock, 0, NULL,
			    NFSCLSTATEMUTEXPTR, NULL);
			nfsv4_getref(&owp->nfsow_rwlock, NULL,
			    NFSCLSTATEMUTEXPTR, NULL);
		} else
			nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
	}
337
338
	NFSUNLOCKCLSTATE();
	if (nowp != NULL)
339
		free(nowp, M_NFSCLOWNER);
340
	if (nop != NULL)
341
		free(nop, M_NFSCLOPEN);
342
343
344
345
346
347
348
349
350
351
352
353
354
355
	if (owpp != NULL)
		*owpp = owp;
	if (opp != NULL)
		*opp = op;
	return (0);
}

/*
 * Create a new open, as required.
 */
static void
nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp,
    struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp,
    struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen,
356
    struct ucred *cred, int *newonep)
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
{
	struct nfsclowner *owp = *owpp, *nowp;
	struct nfsclopen *op, *nop;

	if (nowpp != NULL)
		nowp = *nowpp;
	else
		nowp = NULL;
	if (nopp != NULL)
		nop = *nopp;
	else
		nop = NULL;
	if (owp == NULL && nowp != NULL) {
		NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
		LIST_INIT(&nowp->nfsow_open);
		nowp->nfsow_clp = clp;
		nowp->nfsow_seqid = 0;
		nowp->nfsow_defunct = 0;
		nfscl_lockinit(&nowp->nfsow_rwlock);
		if (dp != NULL) {
377
			nfsstatsv1.cllocalopenowners++;
378
379
			LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list);
		} else {
380
			nfsstatsv1.clopenowners++;
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
			LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list);
		}
		owp = *owpp = nowp;
		*nowpp = NULL;
		if (newonep != NULL)
			*newonep = 1;
	}

	 /* If an fhp has been specified, create an Open as well. */
	if (fhp != NULL) {
		/* and look for the correct open, based upon FH */
		LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
			if (op->nfso_fhlen == fhlen &&
			    !NFSBCMP(op->nfso_fh, fhp, fhlen))
				break;
		}
		if (op == NULL && nop != NULL) {
			nop->nfso_own = owp;
			nop->nfso_mode = 0;
			nop->nfso_opencnt = 0;
			nop->nfso_posixlock = 1;
			nop->nfso_fhlen = fhlen;
			NFSBCOPY(fhp, nop->nfso_fh, fhlen);
			LIST_INIT(&nop->nfso_lock);
			nop->nfso_stateid.seqid = 0;
			nop->nfso_stateid.other[0] = 0;
			nop->nfso_stateid.other[1] = 0;
			nop->nfso_stateid.other[2] = 0;
409
410
			KASSERT(cred != NULL, ("%s: cred NULL\n", __func__));
			newnfs_copyincred(cred, &nop->nfso_cred);
411
412
413
414
415
			if (dp != NULL) {
				TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list);
				TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
				    nfsdl_list);
				dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
416
				nfsstatsv1.cllocalopens++;
417
			} else {
418
419
				LIST_INSERT_HEAD(NFSCLOPENHASH(clp, fhp, fhlen),
				    nop, nfso_hash);
420
				nfsstatsv1.clopens++;
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
			}
			LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list);
			*opp = nop;
			*nopp = NULL;
			if (newonep != NULL)
				*newonep = 1;
		} else {
			*opp = op;
		}
	}
}

/*
 * Called to find/add a delegation to a client.
 */
Ryan Moeller's avatar
Ryan Moeller committed
436
int
437
438
439
440
nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp,
    int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp)
{
	struct nfscldeleg *dp = *dpp, *tdp;
441
	struct nfsmount *nmp;
442

443
444
	KASSERT(mp != NULL, ("nfscl_deleg: mp NULL"));
	nmp = VFSTONFS(mp);
445
446
447
448
449
	/*
	 * First, if we have received a Read delegation for a file on a
	 * read/write file system, just return it, because they aren't
	 * useful, imho.
	 */
450
	if (dp != NULL && !NFSMNT_RDONLY(mp) &&
451
	    (dp->nfsdl_flags & NFSCLDL_READ)) {
452
		nfscl_trydelegreturn(dp, cred, nmp, p);
453
		free(dp, M_NFSCLDELEG);
454
455
456
457
		*dpp = NULL;
		return (0);
	}

458
459
460
461
462
463
464
465
466
	/*
	 * Since a delegation might be added to the mount,
	 * set NFSMNTP_DELEGISSUED now.  If a delegation already
	 * exagain ists, setting this flag is harmless.
	 */
	NFSLOCKMNT(nmp);
	nmp->nm_privflag |= NFSMNTP_DELEGISSUED;
	NFSUNLOCKMNT(nmp);

467
468
469
470
471
472
473
474
475
476
477
478
479
	/* Look for the correct deleg, based upon FH */
	NFSLOCKCLSTATE();
	tdp = nfscl_finddeleg(clp, nfhp, fhlen);
	if (tdp == NULL) {
		if (dp == NULL) {
			NFSUNLOCKCLSTATE();
			return (NFSERR_BADSTATEID);
		}
		*dpp = NULL;
		TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list);
		LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp,
		    nfsdl_hash);
		dp->nfsdl_timestamp = NFSD_MONOSEC + 120;
480
		nfsstatsv1.cldelegates++;
481
482
483
484
485
486
487
		nfscl_delegcnt++;
	} else {
		/*
		 * Delegation already exists, what do we do if a new one??
		 */
		if (dp != NULL) {
			printf("Deleg already exists!\n");
488
			free(dp, M_NFSCLDELEG);
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
			*dpp = NULL;
		} else {
			*dpp = tdp;
		}
	}
	NFSUNLOCKCLSTATE();
	return (0);
}

/*
 * Find a delegation for this file handle. Return NULL upon failure.
 */
static struct nfscldeleg *
nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen)
{
	struct nfscldeleg *dp;

	LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) {
	    if (dp->nfsdl_fhlen == fhlen &&
		!NFSBCMP(dp->nfsdl_fh, fhp, fhlen))
		break;
	}
	return (dp);
}

/*
 * Get a stateid for an I/O operation. First, look for an open and iff
 * found, return either a lockowner stateid or the open stateid.
 * If no Open is found, just return error and the special stateid of all zeros.
 */
Ryan Moeller's avatar
Ryan Moeller committed
519
int
520
nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
521
    int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp,
522
523
524
    void **lckpp)
{
	struct nfsclclient *clp;
525
	struct nfsclopen *op = NULL, *top;
526
	struct nfsclopenhash *oph;
527
528
529
	struct nfscllockowner *lp;
	struct nfscldeleg *dp;
	struct nfsnode *np;
530
	struct nfsmount *nmp;
531
	u_int8_t own[NFSV4CL_LOCKNAMELEN], lockown[NFSV4CL_LOCKNAMELEN];
532
533
	int error;
	bool done;
534
535
536
537

	*lckpp = NULL;
	/*
	 * Initially, just set the special stateid of all zeros.
538
	 * (Don't do this for a DS, since the special stateid can't be used.)
539
	 */
540
541
542
543
544
545
	if (fords == 0) {
		stateidp->seqid = 0;
		stateidp->other[0] = 0;
		stateidp->other[1] = 0;
		stateidp->other[2] = 0;
	}
546
	if (vp->v_type != VREG)
547
548
		return (EISDIR);
	np = VTONFS(vp);
549
	nmp = VFSTONFS(vp->v_mount);
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577

	/*
	 * For "oneopenown" mounts, first check for a cached open in the
	 * NFS vnode, that can be used as a stateid.  This can only be
	 * done if no delegations have been issued to the mount and no
	 * byte range file locking has been done for the file.
	 */
	if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp) && fords == 0) {
		NFSLOCKMNT(nmp);
		NFSLOCKNODE(np);
		if ((nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0 &&
		    (np->n_flag & NMIGHTBELOCKED) == 0 &&
		    np->n_openstateid != NULL) {
			stateidp->seqid = 0;
			stateidp->other[0] =
			    np->n_openstateid->nfso_stateid.other[0];
			stateidp->other[1] =
			    np->n_openstateid->nfso_stateid.other[1];
			stateidp->other[2] =
			    np->n_openstateid->nfso_stateid.other[2];
			NFSUNLOCKNODE(np);
			NFSUNLOCKMNT(nmp);
			return (0);
		}
		NFSUNLOCKNODE(np);
		NFSUNLOCKMNT(nmp);
	}

578
	NFSLOCKCLSTATE();
579
	clp = nfscl_findcl(nmp);
580
581
582
583
584
	if (clp == NULL) {
		NFSUNLOCKCLSTATE();
		return (EACCES);
	}

585
586
587
588
589
590
591
	/*
	 * Wait for recovery to complete.
	 */
	while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG))
		(void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR,
		    PZERO, "nfsrecvr", NULL);

592
593
594
595
596
597
598
599
	/*
	 * First, look for a delegation.
	 */
	LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) {
		if (dp->nfsdl_fhlen == fhlen &&
		    !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) {
			if (!(mode & NFSV4OPEN_ACCESSWRITE) ||
			    (dp->nfsdl_flags & NFSCLDL_WRITE)) {
600
601
602
603
604
				if (NFSHASNFSV4N(nmp))
					stateidp->seqid = 0;
				else
					stateidp->seqid =
					    dp->nfsdl_stateid.seqid;
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
				stateidp->other[0] = dp->nfsdl_stateid.other[0];
				stateidp->other[1] = dp->nfsdl_stateid.other[1];
				stateidp->other[2] = dp->nfsdl_stateid.other[2];
				if (!(np->n_flag & NDELEGRECALL)) {
					TAILQ_REMOVE(&clp->nfsc_deleg, dp,
					    nfsdl_list);
					TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp,
					    nfsdl_list);
					dp->nfsdl_timestamp = NFSD_MONOSEC +
					    120;
					dp->nfsdl_rwlock.nfslock_usecnt++;
					*lckpp = (void *)&dp->nfsdl_rwlock;
				}
				NFSUNLOCKCLSTATE();
				return (0);
			}
			break;
		}
	}

	if (p != NULL) {
		/*
		 * If p != NULL, we want to search the parentage tree
		 * for a matching OpenOwner and use that.
		 */
630
		if (NFSHASONEOPENOWN(VFSTONFS(vp->v_mount)))
631
632
633
			nfscl_filllockowner(NULL, own, F_POSIX);
		else
			nfscl_filllockowner(p->td_proc, own, F_POSIX);
634
		nfscl_filllockowner(p->td_proc, lockown, F_POSIX);
635
		lp = NULL;
636
		error = nfscl_getopen(NULL, clp->nfsc_openhash, nfhp, fhlen,
637
		    own, lockown, mode, &lp, &op);
638
639
		if (error == 0 && lp != NULL && fords == 0) {
			/* Don't return a lock stateid for a DS. */
640
641
642
643
			if (NFSHASNFSV4N(nmp))
				stateidp->seqid = 0;
			else
				stateidp->seqid = lp->nfsl_stateid.seqid;
644
645
646
647
648
649
650
651
			stateidp->other[0] =
			    lp->nfsl_stateid.other[0];
			stateidp->other[1] =
			    lp->nfsl_stateid.other[1];
			stateidp->other[2] =
			    lp->nfsl_stateid.other[2];
			NFSUNLOCKCLSTATE();
			return (0);
652
		}
653
654
655
	}
	if (op == NULL) {
		/* If not found, just look for any OpenOwner that will work. */
656
		top = NULL;
657
		done = false;
658
659
660
661
662
663
664
665
666
667
668
669
670
671
		oph = NFSCLOPENHASH(clp, nfhp, fhlen);
		LIST_FOREACH(op, oph, nfso_hash) {
			if (op->nfso_fhlen == fhlen &&
			    !NFSBCMP(op->nfso_fh, nfhp, fhlen)) {
				if (top == NULL && (op->nfso_mode &
				    NFSV4OPEN_ACCESSWRITE) != 0 &&
				    (mode & NFSV4OPEN_ACCESSREAD) != 0)
					top = op;
				if ((mode & op->nfso_mode) == mode) {
					/* LRU order the hash list. */
					LIST_REMOVE(op, nfso_hash);
					LIST_INSERT_HEAD(oph, op, nfso_hash);
					done = true;
					break;
672
				}
673
674
675
			}
		}
		if (!done) {
676
677
678
679
680
681
			NFSCL_DEBUG(2, "openmode top=%p\n", top);
			if (top == NULL || NFSHASOPENMODE(nmp)) {
				NFSUNLOCKCLSTATE();
				return (ENOENT);
			} else
				op = top;
682
		}
683
684
685
686
687
688
		/*
		 * For read aheads or write behinds, use the open cred.
		 * A read ahead or write behind is indicated by p == NULL.
		 */
		if (p == NULL)
			newnfs_copycred(&op->nfso_cred, cred);
689
690
691
692
693
	}

	/*
	 * No lock stateid, so return the open stateid.
	 */
694
695
696
697
	if (NFSHASNFSV4N(nmp))
		stateidp->seqid = 0;
	else
		stateidp->seqid = op->nfso_stateid.seqid;
698
699
700
701
702
703
704
705
	stateidp->other[0] = op->nfso_stateid.other[0];
	stateidp->other[1] = op->nfso_stateid.other[1];
	stateidp->other[2] = op->nfso_stateid.other[2];
	NFSUNLOCKCLSTATE();
	return (0);
}

/*
706
 * Search for a matching file, mode and, optionally, lockowner.
707
708
 */
static int
709
710
711
nfscl_getopen(struct nfsclownerhead *ohp, struct nfsclopenhash *ohashp,
    u_int8_t *nfhp, int fhlen, u_int8_t *openown, u_int8_t *lockown,
    u_int32_t mode, struct nfscllockowner **lpp, struct nfsclopen **opp)
712
{
713
714
	struct nfsclowner *owp;
	struct nfsclopen *op, *rop, *rop2;
715
	struct nfsclopenhash *oph;
716
	bool keep_looping;
717

718
719
	KASSERT(ohp == NULL || ohashp == NULL, ("nfscl_getopen: "
	    "only one of ohp and ohashp can be set"));
720
721
722
723
724
725
726
727
728
729
730
731
732
733
	if (lpp != NULL)
		*lpp = NULL;
	/*
	 * rop will be set to the open to be returned. There are three
	 * variants of this, all for an open of the correct file:
	 * 1 - A match of lockown.
	 * 2 - A match of the openown, when no lockown match exists.
	 * 3 - A match for any open, if no openown or lockown match exists.
	 * Looking for #2 over #3 probably isn't necessary, but since
	 * RFC3530 is vague w.r.t. the relationship between openowners and
	 * lockowners, I think this is the safer way to go.
	 */
	rop = NULL;
	rop2 = NULL;
734
	keep_looping = true;
735
	/* Search the client list */
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
	if (ohashp == NULL) {
		/* Search the local opens on the delegation. */
		LIST_FOREACH(owp, ohp, nfsow_list) {
			/* and look for the correct open */
			LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
				if (op->nfso_fhlen == fhlen &&
				    !NFSBCMP(op->nfso_fh, nfhp, fhlen)
				    && (op->nfso_mode & mode) == mode)
					keep_looping = nfscl_checkown(owp, op, openown,
					    lockown, lpp, &rop, &rop2);
				if (!keep_looping)
					break;
			}
			if (!keep_looping)
				break;
		}
	} else {
		/* Search for matching opens on the hash list. */
		oph = &ohashp[NFSCLOPENHASHFUNC(nfhp, fhlen)];
		LIST_FOREACH(op, oph, nfso_hash) {
756
757
			if (op->nfso_fhlen == fhlen &&
			    !NFSBCMP(op->nfso_fh, nfhp, fhlen)
758
			    && (op->nfso_mode & mode) == mode)
759
760
761
762
763
764
				keep_looping = nfscl_checkown(op->nfso_own, op,
				    openown, lockown, lpp, &rop, &rop2);
			if (!keep_looping) {
				/* LRU order the hash list. */
				LIST_REMOVE(op, nfso_hash);
				LIST_INSERT_HEAD(oph, op, nfso_hash);
765
				break;
766
			}
767
768
		}
	}
769
770
771
	if (rop == NULL)
		rop = rop2;
	if (rop == NULL)
772
		return (EBADF);
773
	*opp = rop;
774
775
776
	return (0);
}

777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
/* Check for an owner match. */
static bool
nfscl_checkown(struct nfsclowner *owp, struct nfsclopen *op, uint8_t *openown,
    uint8_t *lockown, struct nfscllockowner **lpp, struct nfsclopen **ropp,
    struct nfsclopen **ropp2)
{
	struct nfscllockowner *lp;
	bool keep_looping;

	keep_looping = true;
	if (lpp != NULL) {
		/* Now look for a matching lockowner. */
		LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
			if (!NFSBCMP(lp->nfsl_owner, lockown,
			    NFSV4CL_LOCKNAMELEN)) {
				*lpp = lp;
				*ropp = op;
				return (false);
			}
		}
	}
	if (*ropp == NULL && !NFSBCMP(owp->nfsow_owner, openown,
	    NFSV4CL_LOCKNAMELEN)) {
		*ropp = op;
		if (lpp == NULL)
			keep_looping = false;
	}
	if (*ropp2 == NULL)
		*ropp2 = op;
	return (keep_looping);
}

809
810
811
812
/*
 * Release use of an open owner. Called when open operations are done
 * with the open owner.
 */
Ryan Moeller's avatar
Ryan Moeller committed
813
void
814
815
nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp,
    __unused int error, __unused int candelete, int unlocked)
816
817
818
819
820
{

	if (owp == NULL)
		return;
	NFSLOCKCLSTATE();
821
822
823
824
825
826
	if (unlocked == 0) {
		if (NFSHASONEOPENOWN(nmp))
			nfsv4_relref(&owp->nfsow_rwlock);
		else
			nfscl_lockunlock(&owp->nfsow_rwlock);
	}
827
828
829
830
831
832
833
	nfscl_clrelease(owp->nfsow_clp);
	NFSUNLOCKCLSTATE();
}

/*
 * Release use of an open structure under an open owner.
 */
Ryan Moeller's avatar
Ryan Moeller committed
834
void
835
836
nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error,
    int candelete)
837
838
839
840
841
842
843
844
{
	struct nfsclclient *clp;
	struct nfsclowner *owp;

	if (op == NULL)
		return;
	NFSLOCKCLSTATE();
	owp = op->nfso_own;
845
846
847
848
	if (NFSHASONEOPENOWN(nmp))
		nfsv4_relref(&owp->nfsow_rwlock);
	else
		nfscl_lockunlock(&owp->nfsow_rwlock);
849
850
	clp = owp->nfsow_clp;
	if (error && candelete && op->nfso_opencnt == 0)
851
		nfscl_freeopen(op, 0, true);
852
853
854
855
856
857
858
	nfscl_clrelease(clp);
	NFSUNLOCKCLSTATE();
}

/*
 * Called to get a clientid structure. It will optionally lock the
 * client data structures to do the SetClientId/SetClientId_confirm,
859
 * but will release that lock and return the clientid with a reference
860
 * count on it.
861
862
863
 * If the "cred" argument is NULL, a new clientid should not be created.
 * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot
 * be done.
864
 * It always clpp with a reference count on it, unless returning an error.
865
 */
Ryan Moeller's avatar
Ryan Moeller committed
866
int
867
nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p,
868
    bool tryminvers, bool firstref, struct nfsclclient **clpp)
869
870
{
	struct nfsclclient *clp;
871
	struct nfsclclient *newclp = NULL;
872
	struct nfsmount *nmp;
873
	char uuid[HOSTUUIDLEN];
874
	int igotlock = 0, error, trystalecnt, clidinusedelay, i;
875
876
	u_int16_t idlen = 0;

877
	nmp = VFSTONFS(mp);
878
	if (cred != NULL) {
879
		getcredhostuuid(cred, uuid, sizeof uuid);
880
881
882
883
884
		idlen = strlen(uuid);
		if (idlen > 0)
			idlen += sizeof (u_int64_t);
		else
			idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */
885
		newclp = malloc(
886
		    sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT,
887
		    M_WAITOK | M_ZERO);
888
	}
889
	NFSLOCKCLSTATE();
890
891
892
893
894
	/*
	 * If a forced dismount is already in progress, don't
	 * allocate a new clientid and get out now. For the case where
	 * clp != NULL, this is a harmless optimization.
	 */
895
	if (NFSCL_FORCEDISM(mp)) {
896
897
898
899
900
		NFSUNLOCKCLSTATE();
		if (newclp != NULL)
			free(newclp, M_NFSCLCLIENT);
		return (EBADF);
	}
901
902
	clp = nmp->nm_clp;
	if (clp == NULL) {
903
904
905
906
		if (newclp == NULL) {
			NFSUNLOCKCLSTATE();
			return (EACCES);
		}
907
908
909
910
		clp = newclp;
		clp->nfsc_idlen = idlen;
		LIST_INIT(&clp->nfsc_owner);
		TAILQ_INIT(&clp->nfsc_deleg);
911
912
		TAILQ_INIT(&clp->nfsc_layout);
		LIST_INIT(&clp->nfsc_devinfo);
913
914
		for (i = 0; i < NFSCLDELEGHASHSIZE; i++)
			LIST_INIT(&clp->nfsc_deleghash[i]);
915
916
		for (i = 0; i < NFSCLOPENHASHSIZE; i++)
			LIST_INIT(&clp->nfsc_openhash[i]);
917
918
		for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++)
			LIST_INIT(&clp->nfsc_layouthash[i]);
919
920
921
		clp->nfsc_flags = NFSCLFLAGS_INITED;
		clp->nfsc_clientidrev = 1;
		clp->nfsc_cbident = nfscl_nextcbident();
922
		nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id,
923
924
925
926
927
		    clp->nfsc_idlen);
		LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list);
		nmp->nm_clp = clp;
		clp->nfsc_nmp = nmp;
	} else {
928
		if (newclp != NULL)
929
			free(newclp, M_NFSCLCLIENT);
930
	}
931
	while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock &&
932
	    !NFSCL_FORCEDISM(mp))
933
		igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL,
934
		    NFSCLSTATEMUTEXPTR, mp);
935
936
	if (igotlock == 0) {
		/*
937
938
939
940
		 * Call nfsv4_lock() with "iwantlock == 0" on the firstref so
		 * that it will wait for a pending exclusive lock request.
		 * This gives the exclusive lock request priority over this
		 * shared lock request.
941
		 * An exclusive lock on nfsc_lock is used mainly for server
942
		 * crash recoveries and delegation recalls.
943
		 */
944
945
946
		if (firstref)
			nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR,
			    mp);
947
		nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp);
948
	}
949
	if (igotlock == 0 && NFSCL_FORCEDISM(mp)) {
950
951
		/*
		 * Both nfsv4_lock() and nfsv4_getref() know to check
952
		 * for NFSCL_FORCEDISM() and return without sleeping to
953
954
955
956
957
958
959
960
		 * wait for the exclusive lock to be released, since it
		 * might be held by nfscl_umount() and we need to get out
		 * now for that case and not wait until nfscl_umount()
		 * releases it.
		 */
		NFSUNLOCKCLSTATE();
		return (EBADF);
	}
961
962
963
964
965
966
967
968
	NFSUNLOCKCLSTATE();

	/*
	 * If it needs a clientid, do the setclientid now.
	 */
	if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) {
		if (!igotlock)
			panic("nfscl_clget");
969
		if (p == NULL || cred == NULL) {
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
			NFSLOCKCLSTATE();
			nfsv4_unlock(&clp->nfsc_lock, 0);
			NFSUNLOCKCLSTATE();
			return (EACCES);
		}
		/*
		 * If RFC3530 Sec. 14.2.33 is taken literally,
		 * NFSERR_CLIDINUSE will be returned persistently for the
		 * case where a new mount of the same file system is using
		 * a different principal. In practice, NFSERR_CLIDINUSE is
		 * only returned when there is outstanding unexpired state
		 * on the clientid. As such, try for twice the lease
		 * interval, if we know what that is. Otherwise, make a
		 * wild ass guess.
		 * The case of returning NFSERR_STALECLIENTID is far less
		 * likely, but might occur if there is a significant delay
		 * between doing the SetClientID and SetClientIDConfirm Ops,
		 * such that the server throws away the clientid before
		 * receiving the SetClientIDConfirm.
		 */
		if (clp->nfsc_renew > 0)
			clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2;
		else
			clidinusedelay = 120;
		trystalecnt = 3;
		do {
996
			error = nfsrpc_setclient(nmp, clp, 0, NULL, cred, p);
997
998
			if (error == NFSERR_STALECLIENTID ||
			    error == NFSERR_STALEDONTRECOVER ||
999
			    error == NFSERR_BADSESSION ||
1000
			    error == NFSERR_CLIDINUSE) {
For faster browsing, not all history is shown. View entire blame