ffs_vfsops.c 70.5 KB
Newer Older
1
/*-
2
3
 * SPDX-License-Identifier: BSD-3-Clause
 *
Rodney W. Grimes's avatar
Rodney W. Grimes committed
4
5
6
7
8
9
10
11
12
13
14
 * Copyright (c) 1989, 1991, 1993, 1994
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
Warner Losh's avatar
Warner Losh committed
15
 * 3. Neither the name of the University nor the names of its contributors
Rodney W. Grimes's avatar
Rodney W. Grimes committed
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
31
 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
Rodney W. Grimes's avatar
Rodney W. Grimes committed
32
33
 */

David E. O'Brien's avatar
David E. O'Brien committed
34
35
36
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");

37
#include "opt_quota.h"
38
#include "opt_ufs.h"
39
#include "opt_ffs.h"
40
#include "opt_ddb.h"
41

Rodney W. Grimes's avatar
Rodney W. Grimes committed
42
#include <sys/param.h>
43
#include <sys/gsb_crc32.h>
Rodney W. Grimes's avatar
Rodney W. Grimes committed
44
45
#include <sys/systm.h>
#include <sys/namei.h>
46
#include <sys/priv.h>
Rodney W. Grimes's avatar
Rodney W. Grimes committed
47
#include <sys/proc.h>
48
#include <sys/taskqueue.h>
Rodney W. Grimes's avatar
Rodney W. Grimes committed
49
#include <sys/kernel.h>
50
#include <sys/ktr.h>
Rodney W. Grimes's avatar
Rodney W. Grimes committed
51
52
#include <sys/vnode.h>
#include <sys/mount.h>
53
#include <sys/bio.h>
Rodney W. Grimes's avatar
Rodney W. Grimes committed
54
#include <sys/buf.h>
55
#include <sys/conf.h>
56
#include <sys/fcntl.h>
57
#include <sys/ioccom.h>
Rodney W. Grimes's avatar
Rodney W. Grimes committed
58
#include <sys/malloc.h>
59
#include <sys/mutex.h>
60
#include <sys/rwlock.h>
61
#include <sys/sysctl.h>
62
#include <sys/vmmeter.h>
63

64
65
#include <security/mac/mac_framework.h>

66
#include <ufs/ufs/dir.h>
67
#include <ufs/ufs/extattr.h>
68
#include <ufs/ufs/gjournal.h>
Rodney W. Grimes's avatar
Rodney W. Grimes committed
69
70
71
72
73
74
75
76
#include <ufs/ufs/quota.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufs_extern.h>

#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>

77
#include <vm/vm.h>
78
#include <vm/uma.h>
79
80
#include <vm/vm_page.h>

81
82
83
#include <geom/geom.h>
#include <geom/geom_vfs.h>

84
85
#include <ddb/ddb.h>

86
static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
87
VFS_SMR_DECLARE;
88

Poul-Henning Kamp's avatar
Poul-Henning Kamp committed
89
static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
90
91
static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
		    ufs2_daddr_t);
Poul-Henning Kamp's avatar
Poul-Henning Kamp committed
92
static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
93
static int	ffs_sync_lazy(struct mount *mp);
94
95
static int	ffs_use_bread(void *devfd, off_t loc, void **bufp, int size);
static int	ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size);
96

97
98
static vfs_init_t ffs_init;
static vfs_uninit_t ffs_uninit;
99
static vfs_extattrctl_t ffs_extattrctl;
100
static vfs_cmount_t ffs_cmount;
101
static vfs_unmount_t ffs_unmount;
102
static vfs_mount_t ffs_mount;
103
104
105
static vfs_statfs_t ffs_statfs;
static vfs_fhtovp_t ffs_fhtovp;
static vfs_sync_t ffs_sync;
Rodney W. Grimes's avatar
Rodney W. Grimes committed
106

Eivind Eklund's avatar
Eivind Eklund committed
107
static struct vfsops ufs_vfsops = {
108
109
110
	.vfs_extattrctl =	ffs_extattrctl,
	.vfs_fhtovp =		ffs_fhtovp,
	.vfs_init =		ffs_init,
111
112
	.vfs_mount =		ffs_mount,
	.vfs_cmount =		ffs_cmount,
113
	.vfs_quotactl =		ufs_quotactl,
Mateusz Guzik's avatar
Mateusz Guzik committed
114
115
	.vfs_root =		vfs_cache_root,
	.vfs_cachedroot =	ufs_root,
116
117
118
119
120
	.vfs_statfs =		ffs_statfs,
	.vfs_sync =		ffs_sync,
	.vfs_uninit =		ffs_uninit,
	.vfs_unmount =		ffs_unmount,
	.vfs_vget =		ffs_vget,
121
	.vfs_susp_clean =	process_deferred_inactive,
Rodney W. Grimes's avatar
Rodney W. Grimes committed
122
123
};

124
VFS_SET(ufs_vfsops, ufs, 0);
125
MODULE_VERSION(ufs, 1);
126

127
static b_strategy_t ffs_geom_strategy;
128
static b_write_t ffs_bufwrite;
129
130

static struct buf_ops ffs_ops = {
131
	.bop_name =	"FFS",
132
	.bop_write =	ffs_bufwrite,
133
	.bop_strategy =	ffs_geom_strategy,
134
	.bop_sync =	bufsync,
135
136
137
138
139
#ifdef NO_FFS_SNAPSHOT
	.bop_bdflush =	bufbdflush,
#else
	.bop_bdflush =	ffs_bdflush,
#endif
140
141
};

142
143
144
145
146
147
/*
 * Note that userquota and groupquota options are not currently used
 * by UFS/FFS code and generally mount(8) does not pass those options
 * from userland, but they can be passed by loader(8) via
 * vfs.root.mountfrom.options.
 */
148
static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
149
    "noclusterw", "noexec", "export", "force", "from", "groupquota",
150
    "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir",
151
    "nosymfollow", "sync", "union", "userquota", "untrusted", NULL };
152

153
154
155
156
157
158
static int ffs_enxio_enable = 1;
SYSCTL_DECL(_vfs_ffs);
SYSCTL_INT(_vfs_ffs, OID_AUTO, enxio_enable, CTLFLAG_RWTUN,
    &ffs_enxio_enable, 0,
    "enable mapping of other disk I/O errors to ENXIO");

159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
/*
 * Return buffer with the contents of block "offset" from the beginning of
 * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
 * remaining space in the directory.
 */
static int
ffs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp)
{
	struct inode *ip;
	struct fs *fs;
	struct buf *bp;
	ufs_lbn_t lbn;
	int bsize, error;

	ip = VTOI(vp);
	fs = ITOFS(ip);
	lbn = lblkno(fs, offset);
	bsize = blksize(fs, ip, lbn);

	*bpp = NULL;
	error = bread(vp, lbn, bsize, NOCRED, &bp);
	if (error) {
		return (error);
	}
	if (res)
		*res = (char *)bp->b_data + blkoff(fs, offset);
	*bpp = bp;
	return (0);
}

/*
 * Load up the contents of an inode and copy the appropriate pieces
 * to the incore copy.
 */
static int
ffs_load_inode(struct buf *bp, struct inode *ip, struct fs *fs, ino_t ino)
{
	struct ufs1_dinode *dip1;
	struct ufs2_dinode *dip2;
	int error;

	if (I_IS_UFS1(ip)) {
		dip1 = ip->i_din1;
		*dip1 =
		    *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ino));
		ip->i_mode = dip1->di_mode;
		ip->i_nlink = dip1->di_nlink;
		ip->i_effnlink = dip1->di_nlink;
		ip->i_size = dip1->di_size;
		ip->i_flags = dip1->di_flags;
		ip->i_gen = dip1->di_gen;
		ip->i_uid = dip1->di_uid;
		ip->i_gid = dip1->di_gid;
		return (0);
	}
	dip2 = ((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino));
	if ((error = ffs_verify_dinode_ckhash(fs, dip2)) != 0 &&
	    !ffs_fsfail_cleanup(ITOUMP(ip), error)) {
		printf("%s: inode %jd: check-hash failed\n", fs->fs_fsmnt,
		    (intmax_t)ino);
		return (error);
	}
	*ip->i_din2 = *dip2;
	dip2 = ip->i_din2;
	ip->i_mode = dip2->di_mode;
	ip->i_nlink = dip2->di_nlink;
	ip->i_effnlink = dip2->di_nlink;
	ip->i_size = dip2->di_size;
	ip->i_flags = dip2->di_flags;
	ip->i_gen = dip2->di_gen;
	ip->i_uid = dip2->di_uid;
	ip->i_gid = dip2->di_gid;
	return (0);
}

/*
 * Verify that a filesystem block number is a valid data block.
 * This routine is only called on untrusted filesystems.
 */
static int
ffs_check_blkno(struct mount *mp, ino_t inum, ufs2_daddr_t daddr, int blksize)
{
	struct fs *fs;
	struct ufsmount *ump;
	ufs2_daddr_t end_daddr;
	int cg, havemtx;

	KASSERT((mp->mnt_flag & MNT_UNTRUSTED) != 0,
	    ("ffs_check_blkno called on a trusted file system"));
	ump = VFSTOUFS(mp);
	fs = ump->um_fs;
	cg = dtog(fs, daddr);
	end_daddr = daddr + numfrags(fs, blksize);
	/*
	 * Verify that the block number is a valid data block. Also check
	 * that it does not point to an inode block or a superblock. Accept
	 * blocks that are unalloacted (0) or part of snapshot metadata
	 * (BLK_NOCOPY or BLK_SNAP).
	 *
	 * Thus, the block must be in a valid range for the filesystem and
	 * either in the space before a backup superblock (except the first
	 * cylinder group where that space is used by the bootstrap code) or
	 * after the inode blocks and before the end of the cylinder group.
	 */
	if ((uint64_t)daddr <= BLK_SNAP ||
	    ((uint64_t)end_daddr <= fs->fs_size &&
	    ((cg > 0 && end_daddr <= cgsblock(fs, cg)) ||
	    (daddr >= cgdmin(fs, cg) &&
	    end_daddr <= cgbase(fs, cg) + fs->fs_fpg))))
		return (0);
	if ((havemtx = mtx_owned(UFS_MTX(ump))) == 0)
		UFS_LOCK(ump);
	if (ppsratecheck(&ump->um_last_integritymsg,
	    &ump->um_secs_integritymsg, 1)) {
		UFS_UNLOCK(ump);
		uprintf("\n%s: inode %jd, out-of-range indirect block "
		    "number %jd\n", mp->mnt_stat.f_mntonname, inum, daddr);
		if (havemtx)
			UFS_LOCK(ump);
	} else if (!havemtx)
		UFS_UNLOCK(ump);
	return (EINTEGRITY);
}

/*
 * Initiate a forcible unmount.
 * Used to unmount filesystems whose underlying media has gone away.
 */
static void
ffs_fsfail_unmount(void *v, int pending)
{
	struct fsfail_task *etp;
	struct mount *mp;

	etp = v;

	/*
	 * Find our mount and get a ref on it, then try to unmount.
	 */
	mp = vfs_getvfs(&etp->fsid);
	if (mp != NULL)
		dounmount(mp, MNT_FORCE, curthread);
	free(etp, M_UFSMNT);
}

/*
 * On first ENXIO error, start a task that forcibly unmounts the filesystem.
 *
 * Return true if a cleanup is in progress.
 */
int
ffs_fsfail_cleanup(struct ufsmount *ump, int error)
{
	int retval;

	UFS_LOCK(ump);
	retval = ffs_fsfail_cleanup_locked(ump, error);
	UFS_UNLOCK(ump);
	return (retval);
}

int
ffs_fsfail_cleanup_locked(struct ufsmount *ump, int error)
{
	struct fsfail_task *etp;
	struct task *tp;

	mtx_assert(UFS_MTX(ump), MA_OWNED);
	if (error == ENXIO && (ump->um_flags & UM_FSFAIL_CLEANUP) == 0) {
		ump->um_flags |= UM_FSFAIL_CLEANUP;
		/*
		 * Queue an async forced unmount.
		 */
		etp = ump->um_fsfail_task;
		ump->um_fsfail_task = NULL;
		if (etp != NULL) {
			tp = &etp->task;
			TASK_INIT(tp, 0, ffs_fsfail_unmount, etp);
			taskqueue_enqueue(taskqueue_thread, tp);
			printf("UFS: forcibly unmounting %s from %s\n",
			    ump->um_mountp->mnt_stat.f_mntfromname,
			    ump->um_mountp->mnt_stat.f_mntonname);
		}
	}
	return ((ump->um_flags & UM_FSFAIL_CLEANUP) != 0);
}

/*
 * Wrapper used during ENXIO cleanup to allocate empty buffers when
 * the kernel is unable to read the real one. They are needed so that
 * the soft updates code can use them to unwind its dependencies.
 */
int
ffs_breadz(struct ufsmount *ump, struct vnode *vp, daddr_t lblkno,
    daddr_t dblkno, int size, daddr_t *rablkno, int *rabsize, int cnt,
    struct ucred *cred, int flags, void (*ckhashfunc)(struct buf *),
    struct buf **bpp)
{
	int error;

	flags |= GB_CVTENXIO;
	error = breadn_flags(vp, lblkno, dblkno, size, rablkno, rabsize, cnt,
	    cred, flags, ckhashfunc, bpp);
	if (error != 0 && ffs_fsfail_cleanup(ump, error)) {
		error = getblkx(vp, lblkno, dblkno, size, 0, 0, flags, bpp);
		KASSERT(error == 0, ("getblkx failed"));
		vfs_bio_bzero_buf(*bpp, 0, size);
	}
	return (error);
}

370
static int
371
ffs_mount(struct mount *mp)
Rodney W. Grimes's avatar
Rodney W. Grimes committed
372
{
373
	struct vnode *devvp, *odevvp;
374
	struct thread *td;
Kevin Lo's avatar
Kevin Lo committed
375
	struct ufsmount *ump = NULL;
Poul-Henning Kamp's avatar
Poul-Henning Kamp committed
376
	struct fs *fs;
377
	pid_t fsckpid = 0;
378
	int error, error1, flags;
379
	uint64_t mntorflags, saved_mnt_flag;
380
	accmode_t accmode;
381
	struct nameidata ndp;
382
	char *fspec;
383
	bool mounted_softdep;
Rodney W. Grimes's avatar
Rodney W. Grimes committed
384

385
	td = curthread;
386
387
	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
		return (EINVAL);
388
389
390
391
392
393
394
395
396
397
	if (uma_inode == NULL) {
		uma_inode = uma_zcreate("FFS inode",
		    sizeof(struct inode), NULL, NULL, NULL, NULL,
		    UMA_ALIGN_PTR, 0);
		uma_ufs1 = uma_zcreate("FFS1 dinode",
		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
		    UMA_ALIGN_PTR, 0);
		uma_ufs2 = uma_zcreate("FFS2 dinode",
		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
		    UMA_ALIGN_PTR, 0);
398
		VFS_SMR_ZONE_SET(uma_inode);
399
	}
400

401
402
403
	vfs_deleteopt(mp->mnt_optnew, "groupquota");
	vfs_deleteopt(mp->mnt_optnew, "userquota");

404
405
406
	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
	if (error)
		return (error);
Rodney W. Grimes's avatar
Rodney W. Grimes committed
407

408
	mntorflags = 0;
409
410
411
	if (vfs_getopt(mp->mnt_optnew, "untrusted", NULL, NULL) == 0)
		mntorflags |= MNT_UNTRUSTED;

412
	if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
413
		mntorflags |= MNT_ACLS;
414

415
	if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
416
		mntorflags |= MNT_SNAPSHOT;
417
418
419
420
421
		/*
		 * Once we have set the MNT_SNAPSHOT flag, do not
		 * persist "snapshot" in the options list.
		 */
		vfs_deleteopt(mp->mnt_optnew, "snapshot");
Konstantin Belousov's avatar
Konstantin Belousov committed
422
		vfs_deleteopt(mp->mnt_opt, "snapshot");
423
	}
424

425
426
427
428
429
430
431
432
433
434
435
	if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 &&
	    vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) {
		/*
		 * Once we have set the restricted PID, do not
		 * persist "fsckpid" in the options list.
		 */
		vfs_deleteopt(mp->mnt_optnew, "fsckpid");
		vfs_deleteopt(mp->mnt_opt, "fsckpid");
		if (mp->mnt_flag & MNT_UPDATE) {
			if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 &&
			     vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
436
437
				vfs_mount_error(mp,
				    "Checker enable: Must be read-only");
438
439
440
				return (EINVAL);
			}
		} else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
441
442
			vfs_mount_error(mp,
			    "Checker enable: Must be read-only");
443
444
445
446
447
448
449
			return (EINVAL);
		}
		/* Set to -1 if we are done */
		if (fsckpid == 0)
			fsckpid = -1;
	}

450
451
	if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) {
		if (mntorflags & MNT_ACLS) {
452
453
454
			vfs_mount_error(mp,
			    "\"acls\" and \"nfsv4acls\" options "
			    "are mutually exclusive");
455
456
457
458
459
			return (EINVAL);
		}
		mntorflags |= MNT_NFS4ACLS;
	}

460
	MNT_ILOCK(mp);
461
	mp->mnt_kern_flag &= ~MNTK_FPLOOKUP;
462
	mp->mnt_flag |= mntorflags;
463
	MNT_IUNLOCK(mp);
Rodney W. Grimes's avatar
Rodney W. Grimes committed
464
465
466
467
468
469
470
	/*
	 * If updating, check whether changing from read-only to
	 * read/write; if there is no device name, that's all we do.
	 */
	if (mp->mnt_flag & MNT_UPDATE) {
		ump = VFSTOUFS(mp);
		fs = ump->um_fs;
471
		odevvp = ump->um_odevvp;
472
		devvp = ump->um_devvp;
473
474
475
476
477
478
479
480
481
482
483
484
		if (fsckpid == -1 && ump->um_fsckpid > 0) {
			if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 ||
			    (error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0)
				return (error);
			g_topology_lock();
			/*
			 * Return to normal read-only mode.
			 */
			error = g_access(ump->um_cp, 0, -1, 0);
			g_topology_unlock();
			ump->um_fsckpid = 0;
		}
485
486
		if (fs->fs_ronly == 0 &&
		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
487
			/*
488
			 * Flush any dirty data and suspend filesystem.
489
			 */
490
			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
491
				return (error);
492
493
494
			error = vfs_write_suspend_umnt(mp);
			if (error != 0)
				return (error);
495
496
497
498
499
500
501
502
503
504

			fs->fs_ronly = 1;
			if (MOUNTEDSOFTDEP(mp)) {
				MNT_ILOCK(mp);
				mp->mnt_flag &= ~MNT_SOFTDEP;
				MNT_IUNLOCK(mp);
				mounted_softdep = true;
			} else
				mounted_softdep = false;

505
506
507
508
			/*
			 * Check for and optionally get rid of files open
			 * for writing.
			 */
Rodney W. Grimes's avatar
Rodney W. Grimes committed
509
510
511
			flags = WRITECLOSE;
			if (mp->mnt_flag & MNT_FORCE)
				flags |= FORCECLOSE;
512
			if (mounted_softdep) {
Julian Elischer's avatar
Julian Elischer committed
513
				error = softdep_flushfiles(mp, flags, td);
514
			} else {
Julian Elischer's avatar
Julian Elischer committed
515
				error = ffs_flushfiles(mp, flags, td);
516
			}
517
			if (error) {
518
519
520
521
522
523
				fs->fs_ronly = 0;
				if (mounted_softdep) {
					MNT_ILOCK(mp);
					mp->mnt_flag |= MNT_SOFTDEP;
					MNT_IUNLOCK(mp);
				}
524
				vfs_write_resume(mp, 0);
525
526
				return (error);
			}
527

528
529
			if (fs->fs_pendingblocks != 0 ||
			    fs->fs_pendinginodes != 0) {
530
531
				printf("WARNING: %s Update error: blocks %jd "
				    "files %d\n", fs->fs_fsmnt, 
532
				    (intmax_t)fs->fs_pendingblocks,
533
534
535
536
				    fs->fs_pendinginodes);
				fs->fs_pendingblocks = 0;
				fs->fs_pendinginodes = 0;
			}
537
			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
538
				fs->fs_clean = 1;
539
			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
540
541
				fs->fs_ronly = 0;
				fs->fs_clean = 0;
542
543
544
545
546
				if (mounted_softdep) {
					MNT_ILOCK(mp);
					mp->mnt_flag |= MNT_SOFTDEP;
					MNT_IUNLOCK(mp);
				}
547
				vfs_write_resume(mp, 0);
548
549
				return (error);
			}
550
			if (mounted_softdep)
551
				softdep_unmount(mp);
552
			g_topology_lock();
553
554
555
556
			/*
			 * Drop our write and exclusive access.
			 */
			g_access(ump->um_cp, 0, -1, -1);
557
			g_topology_unlock();
558
			MNT_ILOCK(mp);
559
			mp->mnt_flag |= MNT_RDONLY;
560
			MNT_IUNLOCK(mp);
561
562
563
564
			/*
			 * Allow the writers to note that filesystem
			 * is ro now.
			 */
565
			vfs_write_resume(mp, 0);
566
		}
567
		if ((mp->mnt_flag & MNT_RELOAD) &&
568
		    (error = ffs_reload(mp, td, 0)) != 0)
569
			return (error);
570
571
		if (fs->fs_ronly &&
		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
572
573
574
575
			/*
			 * If we are running a checker, do not allow upgrade.
			 */
			if (ump->um_fsckpid > 0) {
576
577
				vfs_mount_error(mp,
				    "Active checker, cannot upgrade to write");
578
579
				return (EINVAL);
			}
580
581
582
583
			/*
			 * If upgrade to read-write by non-root, then verify
			 * that user has necessary permissions on the device.
			 */
584
585
			vn_lock(odevvp, LK_EXCLUSIVE | LK_RETRY);
			error = VOP_ACCESS(odevvp, VREAD | VWRITE,
586
587
588
			    td->td_ucred, td);
			if (error)
				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
589
			VOP_UNLOCK(odevvp);
590
591
			if (error) {
				return (error);
592
			}
593
			fs->fs_flags &= ~FS_UNCLEAN;
Bruce Evans's avatar
Bruce Evans committed
594
			if (fs->fs_clean == 0) {
595
				fs->fs_flags |= FS_UNCLEAN;
596
				if ((mp->mnt_flag & MNT_FORCE) ||
597
598
				    ((fs->fs_flags &
				     (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
599
				     (fs->fs_flags & FS_DOSOFTDEP))) {
600
601
					printf("WARNING: %s was not properly "
					   "dismounted\n", fs->fs_fsmnt);
Bruce Evans's avatar
Bruce Evans committed
602
				} else {
603
604
605
606
607
608
609
					vfs_mount_error(mp,
					   "R/W mount of %s denied. %s.%s",
					   fs->fs_fsmnt,
					   "Filesystem is not clean - run fsck",
					   (fs->fs_flags & FS_SUJ) == 0 ? "" :
					   " Forced mount will invalidate"
					   " journal contents");
610
					return (EPERM);
Bruce Evans's avatar
Bruce Evans committed
611
612
				}
			}
613
614
			g_topology_lock();
			/*
615
			 * Request exclusive write access.
616
			 */
617
			error = g_access(ump->um_cp, 0, 1, 1);
618
619
620
			g_topology_unlock();
			if (error)
				return (error);
621
622
			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
				return (error);
623
624
625
			error = vfs_write_suspend_umnt(mp);
			if (error != 0)
				return (error);
626
			fs->fs_ronly = 0;
627
			MNT_ILOCK(mp);
628
629
630
631
632
			saved_mnt_flag = MNT_RDONLY;
			if (MOUNTEDSOFTDEP(mp) && (mp->mnt_flag &
			    MNT_ASYNC) != 0)
				saved_mnt_flag |= MNT_ASYNC;
			mp->mnt_flag &= ~saved_mnt_flag;
633
			MNT_IUNLOCK(mp);
634
			fs->fs_mtime = time_second;
635
			/* check to see if we need to start softdep */
636
			if ((fs->fs_flags & FS_DOSOFTDEP) &&
637
			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
638
639
640
641
642
				fs->fs_ronly = 1;
				MNT_ILOCK(mp);
				mp->mnt_flag |= saved_mnt_flag;
				MNT_IUNLOCK(mp);
				vfs_write_resume(mp, 0);
643
				return (error);
644
			}
645
646
			fs->fs_clean = 0;
			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
647
				fs->fs_ronly = 1;
648
649
				if ((fs->fs_flags & FS_DOSOFTDEP) != 0)
					softdep_unmount(mp);
650
651
652
653
				MNT_ILOCK(mp);
				mp->mnt_flag |= saved_mnt_flag;
				MNT_IUNLOCK(mp);
				vfs_write_resume(mp, 0);
654
655
				return (error);
			}
656
657
			if (fs->fs_snapinum[0] != 0)
				ffs_snapshot_mount(mp);
658
			vfs_write_resume(mp, 0);
659
		}
660
661
662
663
		/*
		 * Soft updates is incompatible with "async",
		 * so if we are doing softupdates stop the user
		 * from setting the async flag in an update.
664
		 * Softdep_mount() clears it in an initial mount
665
666
		 * or ro->rw remount.
		 */
667
		if (MOUNTEDSOFTDEP(mp)) {
668
669
			/* XXX: Reset too late ? */
			MNT_ILOCK(mp);
670
			mp->mnt_flag &= ~MNT_ASYNC;
671
672
			MNT_IUNLOCK(mp);
		}
673
674
675
		/*
		 * Keep MNT_ACLS flag if it is stored in superblock.
		 */
676
677
678
		if ((fs->fs_flags & FS_ACLS) != 0) {
			/* XXX: Set too late ? */
			MNT_ILOCK(mp);
679
			mp->mnt_flag |= MNT_ACLS;
680
681
			MNT_IUNLOCK(mp);
		}
682

683
684
685
686
687
688
		if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
			/* XXX: Set too late ? */
			MNT_ILOCK(mp);
			mp->mnt_flag |= MNT_NFS4ACLS;
			MNT_IUNLOCK(mp);
		}
689
690
691
692
693
694
		/*
		 * If this is a request from fsck to clean up the filesystem,
		 * then allow the specified pid to proceed.
		 */
		if (fsckpid > 0) {
			if (ump->um_fsckpid != 0) {
695
696
				vfs_mount_error(mp,
				    "Active checker already running on %s",
697
698
699
				    fs->fs_fsmnt);
				return (EINVAL);
			}
700
			KASSERT(MOUNTEDSOFTDEP(mp) == 0,
701
702
703
704
705
706
707
708
			    ("soft updates enabled on read-only file system"));
			g_topology_lock();
			/*
			 * Request write access.
			 */
			error = g_access(ump->um_cp, 0, 1, 0);
			g_topology_unlock();
			if (error) {
709
710
				vfs_mount_error(mp,
				    "Checker activation failed on %s",
711
712
713
714
715
716
717
718
719
720
721
				    fs->fs_fsmnt);
				return (error);
			}
			ump->um_fsckpid = fsckpid;
			if (fs->fs_snapinum[0] != 0)
				ffs_snapshot_mount(mp);
			fs->fs_mtime = time_second;
			fs->fs_fmod = 1;
			fs->fs_clean = 0;
			(void) ffs_sbupdate(ump, MNT_WAIT, 0);
		}
722

723
724
725
726
		/*
		 * If this is a snapshot request, take the snapshot.
		 */
		if (mp->mnt_flag & MNT_SNAPSHOT)
727
			return (ffs_snapshot(mp, fspec));
728
729
730
731
732

		/*
		 * Must not call namei() while owning busy ref.
		 */
		vfs_unbusy(mp);
Rodney W. Grimes's avatar
Rodney W. Grimes committed
733
	}
734

Rodney W. Grimes's avatar
Rodney W. Grimes committed
735
736
	/*
	 * Not an update, or updating the name: look up the name
Bruce Evans's avatar
Bruce Evans committed
737
	 * and verify that it refers to a sensible disk device.
Rodney W. Grimes's avatar
Rodney W. Grimes committed
738
	 */
739
	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
740
741
742
743
744
745
746
747
748
749
750
751
	error = namei(&ndp);
	if ((mp->mnt_flag & MNT_UPDATE) != 0) {
		/*
		 * Unmount does not start if MNT_UPDATE is set.  Mount
		 * update busies mp before setting MNT_UPDATE.  We
		 * must be able to retain our busy ref succesfully,
		 * without sleep.
		 */
		error1 = vfs_busy(mp, MBF_NOWAIT);
		MPASS(error1 == 0);
	}
	if (error != 0)
752
		return (error);
753
754
	NDFREE(&ndp, NDF_ONLY_PNBUF);
	devvp = ndp.ni_vp;
755
	if (!vn_isdisk_error(devvp, &error)) {
756
		vput(devvp);
757
758
		return (error);
	}
759
760
761
762
763

	/*
	 * If mount by non-root, then verify that user has necessary
	 * permissions on the device.
	 */
764
	accmode = VREAD;
765
	if ((mp->mnt_flag & MNT_RDONLY) == 0)
766
767
		accmode |= VWRITE;
	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
768
769
770
771
772
	if (error)
		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
	if (error) {
		vput(devvp);
		return (error);
773
774
	}

775
776
	if (mp->mnt_flag & MNT_UPDATE) {
		/*
777
778
		 * Update only
		 *
779
780
		 * If it's not the same vnode, or at least the same device
		 * then it's not correct.
781
782
		 */

783
		if (devvp->v_rdev != ump->um_devvp->v_rdev)
784
			error = EINVAL;	/* needs translation */
785
		vput(devvp);
786
787
		if (error)
			return (error);
788
789
	} else {
		/*
790
791
792
793
794
		 * New mount
		 *
		 * We need the name for the mount point (also used for
		 * "last mounted on") copied in. If an error occurs,
		 * the mount point is discarded by the upper level code.
Konstantin Belousov's avatar
Konstantin Belousov committed
795
		 * Note that vfs_mount_alloc() populates f_mntonname for us.
796
		 */
Poul-Henning Kamp's avatar
Poul-Henning Kamp committed
797
		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
798
799
800
			vrele(devvp);
			return (error);
		}
801
		if (fsckpid > 0) {
802
			KASSERT(MOUNTEDSOFTDEP(mp) == 0,
803
804
805
806
807
808
809
810
811
812
			    ("soft updates enabled on read-only file system"));
			ump = VFSTOUFS(mp);
			fs = ump->um_fs;
			g_topology_lock();
			/*
			 * Request write access.
			 */
			error = g_access(ump->um_cp, 0, 1, 0);
			g_topology_unlock();
			if (error) {
813
814
				printf("WARNING: %s: Checker activation "
				    "failed\n", fs->fs_fsmnt);
815
816
817
818
819
820
821
822
823
			} else { 
				ump->um_fsckpid = fsckpid;
				if (fs->fs_snapinum[0] != 0)
					ffs_snapshot_mount(mp);
				fs->fs_mtime = time_second;
				fs->fs_clean = 0;
				(void) ffs_sbupdate(ump, MNT_WAIT, 0);
			}
		}
Rodney W. Grimes's avatar
Rodney W. Grimes committed
824
	}
825
826
827
828
829
830
831

	MNT_ILOCK(mp);
	/*
	 * This is racy versus lookup, see ufs_fplookup_vexec for details.
	 */
	if ((mp->mnt_kern_flag & MNTK_FPLOOKUP) != 0)
		panic("MNTK_FPLOOKUP set on mount %p when it should not be", mp);
832
	if ((mp->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS | MNT_UNION)) == 0)
833
834
835
		mp->mnt_kern_flag |= MNTK_FPLOOKUP;
	MNT_IUNLOCK(mp);

836
	vfs_mountedfrom(mp, fspec);
837
	return (0);
Rodney W. Grimes's avatar
Rodney W. Grimes committed
838
839
}

840
841
842
843
844
/*
 * Compatibility with old mount system call.
 */

static int
845
ffs_cmount(struct mntarg *ma, void *data, uint64_t flags)
846
847
848
849
850
851
852
853
854
855
856
{
	struct ufs_args args;
	int error;

	if (data == NULL)
		return (EINVAL);
	error = copyin(data, &args, sizeof args);
	if (error)
		return (error);

	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
857
	ma = mount_arg(ma, "export", &args.export, sizeof(args.export));
858
859
860
861
862
	error = kernel_mount(ma, flags);

	return (error);
}

Rodney W. Grimes's avatar
Rodney W. Grimes committed
863
864
/*
 * Reload all incore data for a filesystem (used after running fsck on
865
866
 * the root filesystem and finding things to fix). If the 'force' flag
 * is 0, the filesystem must be mounted read-only.
Rodney W. Grimes's avatar
Rodney W. Grimes committed
867
868
869
870
871
872
 *
 * Things to do to update the mount:
 *	1) invalidate all cached meta-data.
 *	2) re-read superblock from disk.
 *	3) re-read summary information from disk.
 *	4) invalidate all inactive vnodes.
873
874
875
876
 *	5) clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags, allowing secondary
 *	   writers, if requested.
 *	6) invalidate all cached file data.
 *	7) re-read inode data for all active vnodes.
Rodney W. Grimes's avatar
Rodney W. Grimes committed
877
 */
878
int
879
ffs_reload(struct mount *mp, struct thread *td, int flags)
Rodney W. Grimes's avatar
Rodney W. Grimes committed
880
{
881
	struct vnode *vp, *mvp, *devvp;
Rodney W. Grimes's avatar
Rodney W. Grimes committed
882
	struct inode *ip;
883
	void *space;
Rodney W. Grimes's avatar
Rodney W. Grimes committed
884
	struct buf *bp;
885
	struct fs *fs, *newfs;
886
	struct ufsmount *ump;
887
	ufs2_daddr_t sblockloc;
888
889
	int i, blks, error;
	u_long size;
890
	int32_t *lp;
Rodney W. Grimes's avatar
Rodney W. Grimes committed
891

892
	ump = VFSTOUFS(mp);
893
894

	MNT_ILOCK(mp);
895
	if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) {
896
897
898
899
		MNT_IUNLOCK(mp);
		return (EINVAL);
	}
	MNT_IUNLOCK(mp);
900

Rodney W. Grimes's avatar
Rodney W. Grimes committed
901
902
903
	/*
	 * Step 1: invalidate all cached meta-data.
	 */
904
	devvp = VFSTOUFS(mp)->um_devvp;
905
	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
906
	if (vinvalbuf(devvp, 0, 0, 0) != 0)
Rodney W. Grimes's avatar
Rodney W. Grimes committed
907
		panic("ffs_reload: dirty1");
908
	VOP_UNLOCK(devvp);
909

Rodney W. Grimes's avatar
Rodney W. Grimes committed
910
911
912
	/*
	 * Step 2: re-read superblock from disk.
	 */
913
	fs = VFSTOUFS(mp)->um_fs;
914
	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
915
	    NOCRED, &bp)) != 0)
Rodney W. Grimes's avatar
Rodney W. Grimes committed
916
		return (error);
917
	newfs = (struct fs *)bp->b_data;
918
919
920
921
	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
	     newfs->fs_magic != FS_UFS2_MAGIC) ||
	    newfs->fs_bsize > MAXBSIZE ||
	    newfs->fs_bsize < sizeof(struct fs)) {
922
923
			brelse(bp);
			return (EIO);		/* XXX needs translation */
Rodney W. Grimes's avatar
Rodney W. Grimes committed
924
	}
925
	/*
926
927
928
	 * Preserve the summary information, read-only status, and
	 * superblock location by copying these fields into our new
	 * superblock before using it to update the existing superblock.
929
	 */
930
	newfs->fs_si = fs->fs_si;
931
	newfs->fs_ronly = fs->fs_ronly;
932
	sblockloc = fs->fs_sblockloc;
933
	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
Rodney W. Grimes's avatar
Rodney W. Grimes committed
934
	brelse(bp);
935
	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
936
	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
937
	UFS_LOCK(ump);
938
	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
939
940
		printf("WARNING: %s: reload pending error: blocks %jd "
		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
941
		    fs->fs_pendinginodes);
942
943
944
		fs->fs_pendingblocks = 0;
		fs->fs_pendinginodes = 0;
	}
945
	UFS_UNLOCK(ump);
946

Rodney W. Grimes's avatar
Rodney W. Grimes committed
947
948
949
	/*
	 * Step 3: re-read summary information from disk.
	 */
950
951
952
953
954
955
	size = fs->fs_cssize;
	blks = howmany(size, fs->fs_fsize);
	if (fs->fs_contigsumsize > 0)
		size += fs->fs_ncg * sizeof(int32_t);
	size += fs->fs_ncg * sizeof(u_int8_t);
	free(fs->fs_csp, M_UFSMNT);
956
	space = malloc(size, M_UFSMNT, M_WAITOK);
957
	fs->fs_csp = space;
Rodney W. Grimes's avatar
Rodney W. Grimes committed
958
959
960
961
	for (i = 0; i < blks; i += fs->fs_frag) {
		size = fs->fs_bsize;
		if (i + fs->fs_frag > blks)
			size = (blks - i) * fs->fs_fsize;
962
963
964
		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
		    NOCRED, &bp);
		if (error)
Rodney W. Grimes's avatar
Rodney W. Grimes committed
965
			return (error);
966
967
		bcopy(bp->b_data, space, (u_int)size);
		space = (char *)space + size;
Rodney W. Grimes's avatar
Rodney W. Grimes committed
968
969
		brelse(bp);
	}
970
971
972
973
	/*
	 * We no longer know anything about clusters per cylinder group.
	 */
	if (fs->fs_contigsumsize > 0) {
974
		fs->fs_maxcluster = lp = space;
975
976
		for (i = 0; i < fs->fs_ncg; i++)
			*lp++ = fs->fs_contigsumsize;
977
		space = lp;
978
	}
979
980
981
	size = fs->fs_ncg * sizeof(u_int8_t);
	fs->fs_contigdirs = (u_int8_t *)space;
	bzero(fs->fs_contigdirs, size);
982
983
984
985
986
987
	if ((flags & FFSR_UNSUSPEND) != 0) {
		MNT_ILOCK(mp);
		mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2);
		wakeup(&mp->mnt_flag);
		MNT_IUNLOCK(mp);
	}
988

Rodney W. Grimes's avatar
Rodney W. Grimes committed
989
loop:
990
	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
991
992
993
994
995
996
997
		/*
		 * Skip syncer vnode.
		 */
		if (vp->v_type == VNON) {
			VI_UNLOCK(vp);
			continue;
		}
Rodney W. Grimes's avatar
Rodney W. Grimes committed
998
		/*
999
		 * Step 4: invalidate all cached file data.
Rodney W. Grimes's avatar
Rodney W. Grimes committed
1000
		 */