Commit 19bd0d9c authored by Konstantin Belousov's avatar Konstantin Belousov
Browse files

Implement address space guards.

Guard, requested by the MAP_GUARD mmap(2) flag, prevents the reuse of
the allocated address space, but does not allow instantiation of the
pages in the range.  It is useful for more explicit support for usual
two-stage reserve then commit allocators, since it prevents accidental
instantiation of the mapping, e.g. by mprotect(2).

Use guards to reimplement stack grow code.  Explicitely track stack
grow area with the guard, including the stack guard page.  On stack
grow, trivial shift of the guard map entry and stack map entry limits
makes the stack expansion.  Move the code to detect stack grow and
call vm_map_growstack(), from vm_fault() into vm_map_lookup().

As result, it is impossible to get random mapping to occur in the
stack grow area, or to overlap the stack guard page.

Enable stack guard page by default.

Reviewed by:	alc, markj
Man page update reviewed by:	alc, bjk, emaste, markj, pho
Tested by:	pho, Qualys
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D11306 (man pages)
parent 546bb2d7
......@@ -199,6 +199,21 @@ In contrast, if
.Dv MAP_EXCL
is specified, the request will fail if a mapping
already exists within the range.
.It Dv MAP_GUARD
Instead of a mapping, create a guard of the specified size.
Guards allow a process to create reservations in its address space,
which can later be replaced by actual mappings.
.Pp
.Fa mmap
will not create mappings in the address range of a guard unless
the request specifies
.Dv MAP_FIXED .
Guards can be destroyed with
.Xr munmap 2 .
Any memory access by a thread to the guarded range results
in the delivery of a
.Dv SIGSEGV
signal to that thread.
.It Dv MAP_NOCORE
Region is not included in a core file.
.It Dv MAP_NOSYNC
......@@ -303,6 +318,7 @@ must include at least
.Dv PROT_READ
and
.Dv PROT_WRITE .
.Pp
This option creates
a memory region that grows to at most
.Fa len
......@@ -313,6 +329,10 @@ stack top is the starting address returned by the call, plus
bytes.
The bottom of the stack at maximum growth is the starting
address returned by the call.
The system uses guards to prevent the inadvertent use of
regions into which stacks created with
.Dv MAP_STACK
will automatically grow, without mapping the whole stack in advance.
.El
.Pp
The
......@@ -406,6 +426,7 @@ were specified.
.It Bq Er EINVAL
None of
.Dv MAP_ANON ,
.Dv MAP_GUARD ,
.Dv MAP_PRIVATE ,
.Dv MAP_SHARED ,
or
......@@ -455,6 +476,25 @@ were specified, but the requested region is already used by a mapping.
was specified, but
.Dv MAP_FIXED
was not.
.It Bq Er EINVAL
.Dv MAP_GUARD
was specified, but the
.Fa offset
argument was not zero, the
.Fa fd
argument was not -1, or the
.Fa prot
argument was not
.Dv PROT_NONE .
.It Bq Er EINVAL
.Dv MAP_GUARD
was specified together with one of the flags
.Dv MAP_ANON ,
.Dv MAP_PREFAULT ,
.Dv MAP_PREFAULT_READ ,
.Dv MAP_PRIVATE ,
.Dv MAP_SHARED ,
.Dv MAP_STACK .
.It Bq Er ENODEV
.Dv MAP_ANON
has not been specified and
......
......@@ -28,7 +28,7 @@
.\" @(#)munmap.2 8.3 (Berkeley) 5/27/94
.\" $FreeBSD$
.\"
.Dd May 27, 1994
.Dd June 22, 2017
.Dt MUNMAP 2
.Os
.Sh NAME
......@@ -44,7 +44,7 @@
The
.Fn munmap
system call
deletes the mappings for the specified address range,
deletes the mappings and guards for the specified address range,
and causes further references to addresses within the range
to generate invalid memory references.
.Sh RETURN VALUES
......
......@@ -90,6 +90,7 @@
/*
* Extended flags
*/
#define MAP_GUARD 0x00002000 /* reserve but don't map address range */
#define MAP_EXCL 0x00004000 /* for MAP_FIXED, fail if address is used */
#define MAP_NOCORE 0x00020000 /* dont include these pages in a coredump */
#define MAP_PREFAULT_READ 0x00040000 /* prefault mapping for reading */
......
......@@ -58,7 +58,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
#define __FreeBSD_version 1200034 /* Master, propagated to newvers */
#define __FreeBSD_version 1200035 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
......@@ -76,12 +76,13 @@
#undef __FreeBSD_kernel__
#define __FreeBSD_kernel__
#ifdef _KERNEL
#if defined(_KERNEL) || defined(IN_RTLD)
#define P_OSREL_SIGWAIT 700000
#define P_OSREL_SIGSEGV 700004
#define P_OSREL_MAP_ANON 800104
#define P_OSREL_MAP_FSTRICT 1100036
#define P_OSREL_SHUTDOWN_ENOTCONN 1100077
#define P_OSREL_MAP_GUARD 1200035
#define P_OSREL_MAJOR(x) ((x) / 100000)
#endif
......
......@@ -78,6 +78,7 @@ typedef u_char vm_prot_t; /* protection codes */
#define VM_PROT_WRITE ((vm_prot_t) 0x02)
#define VM_PROT_EXECUTE ((vm_prot_t) 0x04)
#define VM_PROT_COPY ((vm_prot_t) 0x08) /* copy-on-read */
#define VM_PROT_FAULT_LOOKUP ((vm_prot_t) 0x010)
#define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
#define VM_PROT_RW (VM_PROT_READ|VM_PROT_WRITE)
......
......@@ -495,13 +495,12 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int locked, nera, result, rv;
u_char behavior;
boolean_t wired; /* Passed by reference. */
bool dead, growstack, hardfault, is_first_object_locked;
bool dead, hardfault, is_first_object_locked;
VM_CNT_INC(v_vm_faults);
fs.vp = NULL;
faultcount = 0;
nera = -1;
growstack = true;
hardfault = false;
RetryFault:;
......@@ -511,17 +510,10 @@ RetryFault:;
* search.
*/
fs.map = map;
result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry,
&fs.first_object, &fs.first_pindex, &prot, &wired);
result = vm_map_lookup(&fs.map, vaddr, fault_type |
VM_PROT_FAULT_LOOKUP, &fs.entry, &fs.first_object,
&fs.first_pindex, &prot, &wired);
if (result != KERN_SUCCESS) {
if (growstack && result == KERN_INVALID_ADDRESS &&
map != kernel_map) {
result = vm_map_growstack(curproc, vaddr);
if (result != KERN_SUCCESS)
return (KERN_FAILURE);
growstack = false;
goto RetryFault;
}
unlock_vp(&fs);
return (result);
}
......@@ -547,6 +539,8 @@ RetryFault:;
goto RetryFault;
}
MPASS((fs.entry->eflags & MAP_ENTRY_GUARD) == 0);
if (wired)
fault_type = prot | (fault_type & VM_PROT_COPY);
else
......
This diff is collapsed.
......@@ -103,7 +103,6 @@ struct vm_map_entry {
struct vm_map_entry *right; /* right child in binary search tree */
vm_offset_t start; /* start address */
vm_offset_t end; /* end address */
vm_offset_t avail_ssize; /* amt can grow if this is a stack */
vm_offset_t next_read; /* vaddr of the next sequential read */
vm_size_t adj_free; /* amount of adjacent free space */
vm_size_t max_free; /* max free space in subtree */
......@@ -142,6 +141,9 @@ struct vm_map_entry {
#define MAP_ENTRY_WIRE_SKIPPED 0x4000
#define MAP_ENTRY_VN_WRITECNT 0x8000 /* writeable vnode mapping */
#define MAP_ENTRY_GUARD 0x10000
#define MAP_ENTRY_STACK_GAP_DN 0x20000
#define MAP_ENTRY_STACK_GAP_UP 0x40000
#ifdef _KERNEL
static __inline u_char
......@@ -315,6 +317,7 @@ long vmspace_resident_count(struct vmspace *vmspace);
#define MAP_PREFAULT_PARTIAL 0x0010
#define MAP_DISABLE_SYNCER 0x0020
#define MAP_CHECK_EXCL 0x0040
#define MAP_CREATE_GUARD 0x0080
#define MAP_DISABLE_COREDUMP 0x0100
#define MAP_PREFAULT_MADVISE 0x0200 /* from (user) madvise request */
#define MAP_VN_WRITECOUNT 0x0400
......@@ -322,6 +325,8 @@ long vmspace_resident_count(struct vmspace *vmspace);
#define MAP_STACK_GROWS_UP 0x2000
#define MAP_ACC_CHARGED 0x4000
#define MAP_ACC_NO_CHARGE 0x8000
#define MAP_CREATE_STACK_GAP_UP 0x10000
#define MAP_CREATE_STACK_GAP_DN 0x20000
/*
* vm_fault option flags
......@@ -387,7 +392,6 @@ int vm_map_submap (vm_map_t, vm_offset_t, vm_offset_t, vm_map_t);
int vm_map_sync(vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t);
int vm_map_madvise (vm_map_t, vm_offset_t, vm_offset_t, int);
int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int);
int vm_map_growstack (struct proc *p, vm_offset_t addr);
int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
int flags);
int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
......
......@@ -226,7 +226,7 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags,
}
if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE |
MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE |
MAP_PREFAULT_READ |
MAP_PREFAULT_READ | MAP_GUARD |
#ifdef MAP_32BIT
MAP_32BIT |
#endif
......@@ -239,6 +239,10 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags,
if (prot != PROT_NONE &&
(prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0)
return (EINVAL);
if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 ||
pos != 0 || (flags & (MAP_SHARED | MAP_PRIVATE | MAP_PREFAULT |
MAP_PREFAULT_READ | MAP_ANON | MAP_STACK)) != 0))
return (EINVAL);
/*
* Align the file position to a page boundary,
......@@ -314,7 +318,10 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags,
* returns an error earlier.
*/
error = 0;
} else if (flags & MAP_ANON) {
} else if ((flags & MAP_GUARD) != 0) {
error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE,
VM_PROT_NONE, flags, NULL, pos, FALSE, td);
} else if ((flags & MAP_ANON) != 0) {
/*
* Mapping blank space is trivial.
*
......@@ -1511,6 +1518,8 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
}
if ((flags & MAP_EXCL) != 0)
docow |= MAP_CHECK_EXCL;
if ((flags & MAP_GUARD) != 0)
docow |= MAP_CREATE_GUARD;
if (fitit) {
if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment