From fa7dd9c5bc8e36e355cfe61a87e27779f539eef1 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 16 Dec 2002 19:24:43 +0000 Subject: [PATCH] Change the way ELF coredumps are handled. Instead of unconditionally skipping read-only pages, which can result in valuable non-text-related data not getting dumped, the ELF loader and the dynamic loader now mark read-only text pages NOCORE and the coredump code only checks (primarily) for complete inaccessibility of the page or NOCORE being set. Certain applications which map large amounts of read-only data will produce much larger cores. A new sysctl has been added, debug.elf_legacy_coredump, which will revert to the old behavior. This commit represents collaborative work by all parties involved. The PR contains a program demonstrating the problem. PR: kern/45994 Submitted by: "Peter Edwards" , Archie Cobbs Reviewed by: jdp, dillon MFC after: 7 days --- libexec/rtld-elf/map_object.c | 29 +++++++++++++++++++----- sys/kern/imgact_elf.c | 42 ++++++++++++++++++++++++++--------- sys/vm/vm.h | 1 + 3 files changed, 55 insertions(+), 17 deletions(-) diff --git a/libexec/rtld-elf/map_object.c b/libexec/rtld-elf/map_object.c index a0220ef2078b..7d45662dda73 100644 --- a/libexec/rtld-elf/map_object.c +++ b/libexec/rtld-elf/map_object.c @@ -38,7 +38,8 @@ #include "debug.h" #include "rtld.h" -static int protflags(int); /* Elf flags -> mmap protection */ +static int convert_prot(int); /* Elf flags -> mmap protection */ +static int convert_flags(int); /* Elf flags -> mmap flags */ /* * Map a shared object into memory. The "fd" argument is a file descriptor, @@ -75,6 +76,7 @@ map_object(int fd, const char *path, const struct stat *sb) Elf_Addr data_vlimit; caddr_t data_addr; int data_prot; + int data_flags; Elf_Addr clear_vaddr; caddr_t clear_addr; caddr_t clear_page; @@ -189,8 +191,8 @@ map_object(int fd, const char *path, const struct stat *sb) mapsize = base_vlimit - base_vaddr; base_addr = u.hdr.e_type == ET_EXEC ? (caddr_t) base_vaddr : NULL; - mapbase = mmap(base_addr, mapsize, protflags(segs[0]->p_flags), - MAP_PRIVATE, fd, base_offset); + mapbase = mmap(base_addr, mapsize, convert_prot(segs[0]->p_flags), + convert_flags(segs[0]->p_flags), fd, base_offset); if (mapbase == (caddr_t) -1) { _rtld_error("%s: mmap of entire address space failed: %s", path, strerror(errno)); @@ -209,10 +211,11 @@ map_object(int fd, const char *path, const struct stat *sb) data_vaddr = trunc_page(segs[i]->p_vaddr); data_vlimit = round_page(segs[i]->p_vaddr + segs[i]->p_filesz); data_addr = mapbase + (data_vaddr - base_vaddr); - data_prot = protflags(segs[i]->p_flags); + data_prot = convert_prot(segs[i]->p_flags); + data_flags = convert_flags(segs[i]->p_flags) | MAP_FIXED; /* Do not call mmap on the first segment - this is redundant */ if (i && mmap(data_addr, data_vlimit - data_vaddr, data_prot, - MAP_PRIVATE|MAP_FIXED, fd, data_offset) == (caddr_t) -1) { + data_flags, fd, data_offset) == (caddr_t) -1) { _rtld_error("%s: mmap of data failed: %s", path, strerror(errno)); return NULL; } @@ -315,7 +318,7 @@ obj_new(void) * flags for MMAP. */ static int -protflags(int elfflags) +convert_prot(int elfflags) { int prot = 0; if (elfflags & PF_R) @@ -326,3 +329,17 @@ protflags(int elfflags) prot |= PROT_EXEC; return prot; } + +static int +convert_flags(int elfflags) +{ + int flags = MAP_PRIVATE; /* All mappings are private */ + + /* + * Readonly mappings are marked "MAP_NOCORE", because they can be + * reconstructed by a debugger. + */ + if (!(elfflags & PF_W)) + flags |= MAP_NOCORE; + return flags; +} diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 27bcfbefeea1..44d4bf097aea 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -87,6 +87,9 @@ SYSCTL_INT(_debug, OID_AUTO, elf32_trace, CTLFLAG_RW, &elf_trace, 0, ""); #else SYSCTL_INT(_debug, OID_AUTO, elf64_trace, CTLFLAG_RW, &elf_trace, 0, ""); #endif +static int elf_legacy_coredump = 0; +SYSCTL_INT(_debug, OID_AUTO, elf_legacy_coredump, CTLFLAG_RW, + &elf_legacy_coredump, 0, ""); static Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; extern int fallback_elf_brand; @@ -349,7 +352,7 @@ __elfN(load_section)(struct proc *p, struct vmspace *vmspace, { size_t map_len; vm_offset_t map_addr; - int error, rv; + int error, rv, cow; size_t copy_len; vm_offset_t file_addr; vm_offset_t data_buf = 0; @@ -392,6 +395,11 @@ __elfN(load_section)(struct proc *p, struct vmspace *vmspace, if (map_len != 0) { vm_object_reference(object); + + /* cow flags: don't dump readonly sections in core */ + cow = MAP_COPY_ON_WRITE | MAP_PREFAULT | + (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP); + rv = __elfN(map_insert)(&vmspace->vm_map, object, file_addr, /* file offset */ @@ -399,7 +407,7 @@ __elfN(load_section)(struct proc *p, struct vmspace *vmspace, map_addr + map_len,/* virtual end */ prot, VM_PROT_ALL, - MAP_COPY_ON_WRITE | MAP_PREFAULT); + cow); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); return (EINVAL); @@ -1042,17 +1050,29 @@ each_writable_segment(p, func, closure) entry = entry->next) { vm_object_t obj; - if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) || - (entry->protection & (VM_PROT_READ|VM_PROT_WRITE)) != - (VM_PROT_READ|VM_PROT_WRITE)) - continue; + /* + * Don't dump inaccessible mappings, deal with legacy + * coredump mode. + * + * Note that read-only segments related to the elf binary + * are marked MAP_ENTRY_NOCOREDUMP now so we no longer + * need to arbitrarily ignore such segments. + */ + if (elf_legacy_coredump) { + if ((entry->protection & VM_PROT_RW) != VM_PROT_RW) + continue; + } else { + if ((entry->protection & VM_PROT_ALL) == 0) + continue; + } /* - ** Dont include memory segment in the coredump if - ** MAP_NOCORE is set in mmap(2) or MADV_NOCORE in - ** madvise(2). - */ - if (entry->eflags & MAP_ENTRY_NOCOREDUMP) + * Dont include memory segment in the coredump if + * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in + * madvise(2). Do not dump submaps (i.e. parts of the + * kernel map). + */ + if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP)) continue; if ((obj = entry->object.vm_object) == NULL) diff --git a/sys/vm/vm.h b/sys/vm/vm.h index ffbcd489fa06..55818ef31978 100644 --- a/sys/vm/vm.h +++ b/sys/vm/vm.h @@ -81,6 +81,7 @@ typedef u_char vm_prot_t; /* protection codes */ #define VM_PROT_OVERRIDE_WRITE ((vm_prot_t) 0x08) /* copy-on-write */ #define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) +#define VM_PROT_RW (VM_PROT_READ|VM_PROT_WRITE) #define VM_PROT_DEFAULT VM_PROT_ALL union vm_map_object;