From cc43c38c8757a7d9b18141df42a04314f8ed6227 Mon Sep 17 00:00:00 2001 From: Robert Watson Date: Sun, 2 Dec 2007 10:10:27 +0000 Subject: [PATCH] Add two new sysctls in support of the forthcoming procstat(1) to support its -f and -v arguments: kern.proc.filedesc - dump file descriptor information for a process, if debugging is permitted, including socket addresses, open flags, file offsets, file paths, etc. kern.proc.vmmap - dump virtual memory mapping information for a process, if debugging is permitted, including layout and information on underlying objects, such as the type of object and path. These provide a superset of the information historically available through the now-deprecated procfs(4), and are intended to be exported in an ABI-robust form. --- sys/kern/kern_descrip.c | 181 +++++++++++++++++++++++++++++++++++++++- sys/kern/kern_proc.c | 160 ++++++++++++++++++++++++++++++++++- sys/sys/sysctl.h | 3 + sys/sys/user.h | 93 ++++++++++++++++++++- 4 files changed, 434 insertions(+), 3 deletions(-) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index ef1e0e1c13a9..274522f14c21 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -59,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -68,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -2405,7 +2408,7 @@ filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, s } /* - * Get file structures. + * Get file structures globally. */ static int sysctl_kern_file(SYSCTL_HANDLER_ARGS) @@ -2488,6 +2491,182 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); +/* + * Get per-process file descriptors for use by procstat(1), et al. + */ +static int +sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) +{ + char *fullpath, *freepath; + struct kinfo_file *kif; + struct filedesc *fdp; + int error, i, *name; + struct socket *so; + struct vnode *vp; + struct file *fp; + struct proc *p; + int vfslocked; + + name = (int *)arg1; + if ((p = pfind((pid_t)name[0])) == NULL) + return (ESRCH); + if ((error = p_candebug(curthread, p))) { + PROC_UNLOCK(p); + return (error); + } + fdp = fdhold(p); + PROC_UNLOCK(p); + kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); + FILEDESC_SLOCK(fdp); + for (i = 0; i < fdp->fd_nfiles; i++) { + if ((fp = fdp->fd_ofiles[i]) == NULL) + continue; + bzero(kif, sizeof(*kif)); + kif->kf_structsize = sizeof(*kif); + FILE_LOCK(fp); + vp = NULL; + so = NULL; + kif->kf_fd = i; + switch (fp->f_type) { + case DTYPE_VNODE: + kif->kf_type = KF_TYPE_VNODE; + vp = fp->f_vnode; + vref(vp); + break; + + case DTYPE_SOCKET: + kif->kf_type = KF_TYPE_SOCKET; + so = fp->f_data; + break; + + case DTYPE_PIPE: + kif->kf_type = KF_TYPE_PIPE; + break; + + case DTYPE_FIFO: + kif->kf_type = KF_TYPE_FIFO; + vp = fp->f_vnode; + vref(vp); + break; + + case DTYPE_KQUEUE: + kif->kf_type = KF_TYPE_KQUEUE; + break; + + case DTYPE_CRYPTO: + kif->kf_type = KF_TYPE_CRYPTO; + break; + + case DTYPE_MQUEUE: + kif->kf_type = KF_TYPE_MQUEUE; + break; + + default: + kif->kf_type = KF_TYPE_UNKNOWN; + break; + } + kif->kf_ref_count = fp->f_count; + if (fp->f_flag & FREAD) + kif->kf_flags |= KF_FLAG_READ; + if (fp->f_flag & FWRITE) + kif->kf_flags |= KF_FLAG_WRITE; + if (fp->f_flag & FAPPEND) + kif->kf_flags |= KF_FLAG_APPEND; + if (fp->f_flag & FASYNC) + kif->kf_flags |= KF_FLAG_ASYNC; + if (fp->f_flag & FFSYNC) + kif->kf_flags |= KF_FLAG_FSYNC; + if (fp->f_flag & FNONBLOCK) + kif->kf_flags |= KF_FLAG_NONBLOCK; + if (fp->f_flag & O_DIRECT) + kif->kf_flags |= KF_FLAG_DIRECT; + if (fp->f_flag & FHASLOCK) + kif->kf_flags |= KF_FLAG_HASLOCK; + kif->kf_offset = fp->f_offset; + FILE_UNLOCK(fp); + if (vp != NULL) { + switch (vp->v_type) { + case VNON: + kif->kf_vnode_type = KF_VTYPE_VNON; + break; + case VREG: + kif->kf_vnode_type = KF_VTYPE_VREG; + break; + case VDIR: + kif->kf_vnode_type = KF_VTYPE_VDIR; + break; + case VBLK: + kif->kf_vnode_type = KF_VTYPE_VBLK; + break; + case VCHR: + kif->kf_vnode_type = KF_VTYPE_VCHR; + break; + case VLNK: + kif->kf_vnode_type = KF_VTYPE_VLNK; + break; + case VSOCK: + kif->kf_vnode_type = KF_VTYPE_VSOCK; + break; + case VFIFO: + kif->kf_vnode_type = KF_VTYPE_VFIFO; + break; + case VBAD: + kif->kf_vnode_type = KF_VTYPE_VBAD; + break; + default: + kif->kf_vnode_type = KF_VTYPE_UNKNOWN; + break; + } + /* + * It is OK to drop the filedesc lock here as we will + * re-validate and re-evaluate its properties when + * the loop continues. + */ + freepath = NULL; + fullpath = "-"; + FILEDESC_SUNLOCK(fdp); + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread); + vn_fullpath(curthread, vp, &fullpath, &freepath); + vput(vp); + VFS_UNLOCK_GIANT(vfslocked); + strlcpy(kif->kf_path, fullpath, + sizeof(kif->kf_path)); + if (freepath != NULL) + free(freepath, M_TEMP); + FILEDESC_SLOCK(fdp); + } + if (so != NULL) { + struct sockaddr *sa; + + if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa) + == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) { + bcopy(sa, &kif->kf_sa_local, sa->sa_len); + free(sa, M_SONAME); + } + if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa) + == 00 && sa->sa_len <= sizeof(kif->kf_sa_peer)) { + bcopy(sa, &kif->kf_sa_peer, sa->sa_len); + free(sa, M_SONAME); + } + kif->kf_sock_domain = + so->so_proto->pr_domain->dom_family; + kif->kf_sock_type = so->so_type; + kif->kf_sock_protocol = so->so_proto->pr_protocol; + } + error = SYSCTL_OUT(req, kif, sizeof(*kif)); + if (error) + break; + } + FILEDESC_SUNLOCK(fdp); + fddrop(fdp); + free(kif, M_TEMP); + return (0); +} + +static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD, + sysctl_kern_proc_filedesc, "Process filedesc entries"); + #ifdef DDB /* * For the purposes of debugging, generate a human-readable string for the diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 0677d7c12a69..d2f9c0e87cd3 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -32,6 +32,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_ddb.h" #include "opt_ktrace.h" #include "opt_kstack_pages.h" @@ -40,9 +41,11 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include +#include #include #include #include @@ -60,10 +63,15 @@ __FBSDID("$FreeBSD$"); #include #endif +#ifdef DDB +#include +#endif + #include #include #include #include +#include #include MALLOC_DEFINE(M_PGRP, "pgrp", "process group header"); @@ -1284,8 +1292,155 @@ sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS) return (sysctl_handle_string(oidp, sv_name, 0, req)); } +static int +sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS) +{ + vm_map_entry_t entry, tmp_entry; + unsigned int last_timestamp; + char *fullpath, *freepath; + struct kinfo_vmentry *kve; + int error, *name; + struct vnode *vp; + struct proc *p; + vm_map_t map; -static SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD, 0, "Process table"); + name = (int *)arg1; + if ((p = pfind((pid_t)name[0])) == NULL) + return (ESRCH); + if ((error = p_candebug(curthread, p))) { + PROC_UNLOCK(p); + return (error); + } + _PHOLD(p); + PROC_UNLOCK(p); + + kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK); + + map = &p->p_vmspace->vm_map; /* XXXRW: More locking required? */ + vm_map_lock_read(map); + for (entry = map->header.next; entry != &map->header; + entry = entry->next) { + vm_object_t obj, tobj, lobj; + vm_offset_t addr; + int vfslocked; + + if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) + continue; + + bzero(kve, sizeof(*kve)); + kve->kve_structsize = sizeof(*kve); + + kve->kve_private_resident = 0; + obj = entry->object.vm_object; + if (obj != NULL) { + VM_OBJECT_LOCK(obj); + if (obj->shadow_count == 1) + kve->kve_private_resident = + obj->resident_page_count; + } + kve->kve_resident = 0; + addr = entry->start; + while (addr < entry->end) { + if (pmap_extract(map->pmap, addr)) + kve->kve_resident++; + addr += PAGE_SIZE; + } + + for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) { + if (tobj != obj) + VM_OBJECT_LOCK(tobj); + if (lobj != obj) + VM_OBJECT_UNLOCK(lobj); + lobj = tobj; + } + + freepath = NULL; + fullpath = ""; + if (lobj) { + vp = NULL; + switch(lobj->type) { + case OBJT_DEFAULT: + kve->kve_type = KVME_TYPE_DEFAULT; + break; + case OBJT_VNODE: + kve->kve_type = KVME_TYPE_VNODE; + vp = lobj->handle; + vref(vp); + break; + case OBJT_SWAP: + kve->kve_type = KVME_TYPE_SWAP; + break; + case OBJT_DEVICE: + kve->kve_type = KVME_TYPE_DEVICE; + break; + case OBJT_PHYS: + kve->kve_type = KVME_TYPE_PHYS; + break; + case OBJT_DEAD: + kve->kve_type = KVME_TYPE_DEAD; + break; + default: + kve->kve_type = KVME_TYPE_UNKNOWN; + break; + } + if (lobj != obj) + VM_OBJECT_UNLOCK(lobj); + + kve->kve_ref_count = obj->ref_count; + kve->kve_shadow_count = obj->shadow_count; + VM_OBJECT_UNLOCK(obj); + if (vp != NULL) { + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, + curthread); + vn_fullpath(curthread, vp, &fullpath, + &freepath); + vput(vp); + VFS_UNLOCK_GIANT(vfslocked); + } + } else { + kve->kve_type = KVME_TYPE_NONE; + kve->kve_ref_count = 0; + kve->kve_shadow_count = 0; + } + + kve->kve_start = (void*)entry->start; + kve->kve_end = (void*)entry->end; + + if (entry->protection & VM_PROT_READ) + kve->kve_protection |= KVME_PROT_READ; + if (entry->protection & VM_PROT_WRITE) + kve->kve_protection |= KVME_PROT_WRITE; + if (entry->protection & VM_PROT_EXECUTE) + kve->kve_protection |= KVME_PROT_EXEC; + + if (entry->eflags & MAP_ENTRY_COW) + kve->kve_flags |= KVME_FLAG_COW; + if (entry->eflags & MAP_ENTRY_NEEDS_COPY) + kve->kve_flags |= KVME_FLAG_NEEDS_COPY; + + strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path)); + if (freepath != NULL) + free(freepath, M_TEMP); + + last_timestamp = map->timestamp; + vm_map_unlock_read(map); + error = SYSCTL_OUT(req, kve, sizeof(*kve)); + vm_map_lock_read(map); + if (error) + break; + if (last_timestamp + 1 != map->timestamp) { + vm_map_lookup_entry(map, addr - 1, &tmp_entry); + entry = tmp_entry; + } + } + vm_map_unlock_read(map); + PRELE(p); + free(kve, M_TEMP); + return (error); +} + +SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD, 0, "Process table"); SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT, 0, 0, sysctl_kern_proc, "S,proc", "Return entire process table"); @@ -1353,3 +1508,6 @@ static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td, static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td, CTLFLAG_RD, sysctl_kern_proc, "Return process table, no threads"); + +static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD, + sysctl_kern_proc_vmmap, "Process vm map entries"); diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h index ebf7ecd483bf..20622a560b5d 100644 --- a/sys/sys/sysctl.h +++ b/sys/sys/sysctl.h @@ -456,6 +456,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry); #define KERN_PROC_RGID 10 /* by real group id */ #define KERN_PROC_GID 11 /* by effective group id */ #define KERN_PROC_PATHNAME 12 /* path to executable */ +#define KERN_PROC_VMMAP 13 /* VM map entries for process */ +#define KERN_PROC_FILEDESC 14 /* File descriptors for process */ #define KERN_PROC_INC_THREAD 0x10 /* * modifier for pid, pgrp, tty, * uid, ruid, gid, rgid and proc @@ -619,6 +621,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry); extern struct sysctl_oid_list sysctl__children; SYSCTL_DECL(_kern); SYSCTL_DECL(_kern_ipc); +SYSCTL_DECL(_kern_proc); SYSCTL_DECL(_sysctl); SYSCTL_DECL(_vm); SYSCTL_DECL(_vm_stats); diff --git a/sys/sys/user.h b/sys/sys/user.h index 348b3e0edab0..9e4f14f163cf 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -1,6 +1,8 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 - * The Regents of the University of California. All rights reserved. + * The Regents of the University of California. + * Copyright (c) 2007 Robert N. M. Watson + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -56,6 +58,9 @@ #ifndef _SYS_SIGNALVAR_H_ #include #endif +#ifndef _SYS_SOCKET_VAR_H_ +#include +#endif /* * KERN_PROC subtype ops return arrays of selected proc structure entries: @@ -228,4 +233,90 @@ struct user { struct kinfo_proc u_kproc; /* eproc */ }; +/* + * The KERN_PROC_FILE sysctl allows a process to dumpt the file descriptor + * array of another process. + */ +#define KF_TYPE_NONE 0 +#define KF_TYPE_VNODE 1 +#define KF_TYPE_SOCKET 2 +#define KF_TYPE_PIPE 3 +#define KF_TYPE_FIFO 4 +#define KF_TYPE_KQUEUE 5 +#define KF_TYPE_CRYPTO 6 +#define KF_TYPE_MQUEUE 7 +#define KF_TYPE_UNKNOWN 255 + +#define KF_VTYPE_VNON 0 +#define KF_VTYPE_VREG 1 +#define KF_VTYPE_VDIR 2 +#define KF_VTYPE_VBLK 3 +#define KF_VTYPE_VCHR 4 +#define KF_VTYPE_VLNK 5 +#define KF_VTYPE_VSOCK 6 +#define KF_VTYPE_VFIFO 7 +#define KF_VTYPE_VBAD 8 +#define KF_VTYPE_UNKNOWN 255 + +#define KF_FLAG_READ 0x00000001 +#define KF_FLAG_WRITE 0x00000002 +#define KF_FLAG_APPEND 0x00000004 +#define KF_FLAG_ASYNC 0x00000008 +#define KF_FLAG_FSYNC 0x00000010 +#define KF_FLAG_NONBLOCK 0x00000020 +#define KF_FLAG_DIRECT 0x00000040 +#define KF_FLAG_HASLOCK 0x00000080 + +struct kinfo_file { + int kf_structsize; /* Size of kinfo_file. */ + int kf_type; /* Descriptor type. */ + int kf_fd; /* Array index. */ + int kf_ref_count; /* Reference count. */ + int kf_flags; /* Flags. */ + off_t kf_offset; /* Seek location. */ + int kf_vnode_type; /* Vnode type. */ + int kf_sock_domain; /* Socket domain. */ + int kf_sock_type; /* Socket type. */ + int kf_sock_protocol; /* Socket protocol. */ + char kf_path[PATH_MAX]; /* Path to file, if any. */ + struct sockaddr_storage kf_sa_local; /* Socket address. */ + struct sockaddr_storage kf_sa_peer; /* Peer address. */ +}; + +/* + * The KERN_PROC_VMMAP sysctl allows a process to dump the VM layout of + * another process as a series of entries. + */ +#define KVME_TYPE_NONE 0 +#define KVME_TYPE_DEFAULT 1 +#define KVME_TYPE_VNODE 2 +#define KVME_TYPE_SWAP 3 +#define KVME_TYPE_DEVICE 4 +#define KVME_TYPE_PHYS 5 +#define KVME_TYPE_DEAD 6 +#define KVME_TYPE_UNKNOWN 255 + +#define KVME_PROT_READ 0x00000001 +#define KVME_PROT_WRITE 0x00000002 +#define KVME_PROT_EXEC 0x00000004 + +#define KVME_FLAG_COW 0x00000001 +#define KVME_FLAG_NEEDS_COPY 0x00000002 + +struct kinfo_vmentry { + int kve_structsize; /* Size of kinfo_vmmapentry. */ + int kve_type; /* Type of map entry. */ + void *kve_start; /* Starting pointer. */ + void *kve_end; /* Finishing pointer. */ + int kve_flags; /* Flags on map entry. */ + int kve_resident; /* Number of resident pages. */ + int kve_private_resident; /* Number of private pages. */ + int kve_protection; /* Protection bitmask. */ + int kve_ref_count; /* VM obj ref count. */ + int kve_shadow_count; /* VM obj shadow count. */ + char kve_path[PATH_MAX]; /* Path to VM obj, if any. */ + void *_kve_pspare[8]; /* Space for more stuff. */ + int _kve_ispare[8]; /* Space for more stuff. */ +}; + #endif