From 5e27d793148c9f71a3017981e2362c49559553b9 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Mon, 23 Nov 2015 07:09:35 +0000 Subject: [PATCH] Split kerne timekeep ABI structure vdso_sv_tk out of the struct sysentvec. This allows the timekeep data to be shared between similar ABIs which cannot share sysentvec. Make the timekeep_push_vdso() tick callback to the timekeep structures instead of sysentvecs. If several sysentvec share the vdso_sv_tk structure, we would update the userspace data several times on each tick, without the change. Only allocate vdso_sv_tk in the exec_sysvec_init() sysinit when sysentvec is marked with the new SV_TIMEKEEP flag. This saves allocation and update of unneeded vdso_sv_tk for ABIs which do not provide userspace gettimeofday yet, which are PowerPCs arches right now. Make vdso_sv_tk allocator public, namely split out and export alloc_sv_tk() and alloc_sv_tk_compat32(). ABIs which share timekeep data now can allocate it manually and share as appropriate. Requested by: nwhitehorn Tested by: nwhitehorn, pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks --- sys/amd64/amd64/elf_machdep.c | 2 +- sys/compat/ia32/ia32_sysvec.c | 2 +- sys/i386/i386/elf_machdep.c | 3 +- sys/kern/kern_sharedpage.c | 125 +++++++++++++++++++++------------- sys/sys/sysent.h | 7 +- sys/sys/vdso.h | 9 +++ 6 files changed, 95 insertions(+), 53 deletions(-) diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c index d961f092cdf3..86efdd9ba4a7 100644 --- a/sys/amd64/amd64/elf_machdep.c +++ b/sys/amd64/amd64/elf_machdep.c @@ -75,7 +75,7 @@ struct sysentvec elf64_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP, + .sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/compat/ia32/ia32_sysvec.c b/sys/compat/ia32/ia32_sysvec.c index 51a9e9ff244a..c580e5d74345 100644 --- a/sys/compat/ia32/ia32_sysvec.c +++ b/sys/compat/ia32/ia32_sysvec.c @@ -125,7 +125,7 @@ struct sysentvec ia32_freebsd_sysvec = { .sv_maxssiz = &ia32_maxssiz, .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | #ifdef __amd64__ - SV_SHP + SV_SHP | SV_TIMEKEEP #else 0 #endif diff --git a/sys/i386/i386/elf_machdep.c b/sys/i386/i386/elf_machdep.c index 81d6e35b3a45..376dd0bc3440 100644 --- a/sys/i386/i386/elf_machdep.c +++ b/sys/i386/i386/elf_machdep.c @@ -81,7 +81,8 @@ struct sysentvec elf32_freebsd_sysvec = { .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, - .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP, + .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP | + SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, diff --git a/sys/kern/kern_sharedpage.c b/sys/kern/kern_sharedpage.c index 6ad2ed8bde4b..750147242bd2 100644 --- a/sys/kern/kern_sharedpage.c +++ b/sys/kern/kern_sharedpage.c @@ -1,7 +1,11 @@ /*- * Copyright (c) 2010, 2012 Konstantin Belousov + * Copyright (c) 2015 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -34,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -127,7 +132,7 @@ SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)shared_page_init, * calls us after the timehands are updated). */ static void -timehands_update(struct sysentvec *sv) +timehands_update(struct vdso_sv_tk *svtk) { struct vdso_timehands th; struct vdso_timekeep *tk; @@ -135,20 +140,20 @@ timehands_update(struct sysentvec *sv) enabled = tc_fill_vdso_timehands(&th); th.th_gen = 0; - idx = sv->sv_timekeep_curr; + idx = svtk->sv_timekeep_curr; if (++idx >= VDSO_TH_NUM) idx = 0; - sv->sv_timekeep_curr = idx; - if (++sv->sv_timekeep_gen == 0) - sv->sv_timekeep_gen = 1; + svtk->sv_timekeep_curr = idx; + if (++svtk->sv_timekeep_gen == 0) + svtk->sv_timekeep_gen = 1; tk = (struct vdso_timekeep *)(shared_page_mapping + - sv->sv_timekeep_off); + svtk->sv_timekeep_off); tk->tk_th[idx].th_gen = 0; atomic_thread_fence_rel(); if (enabled) tk->tk_th[idx] = th; - atomic_store_rel_32(&tk->tk_th[idx].th_gen, sv->sv_timekeep_gen); + atomic_store_rel_32(&tk->tk_th[idx].th_gen, svtk->sv_timekeep_gen); atomic_store_rel_32(&tk->tk_current, idx); /* @@ -160,7 +165,7 @@ timehands_update(struct sysentvec *sv) #ifdef COMPAT_FREEBSD32 static void -timehands_update32(struct sysentvec *sv) +timehands_update32(struct vdso_sv_tk *svtk) { struct vdso_timehands32 th; struct vdso_timekeep32 *tk; @@ -168,20 +173,20 @@ timehands_update32(struct sysentvec *sv) enabled = tc_fill_vdso_timehands32(&th); th.th_gen = 0; - idx = sv->sv_timekeep_curr; + idx = svtk->sv_timekeep_curr; if (++idx >= VDSO_TH_NUM) idx = 0; - sv->sv_timekeep_curr = idx; - if (++sv->sv_timekeep_gen == 0) - sv->sv_timekeep_gen = 1; + svtk->sv_timekeep_curr = idx; + if (++svtk->sv_timekeep_gen == 0) + svtk->sv_timekeep_gen = 1; tk = (struct vdso_timekeep32 *)(shared_page_mapping + - sv->sv_timekeep_off); + svtk->sv_timekeep_off); tk->tk_th[idx].th_gen = 0; atomic_thread_fence_rel(); if (enabled) tk->tk_th[idx] = th; - atomic_store_rel_32(&tk->tk_th[idx].th_gen, sv->sv_timekeep_gen); + atomic_store_rel_32(&tk->tk_th[idx].th_gen, svtk->sv_timekeep_gen); atomic_store_rel_32(&tk->tk_current, idx); tk->tk_enabled = enabled; } @@ -192,33 +197,69 @@ timehands_update32(struct sysentvec *sv) * that needs to be iterated over from the hardclock interrupt * context. */ -static struct sysentvec *host_sysentvec; +static struct vdso_sv_tk *host_svtk; #ifdef COMPAT_FREEBSD32 -static struct sysentvec *compat32_sysentvec; +static struct vdso_sv_tk *compat32_svtk; #endif void timekeep_push_vdso(void) { - if (host_sysentvec != NULL && host_sysentvec->sv_timekeep_base != 0) - timehands_update(host_sysentvec); + if (host_svtk != NULL) + timehands_update(host_svtk); #ifdef COMPAT_FREEBSD32 - if (compat32_sysentvec != NULL && - compat32_sysentvec->sv_timekeep_base != 0) - timehands_update32(compat32_sysentvec); + if (compat32_svtk != NULL) + timehands_update32(compat32_svtk); #endif } +struct vdso_sv_tk * +alloc_sv_tk(void) +{ + struct vdso_sv_tk *svtk; + int tk_base; + uint32_t tk_ver; + + tk_ver = VDSO_TK_VER_CURR; + svtk = malloc(sizeof(struct vdso_sv_tk), M_TEMP, M_WAITOK | M_ZERO); + tk_base = shared_page_alloc(sizeof(struct vdso_timekeep) + + sizeof(struct vdso_timehands) * VDSO_TH_NUM, 16); + KASSERT(tk_base != -1, ("tk_base -1 for native")); + shared_page_write(tk_base + offsetof(struct vdso_timekeep, tk_ver), + sizeof(uint32_t), &tk_ver); + svtk->sv_timekeep_off = tk_base; + timekeep_push_vdso(); + return (svtk); +} + +#ifdef COMPAT_FREEBSD32 +struct vdso_sv_tk * +alloc_sv_tk_compat32(void) +{ + struct vdso_sv_tk *svtk; + int tk_base; + uint32_t tk_ver; + + svtk = malloc(sizeof(struct vdso_sv_tk), M_TEMP, M_WAITOK | M_ZERO); + tk_ver = VDSO_TK_VER_CURR; + tk_base = shared_page_alloc(sizeof(struct vdso_timekeep32) + + sizeof(struct vdso_timehands32) * VDSO_TH_NUM, 16); + KASSERT(tk_base != -1, ("tk_base -1 for 32bit")); + shared_page_write(tk_base + offsetof(struct vdso_timekeep32, + tk_ver), sizeof(uint32_t), &tk_ver); + svtk->sv_timekeep_off = tk_base; + timekeep_push_vdso(); + return (svtk); +} +#endif + void exec_sysvec_init(void *param) { struct sysentvec *sv; - int tk_base; - uint32_t tk_ver; sv = (struct sysentvec *)param; - if ((sv->sv_flags & SV_SHP) == 0) return; sv->sv_shared_page_obj = shared_page_obj; @@ -226,30 +267,22 @@ exec_sysvec_init(void *param) shared_page_fill(*(sv->sv_szsigcode), 16, sv->sv_sigcode); if ((sv->sv_flags & SV_ABI_MASK) != SV_ABI_FREEBSD) return; - tk_ver = VDSO_TK_VER_CURR; + if ((sv->sv_flags & SV_TIMEKEEP) != 0) { #ifdef COMPAT_FREEBSD32 - if ((sv->sv_flags & SV_ILP32) != 0) { - tk_base = shared_page_alloc(sizeof(struct vdso_timekeep32) + - sizeof(struct vdso_timehands32) * VDSO_TH_NUM, 16); - KASSERT(tk_base != -1, ("tk_base -1 for 32bit")); - shared_page_write(tk_base + offsetof(struct vdso_timekeep32, - tk_ver), sizeof(uint32_t), &tk_ver); - KASSERT(compat32_sysentvec == 0, - ("Native compat32 already registered")); - compat32_sysentvec = sv; - } else { + if ((sv->sv_flags & SV_ILP32) != 0) { + KASSERT(compat32_svtk == NULL, + ("Compat32 already registered")); + compat32_svtk = alloc_sv_tk_compat32(); + sv->sv_timekeep_base = sv->sv_shared_page_base + + compat32_svtk->sv_timekeep_off; + } else { #endif - tk_base = shared_page_alloc(sizeof(struct vdso_timekeep) + - sizeof(struct vdso_timehands) * VDSO_TH_NUM, 16); - KASSERT(tk_base != -1, ("tk_base -1 for native")); - shared_page_write(tk_base + offsetof(struct vdso_timekeep, - tk_ver), sizeof(uint32_t), &tk_ver); - KASSERT(host_sysentvec == 0, ("Native already registered")); - host_sysentvec = sv; + KASSERT(host_svtk == NULL, ("Host already registered")); + host_svtk = alloc_sv_tk(); + sv->sv_timekeep_base = sv->sv_shared_page_base + + host_svtk->sv_timekeep_off; #ifdef COMPAT_FREEBSD32 + } +#endif } -#endif - sv->sv_timekeep_base = sv->sv_shared_page_base + tk_base; - sv->sv_timekeep_off = tk_base; - timekeep_push_vdso(); } diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index db929815ca49..7e25ee49bc36 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -127,13 +127,10 @@ struct sysentvec { int (*sv_fetch_syscall_args)(struct thread *, struct syscall_args *); const char **sv_syscallnames; + vm_offset_t sv_timekeep_base; vm_offset_t sv_shared_page_base; vm_offset_t sv_shared_page_len; vm_offset_t sv_sigcode_base; - vm_offset_t sv_timekeep_base; - int sv_timekeep_off; - int sv_timekeep_curr; - uint32_t sv_timekeep_gen; void *sv_shared_page_obj; void (*sv_schedtail)(struct thread *); void (*sv_thread_detach)(struct thread *); @@ -145,6 +142,7 @@ struct sysentvec { #define SV_AOUT 0x008000 /* a.out executable. */ #define SV_SHP 0x010000 /* Shared page. */ #define SV_CAPSICUM 0x020000 /* Force cap_enter() on startup. */ +#define SV_TIMEKEEP 0x040000 #define SV_ABI_MASK 0xff #define SV_PROC_FLAG(p, x) ((p)->p_sysent->sv_flags & (x)) @@ -274,6 +272,7 @@ int shared_page_alloc(int size, int align); int shared_page_fill(int size, int align, const void *data); void shared_page_write(int base, int size, const void *data); void exec_sysvec_init(void *param); +void exec_inittk(void); #define INIT_SYSENTVEC(name, sv) \ SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY, \ diff --git a/sys/sys/vdso.h b/sys/sys/vdso.h index d905304ac68a..907f4db00cc0 100644 --- a/sys/sys/vdso.h +++ b/sys/sys/vdso.h @@ -71,6 +71,12 @@ int __vdso_gettimekeep(struct vdso_timekeep **tk); struct timecounter; +struct vdso_sv_tk { + int sv_timekeep_off; + int sv_timekeep_curr; + uint32_t sv_timekeep_gen; +}; + void timekeep_push_vdso(void); uint32_t tc_fill_vdso_timehands(struct vdso_timehands *vdso_th); @@ -86,6 +92,8 @@ uint32_t tc_fill_vdso_timehands(struct vdso_timehands *vdso_th); uint32_t cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc); +struct vdso_sv_tk *alloc_sv_tk(void); + #define VDSO_TH_NUM 4 #ifdef COMPAT_FREEBSD32 @@ -115,6 +123,7 @@ struct vdso_timekeep32 { uint32_t tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32); uint32_t cpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32, struct timecounter *tc); +struct vdso_sv_tk *alloc_sv_tk_compat32(void); #endif #endif