1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-19 10:53:58 +00:00
freebsd/lib/libkse/thread/thr_create.c

346 lines
11 KiB
C
Raw Normal View History

/*
* Copyright (c) 2003 Daniel M. Eischen <deischen@gdeb.com>
* Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by John Birrell.
* 4. Neither the name of the author nor the names of any co-contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
1999-08-28 00:22:10 +00:00
* $FreeBSD$
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <stddef.h>
#include <sys/time.h>
#include <machine/reg.h>
#include <pthread.h>
#include "thr_private.h"
#include "libc_private.h"
static void free_thread(struct pthread *curthread, struct pthread *thread);
static int create_stack(struct pthread_attr *pattr);
static void free_stack(struct pthread_attr *pattr);
static void thread_start(struct pthread *curthread,
void *(*start_routine) (void *), void *arg);
__weak_reference(_pthread_create, pthread_create);
/*
* Some notes on new thread creation and first time initializion
* to enable multi-threading.
*
* There are basically two things that need to be done.
*
* 1) The internal library variables must be initialized.
* 2) Upcalls need to be enabled to allow multiple threads
* to be run.
*
* The first may be done as a result of other pthread functions
* being called. When _thr_initial is null, _libpthread_init is
* called to initialize the internal variables; this also creates
* or sets the initial thread. It'd be nice to automatically
* have _libpthread_init called on program execution so we don't
* have to have checks throughout the library.
*
* The second part is only triggered by the creation of the first
* thread (other than the initial/main thread). If the thread
* being created is a scope system thread, then a new KSE/KSEG
* pair needs to be allocated. Also, if upcalls haven't been
* enabled on the initial thread's KSE, they must be now that
* there is more than one thread; this could be delayed until
* the initial KSEG has more than one thread.
*/
int
_pthread_create(pthread_t * thread, const pthread_attr_t * attr,
void *(*start_routine) (void *), void *arg)
{
struct pthread *curthread, *new_thread;
struct kse *kse = NULL;
struct kse_group *kseg = NULL;
kse_critical_t crit;
int ret = 0;
if (_thr_initial == NULL)
_libpthread_init(NULL);
Add an i386-specifc hack to always set %gs. There still seems to be instances where the kernel doesn't properly save and/or restore it. Use noupcall and nocompleted flags in the KSE mailbox. These require kernel changes to work which will be committed sometime later. Things still work without the changes. Remove the general kse entry function and use two different functions -- one for scope system threads and one for scope process threads. The scope system function is not yet enabled and we use the same function for all threads at the moment. Keep a copy of the KSE stack for the case that a KSE runs a scope system thread and uses the same stack as the thread (no upcalls are generated, so a separate stack isn't needed). This isn't enabled yet. Use a separate field for the KSE waiting flag. It isn't correct to use the mailbox flags field. The following fixes were provided by David Xu: o Initialize condition variable locks with thread versions of the low-level locking functions instead of the kse versions. o Enable threading before creating the first thread instead of after. o Don't enter critical regions when trying to malloc/free or call functions that malloc/free. o Take the scheduling lock when inheriting thread attributes. o Check the attribute's stack pointer instead of the attributes stack size for null when allocating a thread's stack. o Add a kseg reinit function so we don't have to destroy and then recreate the same lock. o Check the return value of kse_create() and return an appropriate error if it fails. o Don't forget to destroy a thread's locks when freeing it. o Examine the correct flags word for checking to see if a thread is in a synchronization queue. Things should now work on an SMP kernel.
2003-04-21 04:02:56 +00:00
/*
* Turn on threaded mode, if failed, it is unnecessary to
* do further work.
*/
if (_kse_isthreaded() == 0 && _kse_setthreaded(1)) {
return (EAGAIN);
}
curthread = _get_curthread();
Add an i386-specifc hack to always set %gs. There still seems to be instances where the kernel doesn't properly save and/or restore it. Use noupcall and nocompleted flags in the KSE mailbox. These require kernel changes to work which will be committed sometime later. Things still work without the changes. Remove the general kse entry function and use two different functions -- one for scope system threads and one for scope process threads. The scope system function is not yet enabled and we use the same function for all threads at the moment. Keep a copy of the KSE stack for the case that a KSE runs a scope system thread and uses the same stack as the thread (no upcalls are generated, so a separate stack isn't needed). This isn't enabled yet. Use a separate field for the KSE waiting flag. It isn't correct to use the mailbox flags field. The following fixes were provided by David Xu: o Initialize condition variable locks with thread versions of the low-level locking functions instead of the kse versions. o Enable threading before creating the first thread instead of after. o Don't enter critical regions when trying to malloc/free or call functions that malloc/free. o Take the scheduling lock when inheriting thread attributes. o Check the attribute's stack pointer instead of the attributes stack size for null when allocating a thread's stack. o Add a kseg reinit function so we don't have to destroy and then recreate the same lock. o Check the return value of kse_create() and return an appropriate error if it fails. o Don't forget to destroy a thread's locks when freeing it. o Examine the correct flags word for checking to see if a thread is in a synchronization queue. Things should now work on an SMP kernel.
2003-04-21 04:02:56 +00:00
/*
* Allocate memory for the thread structure.
* Some functions use malloc, so don't put it
* in a critical region.
*/
if ((new_thread = _thr_alloc(curthread)) == NULL) {
/* Insufficient memory to create a thread: */
ret = EAGAIN;
} else {
/* Check if default thread attributes are required: */
if (attr == NULL || *attr == NULL)
/* Use the default thread attributes: */
new_thread->attr = _pthread_attr_default;
else {
new_thread->attr = *(*attr);
if ((*attr)->sched_inherit == PTHREAD_INHERIT_SCHED) {
/* inherit scheduling contention scop */
if (curthread->attr.flags & PTHREAD_SCOPE_SYSTEM)
new_thread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
else
new_thread->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
/*
* scheduling policy and scheduling parameters will be
* inherited in following code.
*/
}
}
if (_thread_scope_system > 0)
new_thread->attr.flags |= PTHREAD_SCOPE_SYSTEM;
else if ((_thread_scope_system < 0)
&& (thread != &_thr_sig_daemon))
new_thread->attr.flags &= ~PTHREAD_SCOPE_SYSTEM;
if (create_stack(&new_thread->attr) != 0) {
/* Insufficient memory to create a stack: */
ret = EAGAIN;
_thr_free(curthread, new_thread);
}
else if (((new_thread->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) &&
(((kse = _kse_alloc(curthread, 1)) == NULL)
|| ((kseg = _kseg_alloc(curthread)) == NULL))) {
/* Insufficient memory to create a new KSE/KSEG: */
ret = EAGAIN;
if (kse != NULL) {
kse->k_kcb->kcb_kmbx.km_flags |= KMF_DONE;
_kse_free(curthread, kse);
}
free_stack(&new_thread->attr);
_thr_free(curthread, new_thread);
}
else {
if (kseg != NULL) {
/* Add the KSE to the KSEG's list of KSEs. */
TAILQ_INSERT_HEAD(&kseg->kg_kseq, kse, k_kgqe);
kseg->kg_ksecount = 1;
kse->k_kseg = kseg;
kse->k_schedq = &kseg->kg_schedq;
}
/*
* Write a magic value to the thread structure
* to help identify valid ones:
*/
new_thread->magic = THR_MAGIC;
new_thread->slice_usec = -1;
new_thread->start_routine = start_routine;
new_thread->arg = arg;
new_thread->cancelflags = PTHREAD_CANCEL_ENABLE |
PTHREAD_CANCEL_DEFERRED;
/* No thread is wanting to join to this one: */
new_thread->joiner = NULL;
Add an i386-specifc hack to always set %gs. There still seems to be instances where the kernel doesn't properly save and/or restore it. Use noupcall and nocompleted flags in the KSE mailbox. These require kernel changes to work which will be committed sometime later. Things still work without the changes. Remove the general kse entry function and use two different functions -- one for scope system threads and one for scope process threads. The scope system function is not yet enabled and we use the same function for all threads at the moment. Keep a copy of the KSE stack for the case that a KSE runs a scope system thread and uses the same stack as the thread (no upcalls are generated, so a separate stack isn't needed). This isn't enabled yet. Use a separate field for the KSE waiting flag. It isn't correct to use the mailbox flags field. The following fixes were provided by David Xu: o Initialize condition variable locks with thread versions of the low-level locking functions instead of the kse versions. o Enable threading before creating the first thread instead of after. o Don't enter critical regions when trying to malloc/free or call functions that malloc/free. o Take the scheduling lock when inheriting thread attributes. o Check the attribute's stack pointer instead of the attributes stack size for null when allocating a thread's stack. o Add a kseg reinit function so we don't have to destroy and then recreate the same lock. o Check the return value of kse_create() and return an appropriate error if it fails. o Don't forget to destroy a thread's locks when freeing it. o Examine the correct flags word for checking to see if a thread is in a synchronization queue. Things should now work on an SMP kernel.
2003-04-21 04:02:56 +00:00
/*
* Initialize the machine context.
* Enter a critical region to get consistent context.
*/
crit = _kse_critical_enter();
THR_GETCONTEXT(&new_thread->tcb->tcb_tmbx.tm_context);
/* Initialize the thread for signals: */
new_thread->sigmask = curthread->sigmask;
Add an i386-specifc hack to always set %gs. There still seems to be instances where the kernel doesn't properly save and/or restore it. Use noupcall and nocompleted flags in the KSE mailbox. These require kernel changes to work which will be committed sometime later. Things still work without the changes. Remove the general kse entry function and use two different functions -- one for scope system threads and one for scope process threads. The scope system function is not yet enabled and we use the same function for all threads at the moment. Keep a copy of the KSE stack for the case that a KSE runs a scope system thread and uses the same stack as the thread (no upcalls are generated, so a separate stack isn't needed). This isn't enabled yet. Use a separate field for the KSE waiting flag. It isn't correct to use the mailbox flags field. The following fixes were provided by David Xu: o Initialize condition variable locks with thread versions of the low-level locking functions instead of the kse versions. o Enable threading before creating the first thread instead of after. o Don't enter critical regions when trying to malloc/free or call functions that malloc/free. o Take the scheduling lock when inheriting thread attributes. o Check the attribute's stack pointer instead of the attributes stack size for null when allocating a thread's stack. o Add a kseg reinit function so we don't have to destroy and then recreate the same lock. o Check the return value of kse_create() and return an appropriate error if it fails. o Don't forget to destroy a thread's locks when freeing it. o Examine the correct flags word for checking to see if a thread is in a synchronization queue. Things should now work on an SMP kernel.
2003-04-21 04:02:56 +00:00
_kse_critical_leave(crit);
new_thread->tcb->tcb_tmbx.tm_udata = new_thread;
new_thread->tcb->tcb_tmbx.tm_context.uc_sigmask =
new_thread->sigmask;
new_thread->tcb->tcb_tmbx.tm_context.uc_stack.ss_size =
new_thread->attr.stacksize_attr;
new_thread->tcb->tcb_tmbx.tm_context.uc_stack.ss_sp =
new_thread->attr.stackaddr_attr;
makecontext(&new_thread->tcb->tcb_tmbx.tm_context,
(void (*)(void))thread_start, 3, new_thread,
start_routine, arg);
/*
* Check if this thread is to inherit the scheduling
* attributes from its parent:
*/
if (new_thread->attr.sched_inherit == PTHREAD_INHERIT_SCHED) {
Add an i386-specifc hack to always set %gs. There still seems to be instances where the kernel doesn't properly save and/or restore it. Use noupcall and nocompleted flags in the KSE mailbox. These require kernel changes to work which will be committed sometime later. Things still work without the changes. Remove the general kse entry function and use two different functions -- one for scope system threads and one for scope process threads. The scope system function is not yet enabled and we use the same function for all threads at the moment. Keep a copy of the KSE stack for the case that a KSE runs a scope system thread and uses the same stack as the thread (no upcalls are generated, so a separate stack isn't needed). This isn't enabled yet. Use a separate field for the KSE waiting flag. It isn't correct to use the mailbox flags field. The following fixes were provided by David Xu: o Initialize condition variable locks with thread versions of the low-level locking functions instead of the kse versions. o Enable threading before creating the first thread instead of after. o Don't enter critical regions when trying to malloc/free or call functions that malloc/free. o Take the scheduling lock when inheriting thread attributes. o Check the attribute's stack pointer instead of the attributes stack size for null when allocating a thread's stack. o Add a kseg reinit function so we don't have to destroy and then recreate the same lock. o Check the return value of kse_create() and return an appropriate error if it fails. o Don't forget to destroy a thread's locks when freeing it. o Examine the correct flags word for checking to see if a thread is in a synchronization queue. Things should now work on an SMP kernel.
2003-04-21 04:02:56 +00:00
/*
* Copy the scheduling attributes.
* Lock the scheduling lock to get consistent
* scheduling parameters.
*/
THR_SCHED_LOCK(curthread, curthread);
Implement zero system call thread switching. Performance of thread switches should be on par with that under scheduler activations. o Timing is achieved through the use of a fixed interval timer (ITIMER_PROF) to count scheduling ticks instead of retrieving the time-of-day upon every thread switch and calculating elapsed real time. o Polling for I/O readiness is performed once for each scheduling tick instead of every thread switch. o The non-signal saving/restoring versions of setjmp/longjmp are used to save and restore thread contexts. This may allow the removal of _THREAD_SAFE macros from setjmp() and longjmp() - needs more investigation. Change signal handling so that signals are handled in the context of the thread that is receiving the signal. When signals are dispatched to a thread, a special signal handling frame is created on top of the target threads stack. The frame contains the threads saved state information and a new context in which the thread can run. The applications signal handler is invoked through a wrapper routine that knows how to restore the threads saved state and unwind to previous frames. Fix interruption of threads due to signals. Some states were being improperly interrupted while other states were not being interrupted. This should fix several PRs. Signal handlers, which are invoked as a result of a process signal (not by pthread_kill()), are now called with the code (or siginfo_t if SA_SIGINFO was set in sa_flags) and sigcontext_t as received from the process signal handler. Modify the search for a thread to which a signal is delivered. The search algorithm is now: o First thread found in sigwait() with signal in wait mask. o First thread found sigsuspend()'d on the signal. o Current thread if signal is unmasked. o First thread found with signal unmasked. Collapse machine dependent support into macros defined in pthread_private.h. These should probably eventually be moved into separate MD files. Change the range of settable priorities to be compliant with POSIX (0-31). The threads library uses higher priorities internally for real-time threads (not yet implemented) and threads executing signal handlers. Real-time threads and threads running signal handlers add 64 and 32, respectively, to a threads base priority. Some other small changes and cleanups. PR: 17757 18559 21943 Reviewed by: jasone
2000-10-13 22:12:32 +00:00
new_thread->base_priority =
curthread->base_priority &
~THR_SIGNAL_PRIORITY;
Implement zero system call thread switching. Performance of thread switches should be on par with that under scheduler activations. o Timing is achieved through the use of a fixed interval timer (ITIMER_PROF) to count scheduling ticks instead of retrieving the time-of-day upon every thread switch and calculating elapsed real time. o Polling for I/O readiness is performed once for each scheduling tick instead of every thread switch. o The non-signal saving/restoring versions of setjmp/longjmp are used to save and restore thread contexts. This may allow the removal of _THREAD_SAFE macros from setjmp() and longjmp() - needs more investigation. Change signal handling so that signals are handled in the context of the thread that is receiving the signal. When signals are dispatched to a thread, a special signal handling frame is created on top of the target threads stack. The frame contains the threads saved state information and a new context in which the thread can run. The applications signal handler is invoked through a wrapper routine that knows how to restore the threads saved state and unwind to previous frames. Fix interruption of threads due to signals. Some states were being improperly interrupted while other states were not being interrupted. This should fix several PRs. Signal handlers, which are invoked as a result of a process signal (not by pthread_kill()), are now called with the code (or siginfo_t if SA_SIGINFO was set in sa_flags) and sigcontext_t as received from the process signal handler. Modify the search for a thread to which a signal is delivered. The search algorithm is now: o First thread found in sigwait() with signal in wait mask. o First thread found sigsuspend()'d on the signal. o Current thread if signal is unmasked. o First thread found with signal unmasked. Collapse machine dependent support into macros defined in pthread_private.h. These should probably eventually be moved into separate MD files. Change the range of settable priorities to be compliant with POSIX (0-31). The threads library uses higher priorities internally for real-time threads (not yet implemented) and threads executing signal handlers. Real-time threads and threads running signal handlers add 64 and 32, respectively, to a threads base priority. Some other small changes and cleanups. PR: 17757 18559 21943 Reviewed by: jasone
2000-10-13 22:12:32 +00:00
new_thread->attr.prio =
curthread->base_priority &
~THR_SIGNAL_PRIORITY;
Implement zero system call thread switching. Performance of thread switches should be on par with that under scheduler activations. o Timing is achieved through the use of a fixed interval timer (ITIMER_PROF) to count scheduling ticks instead of retrieving the time-of-day upon every thread switch and calculating elapsed real time. o Polling for I/O readiness is performed once for each scheduling tick instead of every thread switch. o The non-signal saving/restoring versions of setjmp/longjmp are used to save and restore thread contexts. This may allow the removal of _THREAD_SAFE macros from setjmp() and longjmp() - needs more investigation. Change signal handling so that signals are handled in the context of the thread that is receiving the signal. When signals are dispatched to a thread, a special signal handling frame is created on top of the target threads stack. The frame contains the threads saved state information and a new context in which the thread can run. The applications signal handler is invoked through a wrapper routine that knows how to restore the threads saved state and unwind to previous frames. Fix interruption of threads due to signals. Some states were being improperly interrupted while other states were not being interrupted. This should fix several PRs. Signal handlers, which are invoked as a result of a process signal (not by pthread_kill()), are now called with the code (or siginfo_t if SA_SIGINFO was set in sa_flags) and sigcontext_t as received from the process signal handler. Modify the search for a thread to which a signal is delivered. The search algorithm is now: o First thread found in sigwait() with signal in wait mask. o First thread found sigsuspend()'d on the signal. o Current thread if signal is unmasked. o First thread found with signal unmasked. Collapse machine dependent support into macros defined in pthread_private.h. These should probably eventually be moved into separate MD files. Change the range of settable priorities to be compliant with POSIX (0-31). The threads library uses higher priorities internally for real-time threads (not yet implemented) and threads executing signal handlers. Real-time threads and threads running signal handlers add 64 and 32, respectively, to a threads base priority. Some other small changes and cleanups. PR: 17757 18559 21943 Reviewed by: jasone
2000-10-13 22:12:32 +00:00
new_thread->attr.sched_policy =
curthread->attr.sched_policy;
Add an i386-specifc hack to always set %gs. There still seems to be instances where the kernel doesn't properly save and/or restore it. Use noupcall and nocompleted flags in the KSE mailbox. These require kernel changes to work which will be committed sometime later. Things still work without the changes. Remove the general kse entry function and use two different functions -- one for scope system threads and one for scope process threads. The scope system function is not yet enabled and we use the same function for all threads at the moment. Keep a copy of the KSE stack for the case that a KSE runs a scope system thread and uses the same stack as the thread (no upcalls are generated, so a separate stack isn't needed). This isn't enabled yet. Use a separate field for the KSE waiting flag. It isn't correct to use the mailbox flags field. The following fixes were provided by David Xu: o Initialize condition variable locks with thread versions of the low-level locking functions instead of the kse versions. o Enable threading before creating the first thread instead of after. o Don't enter critical regions when trying to malloc/free or call functions that malloc/free. o Take the scheduling lock when inheriting thread attributes. o Check the attribute's stack pointer instead of the attributes stack size for null when allocating a thread's stack. o Add a kseg reinit function so we don't have to destroy and then recreate the same lock. o Check the return value of kse_create() and return an appropriate error if it fails. o Don't forget to destroy a thread's locks when freeing it. o Examine the correct flags word for checking to see if a thread is in a synchronization queue. Things should now work on an SMP kernel.
2003-04-21 04:02:56 +00:00
THR_SCHED_UNLOCK(curthread, curthread);
} else {
/*
* Use just the thread priority, leaving the
* other scheduling attributes as their
* default values:
*/
Implement zero system call thread switching. Performance of thread switches should be on par with that under scheduler activations. o Timing is achieved through the use of a fixed interval timer (ITIMER_PROF) to count scheduling ticks instead of retrieving the time-of-day upon every thread switch and calculating elapsed real time. o Polling for I/O readiness is performed once for each scheduling tick instead of every thread switch. o The non-signal saving/restoring versions of setjmp/longjmp are used to save and restore thread contexts. This may allow the removal of _THREAD_SAFE macros from setjmp() and longjmp() - needs more investigation. Change signal handling so that signals are handled in the context of the thread that is receiving the signal. When signals are dispatched to a thread, a special signal handling frame is created on top of the target threads stack. The frame contains the threads saved state information and a new context in which the thread can run. The applications signal handler is invoked through a wrapper routine that knows how to restore the threads saved state and unwind to previous frames. Fix interruption of threads due to signals. Some states were being improperly interrupted while other states were not being interrupted. This should fix several PRs. Signal handlers, which are invoked as a result of a process signal (not by pthread_kill()), are now called with the code (or siginfo_t if SA_SIGINFO was set in sa_flags) and sigcontext_t as received from the process signal handler. Modify the search for a thread to which a signal is delivered. The search algorithm is now: o First thread found in sigwait() with signal in wait mask. o First thread found sigsuspend()'d on the signal. o Current thread if signal is unmasked. o First thread found with signal unmasked. Collapse machine dependent support into macros defined in pthread_private.h. These should probably eventually be moved into separate MD files. Change the range of settable priorities to be compliant with POSIX (0-31). The threads library uses higher priorities internally for real-time threads (not yet implemented) and threads executing signal handlers. Real-time threads and threads running signal handlers add 64 and 32, respectively, to a threads base priority. Some other small changes and cleanups. PR: 17757 18559 21943 Reviewed by: jasone
2000-10-13 22:12:32 +00:00
new_thread->base_priority =
new_thread->attr.prio;
}
[ The author's description... ] o Runnable threads are now maintained in priority queues. The implementation requires two things: 1.) The priority queues must be protected during insertion and removal of threads. Since the kernel scheduler must modify the priority queues, a spinlock for protection cannot be used. The functions _thread_kern_sched_defer() and _thread_kern_sched_undefer() were added to {un}defer kernel scheduler activation. 2.) A thread (active) priority change can be performed only when the thread is removed from the priority queue. The implementation uses a threads active priority when inserting it into the queue. A by-product is that thread switches are much faster. A separate queue is used for waiting and/or blocked threads, and it is searched at most 2 times in the kernel scheduler when there are active threads. It should be possible to reduce this to once by combining polling of threads waiting on I/O with the loop that looks for timed out threads and the minimum timeout value. o Functions to defer kernel scheduler activation were added. These are _thread_kern_sched_defer() and _thread_kern_sched_undefer() and may be called recursively. These routines do not block the scheduling signal, but latch its occurrence. The signal handler will not call the kernel scheduler when the running thread has deferred scheduling, but it will be called when running thread undefers scheduling. o Added support for _POSIX_THREAD_PRIORITY_SCHEDULING. All the POSIX routines required by this should now be implemented. One note, SCHED_OTHER, SCHED_FIFO, and SCHED_RR are required to be defined by including pthread.h. These defines are currently in sched.h. I modified pthread.h to include sched.h but don't know if this is the proper thing to do. o Added support for priority protection and inheritence mutexes. This allows definition of _POSIX_THREAD_PRIO_PROTECT and _POSIX_THREAD_PRIO_INHERIT. o Added additional error checks required by POSIX for mutexes and condition variables. o Provided a wrapper for sigpending which is marked as a hidden syscall. o Added a non-portable function as a debugging aid to allow an application to monitor thread context switches. An application can install a routine that gets called everytime a thread (explicitly created by the application) gets context switched. The routine gets passed the pthread IDs of the threads that are being switched in and out. Submitted by: Dan Eischen <eischen@vigrid.com> Changes by me: o Added a PS_SPINBLOCK state to deal with the priority inversion problem most often (I think) seen by threads calling malloc/free/realloc. o Dispatch signals to the running thread directly rather than at a context switch to avoid the situation where the switch never occurs.
1999-03-23 05:07:56 +00:00
new_thread->active_priority = new_thread->base_priority;
new_thread->inherited_priority = 0;
[ The author's description... ] o Runnable threads are now maintained in priority queues. The implementation requires two things: 1.) The priority queues must be protected during insertion and removal of threads. Since the kernel scheduler must modify the priority queues, a spinlock for protection cannot be used. The functions _thread_kern_sched_defer() and _thread_kern_sched_undefer() were added to {un}defer kernel scheduler activation. 2.) A thread (active) priority change can be performed only when the thread is removed from the priority queue. The implementation uses a threads active priority when inserting it into the queue. A by-product is that thread switches are much faster. A separate queue is used for waiting and/or blocked threads, and it is searched at most 2 times in the kernel scheduler when there are active threads. It should be possible to reduce this to once by combining polling of threads waiting on I/O with the loop that looks for timed out threads and the minimum timeout value. o Functions to defer kernel scheduler activation were added. These are _thread_kern_sched_defer() and _thread_kern_sched_undefer() and may be called recursively. These routines do not block the scheduling signal, but latch its occurrence. The signal handler will not call the kernel scheduler when the running thread has deferred scheduling, but it will be called when running thread undefers scheduling. o Added support for _POSIX_THREAD_PRIORITY_SCHEDULING. All the POSIX routines required by this should now be implemented. One note, SCHED_OTHER, SCHED_FIFO, and SCHED_RR are required to be defined by including pthread.h. These defines are currently in sched.h. I modified pthread.h to include sched.h but don't know if this is the proper thing to do. o Added support for priority protection and inheritence mutexes. This allows definition of _POSIX_THREAD_PRIO_PROTECT and _POSIX_THREAD_PRIO_INHERIT. o Added additional error checks required by POSIX for mutexes and condition variables. o Provided a wrapper for sigpending which is marked as a hidden syscall. o Added a non-portable function as a debugging aid to allow an application to monitor thread context switches. An application can install a routine that gets called everytime a thread (explicitly created by the application) gets context switched. The routine gets passed the pthread IDs of the threads that are being switched in and out. Submitted by: Dan Eischen <eischen@vigrid.com> Changes by me: o Added a PS_SPINBLOCK state to deal with the priority inversion problem most often (I think) seen by threads calling malloc/free/realloc. o Dispatch signals to the running thread directly rather than at a context switch to avoid the situation where the switch never occurs.
1999-03-23 05:07:56 +00:00
/* Initialize the mutex queue: */
TAILQ_INIT(&new_thread->mutexq);
/* Initialise hooks in the thread structure: */
new_thread->specific = NULL;
new_thread->specific_data_count = 0;
new_thread->cleanup = NULL;
new_thread->flags = 0;
new_thread->tlflags = 0;
new_thread->sigbackout = NULL;
new_thread->continuation = NULL;
new_thread->wakeup_time.tv_sec = -1;
new_thread->lock_switch = 0;
sigemptyset(&new_thread->sigpend);
new_thread->check_pending = 0;
new_thread->locklevel = 0;
new_thread->rdlock_count = 0;
new_thread->sigstk.ss_sp = 0;
new_thread->sigstk.ss_size = 0;
new_thread->sigstk.ss_flags = SS_DISABLE;
new_thread->oldsigmask = NULL;
if (new_thread->attr.suspend == THR_CREATE_SUSPENDED) {
new_thread->state = PS_SUSPENDED;
new_thread->flags = THR_FLAGS_SUSPENDED;
}
else
new_thread->state = PS_RUNNING;
In the words of the author: o The polling mechanism for I/O readiness was changed from select() to poll(). In additon, a wrapped version of poll() is now provided. o The wrapped select routine now converts each fd_set to a poll array so that the thread scheduler doesn't have to perform a bitwise search for selected fds each time file descriptors are polled for I/O readiness. o The thread scheduler was modified to use a new queue (_workq) for threads that need work. Threads waiting for I/O readiness and spinblocks are added to the work queue in addition to the waiting queue. This reduces the time spent forming/searching the array of file descriptors being polled. o The waiting queue (_waitingq) is now maintained in order of thread wakeup time. This allows the thread scheduler to find the nearest wakeup time by looking at the first thread in the queue instead of searching the entire queue. o Removed file descriptor locking for select/poll routines. An application should not rely on the threads library for providing this locking; if necessary, the application should use mutexes to protect selecting/polling of file descriptors. o Retrieve and use the kernel clock rate/resolution at startup instead of hardcoding the clock resolution to 10 msec (tested with kernel running at 1000 HZ). o All queues have been changed to use queue.h macros. These include the queues of all threads, dead threads, and threads waiting for file descriptor locks. o Added reinitialization of the GC mutex and condition variable after a fork. Also prevented reallocation of the ready queue after a fork. o Prevented the wrapped close routine from closing the thread kernel pipes. o Initialized file descriptor table for stdio entries at thread init. o Provided additional flags to indicate to what queues threads belong. o Moved TAILQ initialization for statically allocated mutex and condition variables to after the spinlock. o Added dispatching of signals to pthread_kill. Removing the dispatching of signals from thread activation broke sigsuspend when pthread_kill was used to send a signal to a thread. o Temporarily set the state of a thread to PS_SUSPENDED when it is first created and placed in the list of threads so that it will not be accidentally scheduled before becoming a member of one of the scheduling queues. o Change the signal handler to queue signals to the thread kernel pipe if the scheduling queues are protected. When scheduling queues are unprotected, signals are then dequeued and handled. o Ensured that all installed signal handlers block the scheduling signal and that the scheduling signal handler blocks all other signals. This ensures that the signal handler is only interruptible for and by non-scheduling signals. An atomic lock is used to decide which instance of the signal handler will handle pending signals. o Removed _lock_thread_list and _unlock_thread_list as they are no longer used to protect the thread list. o Added missing RCS IDs to modified files. o Added checks for appropriate queue membership and activity when adding, removing, and searching the scheduling queues. These checks add very little overhead and are enabled when compiled with _PTHREADS_INVARIANTS defined. Suggested and implemented by Tor Egge with some modification by me. o Close a race condition in uthread_close. (Tor Egge) o Protect the scheduling queues while modifying them in pthread_cond_signal and _thread_fd_unlock. (Tor Egge) o Ensure that when a thread gets a mutex, the mutex is on that threads list of owned mutexes. (Tor Egge) o Set the kernel-in-scheduler flag in _thread_kern_sched_state and _thread_kern_sched_state_unlock to prevent a scheduling signal from calling the scheduler again. (Tor Egge) o Don't use TAILQ_FOREACH macro while searching the waiting queue for threads in a sigwait state, because a change of state destroys the TAILQ link. It is actually safe to do so, though, because once a sigwaiting thread is found, the loop ends and the function returns. (Tor Egge) o When dispatching signals to threads, make the thread inherit the signal deferral flag of the currently running thread. (Tor Egge) Submitted by: Daniel Eischen <eischen@vigrid.com> and Tor Egge <Tor.Egge@fast.no>
1999-06-20 08:28:48 +00:00
/*
* System scope threads have their own kse and
* kseg. Process scope threads are all hung
* off the main process kseg.
In the words of the author: o The polling mechanism for I/O readiness was changed from select() to poll(). In additon, a wrapped version of poll() is now provided. o The wrapped select routine now converts each fd_set to a poll array so that the thread scheduler doesn't have to perform a bitwise search for selected fds each time file descriptors are polled for I/O readiness. o The thread scheduler was modified to use a new queue (_workq) for threads that need work. Threads waiting for I/O readiness and spinblocks are added to the work queue in addition to the waiting queue. This reduces the time spent forming/searching the array of file descriptors being polled. o The waiting queue (_waitingq) is now maintained in order of thread wakeup time. This allows the thread scheduler to find the nearest wakeup time by looking at the first thread in the queue instead of searching the entire queue. o Removed file descriptor locking for select/poll routines. An application should not rely on the threads library for providing this locking; if necessary, the application should use mutexes to protect selecting/polling of file descriptors. o Retrieve and use the kernel clock rate/resolution at startup instead of hardcoding the clock resolution to 10 msec (tested with kernel running at 1000 HZ). o All queues have been changed to use queue.h macros. These include the queues of all threads, dead threads, and threads waiting for file descriptor locks. o Added reinitialization of the GC mutex and condition variable after a fork. Also prevented reallocation of the ready queue after a fork. o Prevented the wrapped close routine from closing the thread kernel pipes. o Initialized file descriptor table for stdio entries at thread init. o Provided additional flags to indicate to what queues threads belong. o Moved TAILQ initialization for statically allocated mutex and condition variables to after the spinlock. o Added dispatching of signals to pthread_kill. Removing the dispatching of signals from thread activation broke sigsuspend when pthread_kill was used to send a signal to a thread. o Temporarily set the state of a thread to PS_SUSPENDED when it is first created and placed in the list of threads so that it will not be accidentally scheduled before becoming a member of one of the scheduling queues. o Change the signal handler to queue signals to the thread kernel pipe if the scheduling queues are protected. When scheduling queues are unprotected, signals are then dequeued and handled. o Ensured that all installed signal handlers block the scheduling signal and that the scheduling signal handler blocks all other signals. This ensures that the signal handler is only interruptible for and by non-scheduling signals. An atomic lock is used to decide which instance of the signal handler will handle pending signals. o Removed _lock_thread_list and _unlock_thread_list as they are no longer used to protect the thread list. o Added missing RCS IDs to modified files. o Added checks for appropriate queue membership and activity when adding, removing, and searching the scheduling queues. These checks add very little overhead and are enabled when compiled with _PTHREADS_INVARIANTS defined. Suggested and implemented by Tor Egge with some modification by me. o Close a race condition in uthread_close. (Tor Egge) o Protect the scheduling queues while modifying them in pthread_cond_signal and _thread_fd_unlock. (Tor Egge) o Ensure that when a thread gets a mutex, the mutex is on that threads list of owned mutexes. (Tor Egge) o Set the kernel-in-scheduler flag in _thread_kern_sched_state and _thread_kern_sched_state_unlock to prevent a scheduling signal from calling the scheduler again. (Tor Egge) o Don't use TAILQ_FOREACH macro while searching the waiting queue for threads in a sigwait state, because a change of state destroys the TAILQ link. It is actually safe to do so, though, because once a sigwaiting thread is found, the loop ends and the function returns. (Tor Egge) o When dispatching signals to threads, make the thread inherit the signal deferral flag of the currently running thread. (Tor Egge) Submitted by: Daniel Eischen <eischen@vigrid.com> and Tor Egge <Tor.Egge@fast.no>
1999-06-20 08:28:48 +00:00
*/
if ((new_thread->attr.flags & PTHREAD_SCOPE_SYSTEM) == 0) {
new_thread->kseg = _kse_initial->k_kseg;
new_thread->kse = _kse_initial;
}
else {
kse->k_curthread = NULL;
kse->k_kseg->kg_flags |= KGF_SINGLE_THREAD;
new_thread->kse = kse;
new_thread->kseg = kse->k_kseg;
kse->k_kcb->kcb_kmbx.km_udata = kse;
kse->k_kcb->kcb_kmbx.km_curthread = NULL;
}
[ The author's description... ] o Runnable threads are now maintained in priority queues. The implementation requires two things: 1.) The priority queues must be protected during insertion and removal of threads. Since the kernel scheduler must modify the priority queues, a spinlock for protection cannot be used. The functions _thread_kern_sched_defer() and _thread_kern_sched_undefer() were added to {un}defer kernel scheduler activation. 2.) A thread (active) priority change can be performed only when the thread is removed from the priority queue. The implementation uses a threads active priority when inserting it into the queue. A by-product is that thread switches are much faster. A separate queue is used for waiting and/or blocked threads, and it is searched at most 2 times in the kernel scheduler when there are active threads. It should be possible to reduce this to once by combining polling of threads waiting on I/O with the loop that looks for timed out threads and the minimum timeout value. o Functions to defer kernel scheduler activation were added. These are _thread_kern_sched_defer() and _thread_kern_sched_undefer() and may be called recursively. These routines do not block the scheduling signal, but latch its occurrence. The signal handler will not call the kernel scheduler when the running thread has deferred scheduling, but it will be called when running thread undefers scheduling. o Added support for _POSIX_THREAD_PRIORITY_SCHEDULING. All the POSIX routines required by this should now be implemented. One note, SCHED_OTHER, SCHED_FIFO, and SCHED_RR are required to be defined by including pthread.h. These defines are currently in sched.h. I modified pthread.h to include sched.h but don't know if this is the proper thing to do. o Added support for priority protection and inheritence mutexes. This allows definition of _POSIX_THREAD_PRIO_PROTECT and _POSIX_THREAD_PRIO_INHERIT. o Added additional error checks required by POSIX for mutexes and condition variables. o Provided a wrapper for sigpending which is marked as a hidden syscall. o Added a non-portable function as a debugging aid to allow an application to monitor thread context switches. An application can install a routine that gets called everytime a thread (explicitly created by the application) gets context switched. The routine gets passed the pthread IDs of the threads that are being switched in and out. Submitted by: Dan Eischen <eischen@vigrid.com> Changes by me: o Added a PS_SPINBLOCK state to deal with the priority inversion problem most often (I think) seen by threads calling malloc/free/realloc. o Dispatch signals to the running thread directly rather than at a context switch to avoid the situation where the switch never occurs.
1999-03-23 05:07:56 +00:00
/*
* Schedule the new thread starting a new KSEG/KSE
* pair if necessary.
[ The author's description... ] o Runnable threads are now maintained in priority queues. The implementation requires two things: 1.) The priority queues must be protected during insertion and removal of threads. Since the kernel scheduler must modify the priority queues, a spinlock for protection cannot be used. The functions _thread_kern_sched_defer() and _thread_kern_sched_undefer() were added to {un}defer kernel scheduler activation. 2.) A thread (active) priority change can be performed only when the thread is removed from the priority queue. The implementation uses a threads active priority when inserting it into the queue. A by-product is that thread switches are much faster. A separate queue is used for waiting and/or blocked threads, and it is searched at most 2 times in the kernel scheduler when there are active threads. It should be possible to reduce this to once by combining polling of threads waiting on I/O with the loop that looks for timed out threads and the minimum timeout value. o Functions to defer kernel scheduler activation were added. These are _thread_kern_sched_defer() and _thread_kern_sched_undefer() and may be called recursively. These routines do not block the scheduling signal, but latch its occurrence. The signal handler will not call the kernel scheduler when the running thread has deferred scheduling, but it will be called when running thread undefers scheduling. o Added support for _POSIX_THREAD_PRIORITY_SCHEDULING. All the POSIX routines required by this should now be implemented. One note, SCHED_OTHER, SCHED_FIFO, and SCHED_RR are required to be defined by including pthread.h. These defines are currently in sched.h. I modified pthread.h to include sched.h but don't know if this is the proper thing to do. o Added support for priority protection and inheritence mutexes. This allows definition of _POSIX_THREAD_PRIO_PROTECT and _POSIX_THREAD_PRIO_INHERIT. o Added additional error checks required by POSIX for mutexes and condition variables. o Provided a wrapper for sigpending which is marked as a hidden syscall. o Added a non-portable function as a debugging aid to allow an application to monitor thread context switches. An application can install a routine that gets called everytime a thread (explicitly created by the application) gets context switched. The routine gets passed the pthread IDs of the threads that are being switched in and out. Submitted by: Dan Eischen <eischen@vigrid.com> Changes by me: o Added a PS_SPINBLOCK state to deal with the priority inversion problem most often (I think) seen by threads calling malloc/free/realloc. o Dispatch signals to the running thread directly rather than at a context switch to avoid the situation where the switch never occurs.
1999-03-23 05:07:56 +00:00
*/
ret = _thr_schedule_add(curthread, new_thread);
if (ret != 0)
free_thread(curthread, new_thread);
else {
/* Return a pointer to the thread structure: */
(*thread) = new_thread;
}
}
}
/* Return the status: */
return (ret);
}
static void
free_thread(struct pthread *curthread, struct pthread *thread)
{
free_stack(&thread->attr);
if ((thread->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) {
/* Free the KSE and KSEG. */
_kseg_free(thread->kseg);
_kse_free(curthread, thread->kse);
}
_thr_free(curthread, thread);
}
static int
create_stack(struct pthread_attr *pattr)
{
int ret;
/* Check if a stack was specified in the thread attributes: */
if ((pattr->stackaddr_attr) != NULL) {
pattr->guardsize_attr = 0;
pattr->flags |= THR_STACK_USER;
ret = 0;
}
else
ret = _thr_stack_alloc(pattr);
return (ret);
}
static void
free_stack(struct pthread_attr *pattr)
{
struct kse *curkse;
kse_critical_t crit;
if ((pattr->flags & THR_STACK_USER) == 0) {
crit = _kse_critical_enter();
curkse = _get_curkse();
KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock);
/* Stack routines don't use malloc/free. */
_thr_stack_free(pattr);
KSE_LOCK_RELEASE(curkse, &_thread_list_lock);
_kse_critical_leave(crit);
}
}
static void
thread_start(struct pthread *curthread, void *(*start_routine) (void *),
void *arg)
{
/* Run the current thread's start routine with argument: */
pthread_exit(start_routine(arg));
/* This point should never be reached. */
PANIC("Thread has resumed after exit");
}