1
0
mirror of https://git.FreeBSD.org/src.git synced 2025-01-01 12:19:28 +00:00

Merge compiler-rt trunk r338150, and resolve conflicts.

This commit is contained in:
Dimitry Andric 2018-08-02 17:06:03 +00:00
commit 68dc77c284
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/clang700-import/; revision=337136
326 changed files with 32031 additions and 4454 deletions

View File

@ -14,7 +14,7 @@ Full text of the relevant licenses is included below.
University of Illinois/NCSA
Open Source License
Copyright (c) 2009-2016 by the contributors listed in CREDITS.TXT
Copyright (c) 2009-2018 by the contributors listed in CREDITS.TXT
All rights reserved.

View File

@ -65,6 +65,11 @@ extern "C" {
void __sanitizer_unaligned_store32(void *p, uint32_t x);
void __sanitizer_unaligned_store64(void *p, uint64_t x);
// Returns 1 on the first call, then returns 0 thereafter. Called by the tool
// to ensure only one report is printed when multiple errors occur
// simultaneously.
int __sanitizer_acquire_crash_state();
// Annotate the current state of a contiguous container, such as
// std::vector, std::string or similar.
// A contiguous container is a container that keeps all of its elements

View File

@ -104,6 +104,14 @@ extern "C" {
copy. Source and destination regions can overlap. */
void __msan_copy_shadow(const volatile void *dst, const volatile void *src,
size_t size);
/* Disables uninitialized memory checks in interceptors. */
void __msan_scoped_disable_interceptor_checks(void);
/* Re-enables uninitialized memory checks in interceptors after a previous
call to __msan_scoped_disable_interceptor_checks. */
void __msan_scoped_enable_interceptor_checks(void);
#ifdef __cplusplus
} // extern "C"
#endif

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,12 @@ extern "C" {
// the hard limit (HardLimit=1) or the soft limit (HardLimit=0). The limit
// can be removed by setting LimitMb to 0. This function's parameters should
// be fully trusted to avoid security mishaps.
void __scudo_set_rss_limit(unsigned long LimitMb, int HardLimit);
void __scudo_set_rss_limit(size_t LimitMb, int HardLimit);
// This function outputs various allocator statistics for both the Primary
// and Secondary allocators, including memory usage, number of allocations
// and deallocations.
void __scudo_print_stats(void);
#ifdef __cplusplus
} // extern "C"
#endif

View File

@ -27,6 +27,7 @@ enum XRayEntryType {
TAIL = 2,
LOG_ARGS_ENTRY = 3,
CUSTOM_EVENT = 4,
TYPED_EVENT = 5,
};
/// Provide a function to invoke for when instrumentation points are hit. This
@ -68,12 +69,23 @@ extern int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType,
extern int __xray_remove_handler_arg1();
/// Provide a function to invoke when XRay encounters a custom event.
extern int __xray_set_customevent_handler(void (*entry)(void*, std::size_t));
extern int __xray_set_customevent_handler(void (*entry)(void *, std::size_t));
/// This removes whatever the currently provided custom event handler is.
/// Returns 1 on success, 0 on error.
extern int __xray_remove_customevent_handler();
/// Set a handler for xray typed event logging. The first parameter is a type
/// identifier, the second is a payload, and the third is the payload size.
extern int __xray_set_typedevent_handler(void (*entry)(uint16_t, const void *,
std::size_t));
/// Removes the currently set typed event handler.
/// Returns 1 on success, 0 on error.
extern int __xray_remove_typedevent_handler();
extern uint16_t __xray_register_event_type(const char *event_type);
enum XRayPatchingStatus {
NOT_INITIALIZED = 0,
SUCCESS = 1,

View File

@ -21,27 +21,29 @@
///
/// The high-level usage pattern for these APIs look like the following:
///
/// // Before we try initializing the log implementation, we must set it as
/// // the log implementation. We provide the function pointers that define
/// // the various initialization, finalization, and other pluggable hooks
/// // that we need.
/// __xray_set_log_impl({...});
/// // We choose the mode which we'd like to install, and check whether this
/// // has succeeded. Each mode will have their own set of flags they will
/// // support, outside of the global XRay configuration options that are
/// // defined in the XRAY_OPTIONS environment variable.
/// auto select_status = __xray_log_select_mode("xray-fdr");
/// if (select_status != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
/// // This failed, we should not proceed with attempting to initialise
/// // the currently selected mode.
/// return;
/// }
///
/// // Once that's done, we can now initialize the implementation. Each
/// // implementation has a chance to let users customize the implementation
/// // with a struct that their implementation supports. Roughly this might
/// // look like:
/// MyImplementationOptions opts;
/// opts.enable_feature = true;
/// ...
/// auto init_status = __xray_log_init(
/// BufferSize, MaxBuffers, &opts, sizeof opts);
/// if (init_status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
/// // Once that's done, we can now attempt to configure the implementation.
/// // To do this, we provide the string flags configuration for the mode.
/// auto config_status = __xray_log_init_mode(
/// "xray-fdr", "verbosity=1 some_flag=1 another_flag=2");
/// if (config_status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
/// // deal with the error here, if there is one.
/// }
///
/// // When the log implementation has had the chance to initialize, we can
/// // now patch the sleds.
/// // now patch the instrumentation points. Note that we could have patched
/// // the instrumentation points first, but there's no strict ordering to
/// // these operations.
/// auto patch_status = __xray_patch();
/// if (patch_status != XRayPatchingStatus::SUCCESS) {
/// // deal with the error here, if it is an error.
@ -56,12 +58,12 @@
///
/// // We can optionally wait before flushing the log to give other threads a
/// // chance to see that the implementation is already finalized. Also, at
/// // this point we can optionally unpatch the sleds to reduce overheads at
/// // runtime.
/// // this point we can optionally unpatch the instrumentation points to
/// // reduce overheads at runtime.
/// auto unpatch_status = __xray_unpatch();
/// if (unpatch_status != XRayPatchingStatus::SUCCESS) {
// // deal with the error here, if it is an error.
// }
/// // deal with the error here, if it is an error.
/// }
///
/// // If there are logs or data to be flushed somewhere, we can do so only
/// // after we've finalized the log. Some implementations may not actually
@ -72,6 +74,17 @@
/// // deal with the error here, if it is an error.
/// }
///
/// // Alternatively, we can go through the buffers ourselves without
/// // relying on the implementations' flushing semantics (if the
/// // implementation supports exporting this data directly).
/// auto MyBufferProcessor = +[](const char* mode, XRayBuffer buffer) {
/// // Check the "mode" to see if it's something we know how to handle...
/// // and/or do something with an XRayBuffer instance.
/// };
/// auto process_status = __xray_log_process_buffers(MyBufferProcessor);
/// if (process_status != XRayLogFlushStatus::XRAY_LOG_FLUSHED) {
/// // deal with the error here, if it is an error.
/// }
///
/// NOTE: Before calling __xray_patch() again, consider re-initializing the
/// implementation first. Some implementations might stay in an "off" state when
@ -182,9 +195,13 @@ struct XRayLogImpl {
XRayLogFlushStatus (*flush_log)();
};
/// DEPRECATED: Use the mode registration workflow instead with
/// __xray_log_register_mode(...) and __xray_log_select_mode(...). See the
/// documentation for those function.
///
/// This function installs a new logging implementation that XRay will use. In
/// case there are any nullptr members in Impl, XRay will *uninstall any
/// existing implementations*. It does NOT patch the instrumentation sleds.
/// existing implementations*. It does NOT patch the instrumentation points.
///
/// NOTE: This function does NOT attempt to finalize the currently installed
/// implementation. Use with caution.
@ -227,9 +244,14 @@ XRayLogRegisterStatus __xray_log_register_mode(const char *Mode,
/// does not update the currently installed implementation.
XRayLogRegisterStatus __xray_log_select_mode(const char *Mode);
/// Returns an identifier for the currently selected XRay mode chosen through
/// the __xray_log_select_mode(...) function call. Returns nullptr if there is
/// no currently installed mode.
const char *__xray_log_get_current_mode();
/// This function removes the currently installed implementation. It will also
/// uninstall any handlers that have been previously installed. It does NOT
/// unpatch the instrumentation sleds.
/// unpatch the instrumentation points.
///
/// NOTE: This function does NOT attempt to finalize the currently installed
/// implementation. Use with caution.
@ -244,11 +266,37 @@ XRayLogRegisterStatus __xray_log_select_mode(const char *Mode);
/// called while in any other states.
void __xray_remove_log_impl();
/// DEPRECATED: Use __xray_log_init_mode() instead, and provide all the options
/// in string form.
/// Invokes the installed implementation initialization routine. See
/// XRayLogInitStatus for what the return values mean.
XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers,
void *Args, size_t ArgsSize);
/// Invokes the installed initialization routine, which *must* support the
/// string based form.
///
/// NOTE: When this API is used, we still invoke the installed initialization
/// routine, but we will call it with the following convention to signal that we
/// are using the string form:
///
/// - BufferSize = 0
/// - MaxBuffers = 0
/// - ArgsSize = 0
/// - Args will be the pointer to the character buffer representing the
/// configuration.
///
/// FIXME: Updating the XRayLogImpl struct is an ABI breaking change. When we
/// are ready to make a breaking change, we should clean this up appropriately.
XRayLogInitStatus __xray_log_init_mode(const char *Mode, const char *Config);
/// Like __xray_log_init_mode(...) this version allows for providing
/// configurations that might have non-null-terminated strings. This will
/// operate similarly to __xray_log_init_mode, with the exception that
/// |ArgsSize| will be what |ConfigSize| is.
XRayLogInitStatus __xray_log_init_mode_bin(const char *Mode, const char *Config,
size_t ConfigSize);
/// Invokes the installed implementation finalization routine. See
/// XRayLogInitStatus for what the return values mean.
XRayLogInitStatus __xray_log_finalize();
@ -257,16 +305,68 @@ XRayLogInitStatus __xray_log_finalize();
/// XRayLogFlushStatus for what the return values mean.
XRayLogFlushStatus __xray_log_flushLog();
/// An XRayBuffer represents a section of memory which can be treated by log
/// processing functions as bytes stored in the logging implementation's
/// buffers.
struct XRayBuffer {
const void *Data;
size_t Size;
};
/// Registers an iterator function which takes an XRayBuffer argument, then
/// returns another XRayBuffer function representing the next buffer. When the
/// Iterator function returns an empty XRayBuffer (Data = nullptr, Size = 0),
/// this signifies the end of the buffers.
///
/// The first invocation of this Iterator function will always take an empty
/// XRayBuffer (Data = nullptr, Size = 0).
void __xray_log_set_buffer_iterator(XRayBuffer (*Iterator)(XRayBuffer));
/// Removes the currently registered buffer iterator function.
void __xray_log_remove_buffer_iterator();
/// Invokes the provided handler to process data maintained by the logging
/// handler. This API will be provided raw access to the data available in
/// memory from the logging implementation. The callback function must:
///
/// 1) Not modify the data, to avoid running into undefined behaviour.
///
/// 2) Either know the data layout, or treat the data as raw bytes for later
/// interpretation.
///
/// This API is best used in place of the `__xray_log_flushLog()` implementation
/// above to enable the caller to provide an alternative means of extracting the
/// data from the XRay implementation.
///
/// Implementations MUST then provide:
///
/// 1) A function that will return an XRayBuffer. Functions that return an
/// "empty" XRayBuffer signifies that there are no more buffers to be
/// processed. This function should be registered through the
/// `__xray_log_set_buffer_iterator(...)` function.
///
/// 2) Its own means of converting data it holds in memory into an XRayBuffer
/// structure.
///
/// See XRayLogFlushStatus for what the return values mean.
///
XRayLogFlushStatus __xray_log_process_buffers(void (*Processor)(const char *,
XRayBuffer));
} // extern "C"
namespace __xray {
/// DEPRECATED: Use __xray_log_init_mode(...) instead, and provide flag
/// configuration strings to set the options instead.
/// Options used by the LLVM XRay FDR logging implementation.
struct FDRLoggingOptions {
bool ReportErrors = false;
int Fd = -1;
};
/// DEPRECATED: Use __xray_log_init_mode(...) instead, and provide flag
/// configuration strings to set the options instead.
/// Options used by the LLVM XRay Basic (Naive) logging implementation.
struct BasicLoggingOptions {
int DurationFilterMicros = 0;

View File

@ -54,7 +54,7 @@ struct alignas(32) XRayFileHeader {
union {
char FreeForm[16];
// The current civiltime timestamp, as retrived from 'clock_gettime'. This
// The current civiltime timestamp, as retrieved from 'clock_gettime'. This
// allows readers of the file to determine when the file was created or
// written down.
struct timespec TS;
@ -95,8 +95,11 @@ struct alignas(32) XRayRecord {
// The thread ID for the currently running thread.
uint32_t TId = 0;
// The ID of process that is currently running
uint32_t PId = 0;
// Use some bytes in the end of the record for buffers.
char Buffer[4] = {};
char Buffer[8] = {};
} __attribute__((packed));
static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes");
@ -115,8 +118,8 @@ struct alignas(32) XRayArgPayload {
// The thread ID for the currently running thread.
uint32_t TId = 0;
// Add more padding.
uint8_t Padding2[4] = {};
// The ID of process that is currently running
uint32_t PId = 0;
// The argument payload.
uint64_t Arg = 0;

View File

@ -134,8 +134,9 @@ struct AsanChunk: ChunkBase {
};
struct QuarantineCallback {
explicit QuarantineCallback(AllocatorCache *cache)
: cache_(cache) {
QuarantineCallback(AllocatorCache *cache, BufferedStackTrace *stack)
: cache_(cache),
stack_(stack) {
}
void Recycle(AsanChunk *m) {
@ -168,7 +169,7 @@ struct QuarantineCallback {
void *res = get_allocator().Allocate(cache_, size, 1);
// TODO(alekseys): Consider making quarantine OOM-friendly.
if (UNLIKELY(!res))
return DieOnFailure::OnOOM();
ReportOutOfMemory(size, stack_);
return res;
}
@ -176,7 +177,9 @@ struct QuarantineCallback {
get_allocator().Deallocate(cache_, p);
}
AllocatorCache *cache_;
private:
AllocatorCache* const cache_;
BufferedStackTrace* const stack_;
};
typedef Quarantine<QuarantineCallback, AsanChunk> AsanQuarantine;
@ -397,8 +400,11 @@ struct Allocator {
AllocType alloc_type, bool can_fill) {
if (UNLIKELY(!asan_inited))
AsanInitFromRtl();
if (RssLimitExceeded())
return AsanAllocator::FailureHandler::OnOOM();
if (RssLimitExceeded()) {
if (AllocatorMayReturnNull())
return nullptr;
ReportRssLimitExceeded(stack);
}
Flags &fl = *flags();
CHECK(stack);
const uptr min_alignment = SHADOW_GRANULARITY;
@ -431,9 +437,13 @@ struct Allocator {
}
CHECK(IsAligned(needed_size, min_alignment));
if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize) {
Report("WARNING: AddressSanitizer failed to allocate 0x%zx bytes\n",
(void*)size);
return AsanAllocator::FailureHandler::OnBadRequest();
if (AllocatorMayReturnNull()) {
Report("WARNING: AddressSanitizer failed to allocate 0x%zx bytes\n",
(void*)size);
return nullptr;
}
ReportAllocationSizeTooBig(size, needed_size, kMaxAllowedMallocSize,
stack);
}
AsanThread *t = GetCurrentThread();
@ -446,8 +456,12 @@ struct Allocator {
AllocatorCache *cache = &fallback_allocator_cache;
allocated = allocator.Allocate(cache, needed_size, 8);
}
if (!allocated)
return nullptr;
if (UNLIKELY(!allocated)) {
SetAllocatorOutOfMemory();
if (AllocatorMayReturnNull())
return nullptr;
ReportOutOfMemory(size, stack);
}
if (*(u8 *)MEM_TO_SHADOW((uptr)allocated) == 0 && CanPoisonMemory()) {
// Heap poisoning is enabled, but the allocator provides an unpoisoned
@ -583,13 +597,13 @@ struct Allocator {
if (t) {
AsanThreadLocalMallocStorage *ms = &t->malloc_storage();
AllocatorCache *ac = GetAllocatorCache(ms);
quarantine.Put(GetQuarantineCache(ms), QuarantineCallback(ac), m,
m->UsedSize());
quarantine.Put(GetQuarantineCache(ms), QuarantineCallback(ac, stack), m,
m->UsedSize());
} else {
SpinMutexLock l(&fallback_mutex);
AllocatorCache *ac = &fallback_allocator_cache;
quarantine.Put(&fallback_quarantine_cache, QuarantineCallback(ac), m,
m->UsedSize());
quarantine.Put(&fallback_quarantine_cache, QuarantineCallback(ac, stack),
m, m->UsedSize());
}
}
@ -660,8 +674,11 @@ struct Allocator {
}
void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
if (CheckForCallocOverflow(size, nmemb))
return AsanAllocator::FailureHandler::OnBadRequest();
if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
if (AllocatorMayReturnNull())
return nullptr;
ReportCallocOverflow(nmemb, size, stack);
}
void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC, false);
// If the memory comes from the secondary allocator no need to clear it
// as it comes directly from mmap.
@ -677,9 +694,9 @@ struct Allocator {
ReportFreeNotMalloced((uptr)ptr, stack);
}
void CommitBack(AsanThreadLocalMallocStorage *ms) {
void CommitBack(AsanThreadLocalMallocStorage *ms, BufferedStackTrace *stack) {
AllocatorCache *ac = GetAllocatorCache(ms);
quarantine.Drain(GetQuarantineCache(ms), QuarantineCallback(ac));
quarantine.Drain(GetQuarantineCache(ms), QuarantineCallback(ac, stack));
allocator.SwallowCache(ac);
}
@ -739,17 +756,19 @@ struct Allocator {
return AsanChunkView(m1);
}
void Purge() {
void Purge(BufferedStackTrace *stack) {
AsanThread *t = GetCurrentThread();
if (t) {
AsanThreadLocalMallocStorage *ms = &t->malloc_storage();
quarantine.DrainAndRecycle(GetQuarantineCache(ms),
QuarantineCallback(GetAllocatorCache(ms)));
QuarantineCallback(GetAllocatorCache(ms),
stack));
}
{
SpinMutexLock l(&fallback_mutex);
quarantine.DrainAndRecycle(&fallback_quarantine_cache,
QuarantineCallback(&fallback_allocator_cache));
QuarantineCallback(&fallback_allocator_cache,
stack));
}
allocator.ForceReleaseToOS();
@ -836,7 +855,8 @@ AsanChunkView FindHeapChunkByAllocBeg(uptr addr) {
}
void AsanThreadLocalMallocStorage::CommitBack() {
instance.CommitBack(this);
GET_STACK_TRACE_MALLOC;
instance.CommitBack(this, &stack);
}
void PrintInternalAllocatorStats() {
@ -883,7 +903,9 @@ void *asan_pvalloc(uptr size, BufferedStackTrace *stack) {
uptr PageSize = GetPageSizeCached();
if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
errno = errno_ENOMEM;
return AsanAllocator::FailureHandler::OnBadRequest();
if (AllocatorMayReturnNull())
return nullptr;
ReportPvallocOverflow(size, stack);
}
// pvalloc(0) should allocate one page.
size = size ? RoundUpTo(size, PageSize) : PageSize;
@ -895,20 +917,35 @@ void *asan_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
AllocType alloc_type) {
if (UNLIKELY(!IsPowerOfTwo(alignment))) {
errno = errno_EINVAL;
return AsanAllocator::FailureHandler::OnBadRequest();
if (AllocatorMayReturnNull())
return nullptr;
ReportInvalidAllocationAlignment(alignment, stack);
}
return SetErrnoOnNull(
instance.Allocate(size, alignment, stack, alloc_type, true));
}
void *asan_aligned_alloc(uptr alignment, uptr size, BufferedStackTrace *stack) {
if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
errno = errno_EINVAL;
if (AllocatorMayReturnNull())
return nullptr;
ReportInvalidAlignedAllocAlignment(size, alignment, stack);
}
return SetErrnoOnNull(
instance.Allocate(size, alignment, stack, FROM_MALLOC, true));
}
int asan_posix_memalign(void **memptr, uptr alignment, uptr size,
BufferedStackTrace *stack) {
if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
AsanAllocator::FailureHandler::OnBadRequest();
return errno_EINVAL;
if (AllocatorMayReturnNull())
return errno_EINVAL;
ReportInvalidPosixMemalignAlignment(alignment, stack);
}
void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC, true);
if (UNLIKELY(!ptr))
// OOM error is already taken care of by Allocate.
return errno_ENOMEM;
CHECK(IsAligned((uptr)ptr, alignment));
*memptr = ptr;
@ -1054,7 +1091,8 @@ uptr __sanitizer_get_allocated_size(const void *p) {
}
void __sanitizer_purge_allocator() {
instance.Purge();
GET_STACK_TRACE_MALLOC;
instance.Purge(&stack);
}
#if !SANITIZER_SUPPORTS_WEAK_HOOKS

View File

@ -125,11 +125,12 @@ const uptr kAllocatorSpace = ~(uptr)0;
const uptr kAllocatorSize = 0x40000000000ULL; // 4T.
typedef DefaultSizeClassMap SizeClassMap;
# elif defined(__powerpc64__)
const uptr kAllocatorSpace = 0xa0000000000ULL;
const uptr kAllocatorSpace = ~(uptr)0;
const uptr kAllocatorSize = 0x20000000000ULL; // 2T.
typedef DefaultSizeClassMap SizeClassMap;
# elif defined(__aarch64__) && SANITIZER_ANDROID
const uptr kAllocatorSpace = 0x3000000000ULL;
// Android needs to support 39, 42 and 48 bit VMA.
const uptr kAllocatorSpace = ~(uptr)0;
const uptr kAllocatorSize = 0x2000000000ULL; // 128G.
typedef VeryCompactSizeClassMap SizeClassMap;
# elif defined(__aarch64__)
@ -207,6 +208,7 @@ void *asan_realloc(void *p, uptr size, BufferedStackTrace *stack);
void *asan_valloc(uptr size, BufferedStackTrace *stack);
void *asan_pvalloc(uptr size, BufferedStackTrace *stack);
void *asan_aligned_alloc(uptr alignment, uptr size, BufferedStackTrace *stack);
int asan_posix_memalign(void **memptr, uptr alignment, uptr size,
BufferedStackTrace *stack);
uptr asan_malloc_usable_size(const void *ptr, uptr pc, uptr bp);

View File

@ -27,7 +27,8 @@ using namespace __asan;
static void FindInfoForStackVar(uptr addr, const char *frame_descr, uptr offset,
char *name, uptr name_size,
uptr &region_address, uptr &region_size) {
InternalMmapVector<StackVarDescr> vars(16);
InternalMmapVector<StackVarDescr> vars;
vars.reserve(16);
if (!ParseFrameDescription(frame_descr, &vars)) {
return;
}

View File

@ -20,23 +20,25 @@
namespace __asan {
// Return " (thread_name) " or an empty string if the name is empty.
const char *ThreadNameWithParenthesis(AsanThreadContext *t, char buff[],
uptr buff_len) {
const char *name = t->name;
if (name[0] == '\0') return "";
buff[0] = 0;
internal_strncat(buff, " (", 3);
internal_strncat(buff, name, buff_len - 4);
internal_strncat(buff, ")", 2);
return buff;
AsanThreadIdAndName::AsanThreadIdAndName(AsanThreadContext *t) {
Init(t->tid, t->name);
}
const char *ThreadNameWithParenthesis(u32 tid, char buff[], uptr buff_len) {
if (tid == kInvalidTid) return "";
asanThreadRegistry().CheckLocked();
AsanThreadContext *t = GetThreadContextByTidLocked(tid);
return ThreadNameWithParenthesis(t, buff, buff_len);
AsanThreadIdAndName::AsanThreadIdAndName(u32 tid) {
if (tid == kInvalidTid) {
Init(tid, "");
} else {
asanThreadRegistry().CheckLocked();
AsanThreadContext *t = GetThreadContextByTidLocked(tid);
Init(tid, t->name);
}
}
void AsanThreadIdAndName::Init(u32 tid, const char *tname) {
int len = internal_snprintf(name, sizeof(name), "T%d", tid);
CHECK(((unsigned int)len) < sizeof(name));
if (tname[0] != '\0')
internal_snprintf(&name[len], sizeof(name) - len, " (%s)", tname);
}
void DescribeThread(AsanThreadContext *context) {
@ -47,18 +49,15 @@ void DescribeThread(AsanThreadContext *context) {
return;
}
context->announced = true;
char tname[128];
InternalScopedString str(1024);
str.append("Thread T%d%s", context->tid,
ThreadNameWithParenthesis(context->tid, tname, sizeof(tname)));
str.append("Thread %s", AsanThreadIdAndName(context).c_str());
if (context->parent_tid == kInvalidTid) {
str.append(" created by unknown thread\n");
Printf("%s", str.data());
return;
}
str.append(
" created by T%d%s here:\n", context->parent_tid,
ThreadNameWithParenthesis(context->parent_tid, tname, sizeof(tname)));
str.append(" created by %s here:\n",
AsanThreadIdAndName(context->parent_tid).c_str());
Printf("%s", str.data());
StackDepotGet(context->stack_id).Print();
// Recursively described parent thread if needed.
@ -358,10 +357,9 @@ bool GlobalAddressDescription::PointsInsideTheSameVariable(
void StackAddressDescription::Print() const {
Decorator d;
char tname[128];
Printf("%s", d.Location());
Printf("Address %p is located in stack of thread T%d%s", addr, tid,
ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
Printf("Address %p is located in stack of thread %s", addr,
AsanThreadIdAndName(tid).c_str());
if (!frame_descr) {
Printf("%s\n", d.Default());
@ -380,7 +378,8 @@ void StackAddressDescription::Print() const {
StackTrace alloca_stack(&frame_pc, 1);
alloca_stack.Print();
InternalMmapVector<StackVarDescr> vars(16);
InternalMmapVector<StackVarDescr> vars;
vars.reserve(16);
if (!ParseFrameDescription(frame_descr, &vars)) {
Printf(
"AddressSanitizer can't parse the stack frame "
@ -402,7 +401,7 @@ void StackAddressDescription::Print() const {
}
Printf(
"HINT: this may be a false positive if your program uses "
"some custom stack unwind mechanism or swapcontext\n");
"some custom stack unwind mechanism, swapcontext or vfork\n");
if (SANITIZER_WINDOWS)
Printf(" (longjmp, SEH and C++ exceptions *are* supported)\n");
else
@ -418,26 +417,19 @@ void HeapAddressDescription::Print() const {
AsanThreadContext *alloc_thread = GetThreadContextByTidLocked(alloc_tid);
StackTrace alloc_stack = GetStackTraceFromId(alloc_stack_id);
char tname[128];
Decorator d;
AsanThreadContext *free_thread = nullptr;
if (free_tid != kInvalidTid) {
free_thread = GetThreadContextByTidLocked(free_tid);
Printf("%sfreed by thread T%d%s here:%s\n", d.Allocation(),
free_thread->tid,
ThreadNameWithParenthesis(free_thread, tname, sizeof(tname)),
d.Default());
Printf("%sfreed by thread %s here:%s\n", d.Allocation(),
AsanThreadIdAndName(free_thread).c_str(), d.Default());
StackTrace free_stack = GetStackTraceFromId(free_stack_id);
free_stack.Print();
Printf("%spreviously allocated by thread T%d%s here:%s\n", d.Allocation(),
alloc_thread->tid,
ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)),
d.Default());
Printf("%spreviously allocated by thread %s here:%s\n", d.Allocation(),
AsanThreadIdAndName(alloc_thread).c_str(), d.Default());
} else {
Printf("%sallocated by thread T%d%s here:%s\n", d.Allocation(),
alloc_thread->tid,
ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)),
d.Default());
Printf("%sallocated by thread %s here:%s\n", d.Allocation(),
AsanThreadIdAndName(alloc_thread).c_str(), d.Default());
}
alloc_stack.Print();
DescribeThread(GetCurrentThread());

View File

@ -26,9 +26,20 @@ void DescribeThread(AsanThreadContext *context);
static inline void DescribeThread(AsanThread *t) {
if (t) DescribeThread(t->context());
}
const char *ThreadNameWithParenthesis(AsanThreadContext *t, char buff[],
uptr buff_len);
const char *ThreadNameWithParenthesis(u32 tid, char buff[], uptr buff_len);
class AsanThreadIdAndName {
public:
explicit AsanThreadIdAndName(AsanThreadContext *t);
explicit AsanThreadIdAndName(u32 tid);
// Contains "T%tid (%name)" or "T%tid" if the name is empty.
const char *c_str() const { return &name[0]; }
private:
void Init(u32 tid, const char *tname);
char name[128];
};
class Decorator : public __sanitizer::SanitizerCommonDecorator {
public:

View File

@ -45,13 +45,11 @@ void ErrorDeadlySignal::Print() {
void ErrorDoubleFree::Print() {
Decorator d;
Printf("%s", d.Warning());
char tname[128];
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: attempting %s on %p in "
"thread T%d%s:\n",
scariness.GetDescription(), addr_description.addr, tid,
ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
"ERROR: AddressSanitizer: attempting %s on %p in thread %s:\n",
scariness.GetDescription(), addr_description.addr,
AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
scariness.Print();
GET_STACK_TRACE_FATAL(second_free_stack->trace[0],
@ -63,13 +61,11 @@ void ErrorDoubleFree::Print() {
void ErrorNewDeleteTypeMismatch::Print() {
Decorator d;
Printf("%s", d.Warning());
char tname[128];
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: %s on %p in thread "
"T%d%s:\n",
scariness.GetDescription(), addr_description.addr, tid,
ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
"ERROR: AddressSanitizer: %s on %p in thread %s:\n",
scariness.GetDescription(), addr_description.addr,
AsanThreadIdAndName(tid).c_str());
Printf("%s object passed to delete has wrong type:\n", d.Default());
if (delete_size != 0) {
Printf(
@ -106,13 +102,11 @@ void ErrorNewDeleteTypeMismatch::Print() {
void ErrorFreeNotMalloced::Print() {
Decorator d;
Printf("%s", d.Warning());
char tname[128];
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: attempting free on address "
"which was not malloc()-ed: %p in thread T%d%s\n",
addr_description.Address(), tid,
ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
"which was not malloc()-ed: %p in thread %s\n",
addr_description.Address(), AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
CHECK_GT(free_stack->size, 0);
scariness.Print();
@ -129,7 +123,7 @@ void ErrorAllocTypeMismatch::Print() {
"operator delete []"};
CHECK_NE(alloc_type, dealloc_type);
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report("ERROR: AddressSanitizer: %s (%s vs %s) on %p\n",
scariness.GetDescription(),
alloc_names[alloc_type], dealloc_names[dealloc_type],
@ -148,7 +142,7 @@ void ErrorAllocTypeMismatch::Print() {
void ErrorMallocUsableSizeNotOwned::Print() {
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: attempting to call malloc_usable_size() for "
"pointer which is not owned: %p\n",
@ -161,7 +155,7 @@ void ErrorMallocUsableSizeNotOwned::Print() {
void ErrorSanitizerGetAllocatedSizeNotOwned::Print() {
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: attempting to call "
"__sanitizer_get_allocated_size() for pointer which is not owned: %p\n",
@ -172,11 +166,123 @@ void ErrorSanitizerGetAllocatedSizeNotOwned::Print() {
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorCallocOverflow::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: calloc parameters overflow: count * size "
"(%zd * %zd) cannot be represented in type size_t (thread %s)\n",
count, size, AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorPvallocOverflow::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: pvalloc parameters overflow: size 0x%zx "
"rounded up to system page size 0x%zx cannot be represented in type "
"size_t (thread %s)\n",
size, GetPageSizeCached(), AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorInvalidAllocationAlignment::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: invalid allocation alignment: %zd, "
"alignment must be a power of two (thread %s)\n",
alignment, AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorInvalidAlignedAllocAlignment::Print() {
Decorator d;
Printf("%s", d.Error());
#if SANITIZER_POSIX
Report("ERROR: AddressSanitizer: invalid alignment requested in "
"aligned_alloc: %zd, alignment must be a power of two and the "
"requested size 0x%zx must be a multiple of alignment "
"(thread %s)\n", alignment, size, AsanThreadIdAndName(tid).c_str());
#else
Report("ERROR: AddressSanitizer: invalid alignment requested in "
"aligned_alloc: %zd, the requested size 0x%zx must be a multiple of "
"alignment (thread %s)\n", alignment, size,
AsanThreadIdAndName(tid).c_str());
#endif
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorInvalidPosixMemalignAlignment::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: invalid alignment requested in posix_memalign: "
"%zd, alignment must be a power of two and a multiple of sizeof(void*) "
"== %zd (thread %s)\n",
alignment, sizeof(void*), AsanThreadIdAndName(tid).c_str()); // NOLINT
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorAllocationSizeTooBig::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: requested allocation size 0x%zx (0x%zx after "
"adjustments for alignment, red zones etc.) exceeds maximum supported "
"size of 0x%zx (thread %s)\n",
user_size, total_size, max_size, AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorRssLimitExceeded::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: specified RSS limit exceeded, currently set to "
"soft_rss_limit_mb=%zd\n", common_flags()->soft_rss_limit_mb);
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorOutOfMemory::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: allocator is out of memory trying to allocate "
"0x%zx bytes\n", requested_size);
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorStringFunctionMemoryRangesOverlap::Print() {
Decorator d;
char bug_type[100];
internal_snprintf(bug_type, sizeof(bug_type), "%s-param-overlap", function);
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: %s: memory ranges [%p,%p) and [%p, %p) "
"overlap\n",
@ -193,7 +299,7 @@ void ErrorStringFunctionMemoryRangesOverlap::Print() {
void ErrorStringFunctionSizeOverflow::Print() {
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report("ERROR: AddressSanitizer: %s: (size=%zd)\n",
scariness.GetDescription(), size);
Printf("%s", d.Default());
@ -221,7 +327,7 @@ void ErrorBadParamsToAnnotateContiguousContainer::Print() {
void ErrorODRViolation::Print() {
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report("ERROR: AddressSanitizer: %s (%p):\n", scariness.GetDescription(),
global1.beg);
Printf("%s", d.Default());
@ -250,7 +356,7 @@ void ErrorODRViolation::Print() {
void ErrorInvalidPointerPair::Print() {
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report("ERROR: AddressSanitizer: %s: %p %p\n", scariness.GetDescription(),
addr1_description.Address(), addr2_description.Address());
Printf("%s", d.Default());
@ -414,6 +520,7 @@ static void PrintLegend(InternalScopedString *str) {
PrintShadowByte(str, " ASan internal: ", kAsanInternalHeapMagic);
PrintShadowByte(str, " Left alloca redzone: ", kAsanAllocaLeftMagic);
PrintShadowByte(str, " Right alloca redzone: ", kAsanAllocaRightMagic);
PrintShadowByte(str, " Shadow gap: ", kAsanShadowGap);
}
static void PrintShadowBytes(InternalScopedString *str, const char *before,
@ -453,17 +560,15 @@ static void PrintShadowMemoryForAddress(uptr addr) {
void ErrorGeneric::Print() {
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
uptr addr = addr_description.Address();
Report("ERROR: AddressSanitizer: %s on address %p at pc %p bp %p sp %p\n",
bug_descr, (void *)addr, pc, bp, sp);
Printf("%s", d.Default());
char tname[128];
Printf("%s%s of size %zu at %p thread T%d%s%s\n", d.Access(),
Printf("%s%s of size %zu at %p thread %s%s\n", d.Access(),
access_size ? (is_write ? "WRITE" : "READ") : "ACCESS", access_size,
(void *)addr, tid,
ThreadNameWithParenthesis(tid, tname, sizeof(tname)), d.Default());
(void *)addr, AsanThreadIdAndName(tid).c_str(), d.Default());
scariness.Print();
GET_STACK_TRACE_FATAL(pc, bp);

View File

@ -20,20 +20,30 @@
namespace __asan {
// (*) VS2013 does not implement unrestricted unions, so we need a trivial
// default constructor explicitly defined for each particular error.
// None of the error classes own the stack traces mentioned in them.
struct ErrorBase {
ErrorBase() = default;
explicit ErrorBase(u32 tid_) : tid(tid_) {}
ScarinessScoreBase scariness;
u32 tid;
ErrorBase() = default; // (*)
explicit ErrorBase(u32 tid_) : tid(tid_) {}
ErrorBase(u32 tid_, int initial_score, const char *reason) : tid(tid_) {
scariness.Clear();
scariness.Scare(initial_score, reason);
}
};
struct ErrorDeadlySignal : ErrorBase {
SignalContext signal;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorDeadlySignal() = default;
ErrorDeadlySignal() = default; // (*)
ErrorDeadlySignal(u32 tid, const SignalContext &sig)
: ErrorBase(tid), signal(sig) {
: ErrorBase(tid),
signal(sig) {
scariness.Clear();
if (signal.IsStackOverflow()) {
scariness.Scare(10, "stack-overflow");
@ -55,125 +65,206 @@ struct ErrorDeadlySignal : ErrorBase {
};
struct ErrorDoubleFree : ErrorBase {
// ErrorDoubleFree doesn't own the stack trace.
const BufferedStackTrace *second_free_stack;
HeapAddressDescription addr_description;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorDoubleFree() = default;
ErrorDoubleFree() = default; // (*)
ErrorDoubleFree(u32 tid, BufferedStackTrace *stack, uptr addr)
: ErrorBase(tid), second_free_stack(stack) {
: ErrorBase(tid, 42, "double-free"),
second_free_stack(stack) {
CHECK_GT(second_free_stack->size, 0);
GetHeapAddressInformation(addr, 1, &addr_description);
scariness.Clear();
scariness.Scare(42, "double-free");
}
void Print();
};
struct ErrorNewDeleteTypeMismatch : ErrorBase {
// ErrorNewDeleteTypeMismatch doesn't own the stack trace.
const BufferedStackTrace *free_stack;
HeapAddressDescription addr_description;
uptr delete_size;
uptr delete_alignment;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorNewDeleteTypeMismatch() = default;
ErrorNewDeleteTypeMismatch() = default; // (*)
ErrorNewDeleteTypeMismatch(u32 tid, BufferedStackTrace *stack, uptr addr,
uptr delete_size_, uptr delete_alignment_)
: ErrorBase(tid), free_stack(stack), delete_size(delete_size_),
: ErrorBase(tid, 10, "new-delete-type-mismatch"),
free_stack(stack),
delete_size(delete_size_),
delete_alignment(delete_alignment_) {
GetHeapAddressInformation(addr, 1, &addr_description);
scariness.Clear();
scariness.Scare(10, "new-delete-type-mismatch");
}
void Print();
};
struct ErrorFreeNotMalloced : ErrorBase {
// ErrorFreeNotMalloced doesn't own the stack trace.
const BufferedStackTrace *free_stack;
AddressDescription addr_description;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorFreeNotMalloced() = default;
ErrorFreeNotMalloced() = default; // (*)
ErrorFreeNotMalloced(u32 tid, BufferedStackTrace *stack, uptr addr)
: ErrorBase(tid),
: ErrorBase(tid, 40, "bad-free"),
free_stack(stack),
addr_description(addr, /*shouldLockThreadRegistry=*/false) {
scariness.Clear();
scariness.Scare(40, "bad-free");
}
addr_description(addr, /*shouldLockThreadRegistry=*/false) {}
void Print();
};
struct ErrorAllocTypeMismatch : ErrorBase {
// ErrorAllocTypeMismatch doesn't own the stack trace.
const BufferedStackTrace *dealloc_stack;
HeapAddressDescription addr_description;
AllocType alloc_type, dealloc_type;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorAllocTypeMismatch() = default;
ErrorAllocTypeMismatch() = default; // (*)
ErrorAllocTypeMismatch(u32 tid, BufferedStackTrace *stack, uptr addr,
AllocType alloc_type_, AllocType dealloc_type_)
: ErrorBase(tid),
: ErrorBase(tid, 10, "alloc-dealloc-mismatch"),
dealloc_stack(stack),
alloc_type(alloc_type_),
dealloc_type(dealloc_type_) {
GetHeapAddressInformation(addr, 1, &addr_description);
scariness.Clear();
scariness.Scare(10, "alloc-dealloc-mismatch");
};
void Print();
};
struct ErrorMallocUsableSizeNotOwned : ErrorBase {
// ErrorMallocUsableSizeNotOwned doesn't own the stack trace.
const BufferedStackTrace *stack;
AddressDescription addr_description;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorMallocUsableSizeNotOwned() = default;
ErrorMallocUsableSizeNotOwned() = default; // (*)
ErrorMallocUsableSizeNotOwned(u32 tid, BufferedStackTrace *stack_, uptr addr)
: ErrorBase(tid),
: ErrorBase(tid, 10, "bad-malloc_usable_size"),
stack(stack_),
addr_description(addr, /*shouldLockThreadRegistry=*/false) {
scariness.Clear();
scariness.Scare(10, "bad-malloc_usable_size");
}
addr_description(addr, /*shouldLockThreadRegistry=*/false) {}
void Print();
};
struct ErrorSanitizerGetAllocatedSizeNotOwned : ErrorBase {
// ErrorSanitizerGetAllocatedSizeNotOwned doesn't own the stack trace.
const BufferedStackTrace *stack;
AddressDescription addr_description;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorSanitizerGetAllocatedSizeNotOwned() = default;
ErrorSanitizerGetAllocatedSizeNotOwned() = default; // (*)
ErrorSanitizerGetAllocatedSizeNotOwned(u32 tid, BufferedStackTrace *stack_,
uptr addr)
: ErrorBase(tid),
: ErrorBase(tid, 10, "bad-__sanitizer_get_allocated_size"),
stack(stack_),
addr_description(addr, /*shouldLockThreadRegistry=*/false) {
scariness.Clear();
scariness.Scare(10, "bad-__sanitizer_get_allocated_size");
}
addr_description(addr, /*shouldLockThreadRegistry=*/false) {}
void Print();
};
struct ErrorCallocOverflow : ErrorBase {
const BufferedStackTrace *stack;
uptr count;
uptr size;
ErrorCallocOverflow() = default; // (*)
ErrorCallocOverflow(u32 tid, BufferedStackTrace *stack_, uptr count_,
uptr size_)
: ErrorBase(tid, 10, "calloc-overflow"),
stack(stack_),
count(count_),
size(size_) {}
void Print();
};
struct ErrorPvallocOverflow : ErrorBase {
const BufferedStackTrace *stack;
uptr size;
ErrorPvallocOverflow() = default; // (*)
ErrorPvallocOverflow(u32 tid, BufferedStackTrace *stack_, uptr size_)
: ErrorBase(tid, 10, "pvalloc-overflow"),
stack(stack_),
size(size_) {}
void Print();
};
struct ErrorInvalidAllocationAlignment : ErrorBase {
const BufferedStackTrace *stack;
uptr alignment;
ErrorInvalidAllocationAlignment() = default; // (*)
ErrorInvalidAllocationAlignment(u32 tid, BufferedStackTrace *stack_,
uptr alignment_)
: ErrorBase(tid, 10, "invalid-allocation-alignment"),
stack(stack_),
alignment(alignment_) {}
void Print();
};
struct ErrorInvalidAlignedAllocAlignment : ErrorBase {
const BufferedStackTrace *stack;
uptr size;
uptr alignment;
ErrorInvalidAlignedAllocAlignment() = default; // (*)
ErrorInvalidAlignedAllocAlignment(u32 tid, BufferedStackTrace *stack_,
uptr size_, uptr alignment_)
: ErrorBase(tid, 10, "invalid-aligned-alloc-alignment"),
stack(stack_),
size(size_),
alignment(alignment_) {}
void Print();
};
struct ErrorInvalidPosixMemalignAlignment : ErrorBase {
const BufferedStackTrace *stack;
uptr alignment;
ErrorInvalidPosixMemalignAlignment() = default; // (*)
ErrorInvalidPosixMemalignAlignment(u32 tid, BufferedStackTrace *stack_,
uptr alignment_)
: ErrorBase(tid, 10, "invalid-posix-memalign-alignment"),
stack(stack_),
alignment(alignment_) {}
void Print();
};
struct ErrorAllocationSizeTooBig : ErrorBase {
const BufferedStackTrace *stack;
uptr user_size;
uptr total_size;
uptr max_size;
ErrorAllocationSizeTooBig() = default; // (*)
ErrorAllocationSizeTooBig(u32 tid, BufferedStackTrace *stack_,
uptr user_size_, uptr total_size_, uptr max_size_)
: ErrorBase(tid, 10, "allocation-size-too-big"),
stack(stack_),
user_size(user_size_),
total_size(total_size_),
max_size(max_size_) {}
void Print();
};
struct ErrorRssLimitExceeded : ErrorBase {
const BufferedStackTrace *stack;
ErrorRssLimitExceeded() = default; // (*)
ErrorRssLimitExceeded(u32 tid, BufferedStackTrace *stack_)
: ErrorBase(tid, 10, "rss-limit-exceeded"),
stack(stack_) {}
void Print();
};
struct ErrorOutOfMemory : ErrorBase {
const BufferedStackTrace *stack;
uptr requested_size;
ErrorOutOfMemory() = default; // (*)
ErrorOutOfMemory(u32 tid, BufferedStackTrace *stack_, uptr requested_size_)
: ErrorBase(tid, 10, "out-of-memory"),
stack(stack_),
requested_size(requested_size_) {}
void Print();
};
struct ErrorStringFunctionMemoryRangesOverlap : ErrorBase {
// ErrorStringFunctionMemoryRangesOverlap doesn't own the stack trace.
const BufferedStackTrace *stack;
uptr length1, length2;
AddressDescription addr1_description;
AddressDescription addr2_description;
const char *function;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorStringFunctionMemoryRangesOverlap() = default;
ErrorStringFunctionMemoryRangesOverlap() = default; // (*)
ErrorStringFunctionMemoryRangesOverlap(u32 tid, BufferedStackTrace *stack_,
uptr addr1, uptr length1_, uptr addr2,
uptr length2_, const char *function_)
@ -193,65 +284,51 @@ struct ErrorStringFunctionMemoryRangesOverlap : ErrorBase {
};
struct ErrorStringFunctionSizeOverflow : ErrorBase {
// ErrorStringFunctionSizeOverflow doesn't own the stack trace.
const BufferedStackTrace *stack;
AddressDescription addr_description;
uptr size;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorStringFunctionSizeOverflow() = default;
ErrorStringFunctionSizeOverflow() = default; // (*)
ErrorStringFunctionSizeOverflow(u32 tid, BufferedStackTrace *stack_,
uptr addr, uptr size_)
: ErrorBase(tid),
: ErrorBase(tid, 10, "negative-size-param"),
stack(stack_),
addr_description(addr, /*shouldLockThreadRegistry=*/false),
size(size_) {
scariness.Clear();
scariness.Scare(10, "negative-size-param");
}
size(size_) {}
void Print();
};
struct ErrorBadParamsToAnnotateContiguousContainer : ErrorBase {
// ErrorBadParamsToAnnotateContiguousContainer doesn't own the stack trace.
const BufferedStackTrace *stack;
uptr beg, end, old_mid, new_mid;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorBadParamsToAnnotateContiguousContainer() = default;
ErrorBadParamsToAnnotateContiguousContainer() = default; // (*)
// PS4: Do we want an AddressDescription for beg?
ErrorBadParamsToAnnotateContiguousContainer(u32 tid,
BufferedStackTrace *stack_,
uptr beg_, uptr end_,
uptr old_mid_, uptr new_mid_)
: ErrorBase(tid),
: ErrorBase(tid, 10, "bad-__sanitizer_annotate_contiguous_container"),
stack(stack_),
beg(beg_),
end(end_),
old_mid(old_mid_),
new_mid(new_mid_) {
scariness.Clear();
scariness.Scare(10, "bad-__sanitizer_annotate_contiguous_container");
}
new_mid(new_mid_) {}
void Print();
};
struct ErrorODRViolation : ErrorBase {
__asan_global global1, global2;
u32 stack_id1, stack_id2;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorODRViolation() = default;
ErrorODRViolation() = default; // (*)
ErrorODRViolation(u32 tid, const __asan_global *g1, u32 stack_id1_,
const __asan_global *g2, u32 stack_id2_)
: ErrorBase(tid),
: ErrorBase(tid, 10, "odr-violation"),
global1(*g1),
global2(*g2),
stack_id1(stack_id1_),
stack_id2(stack_id2_) {
scariness.Clear();
scariness.Scare(10, "odr-violation");
}
stack_id2(stack_id2_) {}
void Print();
};
@ -259,20 +336,16 @@ struct ErrorInvalidPointerPair : ErrorBase {
uptr pc, bp, sp;
AddressDescription addr1_description;
AddressDescription addr2_description;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorInvalidPointerPair() = default;
ErrorInvalidPointerPair() = default; // (*)
ErrorInvalidPointerPair(u32 tid, uptr pc_, uptr bp_, uptr sp_, uptr p1,
uptr p2)
: ErrorBase(tid),
: ErrorBase(tid, 10, "invalid-pointer-pair"),
pc(pc_),
bp(bp_),
sp(sp_),
addr1_description(p1, 1, /*shouldLockThreadRegistry=*/false),
addr2_description(p2, 1, /*shouldLockThreadRegistry=*/false) {
scariness.Clear();
scariness.Scare(10, "invalid-pointer-pair");
}
addr2_description(p2, 1, /*shouldLockThreadRegistry=*/false) {}
void Print();
};
@ -283,9 +356,8 @@ struct ErrorGeneric : ErrorBase {
const char *bug_descr;
bool is_write;
u8 shadow_val;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorGeneric() = default;
ErrorGeneric() = default; // (*)
ErrorGeneric(u32 tid, uptr addr, uptr pc_, uptr bp_, uptr sp_, bool is_write_,
uptr access_size_);
void Print();
@ -300,6 +372,14 @@ struct ErrorGeneric : ErrorBase {
macro(AllocTypeMismatch) \
macro(MallocUsableSizeNotOwned) \
macro(SanitizerGetAllocatedSizeNotOwned) \
macro(CallocOverflow) \
macro(PvallocOverflow) \
macro(InvalidAllocationAlignment) \
macro(InvalidAlignedAllocAlignment) \
macro(InvalidPosixMemalignAlignment) \
macro(AllocationSizeTooBig) \
macro(RssLimitExceeded) \
macro(OutOfMemory) \
macro(StringFunctionMemoryRangesOverlap) \
macro(StringFunctionSizeOverflow) \
macro(BadParamsToAnnotateContiguousContainer) \
@ -334,6 +414,7 @@ struct ErrorDescription {
};
ErrorDescription() { internal_memset(this, 0, sizeof(*this)); }
explicit ErrorDescription(LinkerInitialized) {}
ASAN_FOR_EACH_ERROR_KIND(ASAN_ERROR_DESCRIPTION_CONSTRUCTOR)
bool IsValid() { return kind != kErrorKindInvalid; }

View File

@ -33,10 +33,7 @@ static const char *MaybeCallAsanDefaultOptions() {
static const char *MaybeUseAsanDefaultOptionsCompileDefinition() {
#ifdef ASAN_DEFAULT_OPTIONS
// Stringize the macro value.
# define ASAN_STRINGIZE(x) #x
# define ASAN_STRINGIZE_OPTIONS(options) ASAN_STRINGIZE(options)
return ASAN_STRINGIZE_OPTIONS(ASAN_DEFAULT_OPTIONS);
return SANITIZER_STRINGIFY(ASAN_DEFAULT_OPTIONS);
#else
return "";
#endif
@ -163,6 +160,10 @@ void InitializeFlags() {
CHECK_LE(f->max_redzone, 2048);
CHECK(IsPowerOfTwo(f->redzone));
CHECK(IsPowerOfTwo(f->max_redzone));
if (SANITIZER_RTEMS) {
CHECK(!f->unmap_shadow_on_exit);
CHECK(!f->protect_shadow_gap);
}
// quarantine_size is deprecated but we still honor it.
// quarantine_size can not be used together with quarantine_size_mb.

View File

@ -88,7 +88,8 @@ ASAN_FLAG(bool, check_malloc_usable_size, true,
"295.*.")
ASAN_FLAG(bool, unmap_shadow_on_exit, false,
"If set, explicitly unmaps the (huge) shadow at exit.")
ASAN_FLAG(bool, protect_shadow_gap, true, "If set, mprotect the shadow gap")
ASAN_FLAG(bool, protect_shadow_gap, !SANITIZER_RTEMS,
"If set, mprotect the shadow gap")
ASAN_FLAG(bool, print_stats, false,
"Print various statistics after printing an error message or if "
"atexit=1.")
@ -136,9 +137,9 @@ ASAN_FLAG(
"Android. ")
ASAN_FLAG(
int, detect_invalid_pointer_pairs, 0,
"If non-zero, try to detect operations like <, <=, >, >= and - on "
"invalid pointer pairs (e.g. when pointers belong to different objects). "
"The bigger the value the harder we try.")
"If >= 2, detect operations like <, <=, >, >= and - on invalid pointer "
"pairs (e.g. when pointers belong to different objects); "
"If == 1, detect invalid operations only when both pointers are non-null.")
ASAN_FLAG(
bool, detect_container_overflow, true,
"If true, honor the container overflow annotations. See "

View File

@ -224,8 +224,9 @@ static void RegisterGlobal(const Global *g) {
list_of_all_globals = l;
if (g->has_dynamic_init) {
if (!dynamic_init_globals) {
dynamic_init_globals = new(allocator_for_globals)
VectorOfGlobals(kDynamicInitGlobalsInitialCapacity);
dynamic_init_globals =
new (allocator_for_globals) VectorOfGlobals; // NOLINT
dynamic_init_globals->reserve(kDynamicInitGlobalsInitialCapacity);
}
DynInitGlobal dyn_global = { *g, false };
dynamic_init_globals->push_back(dyn_global);
@ -358,9 +359,11 @@ void __asan_register_globals(__asan_global *globals, uptr n) {
GET_STACK_TRACE_MALLOC;
u32 stack_id = StackDepotPut(stack);
BlockingMutexLock lock(&mu_for_globals);
if (!global_registration_site_vector)
if (!global_registration_site_vector) {
global_registration_site_vector =
new(allocator_for_globals) GlobalRegistrationSiteVector(128);
new (allocator_for_globals) GlobalRegistrationSiteVector; // NOLINT
global_registration_site_vector->reserve(128);
}
GlobalRegistrationSite site = {stack_id, &globals[0], &globals[n - 1]};
global_registration_site_vector->push_back(site);
if (flags()->report_globals >= 2) {

View File

@ -19,9 +19,9 @@ namespace __asan {
#pragma section(".ASAN$GA", read, write) // NOLINT
#pragma section(".ASAN$GZ", read, write) // NOLINT
extern "C" __declspec(allocate(".ASAN$GA"))
__asan_global __asan_globals_start = {};
ALIGNED(sizeof(__asan_global)) __asan_global __asan_globals_start = {};
extern "C" __declspec(allocate(".ASAN$GZ"))
__asan_global __asan_globals_end = {};
ALIGNED(sizeof(__asan_global)) __asan_global __asan_globals_end = {};
#pragma comment(linker, "/merge:.ASAN=.data")
static void call_on_globals(void (*hook)(__asan_global *, uptr)) {

View File

@ -24,15 +24,20 @@
#include "lsan/lsan_common.h"
#include "sanitizer_common/sanitizer_libc.h"
// There is no general interception at all on Fuchsia.
// There is no general interception at all on Fuchsia and RTEMS.
// Only the functions in asan_interceptors_memintrinsics.cc are
// really defined to replace libc functions.
#if !SANITIZER_FUCHSIA
#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
#if SANITIZER_POSIX
#include "sanitizer_common/sanitizer_posix.h"
#endif
#if ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION || \
ASAN_INTERCEPT__SJLJ_UNWIND_RAISEEXCEPTION
#include <unwind.h>
#endif
#if defined(__i386) && SANITIZER_LINUX
#define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.1"
#elif defined(__mips__) && SANITIZER_LINUX
@ -178,6 +183,7 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *)
(void)(s); \
} while (false)
#include "sanitizer_common/sanitizer_common_syscalls.inc"
#include "sanitizer_common/sanitizer_syscalls_netbsd.inc"
struct ThreadStartParam {
atomic_uintptr_t t;
@ -269,7 +275,15 @@ INTERCEPTOR(int, swapcontext, struct ucontext_t *oucp,
uptr stack, ssize;
ReadContextStack(ucp, &stack, &ssize);
ClearShadowMemoryForContextStack(stack, ssize);
#if __has_attribute(__indirect_return__) && \
(defined(__x86_64__) || defined(__i386__))
int (*real_swapcontext)(struct ucontext_t *, struct ucontext_t *)
__attribute__((__indirect_return__))
= REAL(swapcontext);
int res = real_swapcontext(oucp, ucp);
#else
int res = REAL(swapcontext)(oucp, ucp);
#endif
// swapcontext technically does not return, but program may swap context to
// "oucp" later, that would look as if swapcontext() returned 0.
// We need to clear shadow for ucp once again, as it may be in arbitrary
@ -318,6 +332,32 @@ INTERCEPTOR(void, __cxa_throw, void *a, void *b, void *c) {
}
#endif
#if ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION
INTERCEPTOR(void, __cxa_rethrow_primary_exception, void *a) {
CHECK(REAL(__cxa_rethrow_primary_exception));
__asan_handle_no_return();
REAL(__cxa_rethrow_primary_exception)(a);
}
#endif
#if ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION
INTERCEPTOR(_Unwind_Reason_Code, _Unwind_RaiseException,
_Unwind_Exception *object) {
CHECK(REAL(_Unwind_RaiseException));
__asan_handle_no_return();
return REAL(_Unwind_RaiseException)(object);
}
#endif
#if ASAN_INTERCEPT__SJLJ_UNWIND_RAISEEXCEPTION
INTERCEPTOR(_Unwind_Reason_Code, _Unwind_SjLj_RaiseException,
_Unwind_Exception *object) {
CHECK(REAL(_Unwind_SjLj_RaiseException));
__asan_handle_no_return();
return REAL(_Unwind_SjLj_RaiseException)(object);
}
#endif
#if ASAN_INTERCEPT_INDEX
# if ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX
INTERCEPTOR(char*, index, const char *string, int c)
@ -540,14 +580,6 @@ INTERCEPTOR(int, __cxa_atexit, void (*func)(void *), void *arg,
}
#endif // ASAN_INTERCEPT___CXA_ATEXIT
#if ASAN_INTERCEPT_FORK
INTERCEPTOR(int, fork, void) {
ENSURE_ASAN_INITED();
int pid = REAL(fork)();
return pid;
}
#endif // ASAN_INTERCEPT_FORK
// ---------------------- InitializeAsanInterceptors ---------------- {{{1
namespace __asan {
void InitializeAsanInterceptors() {
@ -598,6 +630,17 @@ void InitializeAsanInterceptors() {
#if ASAN_INTERCEPT___CXA_THROW
ASAN_INTERCEPT_FUNC(__cxa_throw);
#endif
#if ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION
ASAN_INTERCEPT_FUNC(__cxa_rethrow_primary_exception);
#endif
// Indirectly intercept std::rethrow_exception.
#if ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION
INTERCEPT_FUNCTION(_Unwind_RaiseException);
#endif
// Indirectly intercept std::rethrow_exception.
#if ASAN_INTERCEPT__UNWIND_SJLJ_RAISEEXCEPTION
INTERCEPT_FUNCTION(_Unwind_SjLj_RaiseException);
#endif
// Intercept threading-related functions
#if ASAN_INTERCEPT_PTHREAD_CREATE
@ -614,10 +657,6 @@ void InitializeAsanInterceptors() {
ASAN_INTERCEPT_FUNC(__cxa_atexit);
#endif
#if ASAN_INTERCEPT_FORK
ASAN_INTERCEPT_FUNC(fork);
#endif
InitializePlatformInterceptors();
VReport(1, "AddressSanitizer: libc interceptors initialized\n");

View File

@ -34,10 +34,10 @@ void InitializePlatformInterceptors();
} // namespace __asan
// There is no general interception at all on Fuchsia.
// There is no general interception at all on Fuchsia and RTEMS.
// Only the functions in asan_interceptors_memintrinsics.h are
// really defined to replace libc functions.
#if !SANITIZER_FUCHSIA
#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
// Use macro to describe if specific function should be
// intercepted on a given platform.
@ -46,13 +46,11 @@ void InitializePlatformInterceptors();
# define ASAN_INTERCEPT__LONGJMP 1
# define ASAN_INTERCEPT_INDEX 1
# define ASAN_INTERCEPT_PTHREAD_CREATE 1
# define ASAN_INTERCEPT_FORK 1
#else
# define ASAN_INTERCEPT_ATOLL_AND_STRTOLL 0
# define ASAN_INTERCEPT__LONGJMP 0
# define ASAN_INTERCEPT_INDEX 0
# define ASAN_INTERCEPT_PTHREAD_CREATE 0
# define ASAN_INTERCEPT_FORK 0
#endif
#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \
@ -80,13 +78,20 @@ void InitializePlatformInterceptors();
# define ASAN_INTERCEPT___LONGJMP_CHK 0
#endif
// Android bug: https://code.google.com/p/android/issues/detail?id=61799
#if ASAN_HAS_EXCEPTIONS && !SANITIZER_WINDOWS && \
!(SANITIZER_ANDROID && defined(__i386)) && \
!SANITIZER_SOLARIS
#if ASAN_HAS_EXCEPTIONS && !SANITIZER_WINDOWS && !SANITIZER_SOLARIS && \
!SANITIZER_NETBSD
# define ASAN_INTERCEPT___CXA_THROW 1
# define ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION 1
# if defined(_GLIBCXX_SJLJ_EXCEPTIONS) || (SANITIZER_IOS && defined(__arm__))
# define ASAN_INTERCEPT__UNWIND_SJLJ_RAISEEXCEPTION 1
# else
# define ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION 1
# endif
#else
# define ASAN_INTERCEPT___CXA_THROW 0
# define ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION 0
# define ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION 0
# define ASAN_INTERCEPT__UNWIND_SJLJ_RAISEEXCEPTION 0
#endif
#if !SANITIZER_WINDOWS

View File

@ -31,14 +31,14 @@ void *__asan_memmove(void *to, const void *from, uptr size) {
ASAN_MEMMOVE_IMPL(nullptr, to, from, size);
}
#if SANITIZER_FUCHSIA
#if SANITIZER_FUCHSIA || SANITIZER_RTEMS
// Fuchsia doesn't use sanitizer_common_interceptors.inc, but the only
// things there it wants are these three. Just define them as aliases
// here rather than repeating the contents.
// Fuchsia and RTEMS don't use sanitizer_common_interceptors.inc, but
// the only things there it wants are these three. Just define them
// as aliases here rather than repeating the contents.
decltype(memcpy) memcpy[[gnu::alias("__asan_memcpy")]];
decltype(memmove) memmove[[gnu::alias("__asan_memmove")]];
decltype(memset) memset[[gnu::alias("__asan_memset")]];
extern "C" decltype(__asan_memcpy) memcpy[[gnu::alias("__asan_memcpy")]];
extern "C" decltype(__asan_memmove) memmove[[gnu::alias("__asan_memmove")]];
extern "C" decltype(__asan_memset) memset[[gnu::alias("__asan_memset")]];
#endif // SANITIZER_FUCHSIA
#endif // SANITIZER_FUCHSIA || SANITIZER_RTEMS

View File

@ -133,15 +133,22 @@ static inline bool RangesOverlap(const char *offset1, uptr length1,
const char *offset2, uptr length2) {
return !((offset1 + length1 <= offset2) || (offset2 + length2 <= offset1));
}
#define CHECK_RANGES_OVERLAP(name, _offset1, length1, _offset2, length2) do { \
const char *offset1 = (const char*)_offset1; \
const char *offset2 = (const char*)_offset2; \
if (RangesOverlap(offset1, length1, offset2, length2)) { \
GET_STACK_TRACE_FATAL_HERE; \
ReportStringFunctionMemoryRangesOverlap(name, offset1, length1, \
offset2, length2, &stack); \
} \
} while (0)
#define CHECK_RANGES_OVERLAP(name, _offset1, length1, _offset2, length2) \
do { \
const char *offset1 = (const char *)_offset1; \
const char *offset2 = (const char *)_offset2; \
if (RangesOverlap(offset1, length1, offset2, length2)) { \
GET_STACK_TRACE_FATAL_HERE; \
bool suppressed = IsInterceptorSuppressed(name); \
if (!suppressed && HaveStackTraceBasedSuppressions()) { \
suppressed = IsStackTraceSuppressed(&stack); \
} \
if (!suppressed) { \
ReportStringFunctionMemoryRangesOverlap(name, offset1, length1, \
offset2, length2, &stack); \
} \
} \
} while (0)
} // namespace __asan

View File

@ -36,7 +36,7 @@
// If set, values like allocator chunk size, as well as defaults for some flags
// will be changed towards less memory overhead.
#ifndef ASAN_LOW_MEMORY
# if SANITIZER_IOS || SANITIZER_ANDROID
# if SANITIZER_IOS || SANITIZER_ANDROID || SANITIZER_RTEMS
# define ASAN_LOW_MEMORY 1
# else
# define ASAN_LOW_MEMORY 0
@ -78,7 +78,7 @@ void InitializeShadowMemory();
// asan_malloc_linux.cc / asan_malloc_mac.cc
void ReplaceSystemMalloc();
// asan_linux.cc / asan_mac.cc / asan_win.cc
// asan_linux.cc / asan_mac.cc / asan_rtems.cc / asan_win.cc
uptr FindDynamicShadowStart();
void *AsanDoesNotSupportStaticLinkage();
void AsanCheckDynamicRTPrereqs();
@ -147,6 +147,9 @@ const int kAsanArrayCookieMagic = 0xac;
const int kAsanIntraObjectRedzone = 0xbb;
const int kAsanAllocaLeftMagic = 0xca;
const int kAsanAllocaRightMagic = 0xcb;
// Used to populate the shadow gap for systems without memory
// protection there (i.e. Myriad).
const int kAsanShadowGap = 0xcc;
static const uptr kCurrentStackFrameMagic = 0x41B58AB3;
static const uptr kRetiredStackFrameMagic = 0x45E0360E;

View File

@ -62,16 +62,36 @@ uptr FindDynamicShadowStart() {
uptr space_size = kHighShadowEnd + left_padding;
uptr largest_gap_found = 0;
uptr shadow_start = FindAvailableMemoryRange(space_size, alignment,
granularity, &largest_gap_found);
uptr max_occupied_addr = 0;
VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
uptr shadow_start =
FindAvailableMemoryRange(space_size, alignment, granularity,
&largest_gap_found, &max_occupied_addr);
// If the shadow doesn't fit, restrict the address space to make it fit.
if (shadow_start == 0) {
VReport(
2,
"Shadow doesn't fit, largest_gap_found = %p, max_occupied_addr = %p\n",
largest_gap_found, max_occupied_addr);
uptr new_max_vm = RoundDownTo(largest_gap_found << SHADOW_SCALE, alignment);
if (new_max_vm < max_occupied_addr) {
Report("Unable to find a memory range for dynamic shadow.\n");
Report(
"space_size = %p, largest_gap_found = %p, max_occupied_addr = %p, "
"new_max_vm = %p\n",
space_size, largest_gap_found, max_occupied_addr, new_max_vm);
CHECK(0 && "cannot place shadow");
}
RestrictMemoryToMaxAddress(new_max_vm);
kHighMemEnd = new_max_vm - 1;
space_size = kHighShadowEnd + left_padding;
shadow_start =
FindAvailableMemoryRange(space_size, alignment, granularity, nullptr);
VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
shadow_start = FindAvailableMemoryRange(space_size, alignment, granularity,
nullptr, nullptr);
if (shadow_start == 0) {
Report("Unable to find a memory range after restricting VM.\n");
CHECK(0 && "cannot place shadow after restricting vm");
}
}
CHECK_NE((uptr)0, shadow_start);
CHECK(IsAligned(shadow_start, alignment));

View File

@ -16,19 +16,23 @@
#include "sanitizer_common/sanitizer_platform.h"
#if SANITIZER_FREEBSD || SANITIZER_FUCHSIA || SANITIZER_LINUX || \
SANITIZER_NETBSD || SANITIZER_SOLARIS
SANITIZER_NETBSD || SANITIZER_RTEMS || SANITIZER_SOLARIS
#include "sanitizer_common/sanitizer_allocator_checks.h"
#include "sanitizer_common/sanitizer_errno.h"
#include "sanitizer_common/sanitizer_tls_get_addr.h"
#include "asan_allocator.h"
#include "asan_interceptors.h"
#include "asan_internal.h"
#include "asan_malloc_local.h"
#include "asan_stack.h"
// ---------------------- Replacement functions ---------------- {{{1
using namespace __asan; // NOLINT
static uptr allocated_for_dlsym;
static const uptr kDlsymAllocPoolSize = 1024;
static uptr last_dlsym_alloc_size_in_words;
static const uptr kDlsymAllocPoolSize = SANITIZER_RTEMS ? 4096 : 1024;
static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];
static INLINE bool IsInDlsymAllocPool(const void *ptr) {
@ -39,21 +43,73 @@ static INLINE bool IsInDlsymAllocPool(const void *ptr) {
static void *AllocateFromLocalPool(uptr size_in_bytes) {
uptr size_in_words = RoundUpTo(size_in_bytes, kWordSize) / kWordSize;
void *mem = (void*)&alloc_memory_for_dlsym[allocated_for_dlsym];
last_dlsym_alloc_size_in_words = size_in_words;
allocated_for_dlsym += size_in_words;
CHECK_LT(allocated_for_dlsym, kDlsymAllocPoolSize);
return mem;
}
static void DeallocateFromLocalPool(const void *ptr) {
// Hack: since glibc 2.27 dlsym no longer uses stack-allocated memory to store
// error messages and instead uses malloc followed by free. To avoid pool
// exhaustion due to long object filenames, handle that special case here.
uptr prev_offset = allocated_for_dlsym - last_dlsym_alloc_size_in_words;
void *prev_mem = (void*)&alloc_memory_for_dlsym[prev_offset];
if (prev_mem == ptr) {
REAL(memset)(prev_mem, 0, last_dlsym_alloc_size_in_words * kWordSize);
allocated_for_dlsym = prev_offset;
last_dlsym_alloc_size_in_words = 0;
}
}
static int PosixMemalignFromLocalPool(void **memptr, uptr alignment,
uptr size_in_bytes) {
if (UNLIKELY(!CheckPosixMemalignAlignment(alignment)))
return errno_EINVAL;
CHECK(alignment >= kWordSize);
uptr addr = (uptr)&alloc_memory_for_dlsym[allocated_for_dlsym];
uptr aligned_addr = RoundUpTo(addr, alignment);
uptr aligned_size = RoundUpTo(size_in_bytes, kWordSize);
uptr *end_mem = (uptr*)(aligned_addr + aligned_size);
uptr allocated = end_mem - alloc_memory_for_dlsym;
if (allocated >= kDlsymAllocPoolSize)
return errno_ENOMEM;
allocated_for_dlsym = allocated;
*memptr = (void*)aligned_addr;
return 0;
}
#if SANITIZER_RTEMS
void* MemalignFromLocalPool(uptr alignment, uptr size) {
void *ptr = nullptr;
alignment = Max(alignment, kWordSize);
PosixMemalignFromLocalPool(&ptr, alignment, size);
return ptr;
}
bool IsFromLocalPool(const void *ptr) {
return IsInDlsymAllocPool(ptr);
}
#endif
static INLINE bool MaybeInDlsym() {
// Fuchsia doesn't use dlsym-based interceptors.
return !SANITIZER_FUCHSIA && asan_init_is_running;
}
static INLINE bool UseLocalPool() {
return EarlyMalloc() || MaybeInDlsym();
}
static void *ReallocFromLocalPool(void *ptr, uptr size) {
const uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
const uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
void *new_ptr;
if (UNLIKELY(MaybeInDlsym())) {
if (UNLIKELY(UseLocalPool())) {
new_ptr = AllocateFromLocalPool(size);
} else {
ENSURE_ASAN_INITED();
@ -66,8 +122,10 @@ static void *ReallocFromLocalPool(void *ptr, uptr size) {
INTERCEPTOR(void, free, void *ptr) {
GET_STACK_TRACE_FREE;
if (UNLIKELY(IsInDlsymAllocPool(ptr)))
if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
DeallocateFromLocalPool(ptr);
return;
}
asan_free(ptr, &stack, FROM_MALLOC);
}
@ -81,7 +139,7 @@ INTERCEPTOR(void, cfree, void *ptr) {
#endif // SANITIZER_INTERCEPT_CFREE
INTERCEPTOR(void*, malloc, uptr size) {
if (UNLIKELY(MaybeInDlsym()))
if (UNLIKELY(UseLocalPool()))
// Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
return AllocateFromLocalPool(size);
ENSURE_ASAN_INITED();
@ -90,7 +148,7 @@ INTERCEPTOR(void*, malloc, uptr size) {
}
INTERCEPTOR(void*, calloc, uptr nmemb, uptr size) {
if (UNLIKELY(MaybeInDlsym()))
if (UNLIKELY(UseLocalPool()))
// Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
return AllocateFromLocalPool(nmemb * size);
ENSURE_ASAN_INITED();
@ -101,7 +159,7 @@ INTERCEPTOR(void*, calloc, uptr nmemb, uptr size) {
INTERCEPTOR(void*, realloc, void *ptr, uptr size) {
if (UNLIKELY(IsInDlsymAllocPool(ptr)))
return ReallocFromLocalPool(ptr, size);
if (UNLIKELY(MaybeInDlsym()))
if (UNLIKELY(UseLocalPool()))
return AllocateFromLocalPool(size);
ENSURE_ASAN_INITED();
GET_STACK_TRACE_MALLOC;
@ -122,10 +180,12 @@ INTERCEPTOR(void*, __libc_memalign, uptr boundary, uptr size) {
}
#endif // SANITIZER_INTERCEPT_MEMALIGN
#if SANITIZER_INTERCEPT_ALIGNED_ALLOC
INTERCEPTOR(void*, aligned_alloc, uptr boundary, uptr size) {
GET_STACK_TRACE_MALLOC;
return asan_memalign(boundary, size, &stack, FROM_MALLOC);
return asan_aligned_alloc(boundary, size, &stack);
}
#endif // SANITIZER_INTERCEPT_ALIGNED_ALLOC
INTERCEPTOR(uptr, malloc_usable_size, void *ptr) {
GET_CURRENT_PC_BP_SP;
@ -154,8 +214,9 @@ INTERCEPTOR(int, mallopt, int cmd, int value) {
#endif // SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
if (UNLIKELY(UseLocalPool()))
return PosixMemalignFromLocalPool(memptr, alignment, size);
GET_STACK_TRACE_MALLOC;
// Printf("posix_memalign: %zx %zu\n", alignment, size);
return asan_posix_memalign(memptr, alignment, size, &stack);
}

View File

@ -0,0 +1,44 @@
//===-- asan_malloc_local.h -------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of AddressSanitizer, an address sanity checker.
//
// Provide interfaces to check for and handle local pool memory allocation.
//===----------------------------------------------------------------------===//
#ifndef ASAN_MALLOC_LOCAL_H
#define ASAN_MALLOC_LOCAL_H
#include "sanitizer_common/sanitizer_platform.h"
#include "asan_internal.h"
// On RTEMS, we use the local pool to handle memory allocation when the ASan
// run-time is not up.
static INLINE bool EarlyMalloc() {
return SANITIZER_RTEMS && (!__asan::asan_inited ||
__asan::asan_init_is_running);
}
void* MemalignFromLocalPool(uptr alignment, uptr size);
#if SANITIZER_RTEMS
bool IsFromLocalPool(const void *ptr);
#define ALLOCATE_FROM_LOCAL_POOL UNLIKELY(EarlyMalloc())
#define IS_FROM_LOCAL_POOL(ptr) UNLIKELY(IsFromLocalPool(ptr))
#else // SANITIZER_RTEMS
#define ALLOCATE_FROM_LOCAL_POOL 0
#define IS_FROM_LOCAL_POOL(ptr) 0
#endif // SANITIZER_RTEMS
#endif // ASAN_MALLOC_LOCAL_H

View File

@ -38,6 +38,9 @@ using namespace __asan;
#define COMMON_MALLOC_CALLOC(count, size) \
GET_STACK_TRACE_MALLOC; \
void *p = asan_calloc(count, size, &stack);
#define COMMON_MALLOC_POSIX_MEMALIGN(memptr, alignment, size) \
GET_STACK_TRACE_MALLOC; \
int res = asan_posix_memalign(memptr, alignment, size, &stack);
#define COMMON_MALLOC_VALLOC(size) \
GET_STACK_TRACE_MALLOC; \
void *p = asan_memalign(GetPageSizeCached(), size, &stack, FROM_MALLOC);

View File

@ -122,6 +122,13 @@
// || `[0x400000000000, 0x47ffffffffff]` || LowShadow ||
// || `[0x000000000000, 0x3fffffffffff]` || LowMem ||
//
// Shadow mapping on NerBSD/i386 with SHADOW_OFFSET == 0x40000000:
// || `[0x60000000, 0xfffff000]` || HighMem ||
// || `[0x4c000000, 0x5fffffff]` || HighShadow ||
// || `[0x48000000, 0x4bffffff]` || ShadowGap ||
// || `[0x40000000, 0x47ffffff]` || LowShadow ||
// || `[0x00000000, 0x3fffffff]` || LowMem ||
//
// Default Windows/i386 mapping:
// (the exact location of HighShadow/HighMem may vary depending
// on WoW64, /LARGEADDRESSAWARE, etc).
@ -130,11 +137,17 @@
// || `[0x36000000, 0x39ffffff]` || ShadowGap ||
// || `[0x30000000, 0x35ffffff]` || LowShadow ||
// || `[0x00000000, 0x2fffffff]` || LowMem ||
//
// Shadow mapping on Myriad2 (for shadow scale 5):
// || `[0x9ff80000, 0x9fffffff]` || ShadowGap ||
// || `[0x9f000000, 0x9ff7ffff]` || LowShadow ||
// || `[0x80000000, 0x9effffff]` || LowMem ||
// || `[0x00000000, 0x7fffffff]` || Ignored ||
#if defined(ASAN_SHADOW_SCALE)
static const u64 kDefaultShadowScale = ASAN_SHADOW_SCALE;
#else
static const u64 kDefaultShadowScale = 3;
static const u64 kDefaultShadowScale = SANITIZER_MYRIAD2 ? 5 : 3;
#endif
static const u64 kDefaultShadowSentinel = ~(uptr)0;
static const u64 kDefaultShadowOffset32 = 1ULL << 29; // 0x20000000
@ -152,9 +165,19 @@ static const u64 kPPC64_ShadowOffset64 = 1ULL << 44;
static const u64 kSystemZ_ShadowOffset64 = 1ULL << 52;
static const u64 kFreeBSD_ShadowOffset32 = 1ULL << 30; // 0x40000000
static const u64 kFreeBSD_ShadowOffset64 = 1ULL << 46; // 0x400000000000
static const u64 kNetBSD_ShadowOffset32 = 1ULL << 30; // 0x40000000
static const u64 kNetBSD_ShadowOffset64 = 1ULL << 46; // 0x400000000000
static const u64 kWindowsShadowOffset32 = 3ULL << 28; // 0x30000000
static const u64 kMyriadMemoryOffset32 = 0x80000000ULL;
static const u64 kMyriadMemorySize32 = 0x20000000ULL;
static const u64 kMyriadMemoryEnd32 =
kMyriadMemoryOffset32 + kMyriadMemorySize32 - 1;
static const u64 kMyriadShadowOffset32 =
(kMyriadMemoryOffset32 + kMyriadMemorySize32 -
(kMyriadMemorySize32 >> kDefaultShadowScale));
static const u64 kMyriadCacheBitMask32 = 0x40000000ULL;
#define SHADOW_SCALE kDefaultShadowScale
#if SANITIZER_FUCHSIA
@ -166,6 +189,8 @@ static const u64 kWindowsShadowOffset32 = 3ULL << 28; // 0x30000000
# define SHADOW_OFFSET kMIPS32_ShadowOffset32
# elif SANITIZER_FREEBSD
# define SHADOW_OFFSET kFreeBSD_ShadowOffset32
# elif SANITIZER_NETBSD
# define SHADOW_OFFSET kNetBSD_ShadowOffset32
# elif SANITIZER_WINDOWS
# define SHADOW_OFFSET kWindowsShadowOffset32
# elif SANITIZER_IOS
@ -174,6 +199,8 @@ static const u64 kWindowsShadowOffset32 = 3ULL << 28; // 0x30000000
# else
# define SHADOW_OFFSET kIosShadowOffset32
# endif
# elif SANITIZER_MYRIAD2
# define SHADOW_OFFSET kMyriadShadowOffset32
# else
# define SHADOW_OFFSET kDefaultShadowOffset32
# endif
@ -212,6 +239,39 @@ static const u64 kWindowsShadowOffset32 = 3ULL << 28; // 0x30000000
#endif
#define SHADOW_GRANULARITY (1ULL << SHADOW_SCALE)
#define DO_ASAN_MAPPING_PROFILE 0 // Set to 1 to profile the functions below.
#if DO_ASAN_MAPPING_PROFILE
# define PROFILE_ASAN_MAPPING() AsanMappingProfile[__LINE__]++;
#else
# define PROFILE_ASAN_MAPPING()
#endif
// If 1, all shadow boundaries are constants.
// Don't set to 1 other than for testing.
#define ASAN_FIXED_MAPPING 0
namespace __asan {
extern uptr AsanMappingProfile[];
#if ASAN_FIXED_MAPPING
// Fixed mapping for 64-bit Linux. Mostly used for performance comparison
// with non-fixed mapping. As of r175253 (Feb 2013) the performance
// difference between fixed and non-fixed mapping is below the noise level.
static uptr kHighMemEnd = 0x7fffffffffffULL;
static uptr kMidMemBeg = 0x3000000000ULL;
static uptr kMidMemEnd = 0x4fffffffffULL;
#else
extern uptr kHighMemEnd, kMidMemBeg, kMidMemEnd; // Initialized in __asan_init.
#endif
} // namespace __asan
#if SANITIZER_MYRIAD2
#include "asan_mapping_myriad.h"
#else
#define MEM_TO_SHADOW(mem) (((mem) >> SHADOW_SCALE) + (SHADOW_OFFSET))
#define kLowMemBeg 0
@ -243,36 +303,11 @@ static const u64 kWindowsShadowOffset32 = 3ULL << 28; // 0x30000000
#define kShadowGap3Beg (kMidMemBeg ? kMidMemEnd + 1 : 0)
#define kShadowGap3End (kMidMemBeg ? kHighShadowBeg - 1 : 0)
#define DO_ASAN_MAPPING_PROFILE 0 // Set to 1 to profile the functions below.
#if DO_ASAN_MAPPING_PROFILE
# define PROFILE_ASAN_MAPPING() AsanMappingProfile[__LINE__]++;
#else
# define PROFILE_ASAN_MAPPING()
#endif
// If 1, all shadow boundaries are constants.
// Don't set to 1 other than for testing.
#define ASAN_FIXED_MAPPING 0
namespace __asan {
extern uptr AsanMappingProfile[];
#if ASAN_FIXED_MAPPING
// Fixed mapping for 64-bit Linux. Mostly used for performance comparison
// with non-fixed mapping. As of r175253 (Feb 2013) the performance
// difference between fixed and non-fixed mapping is below the noise level.
static uptr kHighMemEnd = 0x7fffffffffffULL;
static uptr kMidMemBeg = 0x3000000000ULL;
static uptr kMidMemEnd = 0x4fffffffffULL;
#else
extern uptr kHighMemEnd, kMidMemBeg, kMidMemEnd; // Initialized in __asan_init.
#endif
static inline bool AddrIsInLowMem(uptr a) {
PROFILE_ASAN_MAPPING();
return a < kLowMemEnd;
return a <= kLowMemEnd;
}
static inline bool AddrIsInLowShadow(uptr a) {
@ -280,16 +315,26 @@ static inline bool AddrIsInLowShadow(uptr a) {
return a >= kLowShadowBeg && a <= kLowShadowEnd;
}
static inline bool AddrIsInHighMem(uptr a) {
PROFILE_ASAN_MAPPING();
return a >= kHighMemBeg && a <= kHighMemEnd;
}
static inline bool AddrIsInMidMem(uptr a) {
PROFILE_ASAN_MAPPING();
return kMidMemBeg && a >= kMidMemBeg && a <= kMidMemEnd;
}
static inline bool AddrIsInMidShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return kMidMemBeg && a >= kMidShadowBeg && a <= kMidShadowEnd;
}
static inline bool AddrIsInHighMem(uptr a) {
PROFILE_ASAN_MAPPING();
return kHighMemBeg && a >= kHighMemBeg && a <= kHighMemEnd;
}
static inline bool AddrIsInHighShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return kHighMemBeg && a >= kHighShadowBeg && a <= kHighShadowEnd;
}
static inline bool AddrIsInShadowGap(uptr a) {
PROFILE_ASAN_MAPPING();
if (kMidMemBeg) {
@ -305,6 +350,12 @@ static inline bool AddrIsInShadowGap(uptr a) {
return a >= kShadowGapBeg && a <= kShadowGapEnd;
}
} // namespace __asan
#endif // SANITIZER_MYRIAD2
namespace __asan {
static inline bool AddrIsInMem(uptr a) {
PROFILE_ASAN_MAPPING();
return AddrIsInLowMem(a) || AddrIsInMidMem(a) || AddrIsInHighMem(a) ||
@ -317,16 +368,6 @@ static inline uptr MemToShadow(uptr p) {
return MEM_TO_SHADOW(p);
}
static inline bool AddrIsInHighShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return a >= kHighShadowBeg && a <= kHighMemEnd;
}
static inline bool AddrIsInMidShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return kMidMemBeg && a >= kMidShadowBeg && a <= kMidMemEnd;
}
static inline bool AddrIsInShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return AddrIsInLowShadow(a) || AddrIsInMidShadow(a) || AddrIsInHighShadow(a);
@ -339,6 +380,8 @@ static inline bool AddrIsAlignedByGranularity(uptr a) {
static inline bool AddressIsPoisoned(uptr a) {
PROFILE_ASAN_MAPPING();
if (SANITIZER_MYRIAD2 && !AddrIsInMem(a) && !AddrIsInShadow(a))
return false;
const uptr kAccessSize = 1;
u8 *shadow_address = (u8*)MEM_TO_SHADOW(a);
s8 shadow_value = *shadow_address;

View File

@ -0,0 +1,86 @@
//===-- asan_mapping_myriad.h -----------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of AddressSanitizer, an address sanity checker.
//
// Myriad-specific definitions for ASan memory mapping.
//===----------------------------------------------------------------------===//
#ifndef ASAN_MAPPING_MYRIAD_H
#define ASAN_MAPPING_MYRIAD_H
#define RAW_ADDR(mem) ((mem) & ~kMyriadCacheBitMask32)
#define MEM_TO_SHADOW(mem) \
(((RAW_ADDR(mem) - kLowMemBeg) >> SHADOW_SCALE) + (SHADOW_OFFSET))
#define kLowMemBeg kMyriadMemoryOffset32
#define kLowMemEnd (SHADOW_OFFSET - 1)
#define kLowShadowBeg SHADOW_OFFSET
#define kLowShadowEnd MEM_TO_SHADOW(kLowMemEnd)
#define kHighMemBeg 0
#define kHighShadowBeg 0
#define kHighShadowEnd 0
#define kMidShadowBeg 0
#define kMidShadowEnd 0
#define kShadowGapBeg (kLowShadowEnd + 1)
#define kShadowGapEnd kMyriadMemoryEnd32
#define kShadowGap2Beg 0
#define kShadowGap2End 0
#define kShadowGap3Beg 0
#define kShadowGap3End 0
namespace __asan {
static inline bool AddrIsInLowMem(uptr a) {
PROFILE_ASAN_MAPPING();
a = RAW_ADDR(a);
return a >= kLowMemBeg && a <= kLowMemEnd;
}
static inline bool AddrIsInLowShadow(uptr a) {
PROFILE_ASAN_MAPPING();
a = RAW_ADDR(a);
return a >= kLowShadowBeg && a <= kLowShadowEnd;
}
static inline bool AddrIsInMidMem(uptr a) {
PROFILE_ASAN_MAPPING();
return false;
}
static inline bool AddrIsInMidShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return false;
}
static inline bool AddrIsInHighMem(uptr a) {
PROFILE_ASAN_MAPPING();
return false;
}
static inline bool AddrIsInHighShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return false;
}
static inline bool AddrIsInShadowGap(uptr a) {
PROFILE_ASAN_MAPPING();
a = RAW_ADDR(a);
return a >= kShadowGapBeg && a <= kShadowGapEnd;
}
} // namespace __asan
#endif // ASAN_MAPPING_MYRIAD_H

View File

@ -31,9 +31,9 @@ struct AllocationSite {
class HeapProfile {
public:
HeapProfile() : allocations_(1024) {}
HeapProfile() { allocations_.reserve(1024); }
void ProcessChunk(const AsanChunkView& cv) {
void ProcessChunk(const AsanChunkView &cv) {
if (cv.IsAllocated()) {
total_allocated_user_size_ += cv.UsedSize();
total_allocated_count_++;
@ -49,10 +49,10 @@ class HeapProfile {
}
void Print(uptr top_percent, uptr max_number_of_contexts) {
InternalSort(&allocations_, allocations_.size(),
[](const AllocationSite &a, const AllocationSite &b) {
return a.total_size > b.total_size;
});
Sort(allocations_.data(), allocations_.size(),
[](const AllocationSite &a, const AllocationSite &b) {
return a.total_size > b.total_size;
});
CHECK(total_allocated_user_size_);
uptr total_shown = 0;
Printf("Live Heap Allocations: %zd bytes in %zd chunks; quarantined: "

View File

@ -14,6 +14,8 @@
#include "asan_allocator.h"
#include "asan_internal.h"
#include "asan_malloc_local.h"
#include "asan_report.h"
#include "asan_stack.h"
#include "interception/interception.h"
@ -67,16 +69,28 @@ struct nothrow_t {};
enum class align_val_t: size_t {};
} // namespace std
// TODO(alekseys): throw std::bad_alloc instead of dying on OOM.
// TODO(alekseyshl): throw std::bad_alloc instead of dying on OOM.
// For local pool allocation, align to SHADOW_GRANULARITY to match asan
// allocator behavior.
#define OPERATOR_NEW_BODY(type, nothrow) \
if (ALLOCATE_FROM_LOCAL_POOL) {\
void *res = MemalignFromLocalPool(SHADOW_GRANULARITY, size);\
if (!nothrow) CHECK(res);\
return res;\
}\
GET_STACK_TRACE_MALLOC;\
void *res = asan_memalign(0, size, &stack, type);\
if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();\
if (!nothrow && UNLIKELY(!res)) ReportOutOfMemory(size, &stack);\
return res;
#define OPERATOR_NEW_BODY_ALIGN(type, nothrow) \
if (ALLOCATE_FROM_LOCAL_POOL) {\
void *res = MemalignFromLocalPool((uptr)align, size);\
if (!nothrow) CHECK(res);\
return res;\
}\
GET_STACK_TRACE_MALLOC;\
void *res = asan_memalign((uptr)align, size, &stack, type);\
if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();\
if (!nothrow && UNLIKELY(!res)) ReportOutOfMemory(size, &stack);\
return res;
// On OS X it's not enough to just provide our own 'operator new' and
@ -128,18 +142,22 @@ INTERCEPTOR(void *, _ZnamRKSt9nothrow_t, size_t size, std::nothrow_t const&) {
#endif // !SANITIZER_MAC
#define OPERATOR_DELETE_BODY(type) \
if (IS_FROM_LOCAL_POOL(ptr)) return;\
GET_STACK_TRACE_FREE;\
asan_delete(ptr, 0, 0, &stack, type);
#define OPERATOR_DELETE_BODY_SIZE(type) \
if (IS_FROM_LOCAL_POOL(ptr)) return;\
GET_STACK_TRACE_FREE;\
asan_delete(ptr, size, 0, &stack, type);
#define OPERATOR_DELETE_BODY_ALIGN(type) \
if (IS_FROM_LOCAL_POOL(ptr)) return;\
GET_STACK_TRACE_FREE;\
asan_delete(ptr, 0, static_cast<uptr>(align), &stack, type);
#define OPERATOR_DELETE_BODY_SIZE_ALIGN(type) \
if (IS_FROM_LOCAL_POOL(ptr)) return;\
GET_STACK_TRACE_FREE;\
asan_delete(ptr, size, static_cast<uptr>(align), &stack, type);

View File

@ -32,7 +32,7 @@ bool CanPoisonMemory() {
}
void PoisonShadow(uptr addr, uptr size, u8 value) {
if (!CanPoisonMemory()) return;
if (value && !CanPoisonMemory()) return;
CHECK(AddrIsAlignedByGranularity(addr));
CHECK(AddrIsInMem(addr));
CHECK(AddrIsAlignedByGranularity(addr + size));
@ -182,8 +182,15 @@ int __asan_address_is_poisoned(void const volatile *addr) {
uptr __asan_region_is_poisoned(uptr beg, uptr size) {
if (!size) return 0;
uptr end = beg + size;
if (!AddrIsInMem(beg)) return beg;
if (!AddrIsInMem(end)) return end;
if (SANITIZER_MYRIAD2) {
// On Myriad, address not in DRAM range need to be treated as
// unpoisoned.
if (!AddrIsInMem(beg) && !AddrIsInShadow(beg)) return 0;
if (!AddrIsInMem(end) && !AddrIsInShadow(end)) return 0;
} else {
if (!AddrIsInMem(beg)) return beg;
if (!AddrIsInMem(end)) return end;
}
CHECK_LT(beg, end);
uptr aligned_b = RoundUpTo(beg, SHADOW_GRANULARITY);
uptr aligned_e = RoundDownTo(end, SHADOW_GRANULARITY);
@ -452,4 +459,3 @@ bool WordIsPoisoned(uptr addr) {
return (__asan_region_is_poisoned(addr, sizeof(uptr)) != 0);
}
}

View File

@ -38,7 +38,7 @@ void PoisonShadowPartialRightRedzone(uptr addr,
// performance-critical code with care.
ALWAYS_INLINE void FastPoisonShadow(uptr aligned_beg, uptr aligned_size,
u8 value) {
DCHECK(CanPoisonMemory());
DCHECK(!value || CanPoisonMemory());
uptr shadow_beg = MEM_TO_SHADOW(aligned_beg);
uptr shadow_end = MEM_TO_SHADOW(
aligned_beg + aligned_size - SHADOW_GRANULARITY) + 1;
@ -51,6 +51,9 @@ ALWAYS_INLINE void FastPoisonShadow(uptr aligned_beg, uptr aligned_size,
// changed at all. It doesn't currently have an efficient means
// to zero a bunch of pages, but maybe we should add one.
SANITIZER_FUCHSIA == 1 ||
// RTEMS doesn't have have pages, let alone a fast way to zero
// them, so default to memset.
SANITIZER_RTEMS == 1 ||
shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
REAL(memset)((void*)shadow_beg, value, shadow_end - shadow_beg);
} else {

View File

@ -84,7 +84,7 @@ static void PrintZoneForPointer(uptr ptr, uptr zone_ptr,
bool ParseFrameDescription(const char *frame_descr,
InternalMmapVector<StackVarDescr> *vars) {
CHECK(frame_descr);
char *p;
const char *p;
// This string is created by the compiler and has the following form:
// "n alloc_1 alloc_2 ... alloc_n"
// where alloc_i looks like "offset size len ObjectName"
@ -134,6 +134,10 @@ class ScopedInErrorReport {
}
~ScopedInErrorReport() {
if (halt_on_error_ && !__sanitizer_acquire_crash_state()) {
asanThreadRegistry().Unlock();
return;
}
ASAN_ON_ERROR();
if (current_error_.IsValid()) current_error_.Print();
@ -152,7 +156,7 @@ class ScopedInErrorReport {
// Copy the message buffer so that we could start logging without holding a
// lock that gets aquired during printing.
InternalScopedBuffer<char> buffer_copy(kErrorMessageBufferSize);
InternalMmapVector<char> buffer_copy(kErrorMessageBufferSize);
{
BlockingMutexLock l(&error_message_buf_mutex);
internal_memcpy(buffer_copy.data(),
@ -202,7 +206,7 @@ class ScopedInErrorReport {
bool halt_on_error_;
};
ErrorDescription ScopedInErrorReport::current_error_;
ErrorDescription ScopedInErrorReport::current_error_(LINKER_INITIALIZED);
void ReportDeadlySignal(const SignalContext &sig) {
ScopedInErrorReport in_report(/*fatal*/ true);
@ -254,6 +258,62 @@ void ReportSanitizerGetAllocatedSizeNotOwned(uptr addr,
in_report.ReportError(error);
}
void ReportCallocOverflow(uptr count, uptr size, BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorCallocOverflow error(GetCurrentTidOrInvalid(), stack, count, size);
in_report.ReportError(error);
}
void ReportPvallocOverflow(uptr size, BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorPvallocOverflow error(GetCurrentTidOrInvalid(), stack, size);
in_report.ReportError(error);
}
void ReportInvalidAllocationAlignment(uptr alignment,
BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorInvalidAllocationAlignment error(GetCurrentTidOrInvalid(), stack,
alignment);
in_report.ReportError(error);
}
void ReportInvalidAlignedAllocAlignment(uptr size, uptr alignment,
BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorInvalidAlignedAllocAlignment error(GetCurrentTidOrInvalid(), stack,
size, alignment);
in_report.ReportError(error);
}
void ReportInvalidPosixMemalignAlignment(uptr alignment,
BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorInvalidPosixMemalignAlignment error(GetCurrentTidOrInvalid(), stack,
alignment);
in_report.ReportError(error);
}
void ReportAllocationSizeTooBig(uptr user_size, uptr total_size, uptr max_size,
BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorAllocationSizeTooBig error(GetCurrentTidOrInvalid(), stack, user_size,
total_size, max_size);
in_report.ReportError(error);
}
void ReportRssLimitExceeded(BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorRssLimitExceeded error(GetCurrentTidOrInvalid(), stack);
in_report.ReportError(error);
}
void ReportOutOfMemory(uptr requested_size, BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorOutOfMemory error(GetCurrentTidOrInvalid(), stack, requested_size);
in_report.ReportError(error);
}
void ReportStringFunctionMemoryRangesOverlap(const char *function,
const char *offset1, uptr length1,
const char *offset2, uptr length2,
@ -343,7 +403,11 @@ static bool IsInvalidPointerPair(uptr a1, uptr a2) {
}
static INLINE void CheckForInvalidPointerPair(void *p1, void *p2) {
if (!flags()->detect_invalid_pointer_pairs) return;
switch (flags()->detect_invalid_pointer_pairs) {
case 0 : return;
case 1 : if (p1 == nullptr || p2 == nullptr) return; break;
}
uptr a1 = reinterpret_cast<uptr>(p1);
uptr a2 = reinterpret_cast<uptr>(p2);

View File

@ -58,6 +58,18 @@ void ReportAllocTypeMismatch(uptr addr, BufferedStackTrace *free_stack,
void ReportMallocUsableSizeNotOwned(uptr addr, BufferedStackTrace *stack);
void ReportSanitizerGetAllocatedSizeNotOwned(uptr addr,
BufferedStackTrace *stack);
void ReportCallocOverflow(uptr count, uptr size, BufferedStackTrace *stack);
void ReportPvallocOverflow(uptr size, BufferedStackTrace *stack);
void ReportInvalidAllocationAlignment(uptr alignment,
BufferedStackTrace *stack);
void ReportInvalidAlignedAllocAlignment(uptr size, uptr alignment,
BufferedStackTrace *stack);
void ReportInvalidPosixMemalignAlignment(uptr alignment,
BufferedStackTrace *stack);
void ReportAllocationSizeTooBig(uptr user_size, uptr total_size, uptr max_size,
BufferedStackTrace *stack);
void ReportRssLimitExceeded(BufferedStackTrace *stack);
void ReportOutOfMemory(uptr requested_size, BufferedStackTrace *stack);
void ReportStringFunctionMemoryRangesOverlap(const char *function,
const char *offset1, uptr length1,
const char *offset2, uptr length2,

View File

@ -0,0 +1,253 @@
//===-- asan_rtems.cc -----------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of AddressSanitizer, an address sanity checker.
//
// RTEMS-specific details.
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_rtems.h"
#if SANITIZER_RTEMS
#include "asan_internal.h"
#include "asan_interceptors.h"
#include "asan_mapping.h"
#include "asan_poisoning.h"
#include "asan_report.h"
#include "asan_stack.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_libc.h"
#include <pthread.h>
#include <stdlib.h>
namespace __asan {
static void ResetShadowMemory() {
uptr shadow_start = SHADOW_OFFSET;
uptr shadow_end = MEM_TO_SHADOW(kMyriadMemoryEnd32);
uptr gap_start = MEM_TO_SHADOW(shadow_start);
uptr gap_end = MEM_TO_SHADOW(shadow_end);
REAL(memset)((void *)shadow_start, 0, shadow_end - shadow_start);
REAL(memset)((void *)gap_start, kAsanShadowGap, gap_end - gap_start);
}
void InitializeShadowMemory() {
kHighMemEnd = 0;
kMidMemBeg = 0;
kMidMemEnd = 0;
ResetShadowMemory();
}
void AsanApplyToGlobals(globals_op_fptr op, const void *needle) {
UNIMPLEMENTED();
}
void AsanCheckDynamicRTPrereqs() {}
void AsanCheckIncompatibleRT() {}
void InitializeAsanInterceptors() {}
void InitializePlatformInterceptors() {}
void InitializePlatformExceptionHandlers() {}
// RTEMS only support static linking; it sufficies to return with no
// error.
void *AsanDoesNotSupportStaticLinkage() { return nullptr; }
void AsanOnDeadlySignal(int signo, void *siginfo, void *context) {
UNIMPLEMENTED();
}
void EarlyInit() {
// Provide early initialization of shadow memory so that
// instrumented code running before full initialzation will not
// report spurious errors.
ResetShadowMemory();
}
// We can use a plain thread_local variable for TSD.
static thread_local void *per_thread;
void *AsanTSDGet() { return per_thread; }
void AsanTSDSet(void *tsd) { per_thread = tsd; }
// There's no initialization needed, and the passed-in destructor
// will never be called. Instead, our own thread destruction hook
// (below) will call AsanThread::TSDDtor directly.
void AsanTSDInit(void (*destructor)(void *tsd)) {
DCHECK(destructor == &PlatformTSDDtor);
}
void PlatformTSDDtor(void *tsd) { UNREACHABLE(__func__); }
//
// Thread registration. We provide an API similar to the Fushia port.
//
struct AsanThread::InitOptions {
uptr stack_bottom, stack_size, tls_bottom, tls_size;
};
// Shared setup between thread creation and startup for the initial thread.
static AsanThread *CreateAsanThread(StackTrace *stack, u32 parent_tid,
uptr user_id, bool detached,
uptr stack_bottom, uptr stack_size,
uptr tls_bottom, uptr tls_size) {
// In lieu of AsanThread::Create.
AsanThread *thread = (AsanThread *)MmapOrDie(sizeof(AsanThread), __func__);
AsanThreadContext::CreateThreadContextArgs args = {thread, stack};
asanThreadRegistry().CreateThread(user_id, detached, parent_tid, &args);
// On other systems, AsanThread::Init() is called from the new
// thread itself. But on RTEMS we already know the stack address
// range beforehand, so we can do most of the setup right now.
const AsanThread::InitOptions options = {stack_bottom, stack_size,
tls_bottom, tls_size};
thread->Init(&options);
return thread;
}
// This gets the same arguments passed to Init by CreateAsanThread, above.
// We're in the creator thread before the new thread is actually started, but
// its stack and tls address range are already known.
void AsanThread::SetThreadStackAndTls(const AsanThread::InitOptions *options) {
DCHECK_NE(GetCurrentThread(), this);
DCHECK_NE(GetCurrentThread(), nullptr);
CHECK_NE(options->stack_bottom, 0);
CHECK_NE(options->stack_size, 0);
stack_bottom_ = options->stack_bottom;
stack_top_ = options->stack_bottom + options->stack_size;
tls_begin_ = options->tls_bottom;
tls_end_ = options->tls_bottom + options->tls_size;
}
// Called by __asan::AsanInitInternal (asan_rtl.c). Unlike other ports, the
// main thread on RTEMS does not require special treatment; its AsanThread is
// already created by the provided hooks. This function simply looks up and
// returns the created thread.
AsanThread *CreateMainThread() {
return GetThreadContextByTidLocked(0)->thread;
}
// This is called before each thread creation is attempted. So, in
// its first call, the calling thread is the initial and sole thread.
static void *BeforeThreadCreateHook(uptr user_id, bool detached,
uptr stack_bottom, uptr stack_size,
uptr tls_bottom, uptr tls_size) {
EnsureMainThreadIDIsCorrect();
// Strict init-order checking is thread-hostile.
if (flags()->strict_init_order) StopInitOrderChecking();
GET_STACK_TRACE_THREAD;
u32 parent_tid = GetCurrentTidOrInvalid();
return CreateAsanThread(&stack, parent_tid, user_id, detached,
stack_bottom, stack_size, tls_bottom, tls_size);
}
// This is called after creating a new thread (in the creating thread),
// with the pointer returned by BeforeThreadCreateHook (above).
static void ThreadCreateHook(void *hook, bool aborted) {
AsanThread *thread = static_cast<AsanThread *>(hook);
if (!aborted) {
// The thread was created successfully.
// ThreadStartHook is already running in the new thread.
} else {
// The thread wasn't created after all.
// Clean up everything we set up in BeforeThreadCreateHook.
asanThreadRegistry().FinishThread(thread->tid());
UnmapOrDie(thread, sizeof(AsanThread));
}
}
// This is called (1) in the newly-created thread before it runs anything else,
// with the pointer returned by BeforeThreadCreateHook (above). (2) before a
// thread restart.
static void ThreadStartHook(void *hook, uptr os_id) {
if (!hook)
return;
AsanThread *thread = static_cast<AsanThread *>(hook);
SetCurrentThread(thread);
ThreadStatus status =
asanThreadRegistry().GetThreadLocked(thread->tid())->status;
DCHECK(status == ThreadStatusCreated || status == ThreadStatusRunning);
// Determine whether we are starting or restarting the thread.
if (status == ThreadStatusCreated)
// In lieu of AsanThread::ThreadStart.
asanThreadRegistry().StartThread(thread->tid(), os_id,
/*workerthread*/ false, nullptr);
else {
// In a thread restart, a thread may resume execution at an
// arbitrary function entry point, with its stack and TLS state
// reset. We unpoison the stack in that case.
PoisonShadow(thread->stack_bottom(), thread->stack_size(), 0);
}
}
// Each thread runs this just before it exits,
// with the pointer returned by BeforeThreadCreateHook (above).
// All per-thread destructors have already been called.
static void ThreadExitHook(void *hook, uptr os_id) {
AsanThread *thread = static_cast<AsanThread *>(hook);
if (thread)
AsanThread::TSDDtor(thread->context());
}
static void HandleExit() {
// Disable ASan by setting it to uninitialized. Also reset the
// shadow memory to avoid reporting errors after the run-time has
// been desroyed.
if (asan_inited) {
asan_inited = false;
ResetShadowMemory();
}
}
} // namespace __asan
// These are declared (in extern "C") by <some_path/sanitizer.h>.
// The system runtime will call our definitions directly.
extern "C" {
void __sanitizer_early_init() {
__asan::EarlyInit();
}
void *__sanitizer_before_thread_create_hook(uptr thread, bool detached,
const char *name,
void *stack_base, size_t stack_size,
void *tls_base, size_t tls_size) {
return __asan::BeforeThreadCreateHook(
thread, detached,
reinterpret_cast<uptr>(stack_base), stack_size,
reinterpret_cast<uptr>(tls_base), tls_size);
}
void __sanitizer_thread_create_hook(void *handle, uptr thread, int status) {
__asan::ThreadCreateHook(handle, status != 0);
}
void __sanitizer_thread_start_hook(void *handle, uptr self) {
__asan::ThreadStartHook(handle, self);
}
void __sanitizer_thread_exit_hook(void *handle, uptr self) {
__asan::ThreadExitHook(handle, self);
}
void __sanitizer_exit() {
__asan::HandleExit();
}
} // "C"
#endif // SANITIZER_RTEMS

View File

@ -56,7 +56,8 @@ static void AsanDie() {
UnmapOrDie((void*)kLowShadowBeg, kMidMemBeg - kLowShadowBeg);
UnmapOrDie((void*)kMidMemEnd, kHighShadowEnd - kMidMemEnd);
} else {
UnmapOrDie((void*)kLowShadowBeg, kHighShadowEnd - kLowShadowBeg);
if (kHighShadowEnd)
UnmapOrDie((void*)kLowShadowBeg, kHighShadowEnd - kLowShadowBeg);
}
}
}
@ -65,8 +66,14 @@ static void AsanCheckFailed(const char *file, int line, const char *cond,
u64 v1, u64 v2) {
Report("AddressSanitizer CHECK failed: %s:%d \"%s\" (0x%zx, 0x%zx)\n", file,
line, cond, (uptr)v1, (uptr)v2);
// FIXME: check for infinite recursion without a thread-local counter here.
PRINT_CURRENT_STACK_CHECK();
// Print a stack trace the first time we come here. Otherwise, we probably
// failed a CHECK during symbolization.
static atomic_uint32_t num_calls;
if (atomic_fetch_add(&num_calls, 1, memory_order_relaxed) == 0) {
PRINT_CURRENT_STACK_CHECK();
}
Die();
}
@ -140,6 +147,8 @@ ASAN_REPORT_ERROR_N(load, false)
ASAN_REPORT_ERROR_N(store, true)
#define ASAN_MEMORY_ACCESS_CALLBACK_BODY(type, is_write, size, exp_arg, fatal) \
if (SANITIZER_MYRIAD2 && !AddrIsInMem(addr) && !AddrIsInShadow(addr)) \
return; \
uptr sp = MEM_TO_SHADOW(addr); \
uptr s = size <= SHADOW_GRANULARITY ? *reinterpret_cast<u8 *>(sp) \
: *reinterpret_cast<u16 *>(sp); \
@ -306,6 +315,7 @@ static void asan_atexit() {
}
static void InitializeHighMemEnd() {
#if !SANITIZER_MYRIAD2
#if !ASAN_FIXED_MAPPING
kHighMemEnd = GetMaxUserVirtualAddress();
// Increase kHighMemEnd to make sure it's properly
@ -313,13 +323,16 @@ static void InitializeHighMemEnd() {
kHighMemEnd |= SHADOW_GRANULARITY * GetMmapGranularity() - 1;
#endif // !ASAN_FIXED_MAPPING
CHECK_EQ((kHighMemBeg % GetMmapGranularity()), 0);
#endif // !SANITIZER_MYRIAD2
}
void PrintAddressSpaceLayout() {
Printf("|| `[%p, %p]` || HighMem ||\n",
(void*)kHighMemBeg, (void*)kHighMemEnd);
Printf("|| `[%p, %p]` || HighShadow ||\n",
(void*)kHighShadowBeg, (void*)kHighShadowEnd);
if (kHighMemBeg) {
Printf("|| `[%p, %p]` || HighMem ||\n",
(void*)kHighMemBeg, (void*)kHighMemEnd);
Printf("|| `[%p, %p]` || HighShadow ||\n",
(void*)kHighShadowBeg, (void*)kHighShadowEnd);
}
if (kMidMemBeg) {
Printf("|| `[%p, %p]` || ShadowGap3 ||\n",
(void*)kShadowGap3Beg, (void*)kShadowGap3End);
@ -338,11 +351,14 @@ void PrintAddressSpaceLayout() {
Printf("|| `[%p, %p]` || LowMem ||\n",
(void*)kLowMemBeg, (void*)kLowMemEnd);
}
Printf("MemToShadow(shadow): %p %p %p %p",
Printf("MemToShadow(shadow): %p %p",
(void*)MEM_TO_SHADOW(kLowShadowBeg),
(void*)MEM_TO_SHADOW(kLowShadowEnd),
(void*)MEM_TO_SHADOW(kHighShadowBeg),
(void*)MEM_TO_SHADOW(kHighShadowEnd));
(void*)MEM_TO_SHADOW(kLowShadowEnd));
if (kHighMemBeg) {
Printf(" %p %p",
(void*)MEM_TO_SHADOW(kHighShadowBeg),
(void*)MEM_TO_SHADOW(kHighShadowEnd));
}
if (kMidMemBeg) {
Printf(" %p %p",
(void*)MEM_TO_SHADOW(kMidShadowBeg),
@ -374,6 +390,7 @@ static void AsanInitInternal() {
asan_init_is_running = true;
CacheBinaryName();
CheckASLR();
// Initialize flags. This must be done early, because most of the
// initialization steps look at flags().
@ -526,6 +543,9 @@ void NOINLINE __asan_handle_no_return() {
if (curr_thread) {
top = curr_thread->stack_top();
bottom = ((uptr)&local_stack - PageSize) & ~(PageSize - 1);
} else if (SANITIZER_RTEMS) {
// Give up On RTEMS.
return;
} else {
CHECK(!SANITIZER_FUCHSIA);
// If we haven't seen this thread, try asking the OS for stack bounds.

View File

@ -14,8 +14,9 @@
#include "sanitizer_common/sanitizer_platform.h"
// asan_fuchsia.cc has its own InitializeShadowMemory implementation.
#if !SANITIZER_FUCHSIA
// asan_fuchsia.cc and asan_rtems.cc have their own
// InitializeShadowMemory implementation.
#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
#include "asan_internal.h"
#include "asan_mapping.h"
@ -30,8 +31,7 @@ void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
uptr size = end - beg + 1;
DecreaseTotalMmap(size); // Don't count the shadow against mmap_limit_mb.
void *res = MmapFixedNoReserve(beg, size, name);
if (res != (void *)beg) {
if (!MmapFixedNoReserve(beg, size, name)) {
Report(
"ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
"Perhaps you're using ulimit -v\n",
@ -162,4 +162,4 @@ void InitializeShadowMemory() {
} // namespace __asan
#endif // !SANITIZER_FUCHSIA
#endif // !SANITIZER_FUCHSIA && !SANITIZER_RTEMS

View File

@ -221,22 +221,25 @@ FakeStack *AsanThread::AsyncSignalSafeLazyInitFakeStack() {
void AsanThread::Init(const InitOptions *options) {
next_stack_top_ = next_stack_bottom_ = 0;
atomic_store(&stack_switching_, false, memory_order_release);
fake_stack_ = nullptr; // Will be initialized lazily if needed.
CHECK_EQ(this->stack_size(), 0U);
SetThreadStackAndTls(options);
CHECK_GT(this->stack_size(), 0U);
CHECK(AddrIsInMem(stack_bottom_));
CHECK(AddrIsInMem(stack_top_ - 1));
ClearShadowForThreadStackAndTLS();
fake_stack_ = nullptr;
if (__asan_option_detect_stack_use_after_return)
AsyncSignalSafeLazyInitFakeStack();
int local = 0;
VReport(1, "T%d: stack [%p,%p) size 0x%zx; local=%p\n", tid(),
(void *)stack_bottom_, (void *)stack_top_, stack_top_ - stack_bottom_,
&local);
}
// Fuchsia doesn't use ThreadStart.
// asan_fuchsia.c defines CreateMainThread and SetThreadStackAndTls.
#if !SANITIZER_FUCHSIA
// Fuchsia and RTEMS don't use ThreadStart.
// asan_fuchsia.c/asan_rtems.c define CreateMainThread and
// SetThreadStackAndTls.
#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
thread_return_t AsanThread::ThreadStart(
tid_t os_id, atomic_uintptr_t *signal_thread_is_registered) {
@ -296,12 +299,17 @@ void AsanThread::SetThreadStackAndTls(const InitOptions *options) {
CHECK(AddrIsInStack((uptr)&local));
}
#endif // !SANITIZER_FUCHSIA
#endif // !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
void AsanThread::ClearShadowForThreadStackAndTLS() {
PoisonShadow(stack_bottom_, stack_top_ - stack_bottom_, 0);
if (tls_begin_ != tls_end_)
PoisonShadow(tls_begin_, tls_end_ - tls_begin_, 0);
if (tls_begin_ != tls_end_) {
uptr tls_begin_aligned = RoundDownTo(tls_begin_, SHADOW_GRANULARITY);
uptr tls_end_aligned = RoundUpTo(tls_end_, SHADOW_GRANULARITY);
FastPoisonShadowPartialRightRedzone(tls_begin_aligned,
tls_end_ - tls_begin_aligned,
tls_end_aligned - tls_end_, 0);
}
}
bool AsanThread::GetStackFrameAccessByAddr(uptr addr,
@ -386,6 +394,9 @@ static bool ThreadStackContainsAddress(ThreadContextBase *tctx_base,
}
AsanThread *GetCurrentThread() {
if (SANITIZER_RTEMS && !asan_inited)
return nullptr;
AsanThreadContext *context =
reinterpret_cast<AsanThreadContext *>(AsanTSDGet());
if (!context) {
@ -477,6 +488,11 @@ void UnlockThreadRegistry() {
__asan::asanThreadRegistry().Unlock();
}
ThreadRegistry *GetThreadRegistryLocked() {
__asan::asanThreadRegistry().CheckLocked();
return &__asan::asanThreadRegistry();
}
void EnsureMainThreadIDIsCorrect() {
__asan::EnsureMainThreadIDIsCorrect();
}

View File

@ -222,8 +222,8 @@ uptr FindDynamicShadowStart() {
uptr alignment = 8 * granularity;
uptr left_padding = granularity;
uptr space_size = kHighShadowEnd + left_padding;
uptr shadow_start =
FindAvailableMemoryRange(space_size, alignment, granularity, nullptr);
uptr shadow_start = FindAvailableMemoryRange(space_size, alignment,
granularity, nullptr, nullptr);
CHECK_NE((uptr)0, shadow_start);
CHECK(IsAligned(shadow_start, alignment));
return shadow_start;
@ -265,11 +265,6 @@ ShadowExceptionHandler(PEXCEPTION_POINTERS exception_pointers) {
// Determine the address of the page that is being accessed.
uptr page = RoundDownTo(addr, page_size);
// Query the existing page.
MEMORY_BASIC_INFORMATION mem_info = {};
if (::VirtualQuery((LPVOID)page, &mem_info, sizeof(mem_info)) == 0)
return EXCEPTION_CONTINUE_SEARCH;
// Commit the page.
uptr result =
(uptr)::VirtualAlloc((LPVOID)page, page_size, MEM_COMMIT, PAGE_READWRITE);

View File

@ -99,7 +99,7 @@ INTERCEPTOR(int, _except_handler4, void *a, void *b, void *c, void *d) {
}
#endif
// Window specific functions not included in asan_interface.inc.
// Windows specific functions not included in asan_interface.inc.
INTERCEPT_WRAP_W_V(__asan_should_detect_stack_use_after_return)
INTERCEPT_WRAP_W_V(__asan_get_shadow_memory_dynamic_address)
INTERCEPT_WRAP_W_W(__asan_unhandled_exception_filter)

View File

@ -0,0 +1,34 @@
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
#include "../assembly.h"
// __chkstk routine
// This routine is windows specific.
// http://msdn.microsoft.com/en-us/library/ms648426.aspx
// This clobbers the register r12, and the condition codes, and uses r5 and r6
// as temporaries by backing them up and restoring them afterwards.
// Does not modify any memory or the stack pointer.
// movw r4, #256 // Number of bytes of stack, in units of 4 byte
// bl __chkstk
// sub.w sp, sp, r4
#define PAGE_SIZE 4096
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__chkstk)
lsl r4, r4, #2
mov r12, sp
push {r5, r6}
mov r5, r4
1:
sub r12, r12, #PAGE_SIZE
subs r5, r5, #PAGE_SIZE
ldr r6, [r12]
bgt 1b
pop {r5, r6}
bx lr
END_COMPILERRT_FUNCTION(__chkstk)

View File

@ -101,6 +101,8 @@ void __clear_cache(void *start, void *end) {
* Intel processors have a unified instruction and data cache
* so there is nothing to do
*/
#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
FlushInstructionCache(GetCurrentProcess(), start, end - start);
#elif defined(__arm__) && !defined(__APPLE__)
#if defined(__FreeBSD__) || defined(__NetBSD__)
struct arm_sync_icache_args arg;
@ -128,8 +130,6 @@ void __clear_cache(void *start, void *end) {
: "r"(syscall_nr), "r"(start_reg), "r"(end_reg),
"r"(flags));
assert(start_reg == 0 && "Cache flush syscall failed.");
#elif defined(_WIN32)
FlushInstructionCache(GetCurrentProcess(), start, end - start);
#else
compilerrt_abort();
#endif

View File

@ -16,6 +16,12 @@
/* Returns: the number of leading 0-bits */
#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__))
/* gcc resolves __builtin_clz -> __clzdi2 leading to infinite recursion */
#define __builtin_clz(a) __clzsi2(a)
extern si_int __clzsi2(si_int);
#endif
/* Precondition: a != 0 */
COMPILER_RT_ABI si_int

View File

@ -416,9 +416,9 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = AMDFAM15H_BDVER3;
break; // "bdver3"; 30h-3Fh: Steamroller
}
if (Model >= 0x10 && Model <= 0x1f) {
if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
*Subtype = AMDFAM15H_BDVER2;
break; // "bdver2"; 10h-1Fh: Piledriver
break; // "bdver2"; 02h, 10h-1Fh: Piledriver
}
if (Model <= 0x0f) {
*Subtype = AMDFAM15H_BDVER1;

View File

@ -16,6 +16,12 @@
/* Returns: the number of trailing 0-bits */
#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__))
/* gcc resolves __builtin_ctz -> __ctzdi2 leading to infinite recursion */
#define __builtin_ctz(a) __ctzsi2(a)
extern si_int __ctzsi2(si_int);
#endif
/* Precondition: a != 0 */
COMPILER_RT_ABI si_int

View File

@ -14,7 +14,22 @@
#include "int_lib.h"
#include "int_util.h"
#ifdef __BIONIC__
/* There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation
to round 2. We need to delay deallocation because:
- Android versions older than M lack __cxa_thread_atexit_impl, so apps
use a pthread key destructor to call C++ destructors.
- Apps might use __thread/thread_local variables in pthread destructors.
We can't wait until the final two rounds, because jemalloc needs two rounds
after the final malloc/free call to free its thread-specific data (see
https://reviews.llvm.org/D46978#1107507). */
#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1
#else
#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0
#endif
typedef struct emutls_address_array {
uintptr_t skip_destructor_rounds;
uintptr_t size; /* number of elements in the 'data' array */
void* data[];
} emutls_address_array;
@ -65,9 +80,30 @@ static __inline void emutls_memalign_free(void *base) {
#endif
}
static __inline void emutls_setspecific(emutls_address_array *value) {
pthread_setspecific(emutls_pthread_key, (void*) value);
}
static __inline emutls_address_array* emutls_getspecific() {
return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);
}
static void emutls_key_destructor(void* ptr) {
emutls_shutdown((emutls_address_array*)ptr);
free(ptr);
emutls_address_array *array = (emutls_address_array*)ptr;
if (array->skip_destructor_rounds > 0) {
/* emutls is deallocated using a pthread key destructor. These
* destructors are called in several rounds to accommodate destructor
* functions that (re)initialize key values with pthread_setspecific.
* Delay the emutls deallocation to accommodate other end-of-thread
* cleanup tasks like calling thread_local destructors (e.g. the
* __cxa_thread_atexit fallback in libc++abi).
*/
array->skip_destructor_rounds--;
emutls_setspecific(array);
} else {
emutls_shutdown(array);
free(ptr);
}
}
static __inline void emutls_init(void) {
@ -88,15 +124,7 @@ static __inline void emutls_unlock() {
pthread_mutex_unlock(&emutls_mutex);
}
static __inline void emutls_setspecific(emutls_address_array *value) {
pthread_setspecific(emutls_pthread_key, (void*) value);
}
static __inline emutls_address_array* emutls_getspecific() {
return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);
}
#else
#else /* _WIN32 */
#include <windows.h>
#include <malloc.h>
@ -222,11 +250,11 @@ static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {
InterlockedExchangePointer((void *volatile *)ptr, (void *)val);
}
#endif
#endif /* __ATOMIC_RELEASE */
#pragma warning (pop)
#endif
#endif /* _WIN32 */
static size_t emutls_num_object = 0; /* number of allocated TLS objects */
@ -314,11 +342,12 @@ static __inline void emutls_check_array_set_size(emutls_address_array *array,
* which must be no smaller than the given index.
*/
static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
/* Need to allocate emutls_address_array with one extra slot
* to store the data array size.
/* Need to allocate emutls_address_array with extra slots
* to store the header.
* Round up the emutls_address_array size to multiple of 16.
*/
return ((index + 1 + 15) & ~((uintptr_t)15)) - 1;
uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *);
return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words;
}
/* Returns the size in bytes required for an emutls_address_array with
@ -337,8 +366,10 @@ emutls_get_address_array(uintptr_t index) {
if (array == NULL) {
uintptr_t new_size = emutls_new_data_array_size(index);
array = (emutls_address_array*) malloc(emutls_asize(new_size));
if (array)
if (array) {
memset(array->data, 0, new_size * sizeof(void*));
array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS;
}
emutls_check_array_set_size(array, new_size);
} else if (index > array->size) {
uintptr_t orig_size = array->size;

View File

@ -0,0 +1,103 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Functions that implement common sequences in function prologues and epilogues
used to save code size */
.macro FUNCTION_BEGIN name
.text
.globl \name
.type \name, @function
.falign
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
.macro FALLTHROUGH_TAIL_CALL name0 name1
.size \name0, . - \name0
.globl \name1
.type \name1, @function
.falign
\name1:
.endm
/* Save r25:24 at fp+#-8 and r27:26 at fp+#-16. */
/* The compiler knows that the __save_* functions clobber LR. No other
registers should be used without informing the compiler. */
/* Since we can only issue one store per packet, we don't hurt performance by
simply jumping to the right point in this sequence of stores. */
FUNCTION_BEGIN __save_r24_through_r27
memd(fp+#-16) = r27:26
FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25
{
memd(fp+#-8) = r25:24
jumpr lr
}
FUNCTION_END __save_r24_through_r25
/* For each of the *_before_tailcall functions, jumpr lr is executed in parallel
with deallocframe. That way, the return gets the old value of lr, which is
where these functions need to return, and at the same time, lr gets the value
it needs going into the tail call. */
FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall
r27:26 = memd(fp+#-16)
FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall
{
r25:24 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall
/* Here we use the extra load bandwidth to restore LR early, allowing the return
to occur in parallel with the deallocframe. */
FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe
{
lr = memw(fp+#4)
r27:26 = memd(fp+#-16)
}
{
r25:24 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r24_through_r27_and_deallocframe
/* Here the load bandwidth is maximized. */
FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe
{
r25:24 = memd(fp+#-8)
deallocframe
}
jumpr lr
FUNCTION_END __restore_r24_through_r25_and_deallocframe

View File

@ -0,0 +1,268 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Functions that implement common sequences in function prologues and epilogues
used to save code size */
.macro FUNCTION_BEGIN name
.p2align 2
.section .text.\name,"ax",@progbits
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
.macro FALLTHROUGH_TAIL_CALL name0 name1
.p2align 2
.size \name0, . - \name0
.globl \name1
.type \name1, @function
\name1:
.endm
/* Save r17:16 at fp+#-8, r19:18 at fp+#-16, r21:20 at fp+#-24, r23:22 at
fp+#-32, r25:24 at fp+#-40, and r27:26 at fp+#-48.
The compiler knows that the __save_* functions clobber LR. No other
registers should be used without informing the compiler. */
FUNCTION_BEGIN __save_r16_through_r27
{
memd(fp+#-48) = r27:26
memd(fp+#-40) = r25:24
}
{
memd(fp+#-32) = r23:22
memd(fp+#-24) = r21:20
}
{
memd(fp+#-16) = r19:18
memd(fp+#-8) = r17:16
jumpr lr
}
FUNCTION_END __save_r16_through_r27
FUNCTION_BEGIN __save_r16_through_r25
{
memd(fp+#-40) = r25:24
memd(fp+#-32) = r23:22
}
{
memd(fp+#-24) = r21:20
memd(fp+#-16) = r19:18
}
{
memd(fp+#-8) = r17:16
jumpr lr
}
FUNCTION_END __save_r16_through_r25
FUNCTION_BEGIN __save_r16_through_r23
{
memd(fp+#-32) = r23:22
memd(fp+#-24) = r21:20
}
{
memd(fp+#-16) = r19:18
memd(fp+#-8) = r17:16
jumpr lr
}
FUNCTION_END __save_r16_through_r23
FUNCTION_BEGIN __save_r16_through_r21
{
memd(fp+#-24) = r21:20
memd(fp+#-16) = r19:18
}
{
memd(fp+#-8) = r17:16
jumpr lr
}
FUNCTION_END __save_r16_through_r21
FUNCTION_BEGIN __save_r16_through_r19
{
memd(fp+#-16) = r19:18
memd(fp+#-8) = r17:16
jumpr lr
}
FUNCTION_END __save_r16_through_r19
FUNCTION_BEGIN __save_r16_through_r17
{
memd(fp+#-8) = r17:16
jumpr lr
}
FUNCTION_END __save_r16_through_r17
/* For each of the *_before_tailcall functions, jumpr lr is executed in parallel
with deallocframe. That way, the return gets the old value of lr, which is
where these functions need to return, and at the same time, lr gets the value
it needs going into the tail call. */
FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe_before_tailcall
r27:26 = memd(fp+#-48)
{
r25:24 = memd(fp+#-40)
r23:22 = memd(fp+#-32)
}
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r16_through_r27_and_deallocframe_before_tailcall
FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe_before_tailcall
{
r25:24 = memd(fp+#-40)
r23:22 = memd(fp+#-32)
}
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r16_through_r25_and_deallocframe_before_tailcall
FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe_before_tailcall
{
r23:22 = memd(fp+#-32)
r21:20 = memd(fp+#-24)
}
r19:18 = memd(fp+#-16)
{
r17:16 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r16_through_r23_and_deallocframe_before_tailcall
FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe_before_tailcall
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r16_through_r19_and_deallocframe_before_tailcall
FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe_before_tailcall
r19:18 = memd(fp+#-16)
{
r17:16 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r16_through_r19_and_deallocframe_before_tailcall
FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe_before_tailcall
{
r17:16 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r16_through_r17_and_deallocframe_before_tailcall
FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe
r27:26 = memd(fp+#-48)
{
r25:24 = memd(fp+#-40)
r23:22 = memd(fp+#-32)
}
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
dealloc_return
}
FUNCTION_END __restore_r16_through_r27_and_deallocframe
FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe
{
r25:24 = memd(fp+#-40)
r23:22 = memd(fp+#-32)
}
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
dealloc_return
}
FUNCTION_END __restore_r16_through_r25_and_deallocframe
FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe
{
r23:22 = memd(fp+#-32)
}
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
dealloc_return
}
FUNCTION_END __restore_r16_through_r23_and_deallocframe
FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
dealloc_return
}
FUNCTION_END __restore_r16_through_r21_and_deallocframe
FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe
{
r19:18 = memd(fp+#-16)
r17:16 = memd(fp+#-8)
}
{
dealloc_return
}
FUNCTION_END __restore_r16_through_r19_and_deallocframe
FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe
{
r17:16 = memd(fp+#-8)
dealloc_return
}
FUNCTION_END __restore_r16_through_r17_and_deallocframe
FUNCTION_BEGIN __deallocframe
dealloc_return
FUNCTION_END __deallocframe

View File

@ -0,0 +1,157 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Functions that implement common sequences in function prologues and epilogues
used to save code size */
.macro FUNCTION_BEGIN name
.text
.globl \name
.type \name, @function
.falign
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
.macro FALLTHROUGH_TAIL_CALL name0 name1
.size \name0, . - \name0
.globl \name1
.type \name1, @function
.falign
\name1:
.endm
/* Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at
fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48. */
/* The compiler knows that the __save_* functions clobber LR. No other
registers should be used without informing the compiler. */
/* Since we can only issue one store per packet, we don't hurt performance by
simply jumping to the right point in this sequence of stores. */
FUNCTION_BEGIN __save_r27_through_r16
memd(fp+#-48) = r17:16
FALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18
memd(fp+#-40) = r19:18
FALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20
memd(fp+#-32) = r21:20
FALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22
memd(fp+#-24) = r23:22
FALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24
memd(fp+#-16) = r25:24
{
memd(fp+#-8) = r27:26
jumpr lr
}
FUNCTION_END __save_r27_through_r24
/* For each of the *_before_sibcall functions, jumpr lr is executed in parallel
with deallocframe. That way, the return gets the old value of lr, which is
where these functions need to return, and at the same time, lr gets the value
it needs going into the sibcall. */
FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall
{
r21:20 = memd(fp+#-32)
r23:22 = memd(fp+#-24)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall
{
r25:24 = memd(fp+#-16)
jump __restore_r27_through_r26_and_deallocframe_before_sibcall
}
FUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall
FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall
r17:16 = memd(fp+#-48)
FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall
{
r19:18 = memd(fp+#-40)
r21:20 = memd(fp+#-32)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall
{
r23:22 = memd(fp+#-24)
r25:24 = memd(fp+#-16)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall
{
r27:26 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall
/* Here we use the extra load bandwidth to restore LR early, allowing the return
to occur in parallel with the deallocframe. */
FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe
{
r17:16 = memd(fp+#-48)
r19:18 = memd(fp+#-40)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe
{
r21:20 = memd(fp+#-32)
r23:22 = memd(fp+#-24)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe
{
lr = memw(fp+#4)
r25:24 = memd(fp+#-16)
}
{
r27:26 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r27_through_r24_and_deallocframe
/* Here the load bandwidth is maximized for all three functions. */
FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe
{
r19:18 = memd(fp+#-40)
r21:20 = memd(fp+#-32)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe
{
r23:22 = memd(fp+#-24)
r25:24 = memd(fp+#-16)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe
{
r27:26 = memd(fp+#-8)
deallocframe
}
jumpr lr
FUNCTION_END __restore_r27_through_r26_and_deallocframe

View File

@ -0,0 +1,398 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Double Precision Multiply */
#define A r1:0
#define AH r1
#define AL r0
#define B r3:2
#define BH r3
#define BL r2
#define EXPA r4
#define EXPB r5
#define EXPB_A r5:4
#define ZTMP r7:6
#define ZTMPH r7
#define ZTMPL r6
#define ATMP r13:12
#define ATMPH r13
#define ATMPL r12
#define BTMP r9:8
#define BTMPH r9
#define BTMPL r8
#define ATMP2 r11:10
#define ATMP2H r11
#define ATMP2L r10
#define EXPDIFF r15
#define EXTRACTOFF r14
#define EXTRACTAMT r15:14
#define TMP r28
#define MANTBITS 52
#define HI_MANTBITS 20
#define EXPBITS 11
#define BIAS 1024
#define MANTISSA_TO_INT_BIAS 52
#define SR_BIT_INEXACT 5
#ifndef SR_ROUND_OFF
#define SR_ROUND_OFF 22
#endif
#define NORMAL p3
#define BIGB p2
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
#define END(TAG) .size TAG,.-TAG
.text
.global __hexagon_adddf3
.global __hexagon_subdf3
.type __hexagon_adddf3, @function
.type __hexagon_subdf3, @function
Q6_ALIAS(adddf3)
FAST_ALIAS(adddf3)
FAST2_ALIAS(adddf3)
Q6_ALIAS(subdf3)
FAST_ALIAS(subdf3)
FAST2_ALIAS(subdf3)
.p2align 5
__hexagon_adddf3:
{
EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
ATMP = combine(##0x20000000,#0)
}
{
NORMAL = dfclass(A,#2)
NORMAL = dfclass(B,#2)
BTMP = ATMP
BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A?
}
{
if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code
if (BIGB) A = B // if B >> A, swap A and B
if (BIGB) B = A // If B >> A, swap A and B
if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents
}
{
ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62
BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62
EXPDIFF = sub(EXPA,EXPB)
ZTMP = combine(#62,#1)
}
#undef BIGB
#undef NORMAL
#define B_POS p3
#define A_POS p2
#define NO_STICKIES p1
.Ladd_continue:
{
EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60,
// will collapse to sticky bit
ATMP2 = neg(ATMP)
A_POS = cmp.gt(AH,#-1)
EXTRACTOFF = #0
}
{
if (!A_POS) ATMP = ATMP2
ATMP2 = extractu(BTMP,EXTRACTAMT)
BTMP = ASR(BTMP,EXPDIFF)
#undef EXTRACTAMT
#undef EXPDIFF
#undef EXTRACTOFF
#define ZERO r15:14
ZERO = #0
}
{
NO_STICKIES = cmp.eq(ATMP2,ZERO)
if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
EXPB = add(EXPA,#-BIAS-60)
B_POS = cmp.gt(BH,#-1)
}
{
ATMP = add(ATMP,BTMP) // ADD!!!
ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!!
ZTMP = combine(#54,##2045)
}
{
p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation
p0 = !cmp.gtu(EXPA,ZTMPL)
if (!p0.new) jump:nt .Ladd_ovf_unf
if (!B_POS) ATMP = ATMP2 // if B neg, pick difference
}
{
A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice!
p0 = cmp.eq(ATMPH,#0)
p0 = cmp.eq(ATMPL,#0)
if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly?
}
{
AH += asl(EXPB,#HI_MANTBITS)
jumpr r31
}
.falign
__hexagon_subdf3:
{
BH = togglebit(BH,#31)
jump __qdsp_adddf3
}
.falign
.Ladd_zero:
// True zero, full cancellation
// +0 unless round towards negative infinity
{
TMP = USR
A = #0
BH = #1
}
{
TMP = extractu(TMP,#2,#22)
BH = asl(BH,#31)
}
{
p0 = cmp.eq(TMP,#2)
if (p0.new) AH = xor(AH,BH)
jumpr r31
}
.falign
.Ladd_ovf_unf:
// Overflow or Denormal is possible
// Good news: Underflow flag is not possible!
/*
* ATMP has 2's complement value
*
* EXPA has A's exponent, EXPB has EXPA-BIAS-60
*
* Convert, extract exponent, add adjustment.
* If > 2046, overflow
* If <= 0, denormal
*
* Note that we've not done our zero check yet, so do that too
*
*/
{
A = convert_d2df(ATMP)
p0 = cmp.eq(ATMPH,#0)
p0 = cmp.eq(ATMPL,#0)
if (p0.new) jump:nt .Ladd_zero
}
{
TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
AH += asl(EXPB,#HI_MANTBITS)
}
{
EXPB = add(EXPB,TMP)
B = combine(##0x00100000,#0)
}
{
p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
if (p0.new) jump:nt .Ladd_ovf
}
{
p0 = cmp.gt(EXPB,#0)
if (p0.new) jumpr:t r31
TMP = sub(#1,EXPB)
}
{
B = insert(A,#MANTBITS,#0)
A = ATMP
}
{
B = lsr(B,TMP)
}
{
A = insert(B,#63,#0)
jumpr r31
}
.falign
.Ladd_ovf:
// We get either max finite value or infinity. Either way, overflow+inexact
{
A = ATMP // 2's complement value
TMP = USR
ATMP = combine(##0x7fefffff,#-1) // positive max finite
}
{
EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits
TMP = or(TMP,#0x28) // inexact + overflow
BTMP = combine(##0x7ff00000,#0) // positive infinity
}
{
USR = TMP
EXPB ^= lsr(AH,#31) // Does sign match rounding?
TMP = EXPB // unmodified rounding mode
}
{
p0 = !cmp.eq(TMP,#1) // If not round-to-zero and
p0 = !cmp.eq(EXPB,#2) // Not rounding the other way,
if (p0.new) ATMP = BTMP // we should get infinity
}
{
A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign
}
{
p0 = dfcmp.eq(A,A)
jumpr r31
}
.Ladd_abnormal:
{
ATMP = extractu(A,#63,#0) // strip off sign
BTMP = extractu(B,#63,#0) // strip off sign
}
{
p3 = cmp.gtu(ATMP,BTMP)
if (!p3.new) A = B // sort values
if (!p3.new) B = A // sort values
}
{
// Any NaN --> NaN, possibly raise invalid if sNaN
p0 = dfclass(A,#0x0f) // A not NaN?
if (!p0.new) jump:nt .Linvalid_nan_add
if (!p3) ATMP = BTMP
if (!p3) BTMP = ATMP
}
{
// Infinity + non-infinity number is infinity
// Infinity + infinity --> inf or nan
p1 = dfclass(A,#0x08) // A is infinity
if (p1.new) jump:nt .Linf_add
}
{
p2 = dfclass(B,#0x01) // B is zero
if (p2.new) jump:nt .LB_zero // so return A or special 0+0
ATMP = #0
}
// We are left with adding one or more subnormals
{
p0 = dfclass(A,#4)
if (p0.new) jump:nt .Ladd_two_subnormal
ATMP = combine(##0x20000000,#0)
}
{
EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
EXPB = #1
// BTMP already ABS(B)
BTMP = asl(BTMP,#EXPBITS-2)
}
#undef ZERO
#define EXTRACTOFF r14
#define EXPDIFF r15
{
ATMP = insert(A,#MANTBITS,#EXPBITS-2)
EXPDIFF = sub(EXPA,EXPB)
ZTMP = combine(#62,#1)
jump .Ladd_continue
}
.Ladd_two_subnormal:
{
ATMP = extractu(A,#63,#0)
BTMP = extractu(B,#63,#0)
}
{
ATMP = neg(ATMP)
BTMP = neg(BTMP)
p0 = cmp.gt(AH,#-1)
p1 = cmp.gt(BH,#-1)
}
{
if (p0) ATMP = A
if (p1) BTMP = B
}
{
ATMP = add(ATMP,BTMP)
}
{
BTMP = neg(ATMP)
p0 = cmp.gt(ATMPH,#-1)
B = #0
}
{
if (!p0) A = BTMP
if (p0) A = ATMP
BH = ##0x80000000
}
{
if (!p0) AH = or(AH,BH)
p0 = dfcmp.eq(A,B)
if (p0.new) jump:nt .Lzero_plus_zero
}
{
jumpr r31
}
.Linvalid_nan_add:
{
TMP = convert_df2sf(A) // will generate invalid if sNaN
p0 = dfclass(B,#0x0f) // if B is not NaN
if (p0.new) B = A // make it whatever A is
}
{
BL = convert_df2sf(B) // will generate invalid if sNaN
A = #-1
jumpr r31
}
.falign
.LB_zero:
{
p0 = dfcmp.eq(ATMP,A) // is A also zero?
if (!p0.new) jumpr:t r31 // If not, just return A
}
// 0 + 0 is special
// if equal integral values, they have the same sign, which is fine for all rounding
// modes.
// If unequal in sign, we get +0 for all rounding modes except round down
.Lzero_plus_zero:
{
p0 = cmp.eq(A,B)
if (p0.new) jumpr:t r31
}
{
TMP = USR
}
{
TMP = extractu(TMP,#2,#SR_ROUND_OFF)
A = #0
}
{
p0 = cmp.eq(TMP,#2)
if (p0.new) AH = ##0x80000000
jumpr r31
}
.Linf_add:
// adding infinities is only OK if they are equal
{
p0 = !cmp.eq(AH,BH) // Do they have different signs
p0 = dfclass(B,#8) // And is B also infinite?
if (!p0.new) jumpr:t r31 // If not, just a normal inf
}
{
BL = ##0x7f800001 // sNAN
}
{
A = convert_sf2df(BL) // trigger invalid, set NaN
jumpr r31
}
END(__hexagon_adddf3)

View File

@ -0,0 +1,492 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Double Precision Divide */
#define A r1:0
#define AH r1
#define AL r0
#define B r3:2
#define BH r3
#define BL r2
#define Q r5:4
#define QH r5
#define QL r4
#define PROD r7:6
#define PRODHI r7
#define PRODLO r6
#define SFONE r8
#define SFDEN r9
#define SFERROR r10
#define SFRECIP r11
#define EXPBA r13:12
#define EXPB r13
#define EXPA r12
#define REMSUB2 r15:14
#define SIGN r28
#define Q_POSITIVE p3
#define NORMAL p2
#define NO_OVF_UNF p1
#define P_TMP p0
#define RECIPEST_SHIFT 3
#define QADJ 61
#define DFCLASS_NORMAL 0x02
#define DFCLASS_NUMBER 0x0F
#define DFCLASS_INFINITE 0x08
#define DFCLASS_ZERO 0x01
#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
#define DF_MANTBITS 52
#define DF_EXPBITS 11
#define SF_MANTBITS 23
#define SF_EXPBITS 8
#define DF_BIAS 0x3ff
#define SR_ROUND_OFF 22
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
#define END(TAG) .size TAG,.-TAG
.text
.global __hexagon_divdf3
.type __hexagon_divdf3,@function
Q6_ALIAS(divdf3)
FAST_ALIAS(divdf3)
FAST2_ALIAS(divdf3)
.p2align 5
__hexagon_divdf3:
{
NORMAL = dfclass(A,#DFCLASS_NORMAL)
NORMAL = dfclass(B,#DFCLASS_NORMAL)
EXPBA = combine(BH,AH)
SIGN = xor(AH,BH)
}
#undef A
#undef AH
#undef AL
#undef B
#undef BH
#undef BL
#define REM r1:0
#define REMHI r1
#define REMLO r0
#define DENOM r3:2
#define DENOMHI r3
#define DENOMLO r2
{
if (!NORMAL) jump .Ldiv_abnormal
PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
SFONE = ##0x3f800001
}
{
SFDEN = or(SFONE,PRODLO)
EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
Q_POSITIVE = cmp.gt(SIGN,#-1)
}
#undef SIGN
#define ONE r28
.Ldenorm_continue:
{
SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
SFERROR = and(SFONE,#-2)
ONE = #1
EXPA = sub(EXPA,EXPB)
}
#undef EXPB
#define RECIPEST r13
{
SFERROR -= sfmpy(SFRECIP,SFDEN):lib
REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
RECIPEST = ##0x00800000 << RECIPEST_SHIFT
}
{
SFRECIP += sfmpy(SFRECIP,SFERROR):lib
DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
SFERROR = and(SFONE,#-2)
}
{
SFERROR -= sfmpy(SFRECIP,SFDEN):lib
QH = #-DF_BIAS+1
QL = #DF_BIAS-1
}
{
SFRECIP += sfmpy(SFRECIP,SFERROR):lib
NO_OVF_UNF = cmp.gt(EXPA,QH)
NO_OVF_UNF = !cmp.gt(EXPA,QL)
}
{
RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
Q = #0
EXPA = add(EXPA,#-QADJ)
}
#undef SFERROR
#undef SFRECIP
#define TMP r10
#define TMP1 r11
{
RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
}
#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
{ \
PROD = mpyu(RECIPEST,REMHI); \
REM = asl(REM,# ## ( REMSHIFT )); \
}; \
{ \
PRODLO = # ## 0; \
REM -= mpyu(PRODHI,DENOMLO); \
REMSUB2 = mpyu(PRODHI,DENOMHI); \
}; \
{ \
Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
REM -= asl(REMSUB2, # ## 32); \
EXTRA \
}
DIV_ITER1B(ASL,14,15,)
DIV_ITER1B(ASR,1,15,)
DIV_ITER1B(ASR,16,15,)
DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
#undef REMSUB2
#define TMPPAIR r15:14
#define TMPPAIRHI r15
#define TMPPAIRLO r14
#undef RECIPEST
#define EXPB r13
{
// compare or sub with carry
TMPPAIR = sub(REM,DENOM)
P_TMP = cmp.gtu(DENOM,REM)
// set up amt to add to q
if (!P_TMP.new) PRODLO = #2
}
{
Q = add(Q,PROD)
if (!P_TMP) REM = TMPPAIR
TMPPAIR = #0
}
{
P_TMP = cmp.eq(REM,TMPPAIR)
if (!P_TMP.new) QL = or(QL,ONE)
}
{
PROD = neg(Q)
}
{
if (!Q_POSITIVE) Q = PROD
}
#undef REM
#undef REMHI
#undef REMLO
#undef DENOM
#undef DENOMLO
#undef DENOMHI
#define A r1:0
#define AH r1
#define AL r0
#define B r3:2
#define BH r3
#define BL r2
{
A = convert_d2df(Q)
if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
}
{
AH += asl(EXPA,#DF_MANTBITS-32)
jumpr r31
}
.Ldiv_ovf_unf:
{
AH += asl(EXPA,#DF_MANTBITS-32)
EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
}
{
PROD = abs(Q)
EXPA = add(EXPA,EXPB)
}
{
P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow
if (P_TMP.new) jump:nt .Ldiv_ovf
}
{
P_TMP = cmp.gt(EXPA,#0)
if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible...
}
/* Underflow */
/* We know what the infinite range exponent should be (EXPA) */
/* Q is 2's complement, PROD is abs(Q) */
/* Normalize Q, shift right, add a high bit, convert, change exponent */
#define FUDGE1 7 // how much to shift right
#define FUDGE2 4 // how many guard/round to keep at lsbs
{
EXPB = add(clb(PROD),#-1) // doesn't need to be added in since
EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent
TMP = USR
TMP1 = #63
}
{
EXPB = min(EXPA,TMP1)
TMP1 = or(TMP,#0x030)
PROD = asl(PROD,EXPB)
EXPA = #0
}
{
TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out
PROD = lsr(PROD,EXPB) // shift out bits
B = #1
}
{
P_TMP = cmp.gtu(B,TMPPAIR)
if (!P_TMP.new) PRODLO = or(BL,PRODLO)
PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
}
{
Q = neg(PROD)
P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
if (!P_TMP.new) TMP = TMP1
}
{
USR = TMP
if (Q_POSITIVE) Q = PROD
TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
}
{
A = convert_d2df(Q)
}
{
AH += asl(TMP,#DF_MANTBITS-32)
jumpr r31
}
.Lpossible_unf:
/* If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal */
/* The answer is correct, but we need to raise Underflow */
{
B = extractu(A,#63,#0)
TMPPAIR = combine(##0x00100000,#0) // min normal
TMP = #0x7FFF
}
{
P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value...
P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)?
}
#if (__HEXAGON_ARCH__ == 60)
TMP = USR // If not, just return
if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact
// Note that inexact is already set...
#else
{
if (!P_TMP) jumpr r31 // If not, just return
TMP = USR // Else, we want to set Unf+Inexact
} // Note that inexact is already set...
#endif
{
TMP = or(TMP,#0x30)
}
{
USR = TMP
}
{
p0 = dfcmp.eq(A,A)
jumpr r31
}
.Ldiv_ovf:
/*
* Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
*/
{
TMP = USR
B = combine(##0x7fefffff,#-1)
AH = mux(Q_POSITIVE,#0,#-1)
}
{
PROD = combine(##0x7ff00000,#0)
QH = extractu(TMP,#2,#SR_ROUND_OFF)
TMP = or(TMP,#0x28)
}
{
USR = TMP
QH ^= lsr(AH,#31)
QL = QH
}
{
p0 = !cmp.eq(QL,#1) // if not round-to-zero
p0 = !cmp.eq(QH,#2) // and not rounding the other way
if (p0.new) B = PROD // go to inf
p0 = dfcmp.eq(B,B) // get exceptions
}
{
A = insert(B,#63,#0)
jumpr r31
}
#undef ONE
#define SIGN r28
#undef NORMAL
#undef NO_OVF_UNF
#define P_INF p1
#define P_ZERO p2
.Ldiv_abnormal:
{
P_TMP = dfclass(A,#DFCLASS_NUMBER)
P_TMP = dfclass(B,#DFCLASS_NUMBER)
Q_POSITIVE = cmp.gt(SIGN,#-1)
}
{
P_INF = dfclass(A,#DFCLASS_INFINITE)
P_INF = dfclass(B,#DFCLASS_INFINITE)
}
{
P_ZERO = dfclass(A,#DFCLASS_ZERO)
P_ZERO = dfclass(B,#DFCLASS_ZERO)
}
{
if (!P_TMP) jump .Ldiv_nan
if (P_INF) jump .Ldiv_invalid
}
{
if (P_ZERO) jump .Ldiv_invalid
}
{
P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero
P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite
}
{
P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero
}
{
if (!P_ZERO) jump .Ldiv_zero_result
if (!P_INF) jump .Ldiv_inf_result
}
/* Now we've narrowed it down to (de)normal / (de)normal */
/* Set up A/EXPA B/EXPB and go back */
#undef P_ZERO
#undef P_INF
#define P_TMP2 p1
{
P_TMP = dfclass(A,#DFCLASS_NORMAL)
P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
TMP = ##0x00100000
}
{
EXPBA = combine(BH,AH)
AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
}
{
if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit
if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit
}
{
QH = add(clb(A),#-DF_EXPBITS)
QL = add(clb(B),#-DF_EXPBITS)
TMP = #1
}
{
EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
}
{
A = asl(A,QH)
B = asl(B,QL)
if (!P_TMP) EXPA = sub(TMP,QH)
if (!P_TMP2) EXPB = sub(TMP,QL)
} // recreate values needed by resume coke
{
PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
}
{
SFDEN = or(SFONE,PRODLO)
jump .Ldenorm_continue
}
.Ldiv_zero_result:
{
AH = xor(AH,BH)
B = #0
}
{
A = insert(B,#63,#0)
jumpr r31
}
.Ldiv_inf_result:
{
p2 = dfclass(B,#DFCLASS_ZERO)
p2 = dfclass(A,#DFCLASS_NONINFINITE)
}
{
TMP = USR
if (!p2) jump 1f
AH = xor(AH,BH)
}
{
TMP = or(TMP,#0x04) // DBZ
}
{
USR = TMP
}
1:
{
B = combine(##0x7ff00000,#0)
p0 = dfcmp.uo(B,B) // take possible exception
}
{
A = insert(B,#63,#0)
jumpr r31
}
.Ldiv_nan:
{
p0 = dfclass(A,#0x10)
p1 = dfclass(B,#0x10)
if (!p0.new) A = B
if (!p1.new) B = A
}
{
QH = convert_df2sf(A) // get possible invalid exceptions
QL = convert_df2sf(B)
}
{
A = #-1
jumpr r31
}
.Ldiv_invalid:
{
TMP = ##0x7f800001
}
{
A = convert_sf2df(TMP) // get invalid, get DF qNaN
jumpr r31
}
END(__hexagon_divdf3)

View File

@ -0,0 +1,705 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define END(TAG) .size TAG,.-TAG
/* Double Precision Multiply */
#define A r1:0
#define AH r1
#define AL r0
#define B r3:2
#define BH r3
#define BL r2
#define C r5:4
#define CH r5
#define CL r4
#define BTMP r15:14
#define BTMPH r15
#define BTMPL r14
#define ATMP r13:12
#define ATMPH r13
#define ATMPL r12
#define CTMP r11:10
#define CTMPH r11
#define CTMPL r10
#define PP_LL r9:8
#define PP_LL_H r9
#define PP_LL_L r8
#define PP_ODD r7:6
#define PP_ODD_H r7
#define PP_ODD_L r6
#define PP_HH r17:16
#define PP_HH_H r17
#define PP_HH_L r16
#define EXPA r18
#define EXPB r19
#define EXPBA r19:18
#define TMP r28
#define P_TMP p0
#define PROD_NEG p3
#define EXACT p2
#define SWAP p1
#define MANTBITS 52
#define HI_MANTBITS 20
#define EXPBITS 11
#define BIAS 1023
#define STACKSPACE 32
#define ADJUST 4
#define FUDGE 7
#define FUDGE2 3
#ifndef SR_ROUND_OFF
#define SR_ROUND_OFF 22
#endif
/*
* First, classify for normal values, and abort if abnormal
*
* Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8
*
* Since we know that the 2 MSBs of the H registers is zero, we should never carry
* the partial products that involve the H registers
*
* Try to buy X slots, at the expense of latency if needed
*
* We will have PP_HH with the upper bits of the product, PP_LL with the lower
* PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts
* PP_HH can have a minimum of 0x0100_0000_0000_0000
*
* 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS
*
* We need to align CTMP.
* If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add
* If CTMP << PP align CTMP and add 128 bits. Then compute sticky
* If CTMP ~= PP, align CTMP and add 128 bits. May have massive cancellation.
*
* Convert partial product and CTMP to 2's complement prior to addition
*
* After we add, we need to normalize into upper 64 bits, then compute sticky.
*
*
*/
.text
.global __hexagon_fmadf4
.type __hexagon_fmadf4,@function
.global __hexagon_fmadf5
.type __hexagon_fmadf5,@function
.global fma
.type fma,@function
Q6_ALIAS(fmadf5)
.p2align 5
__hexagon_fmadf4:
__hexagon_fmadf5:
fma:
{
P_TMP = dfclass(A,#2)
P_TMP = dfclass(B,#2)
ATMP = #0
BTMP = #0
}
{
ATMP = insert(A,#MANTBITS,#EXPBITS-3)
BTMP = insert(B,#MANTBITS,#EXPBITS-3)
PP_ODD_H = ##0x10000000
allocframe(#STACKSPACE)
}
{
PP_LL = mpyu(ATMPL,BTMPL)
if (!P_TMP) jump .Lfma_abnormal_ab
ATMPH = or(ATMPH,PP_ODD_H)
BTMPH = or(BTMPH,PP_ODD_H)
}
{
P_TMP = dfclass(C,#2)
if (!P_TMP.new) jump:nt .Lfma_abnormal_c
CTMP = combine(PP_ODD_H,#0)
PP_ODD = combine(#0,PP_LL_H)
}
.Lfma_abnormal_c_restart:
{
PP_ODD += mpyu(BTMPL,ATMPH)
CTMP = insert(C,#MANTBITS,#EXPBITS-3)
memd(r29+#0) = PP_HH
memd(r29+#8) = EXPBA
}
{
PP_ODD += mpyu(ATMPL,BTMPH)
EXPBA = neg(CTMP)
P_TMP = cmp.gt(CH,#-1)
TMP = xor(AH,BH)
}
{
EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
PP_HH = combine(#0,PP_ODD_H)
if (!P_TMP) CTMP = EXPBA
}
{
PP_HH += mpyu(ATMPH,BTMPH)
PP_LL = combine(PP_ODD_L,PP_LL_L)
#undef PP_ODD
#undef PP_ODD_H
#undef PP_ODD_L
#undef ATMP
#undef ATMPL
#undef ATMPH
#undef BTMP
#undef BTMPL
#undef BTMPH
#define RIGHTLEFTSHIFT r13:12
#define RIGHTSHIFT r13
#define LEFTSHIFT r12
EXPA = add(EXPA,EXPB)
#undef EXPB
#undef EXPBA
#define EXPC r19
#define EXPCA r19:18
EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS)
}
/* PP_HH:PP_LL now has product */
/* CTMP is negated */
/* EXPA,B,C are extracted */
/*
* We need to negate PP
* Since we will be adding with carry later, if we need to negate,
* just invert all bits now, which we can do conditionally and in parallel
*/
#define PP_HH_TMP r15:14
#define PP_LL_TMP r7:6
{
EXPA = add(EXPA,#-BIAS+(ADJUST))
PROD_NEG = !cmp.gt(TMP,#-1)
PP_LL_TMP = #0
PP_HH_TMP = #0
}
{
PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry
P_TMP = !cmp.gt(TMP,#-1)
SWAP = cmp.gt(EXPC,EXPA) // If C >> PP
if (SWAP.new) EXPCA = combine(EXPA,EXPC)
}
{
PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry
if (P_TMP) PP_LL = PP_LL_TMP
#undef PP_LL_TMP
#define CTMP2 r7:6
#define CTMP2H r7
#define CTMP2L r6
CTMP2 = #0
EXPC = sub(EXPA,EXPC)
}
{
if (P_TMP) PP_HH = PP_HH_TMP
P_TMP = cmp.gt(EXPC,#63)
if (SWAP) PP_LL = CTMP2
if (SWAP) CTMP2 = PP_LL
}
#undef PP_HH_TMP
//#define ONE r15:14
//#define S_ONE r14
#define ZERO r15:14
#define S_ZERO r15
#undef PROD_NEG
#define P_CARRY p3
{
if (SWAP) PP_HH = CTMP // Swap C and PP
if (SWAP) CTMP = PP_HH
if (P_TMP) EXPC = add(EXPC,#-64)
TMP = #63
}
{
// If diff > 63, pre-shift-right by 64...
if (P_TMP) CTMP2 = CTMP
TMP = asr(CTMPH,#31)
RIGHTSHIFT = min(EXPC,TMP)
LEFTSHIFT = #0
}
#undef C
#undef CH
#undef CL
#define STICKIES r5:4
#define STICKIESH r5
#define STICKIESL r4
{
if (P_TMP) CTMP = combine(TMP,TMP) // sign extension of pre-shift-right-64
STICKIES = extract(CTMP2,RIGHTLEFTSHIFT)
CTMP2 = lsr(CTMP2,RIGHTSHIFT)
LEFTSHIFT = sub(#64,RIGHTSHIFT)
}
{
ZERO = #0
TMP = #-2
CTMP2 |= lsl(CTMP,LEFTSHIFT)
CTMP = asr(CTMP,RIGHTSHIFT)
}
{
P_CARRY = cmp.gtu(STICKIES,ZERO) // If we have sticky bits from C shift
if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR
#undef ZERO
#define ONE r15:14
#define S_ONE r14
ONE = #1
STICKIES = #0
}
{
PP_LL = add(CTMP2,PP_LL,P_CARRY):carry // use the carry to add the sticky
}
{
PP_HH = add(CTMP,PP_HH,P_CARRY):carry
TMP = #62
}
/*
* PP_HH:PP_LL now holds the sum
* We may need to normalize left, up to ??? bits.
*
* I think that if we have massive cancellation, the range we normalize by
* is still limited
*/
{
LEFTSHIFT = add(clb(PP_HH),#-2)
if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f // all sign bits?
}
/* We had all sign bits, shift left by 62. */
{
CTMP = extractu(PP_LL,#62,#2)
PP_LL = asl(PP_LL,#62)
EXPA = add(EXPA,#-62) // And adjust exponent of result
}
{
PP_HH = insert(CTMP,#62,#0) // Then shift 63
}
{
LEFTSHIFT = add(clb(PP_HH),#-2)
}
.falign
1:
{
CTMP = asl(PP_HH,LEFTSHIFT)
STICKIES |= asl(PP_LL,LEFTSHIFT)
RIGHTSHIFT = sub(#64,LEFTSHIFT)
EXPA = sub(EXPA,LEFTSHIFT)
}
{
CTMP |= lsr(PP_LL,RIGHTSHIFT)
EXACT = cmp.gtu(ONE,STICKIES)
TMP = #BIAS+BIAS-2
}
{
if (!EXACT) CTMPL = or(CTMPL,S_ONE)
// If EXPA is overflow/underflow, jump to ovf_unf
P_TMP = !cmp.gt(EXPA,TMP)
P_TMP = cmp.gt(EXPA,#1)
if (!P_TMP.new) jump:nt .Lfma_ovf_unf
}
{
// XXX: FIXME: should PP_HH for check of zero be CTMP?
P_TMP = cmp.gtu(ONE,CTMP) // is result true zero?
A = convert_d2df(CTMP)
EXPA = add(EXPA,#-BIAS-60)
PP_HH = memd(r29+#0)
}
{
AH += asl(EXPA,#HI_MANTBITS)
EXPCA = memd(r29+#8)
if (!P_TMP) dealloc_return // not zero, return
}
.Ladd_yields_zero:
/* We had full cancellation. Return +/- zero (-0 when round-down) */
{
TMP = USR
A = #0
}
{
TMP = extractu(TMP,#2,#SR_ROUND_OFF)
PP_HH = memd(r29+#0)
EXPCA = memd(r29+#8)
}
{
p0 = cmp.eq(TMP,#2)
if (p0.new) AH = ##0x80000000
dealloc_return
}
#undef RIGHTLEFTSHIFT
#undef RIGHTSHIFT
#undef LEFTSHIFT
#undef CTMP2
#undef CTMP2H
#undef CTMP2L
.Lfma_ovf_unf:
{
p0 = cmp.gtu(ONE,CTMP)
if (p0.new) jump:nt .Ladd_yields_zero
}
{
A = convert_d2df(CTMP)
EXPA = add(EXPA,#-BIAS-60)
TMP = EXPA
}
#define NEW_EXPB r7
#define NEW_EXPA r6
{
AH += asl(EXPA,#HI_MANTBITS)
NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS)
}
{
NEW_EXPA = add(EXPA,NEW_EXPB)
PP_HH = memd(r29+#0)
EXPCA = memd(r29+#8)
#undef PP_HH
#undef PP_HH_H
#undef PP_HH_L
#undef EXPCA
#undef EXPC
#undef EXPA
#undef PP_LL
#undef PP_LL_H
#undef PP_LL_L
#define EXPA r6
#define EXPB r7
#define EXPBA r7:6
#define ATMP r9:8
#define ATMPH r9
#define ATMPL r8
#undef NEW_EXPB
#undef NEW_EXPA
ATMP = abs(CTMP)
}
{
p0 = cmp.gt(EXPA,##BIAS+BIAS)
if (p0.new) jump:nt .Lfma_ovf
}
{
p0 = cmp.gt(EXPA,#0)
if (p0.new) jump:nt .Lpossible_unf
}
{
// TMP has original EXPA.
// ATMP is corresponding value
// Normalize ATMP and shift right to correct location
EXPB = add(clb(ATMP),#-2) // Amount to left shift to normalize
EXPA = sub(#1+5,TMP) // Amount to right shift to denormalize
p3 = cmp.gt(CTMPH,#-1)
}
/* Underflow */
/* We know that the infinte range exponent should be EXPA */
/* CTMP is 2's complement, ATMP is abs(CTMP) */
{
EXPA = add(EXPA,EXPB) // how much to shift back right
ATMP = asl(ATMP,EXPB) // shift left
AH = USR
TMP = #63
}
{
EXPB = min(EXPA,TMP)
EXPA = #0
AL = #0x0030
}
{
B = extractu(ATMP,EXPBA)
ATMP = asr(ATMP,EXPB)
}
{
p0 = cmp.gtu(ONE,B)
if (!p0.new) ATMPL = or(ATMPL,S_ONE)
ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2)
}
{
CTMP = neg(ATMP)
p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1)
if (!p1.new) AH = or(AH,AL)
B = #0
}
{
if (p3) CTMP = ATMP
USR = AH
TMP = #-BIAS-(MANTBITS+FUDGE2)
}
{
A = convert_d2df(CTMP)
}
{
AH += asl(TMP,#HI_MANTBITS)
dealloc_return
}
.Lpossible_unf:
{
TMP = ##0x7fefffff
ATMP = abs(CTMP)
}
{
p0 = cmp.eq(AL,#0)
p0 = bitsclr(AH,TMP)
if (!p0.new) dealloc_return:t
TMP = #0x7fff
}
{
p0 = bitsset(ATMPH,TMP)
BH = USR
BL = #0x0030
}
{
if (p0) BH = or(BH,BL)
}
{
USR = BH
}
{
p0 = dfcmp.eq(A,A)
dealloc_return
}
.Lfma_ovf:
{
TMP = USR
CTMP = combine(##0x7fefffff,#-1)
A = CTMP
}
{
ATMP = combine(##0x7ff00000,#0)
BH = extractu(TMP,#2,#SR_ROUND_OFF)
TMP = or(TMP,#0x28)
}
{
USR = TMP
BH ^= lsr(AH,#31)
BL = BH
}
{
p0 = !cmp.eq(BL,#1)
p0 = !cmp.eq(BH,#2)
}
{
p0 = dfcmp.eq(ATMP,ATMP)
if (p0.new) CTMP = ATMP
}
{
A = insert(CTMP,#63,#0)
dealloc_return
}
#undef CTMP
#undef CTMPH
#undef CTMPL
#define BTMP r11:10
#define BTMPH r11
#define BTMPL r10
#undef STICKIES
#undef STICKIESH
#undef STICKIESL
#define C r5:4
#define CH r5
#define CL r4
.Lfma_abnormal_ab:
{
ATMP = extractu(A,#63,#0)
BTMP = extractu(B,#63,#0)
deallocframe
}
{
p3 = cmp.gtu(ATMP,BTMP)
if (!p3.new) A = B // sort values
if (!p3.new) B = A
}
{
p0 = dfclass(A,#0x0f) // A NaN?
if (!p0.new) jump:nt .Lnan
if (!p3) ATMP = BTMP
if (!p3) BTMP = ATMP
}
{
p1 = dfclass(A,#0x08) // A is infinity
p1 = dfclass(B,#0x0e) // B is nonzero
}
{
p0 = dfclass(A,#0x08) // a is inf
p0 = dfclass(B,#0x01) // b is zero
}
{
if (p1) jump .Lab_inf
p2 = dfclass(B,#0x01)
}
{
if (p0) jump .Linvalid
if (p2) jump .Lab_true_zero
TMP = ##0x7c000000
}
// We are left with a normal or subnormal times a subnormal, A > B
// If A and B are both very small, we will go to a single sticky bit; replace
// A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results
// if A and B might multiply to something bigger, decrease A exp and increase B exp
// and start over
{
p0 = bitsclr(AH,TMP)
if (p0.new) jump:nt .Lfma_ab_tiny
}
{
TMP = add(clb(BTMP),#-EXPBITS)
}
{
BTMP = asl(BTMP,TMP)
}
{
B = insert(BTMP,#63,#0)
AH -= asl(TMP,#HI_MANTBITS)
}
jump fma
.Lfma_ab_tiny:
ATMP = combine(##0x00100000,#0)
{
A = insert(ATMP,#63,#0)
B = insert(ATMP,#63,#0)
}
jump fma
.Lab_inf:
{
B = lsr(B,#63)
p0 = dfclass(C,#0x10)
}
{
A ^= asl(B,#63)
if (p0) jump .Lnan
}
{
p1 = dfclass(C,#0x08)
if (p1.new) jump:nt .Lfma_inf_plus_inf
}
/* A*B is +/- inf, C is finite. Return A */
{
jumpr r31
}
.falign
.Lfma_inf_plus_inf:
{ // adding infinities of different signs is invalid
p0 = dfcmp.eq(A,C)
if (!p0.new) jump:nt .Linvalid
}
{
jumpr r31
}
.Lnan:
{
p0 = dfclass(B,#0x10)
p1 = dfclass(C,#0x10)
if (!p0.new) B = A
if (!p1.new) C = A
}
{ // find sNaNs
BH = convert_df2sf(B)
BL = convert_df2sf(C)
}
{
BH = convert_df2sf(A)
A = #-1
jumpr r31
}
.Linvalid:
{
TMP = ##0x7f800001 // sp snan
}
{
A = convert_sf2df(TMP)
jumpr r31
}
.Lab_true_zero:
// B is zero, A is finite number
{
p0 = dfclass(C,#0x10)
if (p0.new) jump:nt .Lnan
if (p0.new) A = C
}
{
p0 = dfcmp.eq(B,C) // is C also zero?
AH = lsr(AH,#31) // get sign
}
{
BH ^= asl(AH,#31) // form correctly signed zero in B
if (!p0) A = C // If C is not zero, return C
if (!p0) jumpr r31
}
/* B has correctly signed zero, C is also zero */
.Lzero_plus_zero:
{
p0 = cmp.eq(B,C) // yes, scalar equals. +0++0 or -0+-0
if (p0.new) jumpr:t r31
A = B
}
{
TMP = USR
}
{
TMP = extractu(TMP,#2,#SR_ROUND_OFF)
A = #0
}
{
p0 = cmp.eq(TMP,#2)
if (p0.new) AH = ##0x80000000
jumpr r31
}
#undef BTMP
#undef BTMPH
#undef BTMPL
#define CTMP r11:10
.falign
.Lfma_abnormal_c:
/* We know that AB is normal * normal */
/* C is not normal: zero, subnormal, inf, or NaN. */
{
p0 = dfclass(C,#0x10) // is C NaN?
if (p0.new) jump:nt .Lnan
if (p0.new) A = C // move NaN to A
deallocframe
}
{
p0 = dfclass(C,#0x08) // is C inf?
if (p0.new) A = C // return C
if (p0.new) jumpr:nt r31
}
// zero or subnormal
// If we have a zero, and we know AB is normal*normal, we can just call normal multiply
{
p0 = dfclass(C,#0x01) // is C zero?
if (p0.new) jump:nt __hexagon_muldf3
TMP = #1
}
// Left with: subnormal
// Adjust C and jump back to restart
{
allocframe(#STACKSPACE) // oops, deallocated above, re-allocate frame
CTMP = #0
CH = insert(TMP,#EXPBITS,#HI_MANTBITS)
jump .Lfma_abnormal_c_restart
}
END(fma)

View File

@ -0,0 +1,79 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#define A r1:0
#define B r3:2
#define ATMP r5:4
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define END(TAG) .size TAG,.-TAG
/*
* Min and Max return A if B is NaN, or B if A is NaN
* Otherwise, they return the smaller or bigger value
*
* If values are equal, we want to favor -0.0 for min and +0.0 for max.
*/
/*
* Compares always return false for NaN
* if (isnan(A)) A = B; if (A > B) A = B will only trigger at most one of those options.
*/
.text
.global __hexagon_mindf3
.global __hexagon_maxdf3
.global fmin
.type fmin,@function
.global fmax
.type fmax,@function
.type __hexagon_mindf3,@function
.type __hexagon_maxdf3,@function
Q6_ALIAS(mindf3)
Q6_ALIAS(maxdf3)
.p2align 5
__hexagon_mindf3:
fmin:
{
p0 = dfclass(A,#0x10) // If A is a number
p1 = dfcmp.gt(A,B) // AND B > A, don't swap
ATMP = A
}
{
if (p0) A = B // if A is NaN use B
if (p1) A = B // gt is always false if either is NaN
p2 = dfcmp.eq(A,B) // if A == B
if (!p2.new) jumpr:t r31
}
/* A == B, return A|B to select -0.0 over 0.0 */
{
A = or(ATMP,B)
jumpr r31
}
END(__hexagon_mindf3)
.falign
__hexagon_maxdf3:
fmax:
{
p0 = dfclass(A,#0x10)
p1 = dfcmp.gt(B,A)
ATMP = A
}
{
if (p0) A = B
if (p1) A = B
p2 = dfcmp.eq(A,B)
if (!p2.new) jumpr:t r31
}
/* A == B, return A&B to select 0.0 over -0.0 */
{
A = and(ATMP,B)
jumpr r31
}
END(__hexagon_maxdf3)

View File

@ -0,0 +1,418 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Double Precision Multiply */
#define A r1:0
#define AH r1
#define AL r0
#define B r3:2
#define BH r3
#define BL r2
#define BTMP r5:4
#define BTMPH r5
#define BTMPL r4
#define PP_ODD r7:6
#define PP_ODD_H r7
#define PP_ODD_L r6
#define ONE r9:8
#define S_ONE r8
#define S_ZERO r9
#define PP_HH r11:10
#define PP_HH_H r11
#define PP_HH_L r10
#define ATMP r13:12
#define ATMPH r13
#define ATMPL r12
#define PP_LL r15:14
#define PP_LL_H r15
#define PP_LL_L r14
#define TMP r28
#define MANTBITS 52
#define HI_MANTBITS 20
#define EXPBITS 11
#define BIAS 1024
#define MANTISSA_TO_INT_BIAS 52
/* Some constant to adjust normalization amount in error code */
/* Amount to right shift the partial product to get to a denorm */
#define FUDGE 5
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
#define END(TAG) .size TAG,.-TAG
#define SR_ROUND_OFF 22
.text
.global __hexagon_muldf3
.type __hexagon_muldf3,@function
Q6_ALIAS(muldf3)
FAST_ALIAS(muldf3)
FAST2_ALIAS(muldf3)
.p2align 5
__hexagon_muldf3:
{
p0 = dfclass(A,#2)
p0 = dfclass(B,#2)
ATMP = combine(##0x40000000,#0)
}
{
ATMP = insert(A,#MANTBITS,#EXPBITS-1)
BTMP = asl(B,#EXPBITS-1)
TMP = #-BIAS
ONE = #1
}
{
PP_ODD = mpyu(BTMPL,ATMPH)
BTMP = insert(ONE,#2,#62)
}
/* since we know that the MSB of the H registers is zero, we should never carry */
/* H <= 2^31-1. L <= 2^32-1. Therefore, HL <= 2^63-2^32-2^31+1 */
/* Adding 2 HLs, we get 2^64-3*2^32+2 maximum. */
/* Therefore, we can add 3 2^32-1 values safely without carry. We only need one. */
{
PP_LL = mpyu(ATMPL,BTMPL)
PP_ODD += mpyu(ATMPL,BTMPH)
}
{
PP_ODD += lsr(PP_LL,#32)
PP_HH = mpyu(ATMPH,BTMPH)
BTMP = combine(##BIAS+BIAS-4,#0)
}
{
PP_HH += lsr(PP_ODD,#32)
if (!p0) jump .Lmul_abnormal
p1 = cmp.eq(PP_LL_L,#0) // 64 lsb's 0?
p1 = cmp.eq(PP_ODD_L,#0) // 64 lsb's 0?
}
/*
* PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts
* PP_HH can have a minimum of 0x1000_0000_0000_0000 or so
*/
#undef PP_ODD
#undef PP_ODD_H
#undef PP_ODD_L
#define EXP10 r7:6
#define EXP1 r7
#define EXP0 r6
{
if (!p1) PP_HH_L = or(PP_HH_L,S_ONE)
EXP0 = extractu(AH,#EXPBITS,#HI_MANTBITS)
EXP1 = extractu(BH,#EXPBITS,#HI_MANTBITS)
}
{
PP_LL = neg(PP_HH)
EXP0 += add(TMP,EXP1)
TMP = xor(AH,BH)
}
{
if (!p2.new) PP_HH = PP_LL
p2 = cmp.gt(TMP,#-1)
p0 = !cmp.gt(EXP0,BTMPH)
p0 = cmp.gt(EXP0,BTMPL)
if (!p0.new) jump:nt .Lmul_ovf_unf
}
{
A = convert_d2df(PP_HH)
EXP0 = add(EXP0,#-BIAS-58)
}
{
AH += asl(EXP0,#HI_MANTBITS)
jumpr r31
}
.falign
.Lpossible_unf:
/* We end up with a positive exponent */
/* But we may have rounded up to an exponent of 1. */
/* If the exponent is 1, if we rounded up to it
* we need to also raise underflow
* Fortunately, this is pretty easy to detect, we must have +/- 0x0010_0000_0000_0000
* And the PP should also have more than one bit set
*/
/* Note: ATMP should have abs(PP_HH) */
/* Note: BTMPL should have 0x7FEFFFFF */
{
p0 = cmp.eq(AL,#0)
p0 = bitsclr(AH,BTMPL)
if (!p0.new) jumpr:t r31
BTMPH = #0x7fff
}
{
p0 = bitsset(ATMPH,BTMPH)
BTMPL = USR
BTMPH = #0x030
}
{
if (p0) BTMPL = or(BTMPL,BTMPH)
}
{
USR = BTMPL
}
{
p0 = dfcmp.eq(A,A)
jumpr r31
}
.falign
.Lmul_ovf_unf:
{
A = convert_d2df(PP_HH)
ATMP = abs(PP_HH) // take absolute value
EXP1 = add(EXP0,#-BIAS-58)
}
{
AH += asl(EXP1,#HI_MANTBITS)
EXP1 = extractu(AH,#EXPBITS,#HI_MANTBITS)
BTMPL = ##0x7FEFFFFF
}
{
EXP1 += add(EXP0,##-BIAS-58)
//BTMPH = add(clb(ATMP),#-2)
BTMPH = #0
}
{
p0 = cmp.gt(EXP1,##BIAS+BIAS-2) // overflow
if (p0.new) jump:nt .Lmul_ovf
}
{
p0 = cmp.gt(EXP1,#0)
if (p0.new) jump:nt .Lpossible_unf
BTMPH = sub(EXP0,BTMPH)
TMP = #63 // max amount to shift
}
/* Underflow */
/*
* PP_HH has the partial product with sticky LSB.
* PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts
* PP_HH can have a minimum of 0x1000_0000_0000_0000 or so
* The exponent of PP_HH is in EXP1, which is non-positive (0 or negative)
* That's the exponent that happens after the normalization
*
* EXP0 has the exponent that, when added to the normalized value, is out of range.
*
* Strategy:
*
* * Shift down bits, with sticky bit, such that the bits are aligned according
* to the LZ count and appropriate exponent, but not all the way to mantissa
* field, keep around the last few bits.
* * Put a 1 near the MSB
* * Check the LSBs for inexact; if inexact also set underflow
* * Convert [u]d2df -- will correctly round according to rounding mode
* * Replace exponent field with zero
*
*
*/
{
BTMPL = #0 // offset for extract
BTMPH = sub(#FUDGE,BTMPH) // amount to right shift
}
{
p3 = cmp.gt(PP_HH_H,#-1) // is it positive?
BTMPH = min(BTMPH,TMP) // Don't shift more than 63
PP_HH = ATMP
}
{
TMP = USR
PP_LL = extractu(PP_HH,BTMP)
}
{
PP_HH = asr(PP_HH,BTMPH)
BTMPL = #0x0030 // underflow flag
AH = insert(S_ZERO,#EXPBITS,#HI_MANTBITS)
}
{
p0 = cmp.gtu(ONE,PP_LL) // Did we extract all zeros?
if (!p0.new) PP_HH_L = or(PP_HH_L,S_ONE) // add sticky bit
PP_HH_H = setbit(PP_HH_H,#HI_MANTBITS+3) // Add back in a bit so we can use convert instruction
}
{
PP_LL = neg(PP_HH)
p1 = bitsclr(PP_HH_L,#0x7) // Are the LSB's clear?
if (!p1.new) TMP = or(BTMPL,TMP) // If not, Inexact+Underflow
}
{
if (!p3) PP_HH = PP_LL
USR = TMP
}
{
A = convert_d2df(PP_HH) // Do rounding
p0 = dfcmp.eq(A,A) // realize exception
}
{
AH = insert(S_ZERO,#EXPBITS-1,#HI_MANTBITS+1) // Insert correct exponent
jumpr r31
}
.falign
.Lmul_ovf:
// We get either max finite value or infinity. Either way, overflow+inexact
{
TMP = USR
ATMP = combine(##0x7fefffff,#-1) // positive max finite
A = PP_HH
}
{
PP_LL_L = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits
TMP = or(TMP,#0x28) // inexact + overflow
BTMP = combine(##0x7ff00000,#0) // positive infinity
}
{
USR = TMP
PP_LL_L ^= lsr(AH,#31) // Does sign match rounding?
TMP = PP_LL_L // unmodified rounding mode
}
{
p0 = !cmp.eq(TMP,#1) // If not round-to-zero and
p0 = !cmp.eq(PP_LL_L,#2) // Not rounding the other way,
if (p0.new) ATMP = BTMP // we should get infinity
p0 = dfcmp.eq(A,A) // Realize FP exception if enabled
}
{
A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign
jumpr r31
}
.Lmul_abnormal:
{
ATMP = extractu(A,#63,#0) // strip off sign
BTMP = extractu(B,#63,#0) // strip off sign
}
{
p3 = cmp.gtu(ATMP,BTMP)
if (!p3.new) A = B // sort values
if (!p3.new) B = A // sort values
}
{
// Any NaN --> NaN, possibly raise invalid if sNaN
p0 = dfclass(A,#0x0f) // A not NaN?
if (!p0.new) jump:nt .Linvalid_nan
if (!p3) ATMP = BTMP
if (!p3) BTMP = ATMP
}
{
// Infinity * nonzero number is infinity
p1 = dfclass(A,#0x08) // A is infinity
p1 = dfclass(B,#0x0e) // B is nonzero
}
{
// Infinity * zero --> NaN, raise invalid
// Other zeros return zero
p0 = dfclass(A,#0x08) // A is infinity
p0 = dfclass(B,#0x01) // B is zero
}
{
if (p1) jump .Ltrue_inf
p2 = dfclass(B,#0x01)
}
{
if (p0) jump .Linvalid_zeroinf
if (p2) jump .Ltrue_zero // so return zero
TMP = ##0x7c000000
}
// We are left with a normal or subnormal times a subnormal. A > B
// If A and B are both very small (exp(a) < BIAS-MANTBITS),
// we go to a single sticky bit, which we can round easily.
// If A and B might multiply to something bigger, decrease A exponent and increase
// B exponent and try again
{
p0 = bitsclr(AH,TMP)
if (p0.new) jump:nt .Lmul_tiny
}
{
TMP = cl0(BTMP)
}
{
TMP = add(TMP,#-EXPBITS)
}
{
BTMP = asl(BTMP,TMP)
}
{
B = insert(BTMP,#63,#0)
AH -= asl(TMP,#HI_MANTBITS)
}
jump __hexagon_muldf3
.Lmul_tiny:
{
TMP = USR
A = xor(A,B) // get sign bit
}
{
TMP = or(TMP,#0x30) // Inexact + Underflow
A = insert(ONE,#63,#0) // put in rounded up value
BTMPH = extractu(TMP,#2,#SR_ROUND_OFF) // get rounding mode
}
{
USR = TMP
p0 = cmp.gt(BTMPH,#1) // Round towards pos/neg inf?
if (!p0.new) AL = #0 // If not, zero
BTMPH ^= lsr(AH,#31) // rounding my way --> set LSB
}
{
p0 = cmp.eq(BTMPH,#3) // if rounding towards right inf
if (!p0.new) AL = #0 // don't go to zero
jumpr r31
}
.Linvalid_zeroinf:
{
TMP = USR
}
{
A = #-1
TMP = or(TMP,#2)
}
{
USR = TMP
}
{
p0 = dfcmp.uo(A,A) // force exception if enabled
jumpr r31
}
.Linvalid_nan:
{
p0 = dfclass(B,#0x0f) // if B is not NaN
TMP = convert_df2sf(A) // will generate invalid if sNaN
if (p0.new) B = A // make it whatever A is
}
{
BL = convert_df2sf(B) // will generate invalid if sNaN
A = #-1
jumpr r31
}
.falign
.Ltrue_zero:
{
A = B
B = A
}
.Ltrue_inf:
{
BH = extract(BH,#1,#31)
}
{
AH ^= asl(BH,#31)
jumpr r31
}
END(__hexagon_muldf3)
#undef ATMP
#undef ATMPL
#undef ATMPH
#undef BTMP
#undef BTMPL
#undef BTMPH

View File

@ -0,0 +1,406 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Double Precision square root */
#define EXP r28
#define A r1:0
#define AH r1
#define AL r0
#define SFSH r3:2
#define SF_S r3
#define SF_H r2
#define SFHALF_SONE r5:4
#define S_ONE r4
#define SFHALF r5
#define SF_D r6
#define SF_E r7
#define RECIPEST r8
#define SFRAD r9
#define FRACRAD r11:10
#define FRACRADH r11
#define FRACRADL r10
#define ROOT r13:12
#define ROOTHI r13
#define ROOTLO r12
#define PROD r15:14
#define PRODHI r15
#define PRODLO r14
#define P_TMP p0
#define P_EXP1 p1
#define NORMAL p2
#define SF_EXPBITS 8
#define SF_MANTBITS 23
#define DF_EXPBITS 11
#define DF_MANTBITS 52
#define DF_BIAS 0x3ff
#define DFCLASS_ZERO 0x01
#define DFCLASS_NORMAL 0x02
#define DFCLASS_DENORMAL 0x02
#define DFCLASS_INFINITE 0x08
#define DFCLASS_NAN 0x10
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG; .type __qdsp_##TAG,@function
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG; .type __hexagon_fast_##TAG,@function
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG; .type __hexagon_fast2_##TAG,@function
#define END(TAG) .size TAG,.-TAG
.text
.global __hexagon_sqrtdf2
.type __hexagon_sqrtdf2,@function
.global __hexagon_sqrt
.type __hexagon_sqrt,@function
Q6_ALIAS(sqrtdf2)
Q6_ALIAS(sqrt)
FAST_ALIAS(sqrtdf2)
FAST_ALIAS(sqrt)
FAST2_ALIAS(sqrtdf2)
FAST2_ALIAS(sqrt)
.type sqrt,@function
.p2align 5
__hexagon_sqrtdf2:
__hexagon_sqrt:
{
PROD = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS)
EXP = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
SFHALF_SONE = combine(##0x3f000004,#1)
}
{
NORMAL = dfclass(A,#DFCLASS_NORMAL) // Is it normal
NORMAL = cmp.gt(AH,#-1) // and positive?
if (!NORMAL.new) jump:nt .Lsqrt_abnormal
SFRAD = or(SFHALF,PRODLO)
}
#undef NORMAL
.Ldenormal_restart:
{
FRACRAD = A
SF_E,P_TMP = sfinvsqrta(SFRAD)
SFHALF = and(SFHALF,#-16)
SFSH = #0
}
#undef A
#undef AH
#undef AL
#define ERROR r1:0
#define ERRORHI r1
#define ERRORLO r0
// SF_E : reciprocal square root
// SF_H : half rsqrt
// sf_S : square root
// SF_D : error term
// SFHALF: 0.5
{
SF_S += sfmpy(SF_E,SFRAD):lib // s0: root
SF_H += sfmpy(SF_E,SFHALF):lib // h0: 0.5*y0. Could also decrement exponent...
SF_D = SFHALF
#undef SFRAD
#define SHIFTAMT r9
SHIFTAMT = and(EXP,#1)
}
{
SF_D -= sfmpy(SF_S,SF_H):lib // d0: 0.5-H*S = 0.5-0.5*~1
FRACRADH = insert(S_ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) // replace upper bits with hidden
P_EXP1 = cmp.gtu(SHIFTAMT,#0)
}
{
SF_S += sfmpy(SF_S,SF_D):lib // s1: refine sqrt
SF_H += sfmpy(SF_H,SF_D):lib // h1: refine half-recip
SF_D = SFHALF
SHIFTAMT = mux(P_EXP1,#8,#9)
}
{
SF_D -= sfmpy(SF_S,SF_H):lib // d1: error term
FRACRAD = asl(FRACRAD,SHIFTAMT) // Move fracrad bits to right place
SHIFTAMT = mux(P_EXP1,#3,#2)
}
{
SF_H += sfmpy(SF_H,SF_D):lib // d2: rsqrt
// cool trick: half of 1/sqrt(x) has same mantissa as 1/sqrt(x).
PROD = asl(FRACRAD,SHIFTAMT) // fracrad<<(2+exp1)
}
{
SF_H = and(SF_H,##0x007fffff)
}
{
SF_H = add(SF_H,##0x00800000 - 3)
SHIFTAMT = mux(P_EXP1,#7,#8)
}
{
RECIPEST = asl(SF_H,SHIFTAMT)
SHIFTAMT = mux(P_EXP1,#15-(1+1),#15-(1+0))
}
{
ROOT = mpyu(RECIPEST,PRODHI) // root = mpyu_full(recipest,hi(fracrad<<(2+exp1)))
}
#undef SFSH // r3:2
#undef SF_H // r2
#undef SF_S // r3
#undef S_ONE // r4
#undef SFHALF // r5
#undef SFHALF_SONE // r5:4
#undef SF_D // r6
#undef SF_E // r7
#define HL r3:2
#define LL r5:4
#define HH r7:6
#undef P_EXP1
#define P_CARRY0 p1
#define P_CARRY1 p2
#define P_CARRY2 p3
/* Iteration 0 */
/* Maybe we can save a cycle by starting with ERROR=asl(fracrad), then as we multiply */
/* We can shift and subtract instead of shift and add? */
{
ERROR = asl(FRACRAD,#15)
PROD = mpyu(ROOTHI,ROOTHI)
P_CARRY0 = cmp.eq(r0,r0)
}
{
ERROR -= asl(PROD,#15)
PROD = mpyu(ROOTHI,ROOTLO)
P_CARRY1 = cmp.eq(r0,r0)
}
{
ERROR -= lsr(PROD,#16)
P_CARRY2 = cmp.eq(r0,r0)
}
{
ERROR = mpyu(ERRORHI,RECIPEST)
}
{
ROOT += lsr(ERROR,SHIFTAMT)
SHIFTAMT = add(SHIFTAMT,#16)
ERROR = asl(FRACRAD,#31) // for next iter
}
/* Iteration 1 */
{
PROD = mpyu(ROOTHI,ROOTHI)
ERROR -= mpyu(ROOTHI,ROOTLO) // amount is 31, no shift needed
}
{
ERROR -= asl(PROD,#31)
PROD = mpyu(ROOTLO,ROOTLO)
}
{
ERROR -= lsr(PROD,#33)
}
{
ERROR = mpyu(ERRORHI,RECIPEST)
}
{
ROOT += lsr(ERROR,SHIFTAMT)
SHIFTAMT = add(SHIFTAMT,#16)
ERROR = asl(FRACRAD,#47) // for next iter
}
/* Iteration 2 */
{
PROD = mpyu(ROOTHI,ROOTHI)
}
{
ERROR -= asl(PROD,#47)
PROD = mpyu(ROOTHI,ROOTLO)
}
{
ERROR -= asl(PROD,#16) // bidir shr 31-47
PROD = mpyu(ROOTLO,ROOTLO)
}
{
ERROR -= lsr(PROD,#17) // 64-47
}
{
ERROR = mpyu(ERRORHI,RECIPEST)
}
{
ROOT += lsr(ERROR,SHIFTAMT)
}
#undef ERROR
#undef PROD
#undef PRODHI
#undef PRODLO
#define REM_HI r15:14
#define REM_HI_HI r15
#define REM_LO r1:0
#undef RECIPEST
#undef SHIFTAMT
#define TWOROOT_LO r9:8
/* Adjust Root */
{
HL = mpyu(ROOTHI,ROOTLO)
LL = mpyu(ROOTLO,ROOTLO)
REM_HI = #0
REM_LO = #0
}
{
HL += lsr(LL,#33)
LL += asl(HL,#33)
P_CARRY0 = cmp.eq(r0,r0)
}
{
HH = mpyu(ROOTHI,ROOTHI)
REM_LO = sub(REM_LO,LL,P_CARRY0):carry
TWOROOT_LO = #1
}
{
HH += lsr(HL,#31)
TWOROOT_LO += asl(ROOT,#1)
}
#undef HL
#undef LL
#define REM_HI_TMP r3:2
#define REM_HI_TMP_HI r3
#define REM_LO_TMP r5:4
{
REM_HI = sub(FRACRAD,HH,P_CARRY0):carry
REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY1):carry
#undef FRACRAD
#undef HH
#define ZERO r11:10
#define ONE r7:6
ONE = #1
ZERO = #0
}
{
REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY1):carry
ONE = add(ROOT,ONE)
EXP = add(EXP,#-DF_BIAS) // subtract bias --> signed exp
}
{
// If carry set, no borrow: result was still positive
if (P_CARRY1) ROOT = ONE
if (P_CARRY1) REM_LO = REM_LO_TMP
if (P_CARRY1) REM_HI = REM_HI_TMP
}
{
REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY2):carry
ONE = #1
EXP = asr(EXP,#1) // divide signed exp by 2
}
{
REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY2):carry
ONE = add(ROOT,ONE)
}
{
if (P_CARRY2) ROOT = ONE
if (P_CARRY2) REM_LO = REM_LO_TMP
// since tworoot <= 2^32, remhi must be zero
#undef REM_HI_TMP
#undef REM_HI_TMP_HI
#define S_ONE r2
#define ADJ r3
S_ONE = #1
}
{
P_TMP = cmp.eq(REM_LO,ZERO) // is the low part zero
if (!P_TMP.new) ROOTLO = or(ROOTLO,S_ONE) // if so, it's exact... hopefully
ADJ = cl0(ROOT)
EXP = add(EXP,#-63)
}
#undef REM_LO
#define RET r1:0
#define RETHI r1
{
RET = convert_ud2df(ROOT) // set up mantissa, maybe set inexact flag
EXP = add(EXP,ADJ) // add back bias
}
{
RETHI += asl(EXP,#DF_MANTBITS-32) // add exponent adjust
jumpr r31
}
#undef REM_LO_TMP
#undef REM_HI_TMP
#undef REM_HI_TMP_HI
#undef REM_LO
#undef REM_HI
#undef TWOROOT_LO
#undef RET
#define A r1:0
#define AH r1
#define AL r1
#undef S_ONE
#define TMP r3:2
#define TMPHI r3
#define TMPLO r2
#undef P_CARRY0
#define P_NEG p1
#define SFHALF r5
#define SFRAD r9
.Lsqrt_abnormal:
{
P_TMP = dfclass(A,#DFCLASS_ZERO) // zero?
if (P_TMP.new) jumpr:t r31
}
{
P_TMP = dfclass(A,#DFCLASS_NAN)
if (P_TMP.new) jump:nt .Lsqrt_nan
}
{
P_TMP = cmp.gt(AH,#-1)
if (!P_TMP.new) jump:nt .Lsqrt_invalid_neg
if (!P_TMP.new) EXP = ##0x7F800001 // sNaN
}
{
P_TMP = dfclass(A,#DFCLASS_INFINITE)
if (P_TMP.new) jumpr:nt r31
}
// If we got here, we're denormal
// prepare to restart
{
A = extractu(A,#DF_MANTBITS,#0) // Extract mantissa
}
{
EXP = add(clb(A),#-DF_EXPBITS) // how much to normalize?
}
{
A = asl(A,EXP) // Shift mantissa
EXP = sub(#1,EXP) // Form exponent
}
{
AH = insert(EXP,#1,#DF_MANTBITS-32) // insert lsb of exponent
}
{
TMP = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS) // get sf value (mant+exp1)
SFHALF = ##0x3f000004 // form half constant
}
{
SFRAD = or(SFHALF,TMPLO) // form sf value
SFHALF = and(SFHALF,#-16)
jump .Ldenormal_restart // restart
}
.Lsqrt_nan:
{
EXP = convert_df2sf(A) // if sNaN, get invalid
A = #-1 // qNaN
jumpr r31
}
.Lsqrt_invalid_neg:
{
A = convert_sf2df(EXP) // Invalid,NaNval
jumpr r31
}
END(__hexagon_sqrt)
END(__hexagon_sqrtdf2)

View File

@ -0,0 +1,85 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_divdi3
{
p2 = tstbit(r1,#31)
p3 = tstbit(r3,#31)
}
{
r1:0 = abs(r1:0)
r3:2 = abs(r3:2)
}
{
r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
r5:4 = r3:2 // divisor moved into working registers
r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
}
{
p3 = xor(p2,p3)
r10 = sub(r7,r6) // left shift count for bit & divisor
r1:0 = #0 // initialize quotient to 0
r15:14 = #1 // initialize bit to 1
}
{
r11 = add(r10,#1) // loop count is 1 more than shift count
r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
}
{
p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
loop0(1f,r11) // register loop
}
{
if (p0) jump .hexagon_divdi3_return // if divisor > dividend, we're done, so return
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
}
{
r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
}
{
r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
}
{
r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
}:endloop0
.hexagon_divdi3_return:
{
r3:2 = neg(r1:0)
}
{
r1:0 = vmux(p3,r3:2,r1:0)
jumpr r31
}
FUNCTION_END __hexagon_divdi3
.globl __qdsp_divdi3
.set __qdsp_divdi3, __hexagon_divdi3

View File

@ -0,0 +1,84 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_divsi3
{
p0 = cmp.ge(r0,#0)
p1 = cmp.ge(r1,#0)
r1 = abs(r0)
r2 = abs(r1)
}
{
r3 = cl0(r1)
r4 = cl0(r2)
r5 = sub(r1,r2)
p2 = cmp.gtu(r2,r1)
}
#if (__HEXAGON_ARCH__ == 60)
{
r0 = #0
p1 = xor(p0,p1)
p0 = cmp.gtu(r2,r5)
}
if (p2) jumpr r31
#else
{
r0 = #0
p1 = xor(p0,p1)
p0 = cmp.gtu(r2,r5)
if (p2) jumpr r31
}
#endif
{
r0 = mux(p1,#-1,#1)
if (p0) jumpr r31
r4 = sub(r4,r3)
r3 = #1
}
{
r0 = #0
r3:2 = vlslw(r3:2,r4)
loop0(1f,r4)
}
.falign
1:
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r1 = sub(r1,r2)
if (!p0.new) r0 = add(r0,r3)
r3:2 = vlsrw(r3:2,#1)
}:endloop0
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r0 = add(r0,r3)
if (!p1) jumpr r31
}
{
r0 = neg(r0)
jumpr r31
}
FUNCTION_END __hexagon_divsi3
.globl __qdsp_divsi3
.set __qdsp_divsi3, __hexagon_divsi3

View File

@ -0,0 +1,37 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN fabs
{
r1 = clrbit(r1, #31)
jumpr r31
}
FUNCTION_END fabs
FUNCTION_BEGIN fabsf
{
r0 = clrbit(r0, #31)
jumpr r31
}
FUNCTION_END fabsf
.globl fabsl
.set fabsl, fabs

View File

@ -0,0 +1,491 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* ==================================================================== */
/* FUNCTIONS Optimized double floating point operators */
/* ==================================================================== */
/* c = dadd_asm(a, b) */
/* ==================================================================== *
fast2_QDOUBLE fast2_dadd(fast2_QDOUBLE a,fast2_QDOUBLE b) {
fast2_QDOUBLE c;
lint manta = a & MANTMASK;
int expa = Q6_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = Q6_R_sxth_R(b) ;
int exp, expdiff, j, k, hi, lo, cn;
lint mant;
expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
expdiff = Q6_R_sxth_R(expdiff) ;
if (expdiff > 63) { expdiff = 62;}
if (expa > expb) {
exp = expa + 1;
expa = 1;
expb = expdiff + 1;
} else {
exp = expb + 1;
expb = 1;
expa = expdiff + 1;
}
mant = (manta>>expa) + (mantb>>expb);
hi = (int) (mant>>32);
lo = (int) (mant);
k = Q6_R_normamt_R(hi);
if(hi == 0 || hi == -1) k = 31+Q6_R_normamt_R(lo);
mant = (mant << k);
cn = (mant == 0x8000000000000000LL);
exp = exp - k + cn;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global fast2_dadd_asm
.type fast2_dadd_asm, @function
fast2_dadd_asm:
#define manta R0
#define mantexpa R1:0
#define lmanta R1:0
#define mantb R2
#define mantexpb R3:2
#define lmantb R3:2
#define expa R4
#define expb R5
#define mantexpd R7:6
#define expd R6
#define exp R8
#define c63 R9
#define lmant R1:0
#define manth R1
#define mantl R0
#define minmin R11:10 // exactly 0x000000000000008001LL
#define minminl R10
#define k R4
#define ce P0
.falign
{
mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
c63 = #62
expa = SXTH(manta)
expb = SXTH(mantb)
} {
expd = SXTH(expd)
ce = CMP.GT(expa, expb);
if ( ce.new) exp = add(expa, #1)
if (!ce.new) exp = add(expb, #1)
} {
if ( ce) expa = #1
if (!ce) expb = #1
manta.L = #0
expd = MIN(expd, c63)
} {
if (!ce) expa = add(expd, #1)
if ( ce) expb = add(expd, #1)
mantb.L = #0
minmin = #0
} {
lmanta = ASR(lmanta, expa)
lmantb = ASR(lmantb, expb)
} {
lmant = add(lmanta, lmantb)
minminl.L = #0x8001
} {
k = clb(lmant)
c63 = #58
} {
k = add(k, #-1)
p0 = cmp.gt(k, c63)
} {
mantexpa = ASL(lmant, k)
exp = SUB(exp, k)
if(p0) jump .Ldenorma
} {
manta = insert(exp, #16, #0)
jumpr r31
}
.Ldenorma:
{
mantexpa = minmin
jumpr r31
}
/* =================================================================== *
fast2_QDOUBLE fast2_dsub(fast2_QDOUBLE a,fast2_QDOUBLE b) {
fast2_QDOUBLE c;
lint manta = a & MANTMASK;
int expa = Q6_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = Q6_R_sxth_R(b) ;
int exp, expdiff, j, k;
lint mant;
expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
expdiff = Q6_R_sxth_R(expdiff) ;
if (expdiff > 63) { expdiff = 62;}
if (expa > expb) {
exp = expa + 1;
expa = 1;
expb = expdiff + 1;
} else {
exp = expb + 1;
expb = 1;
expa = expdiff + 1;
}
mant = (manta>>expa) - (mantb>>expb);
k = Q6_R_clb_P(mant)-1;
mant = (mant << k);
exp = exp - k;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global fast2_dsub_asm
.type fast2_dsub_asm, @function
fast2_dsub_asm:
#define manta R0
#define mantexpa R1:0
#define lmanta R1:0
#define mantb R2
#define mantexpb R3:2
#define lmantb R3:2
#define expa R4
#define expb R5
#define mantexpd R7:6
#define expd R6
#define exp R8
#define c63 R9
#define lmant R1:0
#define manth R1
#define mantl R0
#define minmin R11:10 // exactly 0x000000000000008001LL
#define minminl R10
#define k R4
#define ce P0
.falign
{
mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
c63 = #62
expa = SXTH(manta)
expb = SXTH(mantb)
} {
expd = SXTH(expd)
ce = CMP.GT(expa, expb);
if ( ce.new) exp = add(expa, #1)
if (!ce.new) exp = add(expb, #1)
} {
if ( ce) expa = #1
if (!ce) expb = #1
manta.L = #0
expd = MIN(expd, c63)
} {
if (!ce) expa = add(expd, #1)
if ( ce) expb = add(expd, #1)
mantb.L = #0
minmin = #0
} {
lmanta = ASR(lmanta, expa)
lmantb = ASR(lmantb, expb)
} {
lmant = sub(lmanta, lmantb)
minminl.L = #0x8001
} {
k = clb(lmant)
c63 = #58
} {
k = add(k, #-1)
p0 = cmp.gt(k, c63)
} {
mantexpa = ASL(lmant, k)
exp = SUB(exp, k)
if(p0) jump .Ldenorm
} {
manta = insert(exp, #16, #0)
jumpr r31
}
.Ldenorm:
{
mantexpa = minmin
jumpr r31
}
/* ==================================================================== *
fast2_QDOUBLE fast2_dmpy(fast2_QDOUBLE a,fast2_QDOUBLE b) {
fast2_QDOUBLE c;
lint manta = a & MANTMASK;
int expa = Q6_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = Q6_R_sxth_R(b) ;
int exp, k;
lint mant;
int hia, hib, hi, lo;
unsigned int loa, lob;
hia = (int)(a >> 32);
loa = Q6_R_extractu_RII((int)manta, 31, 1);
hib = (int)(b >> 32);
lob = Q6_R_extractu_RII((int)mantb, 31, 1);
mant = Q6_P_mpy_RR(hia, lob);
mant = Q6_P_mpyacc_RR(mant,hib, loa);
mant = (mant >> 30) + (Q6_P_mpy_RR(hia, hib)<<1);
hi = (int) (mant>>32);
k = Q6_R_normamt_R(hi);
mant = mant << k;
exp = expa + expb - k;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global fast2_dmpy_asm
.type fast2_dmpy_asm, @function
fast2_dmpy_asm:
#define mantal R0
#define mantah R1
#define mantexpa R1:0
#define mantbl R2
#define mantbh R3
#define mantexpb R3:2
#define expa R4
#define expb R5
#define c8001 R12
#define mantexpd R7:6
#define mantdh R7
#define exp R8
#define lmantc R11:10
#define kb R9
#define guard R11
#define mantal_ R12
#define mantbl_ R13
#define min R15:14
#define minh R15
.falign
{
mantbl_= lsr(mantbl, #16)
expb = sxth(mantbl)
expa = sxth(mantal)
mantal_= lsr(mantal, #16)
}
{
lmantc = mpy(mantah, mantbh)
mantexpd = mpy(mantah, mantbl_)
mantal.L = #0x0
min = #0
}
{
lmantc = add(lmantc, lmantc)
mantexpd+= mpy(mantbh, mantal_)
mantbl.L = #0x0
minh.H = #0x8000
}
{
mantexpd = asr(mantexpd, #15)
c8001.L = #0x8001
p1 = cmp.eq(mantexpa, mantexpb)
}
{
mantexpd = add(mantexpd, lmantc)
exp = add(expa, expb)
p2 = cmp.eq(mantexpa, min)
}
{
kb = clb(mantexpd)
mantexpb = abs(mantexpd)
guard = #58
}
{
p1 = and(p1, p2)
exp = sub(exp, kb)
kb = add(kb, #-1)
p0 = cmp.gt(kb, guard)
}
{
exp = add(exp, #1)
mantexpa = asl(mantexpd, kb)
if(p1) jump .Lsat //rarely happens
}
{
mantal = insert(exp,#16, #0)
if(!p0) jumpr r31
}
{
mantal = insert(c8001,#16, #0)
jumpr r31
}
.Lsat:
{
mantexpa = #-1
}
{
mantexpa = lsr(mantexpa, #1)
}
{
mantal = insert(exp,#16, #0)
jumpr r31
}
/* ==================================================================== *
int fast2_qd2f(fast2_QDOUBLE a) {
int exp;
long long int manta;
int ic, rnd, mantb;
manta = a>>32;
exp = Q6_R_sxth_R(a) ;
ic = 0x80000000 & manta;
manta = Q6_R_abs_R_sat(manta);
mantb = (manta + rnd)>>7;
rnd = 0x40
exp = (exp + 126);
if((manta & 0xff) == rnd) rnd = 0x00;
if((manta & 0x7fffffc0) == 0x7fffffc0) {
manta = 0x0; exp++;
} else {
manta= mantb & 0x007fffff;
}
exp = (exp << 23) & 0x7fffffc0;
ic = Q6_R_addacc_RR(ic, exp, manta);
return (ic);
}
* ==================================================================== */
.text
.global fast2_qd2f_asm
.type fast2_qd2f_asm, @function
fast2_qd2f_asm:
#define mantah R1
#define mantal R0
#define cff R0
#define mant R3
#define expo R4
#define rnd R5
#define mask R6
#define c07f R7
#define c80 R0
#define mantb R2
#define ic R0
.falign
{
mant = abs(mantah):sat
expo = sxth(mantal)
rnd = #0x40
mask.L = #0xffc0
}
{
cff = extractu(mant, #8, #0)
p2 = cmp.gt(expo, #126)
p3 = cmp.ge(expo, #-126)
mask.H = #0x7fff
}
{
p1 = cmp.eq(cff,#0x40)
if(p1.new) rnd = #0
expo = add(expo, #126)
if(!p3) jump .Lmin
}
{
p0 = bitsset(mant, mask)
c80.L = #0x0000
mantb = add(mant, rnd)
c07f = lsr(mask, #8)
}
{
if(p0) expo = add(expo, #1)
if(p0) mant = #0
mantb = lsr(mantb, #7)
c80.H = #0x8000
}
{
ic = and(c80, mantah)
mask &= asl(expo, #23)
if(!p0) mant = and(mantb, c07f)
if(p2) jump .Lmax
}
{
ic += add(mask, mant)
jumpr r31
}
.Lmax:
{
ic.L = #0xffff;
}
{
ic.H = #0x7f7f;
jumpr r31
}
.Lmin:
{
ic = #0x0
jumpr r31
}
/* ==================================================================== *
fast2_QDOUBLE fast2_f2qd(int ia) {
lint exp;
lint mant;
fast2_QDOUBLE c;
mant = ((ia << 7) | 0x40000000)&0x7fffff80 ;
if (ia & 0x80000000) mant = -mant;
exp = ((ia >> 23) & 0xFFLL) - 126;
c = (mant<<32) | Q6_R_zxth_R(exp);;
return(c);
}
* ==================================================================== */
.text
.global fast2_f2qd_asm
.type fast2_f2qd_asm, @function
fast2_f2qd_asm:
#define ia R0
#define mag R3
#define mantr R1
#define expr R0
#define zero R2
#define maxneg R5:4
#define maxnegl R4
.falign
{
mantr = asl(ia, #7)
p0 = tstbit(ia, #31)
maxneg = #0
mag = add(ia,ia)
}
{
mantr = setbit(mantr, #30)
expr= extractu(ia,#8,#23)
maxnegl.L = #0x8001
p1 = cmp.eq(mag, #0)
}
{
mantr= extractu(mantr, #31, #0)
expr= add(expr, #-126)
zero = #0
if(p1) jump .Lminqd
}
{
expr = zxth(expr)
if(p0) mantr= sub(zero, mantr)
jumpr r31
}
.Lminqd:
{
R1:0 = maxneg
jumpr r31
}

View File

@ -0,0 +1,345 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* ==================================================================== *
fast2_QLDOUBLE fast2_ldadd(fast2_QLDOUBLE a,fast2_QLDOUBLE b) {
fast2_QLDOUBLE c;
lint manta = a & MANTMASK;
int expa = Q6_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = Q6_R_sxth_R(b) ;
int exp, expdiff, j, k, hi, lo, cn;
lint mant;
expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
expdiff = Q6_R_sxth_R(expdiff) ;
if (expdiff > 63) { expdiff = 62;}
if (expa > expb) {
exp = expa + 1;
expa = 1;
expb = expdiff + 1;
} else {
exp = expb + 1;
expb = 1;
expa = expdiff + 1;
}
mant = (manta>>expa) + (mantb>>expb);
hi = (int) (mant>>32);
lo = (int) (mant);
k = Q6_R_normamt_R(hi);
if(hi == 0 || hi == -1) k = 31+Q6_R_normamt_R(lo);
mant = (mant << k);
cn = (mant == 0x8000000000000000LL);
exp = exp - k + cn;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global fast2_ldadd_asm
.type fast2_ldadd_asm, @function
fast2_ldadd_asm:
#define manta R1:0
#define lmanta R1:0
#define mantb R3:2
#define lmantb R3:2
#define expa R4
#define expb R5
#define expd R6
#define exp R8
#define c63 R9
#define lmant R1:0
#define k R4
#define ce P0
#define zero R3:2
.falign
{
expa = memw(r29+#8)
expb = memw(r29+#24)
r7 = r0
}
{
expd = sub(expa, expb):sat
ce = CMP.GT(expa, expb);
if ( ce.new) exp = add(expa, #1)
if (!ce.new) exp = add(expb, #1)
} {
expd = abs(expd):sat
if ( ce) expa = #1
if (!ce) expb = #1
c63 = #62
} {
expd = MIN(expd, c63)
manta = memd(r29+#0)
mantb = memd(r29+#16)
} {
if (!ce) expa = add(expd, #1)
if ( ce) expb = add(expd, #1)
} {
lmanta = ASR(lmanta, expa)
lmantb = ASR(lmantb, expb)
} {
lmant = add(lmanta, lmantb)
zero = #0
} {
k = clb(lmant)
c63.L =#0x0001
} {
exp -= add(k, #-1) //exp = exp - (k-1)
k = add(k, #-1)
p0 = cmp.gt(k, #58)
c63.H =#0x8000
} {
if(!p0)memw(r7+#8) = exp
lmant = ASL(lmant, k)
if(p0) jump .Ldenorma
} {
memd(r7+#0) = lmant
jumpr r31
}
.Ldenorma:
memd(r7+#0) = zero
{
memw(r7+#8) = c63
jumpr r31
}
/* =================================================================== *
fast2_QLDOUBLE fast2_ldsub(fast2_QLDOUBLE a,fast2_QLDOUBLE b) {
fast2_QLDOUBLE c;
lint manta = a & MANTMASK;
int expa = Q6_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = Q6_R_sxth_R(b) ;
int exp, expdiff, j, k;
lint mant;
expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
expdiff = Q6_R_sxth_R(expdiff) ;
if (expdiff > 63) { expdiff = 62;}
if (expa > expb) {
exp = expa + 1;
expa = 1;
expb = expdiff + 1;
} else {
exp = expb + 1;
expb = 1;
expa = expdiff + 1;
}
mant = (manta>>expa) - (mantb>>expb);
k = Q6_R_clb_P(mant)-1;
mant = (mant << k);
exp = exp - k;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global fast2_ldsub_asm
.type fast2_ldsub_asm, @function
fast2_ldsub_asm:
#define manta R1:0
#define lmanta R1:0
#define mantb R3:2
#define lmantb R3:2
#define expa R4
#define expb R5
#define expd R6
#define exp R8
#define c63 R9
#define lmant R1:0
#define k R4
#define ce P0
#define zero R3:2
.falign
{
expa = memw(r29+#8)
expb = memw(r29+#24)
r7 = r0
}
{
expd = sub(expa, expb):sat
ce = CMP.GT(expa, expb);
if ( ce.new) exp = add(expa, #1)
if (!ce.new) exp = add(expb, #1)
} {
expd = abs(expd):sat
if ( ce) expa = #1
if (!ce) expb = #1
c63 = #62
} {
expd = min(expd, c63)
manta = memd(r29+#0)
mantb = memd(r29+#16)
} {
if (!ce) expa = add(expd, #1)
if ( ce) expb = add(expd, #1)
} {
lmanta = ASR(lmanta, expa)
lmantb = ASR(lmantb, expb)
} {
lmant = sub(lmanta, lmantb)
zero = #0
} {
k = clb(lmant)
c63.L =#0x0001
} {
exp -= add(k, #-1) //exp = exp - (k+1)
k = add(k, #-1)
p0 = cmp.gt(k, #58)
c63.H =#0x8000
} {
if(!p0)memw(r7+#8) = exp
lmant = asl(lmant, k)
if(p0) jump .Ldenorma_s
} {
memd(r7+#0) = lmant
jumpr r31
}
.Ldenorma_s:
memd(r7+#0) = zero
{
memw(r7+#8) = c63
jumpr r31
}
/* ==================================================================== *
fast2_QLDOUBLE fast2_ldmpy(fast2_QLDOUBLE a,fast2_QLDOUBLE b) {
fast2_QLDOUBLE c;
lint manta = a & MANTMASK;
int expa = Q6_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = Q6_R_sxth_R(b) ;
int exp, k;
lint mant;
int hia, hib, hi, lo;
unsigned int loa, lob;
hia = (int)(a >> 32);
loa = Q6_R_extractu_RII((int)manta, 31, 1);
hib = (int)(b >> 32);
lob = Q6_R_extractu_RII((int)mantb, 31, 1);
mant = Q6_P_mpy_RR(hia, lob);
mant = Q6_P_mpyacc_RR(mant,hib, loa);
mant = (mant >> 30) + (Q6_P_mpy_RR(hia, hib)<<1);
hi = (int) (mant>>32);
k = Q6_R_normamt_R(hi);
mant = mant << k;
exp = expa + expb - k;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global fast2_ldmpy_asm
.type fast2_ldmpy_asm, @function
fast2_ldmpy_asm:
#define mantxl_ R9
#define mantxl R14
#define mantxh R15
#define mantx R15:14
#define mantbl R2
#define mantbl_ R8
#define mantbh R3
#define mantb R3:2
#define expa R4
#define expb R5
#define c8001 R8
#define mantd R7:6
#define lmantc R11:10
#define kp R9
#define min R13:12
#define minh R13
#define max R13:12
#define maxh R13
#define ret R0
.falign
{
mantx = memd(r29+#0)
mantb = memd(r29+#16)
min = #0
}
{
mantbl_= extractu(mantbl, #31, #1)
mantxl_= extractu(mantxl, #31, #1)
minh.H = #0x8000
}
{
lmantc = mpy(mantxh, mantbh)
mantd = mpy(mantxh, mantbl_)
expa = memw(r29+#8)
expb = memw(r29+#24)
}
{
lmantc = add(lmantc, lmantc)
mantd += mpy(mantbh, mantxl_)
}
{
mantd = asr(mantd, #30)
c8001.L = #0x0001
p1 = cmp.eq(mantx, mantb)
}
{
mantd = add(mantd, lmantc)
expa= add(expa, expb)
p2 = cmp.eq(mantb, min)
}
{
kp = clb(mantd)
c8001.H = #0x8000
p1 = and(p1, p2)
}
{
expa-= add(kp, #-1)
kp = add(kp, #-1)
if(p1) jump .Lsat
}
{
mantd = asl(mantd, kp)
memw(ret+#8) = expa
p0 = cmp.gt(kp, #58)
if(p0.new) jump:NT .Ldenorm //rarely happens
}
{
memd(ret+#0) = mantd
jumpr r31
}
.Lsat:
{
max = #0
expa+= add(kp, #1)
}
{
maxh.H = #0x4000
memw(ret+#8) = expa
}
{
memd(ret+#0) = max
jumpr r31
}
.Ldenorm:
{
memw(ret+#8) = c8001
mantx = #0
}
{
memd(ret+#0) = mantx
jumpr r31
}

View File

@ -0,0 +1,400 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* ==================================================================== */
/* FUNCTIONS Optimized double floating point operators */
/* ==================================================================== */
/* c = dadd_asm(a, b) */
/* ====================================================================
QDOUBLE dadd(QDOUBLE a,QDOUBLE b) {
QDOUBLE c;
lint manta = a & MANTMASK;
int expa = HEXAGON_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = HEXAGON_R_sxth_R(b) ;
int exp, expdiff, j, k, hi, lo, cn;
lint mant;
expdiff = (int) HEXAGON_P_vabsdiffh_PP(a, b);
expdiff = HEXAGON_R_sxth_R(expdiff) ;
if (expdiff > 63) { expdiff = 62;}
if (expa > expb) {
exp = expa + 1;
expa = 1;
expb = expdiff + 1;
} else {
exp = expb + 1;
expb = 1;
expa = expdiff + 1;
}
mant = (manta>>expa) + (mantb>>expb);
hi = (int) (mant>>32);
lo = (int) (mant);
k = HEXAGON_R_normamt_R(hi);
if(hi == 0 || hi == -1) k = 31+HEXAGON_R_normamt_R(lo);
mant = (mant << k);
cn = (mant == 0x8000000000000000LL);
exp = exp - k + cn;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global dadd_asm
.type dadd_asm, @function
dadd_asm:
#define manta R0
#define mantexpa R1:0
#define lmanta R1:0
#define mantb R2
#define mantexpb R3:2
#define lmantb R3:2
#define expa R4
#define expb R5
#define mantexpd R7:6
#define expd R6
#define exp R8
#define c63 R9
#define lmant R1:0
#define manth R1
#define mantl R0
#define zero R7:6
#define zerol R6
#define minus R3:2
#define minusl R2
#define maxneg R9
#define minmin R11:10 // exactly 0x800000000000000000LL
#define minminh R11
#define k R4
#define kl R5
#define ce P0
.falign
{
mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
c63 = #62
expa = SXTH(manta)
expb = SXTH(mantb)
} {
expd = SXTH(expd)
ce = CMP.GT(expa, expb);
if ( ce.new) exp = add(expa, #1)
if (!ce.new) exp = add(expb, #1)
} {
if ( ce) expa = #1
if (!ce) expb = #1
manta.L = #0
expd = MIN(expd, c63)
} {
if (!ce) expa = add(expd, #1)
if ( ce) expb = add(expd, #1)
mantb.L = #0
zero = #0
} {
lmanta = ASR(lmanta, expa)
lmantb = ASR(lmantb, expb)
minmin = #0
} {
lmant = add(lmanta, lmantb)
minus = #-1
minminh.H = #0x8000
} {
k = NORMAMT(manth)
kl = NORMAMT(mantl)
p0 = cmp.eq(manth, zerol)
p1 = cmp.eq(manth, minusl)
} {
p0 = OR(p0, p1)
if(p0.new) k = add(kl, #31)
maxneg.H = #0
} {
mantexpa = ASL(lmant, k)
exp = SUB(exp, k)
maxneg.L = #0x8001
} {
p0 = cmp.eq(mantexpa, zero)
p1 = cmp.eq(mantexpa, minus)
manta.L = #0
exp = ZXTH(exp)
} {
p2 = cmp.eq(mantexpa, minmin) //is result 0x80....0
if(p2.new) exp = add(exp, #1)
}
#if (__HEXAGON_ARCH__ == 60)
{
p0 = OR(p0, p1)
if( p0.new) manta = OR(manta,maxneg)
if(!p0.new) manta = OR(manta,exp)
}
jumpr r31
#else
{
p0 = OR(p0, p1)
if( p0.new) manta = OR(manta,maxneg)
if(!p0.new) manta = OR(manta,exp)
jumpr r31
}
#endif
/* =================================================================== *
QDOUBLE dsub(QDOUBLE a,QDOUBLE b) {
QDOUBLE c;
lint manta = a & MANTMASK;
int expa = HEXAGON_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = HEXAGON_R_sxth_R(b) ;
int exp, expdiff, j, k, hi, lo, cn;
lint mant;
expdiff = (int) HEXAGON_P_vabsdiffh_PP(a, b);
expdiff = HEXAGON_R_sxth_R(expdiff) ;
if (expdiff > 63) { expdiff = 62;}
if (expa > expb) {
exp = expa + 1;
expa = 1;
expb = expdiff + 1;
} else {
exp = expb + 1;
expb = 1;
expa = expdiff + 1;
}
mant = (manta>>expa) - (mantb>>expb);
hi = (int) (mant>>32);
lo = (int) (mant);
k = HEXAGON_R_normamt_R(hi);
if(hi == 0 || hi == -1) k = 31+HEXAGON_R_normamt_R(lo);
mant = (mant << k);
cn = (mant == 0x8000000000000000LL);
exp = exp - k + cn;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global dsub_asm
.type dsub_asm, @function
dsub_asm:
#define manta R0
#define mantexpa R1:0
#define lmanta R1:0
#define mantb R2
#define mantexpb R3:2
#define lmantb R3:2
#define expa R4
#define expb R5
#define mantexpd R7:6
#define expd R6
#define exp R8
#define c63 R9
#define lmant R1:0
#define manth R1
#define mantl R0
#define zero R7:6
#define zerol R6
#define minus R3:2
#define minusl R2
#define maxneg R9
#define minmin R11:10 // exactly 0x800000000000000000LL
#define minminh R11
#define k R4
#define kl R5
#define ce P0
.falign
{
mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
c63 = #62
expa = SXTH(manta)
expb = SXTH(mantb)
} {
expd = SXTH(expd)
ce = CMP.GT(expa, expb);
if ( ce.new) exp = add(expa, #1)
if (!ce.new) exp = add(expb, #1)
} {
if ( ce) expa = #1
if (!ce) expb = #1
manta.L = #0
expd = MIN(expd, c63)
} {
if (!ce) expa = add(expd, #1)
if ( ce) expb = add(expd, #1)
mantb.L = #0
zero = #0
} {
lmanta = ASR(lmanta, expa)
lmantb = ASR(lmantb, expb)
minmin = #0
} {
lmant = sub(lmanta, lmantb)
minus = #-1
minminh.H = #0x8000
} {
k = NORMAMT(manth)
kl = NORMAMT(mantl)
p0 = cmp.eq(manth, zerol)
p1 = cmp.eq(manth, minusl)
} {
p0 = OR(p0, p1)
if(p0.new) k = add(kl, #31)
maxneg.H = #0
} {
mantexpa = ASL(lmant, k)
exp = SUB(exp, k)
maxneg.L = #0x8001
} {
p0 = cmp.eq(mantexpa, zero)
p1 = cmp.eq(mantexpa, minus)
manta.L = #0
exp = ZXTH(exp)
} {
p2 = cmp.eq(mantexpa, minmin) //is result 0x80....0
if(p2.new) exp = add(exp, #1)
}
#if (__HEXAGON_ARCH__ == 60)
{
p0 = OR(p0, p1)
if( p0.new) manta = OR(manta,maxneg)
if(!p0.new) manta = OR(manta,exp)
}
jumpr r31
#else
{
p0 = OR(p0, p1)
if( p0.new) manta = OR(manta,maxneg)
if(!p0.new) manta = OR(manta,exp)
jumpr r31
}
#endif
/* ==================================================================== *
QDOUBLE dmpy(QDOUBLE a,QDOUBLE b) {
QDOUBLE c;
lint manta = a & MANTMASK;
int expa = HEXAGON_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = HEXAGON_R_sxth_R(b) ;
int exp, k;
lint mant;
int hia, hib, hi, lo;
unsigned int loa, lob;
hia = (int)(a >> 32);
loa = HEXAGON_R_extractu_RII((int)manta, 31, 1);
hib = (int)(b >> 32);
lob = HEXAGON_R_extractu_RII((int)mantb, 31, 1);
mant = HEXAGON_P_mpy_RR(hia, lob);
mant = HEXAGON_P_mpyacc_RR(mant,hib, loa);
mant = (mant >> 30) + (HEXAGON_P_mpy_RR(hia, hib)<<1);
hi = (int) (mant>>32);
lo = (int) (mant);
k = HEXAGON_R_normamt_R(hi);
if(hi == 0 || hi == -1) k = 31+HEXAGON_R_normamt_R(lo);
mant = mant << k;
exp = expa + expb - k;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global dmpy_asm
.type dmpy_asm, @function
dmpy_asm:
#define mantal R0
#define mantah R1
#define mantexpa R1:0
#define mantbl R2
#define mantbh R3
#define mantexpb R3:2
#define expa R4
#define expb R5
#define mantexpd R7:6
#define exp R8
#define lmantc R11:10
#define mantch R11
#define mantcl R10
#define zero0 R7:6
#define zero0l R6
#define minus1 R3:2
#define minus1l R2
#define maxneg R9
#define k R4
#define kl R5
.falign
{
mantbl = lsr(mantbl, #16)
mantal = lsr(mantal, #16)
expa = sxth(mantal)
expb = sxth(mantbl)
}
{
lmantc = mpy(mantah, mantbh)
mantexpd = mpy(mantah, mantbl)
}
{
lmantc = add(lmantc, lmantc) //<<1
mantexpd+= mpy(mantbh, mantal)
}
{
lmantc += asr(mantexpd, #15)
exp = add(expa, expb)
zero0 = #0
minus1 = #-1
}
{
k = normamt(mantch)
kl = normamt(mantcl)
p0 = cmp.eq(mantch, zero0l)
p1 = cmp.eq(mantch, minus1l)
}
{
p0 = or(p0, p1)
if(p0.new) k = add(kl, #31)
maxneg.H = #0
}
{
mantexpa = asl(lmantc, k)
exp = sub(exp, k)
maxneg.L = #0x8001
}
{
p0 = cmp.eq(mantexpa, zero0)
p1 = cmp.eq(mantexpa, minus1)
mantal.L = #0
exp = zxth(exp)
}
#if (__HEXAGON_ARCH__ == 60)
{
p0 = or(p0, p1)
if( p0.new) mantal = or(mantal,maxneg)
if(!p0.new) mantal = or(mantal,exp)
}
jumpr r31
#else
{
p0 = or(p0, p1)
if( p0.new) mantal = or(mantal,maxneg)
if(!p0.new) mantal = or(mantal,exp)
jumpr r31
}
#endif

View File

@ -0,0 +1,31 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN fmaf
r2 += sfmpy(r0, r1)
{
r0 = r2
jumpr r31
}
FUNCTION_END fmaf
.globl fmal
.set fmal, fma

View File

@ -0,0 +1,30 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN fmaxf
{
r0 = sfmax(r0, r1)
jumpr r31
}
FUNCTION_END fmaxf
.globl fmaxl
.set fmaxl, fmax

View File

@ -0,0 +1,30 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN fminf
{
r0 = sfmin(r0, r1)
jumpr r31
}
FUNCTION_END fminf
.globl fminl
.set fminl, fmin

View File

@ -0,0 +1,125 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// An optimized version of a memcpy which is equivalent to the following loop:
//
// volatile unsigned *dest;
// unsigned *src;
//
// for (i = 0; i < num_words; ++i)
// *dest++ = *src++;
//
// The corresponding C prototype for this function would be
// void hexagon_memcpy_forward_vp4cp4n2(volatile unsigned *dest,
// const unsigned *src,
// unsigned num_words);
//
// *** Both dest and src must be aligned to 32-bit boundaries. ***
// The code does not perform any runtime checks for this, and will fail
// in bad ways if this requirement is not met.
//
// The "forward" in the name refers to the fact that the function copies
// the words going forward in memory. It is incorrect to use this function
// for cases where the original code copied words in any other order.
//
// *** This function is only for the use by the compiler. ***
// The only indended use is for the LLVM compiler to generate calls to
// this function, when a mem-copy loop, like the one above, is detected.
.text
// Inputs:
// r0: dest
// r1: src
// r2: num_words
.globl hexagon_memcpy_forward_vp4cp4n2
.balign 32
.type hexagon_memcpy_forward_vp4cp4n2,@function
hexagon_memcpy_forward_vp4cp4n2:
// Compute r3 to be the number of words remaining in the current page.
// At the same time, compute r4 to be the number of 32-byte blocks
// remaining in the page (for prefetch).
{
r3 = sub(##4096, r1)
r5 = lsr(r2, #3)
}
{
// The word count before end-of-page is in the 12 lowest bits of r3.
// (If the address in r1 was already page-aligned, the bits are 0.)
r3 = extractu(r3, #10, #2)
r4 = extractu(r3, #7, #5)
}
{
r3 = minu(r2, r3)
r4 = minu(r5, r4)
}
{
r4 = or(r4, ##2105344) // 2105344 = 0x202000
p0 = cmp.eq(r3, #0)
if (p0.new) jump:nt .Lskipprolog
}
l2fetch(r1, r4)
{
loop0(.Lprolog, r3)
r2 = sub(r2, r3) // r2 = number of words left after the prolog.
}
.falign
.Lprolog:
{
r4 = memw(r1++#4)
memw(r0++#4) = r4.new
} :endloop0
.Lskipprolog:
{
// Let r3 = number of whole pages left (page = 1024 words).
r3 = lsr(r2, #10)
if (cmp.eq(r3.new, #0)) jump:nt .Lskipmain
}
{
loop1(.Lout, r3)
r2 = extractu(r2, #10, #0) // r2 = r2 & 1023
r3 = ##2105472 // r3 = 0x202080 (prefetch info)
}
// Iterate over pages.
.falign
.Lout:
// Prefetch each individual page.
l2fetch(r1, r3)
loop0(.Lpage, #512)
.falign
.Lpage:
r5:4 = memd(r1++#8)
{
memw(r0++#8) = r4
memw(r0+#4) = r5
} :endloop0:endloop1
.Lskipmain:
{
r3 = ##2105344 // r3 = 0x202000 (prefetch info)
r4 = lsr(r2, #3) // r4 = number of 32-byte blocks remaining.
p0 = cmp.eq(r2, #0)
if (p0.new) jumpr:nt r31
}
{
r3 = or(r3, r4)
loop0(.Lepilog, r2)
}
l2fetch(r1, r3)
.falign
.Lepilog:
{
r4 = memw(r1++#4)
memw(r0++#4) = r4.new
} :endloop0
jumpr r31
.size hexagon_memcpy_forward_vp4cp4n2, . - hexagon_memcpy_forward_vp4cp4n2

View File

@ -0,0 +1,64 @@
//===------------------------- memcopy routines ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
{
p0 = bitsclr(r1,#7)
p0 = bitsclr(r0,#7)
if (p0.new) r5:4 = memd(r1)
r3 = #-3
}
{
if (!p0) jump .Lmemcpy_call
if (p0) memd(r0++#8) = r5:4
if (p0) r5:4 = memd(r1+#8)
r3 += lsr(r2,#3)
}
{
memd(r0++#8) = r5:4
r5:4 = memd(r1+#16)
r1 = add(r1,#24)
loop0(1f,r3)
}
.falign
1:
{
memd(r0++#8) = r5:4
r5:4 = memd(r1++#8)
}:endloop0
{
memd(r0) = r5:4
r0 -= add(r2,#-8)
jumpr r31
}
FUNCTION_END __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
.Lmemcpy_call:
#ifdef __PIC__
jump memcpy@PLT
#else
jump memcpy
#endif
.globl __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes
.set __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes, \
__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes

View File

@ -0,0 +1,83 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_moddi3
{
p3 = tstbit(r1,#31)
}
{
r1:0 = abs(r1:0)
r3:2 = abs(r3:2)
}
{
r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
r5:4 = r3:2 // divisor moved into working registers
r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
}
{
r10 = sub(r7,r6) // left shift count for bit & divisor
r1:0 = #0 // initialize quotient to 0
r15:14 = #1 // initialize bit to 1
}
{
r11 = add(r10,#1) // loop count is 1 more than shift count
r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
}
{
p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
loop0(1f,r11) // register loop
}
{
if (p0) jump .hexagon_moddi3_return // if divisor > dividend, we're done, so return
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
}
{
r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
}
{
r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
}
{
r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
}:endloop0
.hexagon_moddi3_return:
{
r1:0 = neg(r3:2)
}
{
r1:0 = vmux(p3,r1:0,r3:2)
jumpr r31
}
FUNCTION_END __hexagon_moddi3
.globl __qdsp_moddi3
.set __qdsp_moddi3, __hexagon_moddi3

View File

@ -0,0 +1,66 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_modsi3
{
p2 = cmp.ge(r0,#0)
r2 = abs(r0)
r1 = abs(r1)
}
{
r3 = cl0(r2)
r4 = cl0(r1)
p0 = cmp.gtu(r1,r2)
}
{
r3 = sub(r4,r3)
if (p0) jumpr r31
}
{
p1 = cmp.eq(r3,#0)
loop0(1f,r3)
r0 = r2
r2 = lsl(r1,r3)
}
.falign
1:
{
p0 = cmp.gtu(r2,r0)
if (!p0.new) r0 = sub(r0,r2)
r2 = lsr(r2,#1)
if (p1) r1 = #0
}:endloop0
{
p0 = cmp.gtu(r2,r0)
if (!p0.new) r0 = sub(r0,r1)
if (p2) jumpr r31
}
{
r0 = neg(r0)
jumpr r31
}
FUNCTION_END __hexagon_modsi3
.globl __qdsp_modsi3
.set __qdsp_modsi3, __hexagon_modsi3

View File

@ -0,0 +1,66 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
FUNCTION_BEGIN __hexagon_divsf3
{
r2,p0 = sfrecipa(r0,r1)
r4 = sffixupd(r0,r1)
r3 = ##0x3f800000 // 1.0
}
{
r5 = sffixupn(r0,r1)
r3 -= sfmpy(r4,r2):lib // 1-(den/recip) yields error?
r6 = ##0x80000000
r7 = r3
}
{
r2 += sfmpy(r3,r2):lib
r3 = r7
r6 = r5
r0 = and(r6,r5)
}
{
r3 -= sfmpy(r4,r2):lib
r0 += sfmpy(r5,r2):lib
}
{
r2 += sfmpy(r3,r2):lib
r6 -= sfmpy(r0,r4):lib
}
{
r0 += sfmpy(r6,r2):lib
}
{
r5 -= sfmpy(r0,r4):lib
}
{
r0 += sfmpy(r5,r2,p0):scale
jumpr r31
}
FUNCTION_END __hexagon_divsf3
Q6_ALIAS(divsf3)
FAST_ALIAS(divsf3)
FAST2_ALIAS(divsf3)

View File

@ -0,0 +1,82 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
#define RIN r0
#define S r0
#define H r1
#define D r2
#define E r3
#define HALF r4
#define R r5
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
FUNCTION_BEGIN __hexagon_sqrtf
{
E,p0 = sfinvsqrta(RIN)
R = sffixupr(RIN)
HALF = ##0x3f000000 // 0.5
r1:0 = combine(#0,#0) // clear S/H
}
{
S += sfmpy(E,R):lib // S0
H += sfmpy(E,HALF):lib // H0
D = HALF
E = R
}
{
D -= sfmpy(S,H):lib // d0
p1 = sfclass(R,#1) // is zero?
//E -= sfmpy(S,S):lib // e0
}
{
S += sfmpy(S,D):lib // S1
H += sfmpy(H,D):lib // H1
D = HALF
E = R
}
{
D -= sfmpy(S,H):lib // d0
E -= sfmpy(S,S):lib // e0
}
{
S += sfmpy(H,E):lib // S2
H += sfmpy(H,D):lib // H2
D = HALF
E = R
}
{
//D -= sfmpy(S,H):lib // d2
E -= sfmpy(S,S):lib // e2
if (p1) r0 = or(r0,R) // sqrt(-0.0) = -0.0
}
{
S += sfmpy(H,E,p0):scale // S3
jumpr r31
}
FUNCTION_END __hexagon_sqrtf
Q6_ALIAS(sqrtf)
FAST_ALIAS(sqrtf)
FAST2_ALIAS(sqrtf)

View File

@ -0,0 +1,71 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_udivdi3
{
r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
r5:4 = r3:2 // divisor moved into working registers
r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
}
{
r10 = sub(r7,r6) // left shift count for bit & divisor
r1:0 = #0 // initialize quotient to 0
r15:14 = #1 // initialize bit to 1
}
{
r11 = add(r10,#1) // loop count is 1 more than shift count
r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
}
{
p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
loop0(1f,r11) // register loop
}
{
if (p0) jumpr r31 // if divisor > dividend, we're done, so return
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
}
{
r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
}
{
r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
}
{
r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
}:endloop0
{
jumpr r31 // return
}
FUNCTION_END __hexagon_udivdi3
.globl __qdsp_udivdi3
.set __qdsp_udivdi3, __hexagon_udivdi3

View File

@ -0,0 +1,71 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_udivmoddi4
{
r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
r5:4 = r3:2 // divisor moved into working registers
r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
}
{
r10 = sub(r7,r6) // left shift count for bit & divisor
r1:0 = #0 // initialize quotient to 0
r15:14 = #1 // initialize bit to 1
}
{
r11 = add(r10,#1) // loop count is 1 more than shift count
r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
}
{
p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
loop0(1f,r11) // register loop
}
{
if (p0) jumpr r31 // if divisor > dividend, we're done, so return
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
}
{
r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
}
{
r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
}
{
r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
}:endloop0
{
jumpr r31 // return
}
FUNCTION_END __hexagon_udivmoddi4
.globl __qdsp_udivmoddi4
.set __qdsp_udivmoddi4, __hexagon_udivmoddi4

View File

@ -0,0 +1,60 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_udivmodsi4
{
r2 = cl0(r0)
r3 = cl0(r1)
r5:4 = combine(#1,#0)
p0 = cmp.gtu(r1,r0)
}
{
r6 = sub(r3,r2)
r4 = r1
r1:0 = combine(r0,r4)
if (p0) jumpr r31
}
{
r3:2 = vlslw(r5:4,r6)
loop0(1f,r6)
p0 = cmp.eq(r6,#0)
if (p0.new) r4 = #0
}
.falign
1:
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r1 = sub(r1,r2)
if (!p0.new) r0 = add(r0,r3)
r3:2 = vlsrw(r3:2,#1)
}:endloop0
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r1 = sub(r1,r4)
if (!p0.new) r0 = add(r0,r3)
jumpr r31
}
FUNCTION_END __hexagon_udivmodsi4
.globl __qdsp_udivmodsi4
.set __qdsp_udivmodsi4, __hexagon_udivmodsi4

View File

@ -0,0 +1,56 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_udivsi3
{
r2 = cl0(r0)
r3 = cl0(r1)
r5:4 = combine(#1,#0)
p0 = cmp.gtu(r1,r0)
}
{
r6 = sub(r3,r2)
r4 = r1
r1:0 = combine(r0,r4)
if (p0) jumpr r31
}
{
r3:2 = vlslw(r5:4,r6)
loop0(1f,r6)
}
.falign
1:
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r1 = sub(r1,r2)
if (!p0.new) r0 = add(r0,r3)
r3:2 = vlsrw(r3:2,#1)
}:endloop0
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r0 = add(r0,r3)
jumpr r31
}
FUNCTION_END __hexagon_udivsi3
.globl __qdsp_udivsi3
.set __qdsp_udivsi3, __hexagon_udivsi3

View File

@ -0,0 +1,74 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_umoddi3
{
r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
r5:4 = r3:2 // divisor moved into working registers
r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
}
{
r10 = sub(r7,r6) // left shift count for bit & divisor
r1:0 = #0 // initialize quotient to 0
r15:14 = #1 // initialize bit to 1
}
{
r11 = add(r10,#1) // loop count is 1 more than shift count
r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
}
{
p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
loop0(1f,r11) // register loop
}
{
if (p0) jump .hexagon_umoddi3_return // if divisor > dividend, we're done, so return
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
}
{
r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
}
{
r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
}
{
r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
}:endloop0
.hexagon_umoddi3_return:
{
r1:0 = r3:2
jumpr r31
}
FUNCTION_END __hexagon_umoddi3
.globl __qdsp_umoddi3
.set __qdsp_umoddi3, __hexagon_umoddi3

View File

@ -0,0 +1,55 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_umodsi3
{
r2 = cl0(r0)
r3 = cl0(r1)
p0 = cmp.gtu(r1,r0)
}
{
r2 = sub(r3,r2)
if (p0) jumpr r31
}
{
loop0(1f,r2)
p1 = cmp.eq(r2,#0)
r2 = lsl(r1,r2)
}
.falign
1:
{
p0 = cmp.gtu(r2,r0)
if (!p0.new) r0 = sub(r0,r2)
r2 = lsr(r2,#1)
if (p1) r1 = #0
}:endloop0
{
p0 = cmp.gtu(r2,r0)
if (!p0.new) r0 = sub(r0,r1)
jumpr r31
}
FUNCTION_END __hexagon_umodsi3
.globl __qdsp_umodsi3
.set __qdsp_umodsi3, __hexagon_umodsi3

View File

@ -60,7 +60,7 @@ typedef union
}s;
} udwords;
#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64))
#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64)) || defined(__riscv)
#define CRT_HAS_128BIT
#endif

View File

@ -16,8 +16,8 @@
#ifdef __APPLE__
#include <CoreFoundation/CoreFoundation.h>
#include <dispatch/dispatch.h>
#include <TargetConditionals.h>
#include <dispatch/dispatch.h>
#include <dlfcn.h>
#include <stdint.h>
#include <stdio.h>
@ -28,6 +28,26 @@
static int32_t GlobalMajor, GlobalMinor, GlobalSubminor;
static dispatch_once_t DispatchOnceCounter;
typedef CFDataRef (*CFDataCreateWithBytesNoCopyFuncTy)(CFAllocatorRef,
const UInt8 *, CFIndex,
CFAllocatorRef);
typedef CFPropertyListRef (*CFPropertyListCreateWithDataFuncTy)(
CFAllocatorRef, CFDataRef, CFOptionFlags, CFPropertyListFormat *,
CFErrorRef *);
typedef CFPropertyListRef (*CFPropertyListCreateFromXMLDataFuncTy)(
CFAllocatorRef, CFDataRef, CFOptionFlags, CFStringRef *);
typedef CFStringRef (*CFStringCreateWithCStringNoCopyFuncTy)(CFAllocatorRef,
const char *,
CFStringEncoding,
CFAllocatorRef);
typedef const void *(*CFDictionaryGetValueFuncTy)(CFDictionaryRef,
const void *);
typedef CFTypeID (*CFGetTypeIDFuncTy)(CFTypeRef);
typedef CFTypeID (*CFStringGetTypeIDFuncTy)(void);
typedef Boolean (*CFStringGetCStringFuncTy)(CFStringRef, char *, CFIndex,
CFStringEncoding);
typedef void (*CFReleaseFuncTy)(CFTypeRef);
/* Find and parse the SystemVersion.plist file. */
static void parseSystemVersionPList(void *Unused) {
(void)Unused;
@ -37,50 +57,49 @@ static void parseSystemVersionPList(void *Unused) {
return;
const CFAllocatorRef kCFAllocatorNull =
*(const CFAllocatorRef *)NullAllocator;
typeof(CFDataCreateWithBytesNoCopy) *CFDataCreateWithBytesNoCopyFunc =
(typeof(CFDataCreateWithBytesNoCopy) *)dlsym(
RTLD_DEFAULT, "CFDataCreateWithBytesNoCopy");
CFDataCreateWithBytesNoCopyFuncTy CFDataCreateWithBytesNoCopyFunc =
(CFDataCreateWithBytesNoCopyFuncTy)dlsym(RTLD_DEFAULT,
"CFDataCreateWithBytesNoCopy");
if (!CFDataCreateWithBytesNoCopyFunc)
return;
typeof(CFPropertyListCreateWithData) *CFPropertyListCreateWithDataFunc =
(typeof(CFPropertyListCreateWithData) *)dlsym(
CFPropertyListCreateWithDataFuncTy CFPropertyListCreateWithDataFunc =
(CFPropertyListCreateWithDataFuncTy)dlsym(
RTLD_DEFAULT, "CFPropertyListCreateWithData");
/* CFPropertyListCreateWithData was introduced only in macOS 10.6+, so it
* will be NULL on earlier OS versions. */
/* CFPropertyListCreateWithData was introduced only in macOS 10.6+, so it
* will be NULL on earlier OS versions. */
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
typeof(CFPropertyListCreateFromXMLData) *CFPropertyListCreateFromXMLDataFunc =
(typeof(CFPropertyListCreateFromXMLData) *)dlsym(
CFPropertyListCreateFromXMLDataFuncTy CFPropertyListCreateFromXMLDataFunc =
(CFPropertyListCreateFromXMLDataFuncTy)dlsym(
RTLD_DEFAULT, "CFPropertyListCreateFromXMLData");
#pragma clang diagnostic pop
/* CFPropertyListCreateFromXMLDataFunc is deprecated in macOS 10.10, so it
* might be NULL in future OS versions. */
if (!CFPropertyListCreateWithDataFunc && !CFPropertyListCreateFromXMLDataFunc)
return;
typeof(CFStringCreateWithCStringNoCopy) *CFStringCreateWithCStringNoCopyFunc =
(typeof(CFStringCreateWithCStringNoCopy) *)dlsym(
CFStringCreateWithCStringNoCopyFuncTy CFStringCreateWithCStringNoCopyFunc =
(CFStringCreateWithCStringNoCopyFuncTy)dlsym(
RTLD_DEFAULT, "CFStringCreateWithCStringNoCopy");
if (!CFStringCreateWithCStringNoCopyFunc)
return;
typeof(CFDictionaryGetValue) *CFDictionaryGetValueFunc =
(typeof(CFDictionaryGetValue) *)dlsym(RTLD_DEFAULT,
"CFDictionaryGetValue");
CFDictionaryGetValueFuncTy CFDictionaryGetValueFunc =
(CFDictionaryGetValueFuncTy)dlsym(RTLD_DEFAULT, "CFDictionaryGetValue");
if (!CFDictionaryGetValueFunc)
return;
typeof(CFGetTypeID) *CFGetTypeIDFunc =
(typeof(CFGetTypeID) *)dlsym(RTLD_DEFAULT, "CFGetTypeID");
CFGetTypeIDFuncTy CFGetTypeIDFunc =
(CFGetTypeIDFuncTy)dlsym(RTLD_DEFAULT, "CFGetTypeID");
if (!CFGetTypeIDFunc)
return;
typeof(CFStringGetTypeID) *CFStringGetTypeIDFunc =
(typeof(CFStringGetTypeID) *)dlsym(RTLD_DEFAULT, "CFStringGetTypeID");
CFStringGetTypeIDFuncTy CFStringGetTypeIDFunc =
(CFStringGetTypeIDFuncTy)dlsym(RTLD_DEFAULT, "CFStringGetTypeID");
if (!CFStringGetTypeIDFunc)
return;
typeof(CFStringGetCString) *CFStringGetCStringFunc =
(typeof(CFStringGetCString) *)dlsym(RTLD_DEFAULT, "CFStringGetCString");
CFStringGetCStringFuncTy CFStringGetCStringFunc =
(CFStringGetCStringFuncTy)dlsym(RTLD_DEFAULT, "CFStringGetCString");
if (!CFStringGetCStringFunc)
return;
typeof(CFRelease) *CFReleaseFunc =
(typeof(CFRelease) *)dlsym(RTLD_DEFAULT, "CFRelease");
CFReleaseFuncTy CFReleaseFunc =
(CFReleaseFuncTy)dlsym(RTLD_DEFAULT, "CFRelease");
if (!CFReleaseFunc)
return;
@ -163,10 +182,14 @@ int32_t __isOSVersionAtLeast(int32_t Major, int32_t Minor, int32_t Subminor) {
/* Populate the global version variables, if they haven't already. */
dispatch_once_f(&DispatchOnceCounter, NULL, parseSystemVersionPList);
if (Major < GlobalMajor) return 1;
if (Major > GlobalMajor) return 0;
if (Minor < GlobalMinor) return 1;
if (Minor > GlobalMinor) return 0;
if (Major < GlobalMajor)
return 1;
if (Major > GlobalMajor)
return 0;
if (Minor < GlobalMinor)
return 1;
if (Minor > GlobalMinor)
return 0;
return Subminor <= GlobalSubminor;
}

View File

@ -0,0 +1,28 @@
//===--- mulsi3.S - Integer multiplication routines routines ---===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#if !defined(__riscv_mul) && __riscv_xlen == 32
.text
.align 2
.globl __mulsi3
.type __mulsi3, @function
__mulsi3:
mv a2, a0
mv a0, zero
.L1:
andi a3, a1, 1
beqz a3, .L2
add a0, a0, a2
.L2:
srli a1, a1, 1
slli a2, a2, 1
bnez a1, .L1
ret
#endif

View File

@ -132,7 +132,11 @@ void ShadowBuilder::Start() {
void ShadowBuilder::AddUnchecked(uptr begin, uptr end) {
uint16_t *shadow_begin = MemToShadow(begin, shadow_);
uint16_t *shadow_end = MemToShadow(end - 1, shadow_) + 1;
memset(shadow_begin, kUncheckedShadow,
// memset takes a byte, so our unchecked shadow value requires both bytes to
// be the same. Make sure we're ok during compilation.
static_assert((kUncheckedShadow & 0xff) == ((kUncheckedShadow >> 8) & 0xff),
"Both bytes of the 16-bit value must be the same!");
memset(shadow_begin, kUncheckedShadow & 0xff,
(shadow_end - shadow_begin) * sizeof(*shadow_begin));
}
@ -379,6 +383,8 @@ __cfi_slowpath_diag(u64 CallSiteTypeId, void *Ptr, void *DiagData) {
}
#endif
static void EnsureInterceptorsInitialized();
// Setup shadow for dlopen()ed libraries.
// The actual shadow setup happens after dlopen() returns, which means that
// a library can not be a target of any CFI checks while its constructors are
@ -388,6 +394,7 @@ __cfi_slowpath_diag(u64 CallSiteTypeId, void *Ptr, void *DiagData) {
// We could insert a high-priority constructor into the library, but that would
// not help with the uninstrumented libraries.
INTERCEPTOR(void*, dlopen, const char *filename, int flag) {
EnsureInterceptorsInitialized();
EnterLoader();
void *handle = REAL(dlopen)(filename, flag);
ExitLoader();
@ -395,12 +402,27 @@ INTERCEPTOR(void*, dlopen, const char *filename, int flag) {
}
INTERCEPTOR(int, dlclose, void *handle) {
EnsureInterceptorsInitialized();
EnterLoader();
int res = REAL(dlclose)(handle);
ExitLoader();
return res;
}
static BlockingMutex interceptor_init_lock(LINKER_INITIALIZED);
static bool interceptors_inited = false;
static void EnsureInterceptorsInitialized() {
BlockingMutexLock lock(&interceptor_init_lock);
if (interceptors_inited)
return;
INTERCEPT_FUNCTION(dlopen);
INTERCEPT_FUNCTION(dlclose);
interceptors_inited = true;
}
extern "C" SANITIZER_INTERFACE_ATTRIBUTE
#if !SANITIZER_CAN_USE_PREINIT_ARRAY
// On ELF platforms, the constructor is invoked using .preinit_array (see below)
@ -411,9 +433,6 @@ void __cfi_init() {
InitializeFlags();
InitShadow();
INTERCEPT_FUNCTION(dlopen);
INTERCEPT_FUNCTION(dlclose);
#ifdef CFI_ENABLE_DIAG
__ubsan::InitAsPlugin();
#endif

View File

@ -1,7 +1,9 @@
[cfi-unrelated-cast]
# std::get_temporary_buffer, likewise (libstdc++, libc++).
# The specification of std::get_temporary_buffer mandates a cast to
# uninitialized T* (libstdc++, libc++, MSVC stdlib).
fun:_ZSt20get_temporary_buffer*
fun:_ZNSt3__120get_temporary_buffer*
fun:*get_temporary_buffer@.*@std@@*
# STL address-of magic (libstdc++, libc++).
fun:*__addressof*

View File

@ -425,7 +425,8 @@ static void dfsan_init(int argc, char **argv, char **envp) {
InitializePlatformEarly();
MmapFixedNoReserve(ShadowAddr(), UnusedAddr() - ShadowAddr());
if (!MmapFixedNoReserve(ShadowAddr(), UnusedAddr() - ShadowAddr()))
Die();
// Protect the region of memory we don't use, to preserve the one-to-one
// mapping from application to shadow memory. But if ASLR is disabled, Linux

View File

@ -1132,4 +1132,26 @@ int __dfsw_snprintf(char *str, size_t size, const char *format,
va_end(ap);
return ret;
}
} // extern "C"
// Default empty implementations (weak). Users should redefine them.
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard, u32 *) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard_init, u32 *,
u32 *) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_pcs_init, void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_indir, void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp, void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp1, void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp2, void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp4, void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp8, void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_const_cmp1,
void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_const_cmp2,
void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_const_cmp4,
void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_const_cmp8,
void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_switch, void) {}
} // extern "C"

View File

@ -264,8 +264,6 @@ fun:reflect.makeFuncStub=discard
# lib/Fuzzer
###############################################################################
# Replaces __sanitizer_cov_trace_cmp with __dfsw___sanitizer_cov_trace_cmp
fun:__sanitizer_cov_trace_cmp=custom
fun:__sanitizer_cov_trace_cmp=uninstrumented
fun:__sanitizer_cov_trace_cmp1=custom
fun:__sanitizer_cov_trace_cmp1=uninstrumented
fun:__sanitizer_cov_trace_cmp2=custom
@ -274,6 +272,14 @@ fun:__sanitizer_cov_trace_cmp4=custom
fun:__sanitizer_cov_trace_cmp4=uninstrumented
fun:__sanitizer_cov_trace_cmp8=custom
fun:__sanitizer_cov_trace_cmp8=uninstrumented
fun:__sanitizer_cov_trace_const_cmp1=custom
fun:__sanitizer_cov_trace_const_cmp1=uninstrumented
fun:__sanitizer_cov_trace_const_cmp2=custom
fun:__sanitizer_cov_trace_const_cmp2=uninstrumented
fun:__sanitizer_cov_trace_const_cmp4=custom
fun:__sanitizer_cov_trace_const_cmp4=uninstrumented
fun:__sanitizer_cov_trace_const_cmp8=custom
fun:__sanitizer_cov_trace_const_cmp8=uninstrumented
# Similar for __sanitizer_cov_trace_switch
fun:__sanitizer_cov_trace_switch=custom
fun:__sanitizer_cov_trace_switch=uninstrumented
@ -289,10 +295,11 @@ fun:__sanitizer_set_death_callback=uninstrumented
fun:__sanitizer_set_death_callback=discard
fun:__sanitizer_update_counter_bitset_and_clear_counters=uninstrumented
fun:__sanitizer_update_counter_bitset_and_clear_counters=discard
fun:__sanitizer_cov_trace_pc*=uninstrumented
fun:__sanitizer_cov_trace_pc*=discard
fun:__sanitizer_cov_pcs_init=uninstrumented
fun:__sanitizer_cov_pcs_init=discard
# Ignores the dfsan wrappers.
fun:__dfsw_*=uninstrumented
fun:__dfsw_*=discard
# Don't add extra parameters to the Fuzzer callback.
fun:LLVMFuzzerTestOneInput=uninstrumented

View File

@ -163,15 +163,15 @@ static void initializeShadow() {
VPrintf(1, "Shadow #%d: [%zx-%zx) (%zuGB)\n", i, ShadowStart, ShadowEnd,
(ShadowEnd - ShadowStart) >> 30);
uptr Map;
uptr Map = 0;
if (__esan_which_tool == ESAN_WorkingSet) {
// We want to identify all shadow pages that are touched so we start
// out inaccessible.
Map = (uptr)MmapFixedNoAccess(ShadowStart, ShadowEnd- ShadowStart,
"shadow");
} else {
Map = (uptr)MmapFixedNoReserve(ShadowStart, ShadowEnd - ShadowStart,
"shadow");
if (MmapFixedNoReserve(ShadowStart, ShadowEnd - ShadowStart, "shadow"))
Map = ShadowStart;
}
if (Map != ShadowStart) {
Printf("FATAL: EfficiencySanitizer failed to map its shadow memory.\n");

View File

@ -175,6 +175,15 @@ DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr)
do { \
} while (false)
#define COMMON_INTERCEPTOR_MMAP_IMPL(ctx, mmap, addr, sz, prot, flags, fd, \
off) \
do { \
if (!fixMmapAddr(&addr, sz, flags)) \
return (void *)-1; \
void *result = REAL(mmap)(addr, sz, prot, flags, fd, off); \
return (void *)checkMmapResult((uptr)result, sz); \
} while (false)
#include "sanitizer_common/sanitizer_common_interceptors.inc"
//===----------------------------------------------------------------------===//
@ -232,6 +241,7 @@ DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr)
} while (false)
#include "sanitizer_common/sanitizer_common_syscalls.inc"
#include "sanitizer_common/sanitizer_syscalls_netbsd.inc"
//===----------------------------------------------------------------------===//
// Custom interceptors
@ -306,13 +316,6 @@ INTERCEPTOR(int, unlink, char *path) {
return REAL(unlink)(path);
}
INTERCEPTOR(int, puts, const char *s) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, puts, s);
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s));
return REAL(puts)(s);
}
INTERCEPTOR(int, rmdir, char *path) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, rmdir, path);
@ -320,44 +323,6 @@ INTERCEPTOR(int, rmdir, char *path) {
return REAL(rmdir)(path);
}
//===----------------------------------------------------------------------===//
// Shadow-related interceptors
//===----------------------------------------------------------------------===//
// These are candidates for sharing with all sanitizers if shadow memory
// support is also standardized.
INTERCEPTOR(void *, mmap, void *addr, SIZE_T sz, int prot, int flags,
int fd, OFF_T off) {
if (UNLIKELY(REAL(mmap) == nullptr)) {
// With esan init during interceptor init and a static libc preventing
// our early-calloc from triggering, we can end up here before our
// REAL pointer is set up.
return (void *)internal_mmap(addr, sz, prot, flags, fd, off);
}
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, mmap, addr, sz, prot, flags, fd, off);
if (!fixMmapAddr(&addr, sz, flags))
return (void *)-1;
void *result = REAL(mmap)(addr, sz, prot, flags, fd, off);
return (void *)checkMmapResult((uptr)result, sz);
}
#if SANITIZER_LINUX
INTERCEPTOR(void *, mmap64, void *addr, SIZE_T sz, int prot, int flags,
int fd, OFF64_T off) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, mmap64, addr, sz, prot, flags, fd, off);
if (!fixMmapAddr(&addr, sz, flags))
return (void *)-1;
void *result = REAL(mmap64)(addr, sz, prot, flags, fd, off);
return (void *)checkMmapResult((uptr)result, sz);
}
#define ESAN_MAYBE_INTERCEPT_MMAP64 INTERCEPT_FUNCTION(mmap64)
#else
#define ESAN_MAYBE_INTERCEPT_MMAP64
#endif
//===----------------------------------------------------------------------===//
// Signal-related interceptors
//===----------------------------------------------------------------------===//
@ -521,14 +486,8 @@ void initializeInterceptors() {
INTERCEPT_FUNCTION(creat);
ESAN_MAYBE_INTERCEPT_CREAT64;
INTERCEPT_FUNCTION(unlink);
INTERCEPT_FUNCTION(fread);
INTERCEPT_FUNCTION(fwrite);
INTERCEPT_FUNCTION(puts);
INTERCEPT_FUNCTION(rmdir);
INTERCEPT_FUNCTION(mmap);
ESAN_MAYBE_INTERCEPT_MMAP64;
ESAN_MAYBE_INTERCEPT_SIGNAL;
ESAN_MAYBE_INTERCEPT_SIGACTION;
ESAN_MAYBE_INTERCEPT_SIGPROCMASK;

View File

@ -70,7 +70,7 @@ int SidelineThread::runSideline(void *Arg) {
internal_prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
// Set up a signal handler on an alternate stack for safety.
InternalScopedBuffer<char> StackMap(SigAltStackSize);
InternalMmapVector<char> StackMap(SigAltStackSize);
stack_t SigAltStack;
SigAltStack.ss_sp = StackMap.data();
SigAltStack.ss_size = SigAltStackSize;

View File

@ -1,4 +1,4 @@
//===-- hwasan.cc -----------------------------------------------------------===//
//===-- hwasan.cc ---------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@ -13,8 +13,10 @@
//===----------------------------------------------------------------------===//
#include "hwasan.h"
#include "hwasan_thread.h"
#include "hwasan_mapping.h"
#include "hwasan_poisoning.h"
#include "hwasan_report.h"
#include "hwasan_thread.h"
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_flags.h"
@ -84,7 +86,7 @@ static void InitializeFlags() {
cf.check_printf = false;
cf.intercept_tls_get_addr = true;
cf.exitcode = 99;
cf.handle_sigill = kHandleSignalExclusive;
cf.handle_sigtrap = kHandleSignalExclusive;
OverrideCommonFlags(cf);
}
@ -143,12 +145,22 @@ void PrintWarning(uptr pc, uptr bp) {
ReportInvalidAccess(&stack, 0);
}
static void HWAsanCheckFailed(const char *file, int line, const char *cond,
u64 v1, u64 v2) {
Report("HWAddressSanitizer CHECK failed: %s:%d \"%s\" (0x%zx, 0x%zx)\n", file,
line, cond, (uptr)v1, (uptr)v2);
PRINT_CURRENT_STACK_CHECK();
Die();
}
} // namespace __hwasan
// Interface.
using namespace __hwasan;
uptr __hwasan_shadow_memory_dynamic_address; // Global interface symbol.
void __hwasan_init() {
CHECK(!hwasan_init_is_running);
if (hwasan_inited) return;
@ -160,23 +172,28 @@ void __hwasan_init() {
CacheBinaryName();
InitializeFlags();
// Install tool-specific callbacks in sanitizer_common.
SetCheckFailedCallback(HWAsanCheckFailed);
__sanitizer_set_report_path(common_flags()->log_path);
DisableCoreDumperIfNecessary();
if (!InitShadow()) {
Printf("FATAL: HWAddressSanitizer cannot mmap the shadow memory.\n");
if (HWASAN_FIXED_MAPPING) {
Printf("FATAL: Make sure to compile with -fPIE and to link with -pie.\n");
Printf("FATAL: Disabling ASLR is known to cause this error.\n");
Printf("FATAL: If running under GDB, try "
"'set disable-randomization off'.\n");
}
DumpProcessMap();
Die();
}
InitializeInterceptors();
InstallDeadlySignalHandlers(HwasanOnDeadlySignal);
InstallAtExitHandler(); // Needs __cxa_atexit interceptor.
DisableCoreDumperIfNecessary();
if (!InitShadow()) {
Printf("FATAL: HWAddressSanitizer can not mmap the shadow memory.\n");
Printf("FATAL: Make sure to compile with -fPIE and to link with -pie.\n");
Printf("FATAL: Disabling ASLR is known to cause this error.\n");
Printf("FATAL: If running under GDB, try "
"'set disable-randomization off'.\n");
DumpProcessMap();
Die();
}
Symbolizer::GetOrInit()->AddHooks(EnterSymbolizer, ExitSymbolizer);
InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir);
@ -240,11 +257,23 @@ void __sanitizer_unaligned_store64(uu64 *p, u64 x) {
template<unsigned X>
__attribute__((always_inline))
static void SigIll() {
static void SigTrap(uptr p) {
#if defined(__aarch64__)
asm("hlt %0\n\t" ::"n"(X));
#elif defined(__x86_64__) || defined(__i386__)
asm("ud2\n\t");
(void)p;
// 0x900 is added to do not interfere with the kernel use of lower values of
// brk immediate.
// FIXME: Add a constraint to put the pointer into x0, the same as x86 branch.
asm("brk %0\n\t" ::"n"(0x900 + X));
#elif defined(__x86_64__)
// INT3 + NOP DWORD ptr [EAX + X] to pass X to our signal handler, 5 bytes
// total. The pointer is passed via rdi.
// 0x40 is added as a safeguard, to help distinguish our trap from others and
// to avoid 0 offsets in the command (otherwise it'll be reduced to a
// different nop command, the three bytes one).
asm volatile(
"int3\n"
"nopl %c0(%%rax)\n"
:: "n"(0x40 + X), "D"(p));
#else
// FIXME: not always sigill.
__builtin_trap();
@ -261,8 +290,8 @@ __attribute__((always_inline, nodebug)) static void CheckAddress(uptr p) {
uptr ptr_raw = p & ~kAddressTagMask;
tag_t mem_tag = *(tag_t *)MEM_TO_SHADOW(ptr_raw);
if (UNLIKELY(ptr_tag != mem_tag)) {
SigIll<0x100 + 0x20 * (EA == ErrorAction::Recover) +
0x10 * (AT == AccessType::Store) + LogSize>();
SigTrap<0x20 * (EA == ErrorAction::Recover) +
0x10 * (AT == AccessType::Store) + LogSize>(p);
if (EA == ErrorAction::Abort) __builtin_unreachable();
}
}
@ -277,13 +306,13 @@ __attribute__((always_inline, nodebug)) static void CheckAddressSized(uptr p,
tag_t *shadow_last = (tag_t *)MEM_TO_SHADOW(ptr_raw + sz - 1);
for (tag_t *t = shadow_first; t <= shadow_last; ++t)
if (UNLIKELY(ptr_tag != *t)) {
SigIll<0x100 + 0x20 * (EA == ErrorAction::Recover) +
0x10 * (AT == AccessType::Store) + 0xf>();
SigTrap<0x20 * (EA == ErrorAction::Recover) +
0x10 * (AT == AccessType::Store) + 0xf>(p);
if (EA == ErrorAction::Abort) __builtin_unreachable();
}
}
void __hwasan_load(uptr p, uptr sz) {
void __hwasan_loadN(uptr p, uptr sz) {
CheckAddressSized<ErrorAction::Abort, AccessType::Load>(p, sz);
}
void __hwasan_load1(uptr p) {
@ -302,7 +331,7 @@ void __hwasan_load16(uptr p) {
CheckAddress<ErrorAction::Abort, AccessType::Load, 4>(p);
}
void __hwasan_load_noabort(uptr p, uptr sz) {
void __hwasan_loadN_noabort(uptr p, uptr sz) {
CheckAddressSized<ErrorAction::Recover, AccessType::Load>(p, sz);
}
void __hwasan_load1_noabort(uptr p) {
@ -321,7 +350,7 @@ void __hwasan_load16_noabort(uptr p) {
CheckAddress<ErrorAction::Recover, AccessType::Load, 4>(p);
}
void __hwasan_store(uptr p, uptr sz) {
void __hwasan_storeN(uptr p, uptr sz) {
CheckAddressSized<ErrorAction::Abort, AccessType::Store>(p, sz);
}
void __hwasan_store1(uptr p) {
@ -340,7 +369,7 @@ void __hwasan_store16(uptr p) {
CheckAddress<ErrorAction::Abort, AccessType::Store, 4>(p);
}
void __hwasan_store_noabort(uptr p, uptr sz) {
void __hwasan_storeN_noabort(uptr p, uptr sz) {
CheckAddressSized<ErrorAction::Recover, AccessType::Store>(p, sz);
}
void __hwasan_store1_noabort(uptr p) {
@ -359,6 +388,18 @@ void __hwasan_store16_noabort(uptr p) {
CheckAddress<ErrorAction::Recover, AccessType::Store, 4>(p);
}
void __hwasan_tag_memory(uptr p, u8 tag, uptr sz) {
TagMemoryAligned(p, sz, tag);
}
static const u8 kFallbackTag = 0xBB;
u8 __hwasan_generate_tag() {
HwasanThread *t = GetCurrentThread();
if (!t) return kFallbackTag;
return t->GenerateRandomTag();
}
#if !SANITIZER_SUPPORTS_WEAK_HOOKS
extern "C" {
SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE

View File

@ -1,4 +1,4 @@
//===-- hwasan.h --------------------------------------------------*- C++ -*-===//
//===-- hwasan.h ------------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -32,16 +32,6 @@
typedef u8 tag_t;
// Reasonable values are 4 (for 1/16th shadow) and 6 (for 1/64th).
const uptr kShadowScale = 4;
const uptr kShadowAlignment = 1UL << kShadowScale;
#define MEM_TO_SHADOW_OFFSET(mem) ((uptr)(mem) >> kShadowScale)
#define MEM_TO_SHADOW(mem) ((uptr)(mem) >> kShadowScale)
#define SHADOW_TO_MEM(shadow) ((uptr)(shadow) << kShadowScale)
#define MEM_IS_APP(mem) true
// TBI (Top Byte Ignore) feature of AArch64: bits [63:56] are ignored in address
// translation and can be used to store a tag.
const unsigned kAddressTagShift = 56;
@ -107,15 +97,6 @@ void PrintWarning(uptr pc, uptr bp);
void GetStackTrace(BufferedStackTrace *stack, uptr max_s, uptr pc, uptr bp,
void *context, bool request_fast_unwind);
void ReportInvalidAccess(StackTrace *stack, u32 origin);
void ReportTagMismatch(StackTrace *stack, uptr addr, uptr access_size,
bool is_store);
void ReportStats();
void ReportAtExitStatistics();
void DescribeMemoryRange(const void *x, uptr size);
void ReportInvalidAccessInsideAddressRange(const char *what, const void *start, uptr size,
uptr offset);
// Returns a "chained" origin id, pointing to the given stack trace followed by
// the previous origin id.
u32 ChainOrigin(u32 id, StackTrace *stack);
@ -135,6 +116,15 @@ const int STACK_TRACE_TAG_POISON = StackTrace::TAG_CUSTOM + 1;
GetStackTrace(&stack, kStackTraceMax, pc, bp, nullptr, \
common_flags()->fast_unwind_on_fatal)
#define GET_FATAL_STACK_TRACE_HERE \
GET_FATAL_STACK_TRACE_PC_BP(StackTrace::GetCurrentPc(), GET_CURRENT_FRAME())
#define PRINT_CURRENT_STACK_CHECK() \
{ \
GET_FATAL_STACK_TRACE_HERE; \
stack.Print(); \
}
class ScopedThreadLocalStateBackup {
public:
ScopedThreadLocalStateBackup() { Backup(); }

View File

@ -1,4 +1,4 @@
//===-- hwasan_allocator.cc --------------------------- ---------------------===//
//===-- hwasan_allocator.cc ------------------------- ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@ -15,11 +15,13 @@
#include "sanitizer_common/sanitizer_allocator.h"
#include "sanitizer_common/sanitizer_allocator_checks.h"
#include "sanitizer_common/sanitizer_allocator_interface.h"
#include "sanitizer_common/sanitizer_allocator_report.h"
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_errno.h"
#include "sanitizer_common/sanitizer_stackdepot.h"
#include "hwasan.h"
#include "hwasan_allocator.h"
#include "hwasan_mapping.h"
#include "hwasan_thread.h"
#include "hwasan_poisoning.h"
@ -70,8 +72,8 @@ struct HwasanMapUnmapCallback {
}
};
#if !defined(__aarch64__)
#error unsupported platform
#if !defined(__aarch64__) && !defined(__x86_64__)
#error Unsupported platform
#endif
static const uptr kMaxAllowedMallocSize = 2UL << 30; // 2G
@ -100,6 +102,9 @@ static AllocatorCache fallback_allocator_cache;
static SpinMutex fallback_mutex;
static atomic_uint8_t hwasan_allocator_tagging_enabled;
static const tag_t kFallbackAllocTag = 0xBB;
static const tag_t kFallbackFreeTag = 0xBC;
void HwasanAllocatorInit() {
atomic_store_relaxed(&hwasan_allocator_tagging_enabled,
!flags()->disable_allocator_tagging);
@ -123,9 +128,12 @@ static void *HwasanAllocate(StackTrace *stack, uptr size, uptr alignment,
size = RoundUpTo(size, kShadowAlignment);
if (size > kMaxAllowedMallocSize) {
Report("WARNING: HWAddressSanitizer failed to allocate %p bytes\n",
(void *)size);
return Allocator::FailureHandler::OnBadRequest();
if (AllocatorMayReturnNull()) {
Report("WARNING: HWAddressSanitizer failed to allocate 0x%zx bytes\n",
size);
return nullptr;
}
ReportAllocationSizeTooBig(size, kMaxAllowedMallocSize, stack);
}
HwasanThread *t = GetCurrentThread();
void *allocated;
@ -137,6 +145,12 @@ static void *HwasanAllocate(StackTrace *stack, uptr size, uptr alignment,
AllocatorCache *cache = &fallback_allocator_cache;
allocated = allocator.Allocate(cache, size, alignment);
}
if (UNLIKELY(!allocated)) {
SetAllocatorOutOfMemory();
if (AllocatorMayReturnNull())
return nullptr;
ReportOutOfMemory(size, stack);
}
Metadata *meta =
reinterpret_cast<Metadata *>(allocator.GetMetaData(allocated));
meta->state = CHUNK_ALLOCATED;
@ -145,10 +159,11 @@ static void *HwasanAllocate(StackTrace *stack, uptr size, uptr alignment,
if (zeroise)
internal_memset(allocated, 0, size);
void *user_ptr = (flags()->tag_in_malloc &&
atomic_load_relaxed(&hwasan_allocator_tagging_enabled))
? (void *)TagMemoryAligned((uptr)allocated, size, 0xBB)
: allocated;
void *user_ptr = allocated;
if (flags()->tag_in_malloc &&
atomic_load_relaxed(&hwasan_allocator_tagging_enabled))
user_ptr = (void *)TagMemoryAligned(
(uptr)user_ptr, size, t ? t->GenerateRandomTag() : kFallbackAllocTag);
HWASAN_MALLOC_HOOK(user_ptr, size);
return user_ptr;
@ -166,10 +181,11 @@ void HwasanDeallocate(StackTrace *stack, void *user_ptr) {
meta->free_context_id = StackDepotPut(*stack);
// This memory will not be reused by anyone else, so we are free to keep it
// poisoned.
HwasanThread *t = GetCurrentThread();
if (flags()->tag_in_free &&
atomic_load_relaxed(&hwasan_allocator_tagging_enabled))
TagMemoryAligned((uptr)p, size, 0xBC);
HwasanThread *t = GetCurrentThread();
TagMemoryAligned((uptr)p, size,
t ? t->GenerateRandomTag() : kFallbackFreeTag);
if (t) {
AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
allocator.Deallocate(cache, p);
@ -195,8 +211,12 @@ void *HwasanReallocate(StackTrace *stack, void *user_old_p, uptr new_size,
meta->requested_size = new_size;
if (!atomic_load_relaxed(&hwasan_allocator_tagging_enabled))
return user_old_p;
if (flags()->retag_in_realloc)
return (void *)TagMemoryAligned((uptr)old_p, new_size, 0xCC);
if (flags()->retag_in_realloc) {
HwasanThread *t = GetCurrentThread();
return (void *)TagMemoryAligned(
(uptr)old_p, new_size,
t ? t->GenerateRandomTag() : kFallbackAllocTag);
}
if (new_size > old_size) {
tag_t tag = GetTagFromPointer((uptr)user_old_p);
TagMemoryAligned((uptr)old_p + old_size, new_size - old_size, tag);
@ -212,6 +232,15 @@ void *HwasanReallocate(StackTrace *stack, void *user_old_p, uptr new_size,
return new_p;
}
void *HwasanCalloc(StackTrace *stack, uptr nmemb, uptr size) {
if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
if (AllocatorMayReturnNull())
return nullptr;
ReportCallocOverflow(nmemb, size, stack);
}
return HwasanAllocate(stack, nmemb * size, sizeof(u64), true);
}
HwasanChunkView FindHeapChunkByAddress(uptr address) {
void *block = allocator.GetBlockBegin(reinterpret_cast<void*>(address));
if (!block)
@ -235,9 +264,7 @@ void *hwasan_malloc(uptr size, StackTrace *stack) {
}
void *hwasan_calloc(uptr nmemb, uptr size, StackTrace *stack) {
if (UNLIKELY(CheckForCallocOverflow(size, nmemb)))
return SetErrnoOnNull(Allocator::FailureHandler::OnBadRequest());
return SetErrnoOnNull(HwasanAllocate(stack, nmemb * size, sizeof(u64), true));
return SetErrnoOnNull(HwasanCalloc(stack, nmemb, size));
}
void *hwasan_realloc(void *ptr, uptr size, StackTrace *stack) {
@ -251,14 +278,17 @@ void *hwasan_realloc(void *ptr, uptr size, StackTrace *stack) {
}
void *hwasan_valloc(uptr size, StackTrace *stack) {
return SetErrnoOnNull(HwasanAllocate(stack, size, GetPageSizeCached(), false));
return SetErrnoOnNull(
HwasanAllocate(stack, size, GetPageSizeCached(), false));
}
void *hwasan_pvalloc(uptr size, StackTrace *stack) {
uptr PageSize = GetPageSizeCached();
if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
errno = errno_ENOMEM;
return Allocator::FailureHandler::OnBadRequest();
if (AllocatorMayReturnNull())
return nullptr;
ReportPvallocOverflow(size, stack);
}
// pvalloc(0) should allocate one page.
size = size ? RoundUpTo(size, PageSize) : PageSize;
@ -268,7 +298,9 @@ void *hwasan_pvalloc(uptr size, StackTrace *stack) {
void *hwasan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack) {
if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
errno = errno_EINVAL;
return Allocator::FailureHandler::OnBadRequest();
if (AllocatorMayReturnNull())
return nullptr;
ReportInvalidAlignedAllocAlignment(size, alignment, stack);
}
return SetErrnoOnNull(HwasanAllocate(stack, size, alignment, false));
}
@ -276,7 +308,9 @@ void *hwasan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack) {
void *hwasan_memalign(uptr alignment, uptr size, StackTrace *stack) {
if (UNLIKELY(!IsPowerOfTwo(alignment))) {
errno = errno_EINVAL;
return Allocator::FailureHandler::OnBadRequest();
if (AllocatorMayReturnNull())
return nullptr;
ReportInvalidAllocationAlignment(alignment, stack);
}
return SetErrnoOnNull(HwasanAllocate(stack, size, alignment, false));
}
@ -284,18 +318,20 @@ void *hwasan_memalign(uptr alignment, uptr size, StackTrace *stack) {
int hwasan_posix_memalign(void **memptr, uptr alignment, uptr size,
StackTrace *stack) {
if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
Allocator::FailureHandler::OnBadRequest();
return errno_EINVAL;
if (AllocatorMayReturnNull())
return errno_EINVAL;
ReportInvalidPosixMemalignAlignment(alignment, stack);
}
void *ptr = HwasanAllocate(stack, size, alignment, false);
if (UNLIKELY(!ptr))
// OOM error is already taken care of by HwasanAllocate.
return errno_ENOMEM;
CHECK(IsAligned((uptr)ptr, alignment));
*memptr = ptr;
return 0;
}
} // namespace __hwasan
} // namespace __hwasan
using namespace __hwasan;

View File

@ -0,0 +1,132 @@
//===-- hwasan_dynamic_shadow.cc --------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file is a part of HWAddressSanitizer. It reserves dynamic shadow memory
/// region and handles ifunc resolver case, when necessary.
///
//===----------------------------------------------------------------------===//
#include "hwasan_dynamic_shadow.h"
#include "hwasan_mapping.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_posix.h"
// The code in this file needs to run in an unrelocated binary. It should not
// access any external symbol, including its own non-hidden globals.
namespace __hwasan {
static void UnmapFromTo(uptr from, uptr to) {
if (to == from)
return;
CHECK(to >= from);
uptr res = internal_munmap(reinterpret_cast<void *>(from), to - from);
if (UNLIKELY(internal_iserror(res))) {
Report("ERROR: %s failed to unmap 0x%zx (%zd) bytes at address %p\n",
SanitizerToolName, to - from, to - from, from);
CHECK("unable to unmap" && 0);
}
}
// Returns an address aligned to 8 pages, such that one page on the left and
// shadow_size_bytes bytes on the right of it are mapped r/o.
static uptr MapDynamicShadow(uptr shadow_size_bytes) {
const uptr granularity = GetMmapGranularity();
const uptr alignment = granularity * SHADOW_GRANULARITY;
const uptr left_padding = granularity;
const uptr shadow_size =
RoundUpTo(shadow_size_bytes, granularity);
const uptr map_size = shadow_size + left_padding + alignment;
const uptr map_start = (uptr)MmapNoAccess(map_size);
CHECK_NE(map_start, ~(uptr)0);
const uptr shadow_start = RoundUpTo(map_start + left_padding, alignment);
UnmapFromTo(map_start, shadow_start - left_padding);
UnmapFromTo(shadow_start + shadow_size, map_start + map_size);
return shadow_start;
}
} // namespace __hwasan
#if HWASAN_PREMAP_SHADOW
extern "C" {
INTERFACE_ATTRIBUTE void __hwasan_shadow();
decltype(__hwasan_shadow)* __hwasan_premap_shadow();
} // extern "C"
namespace __hwasan {
// Conservative upper limit.
static uptr PremapShadowSize() {
return RoundUpTo(GetMaxVirtualAddress() >> kShadowScale,
GetMmapGranularity());
}
static uptr PremapShadow() {
return MapDynamicShadow(PremapShadowSize());
}
static bool IsPremapShadowAvailable() {
const uptr shadow = reinterpret_cast<uptr>(&__hwasan_shadow);
const uptr resolver = reinterpret_cast<uptr>(&__hwasan_premap_shadow);
// shadow == resolver is how Android KitKat and older handles ifunc.
// shadow == 0 just in case.
return shadow != 0 && shadow != resolver;
}
static uptr FindPremappedShadowStart(uptr shadow_size_bytes) {
const uptr granularity = GetMmapGranularity();
const uptr shadow_start = reinterpret_cast<uptr>(&__hwasan_shadow);
const uptr premap_shadow_size = PremapShadowSize();
const uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity);
// We may have mapped too much. Release extra memory.
UnmapFromTo(shadow_start + shadow_size, shadow_start + premap_shadow_size);
return shadow_start;
}
} // namespace __hwasan
extern "C" {
decltype(__hwasan_shadow)* __hwasan_premap_shadow() {
// The resolver might be called multiple times. Map the shadow just once.
static __sanitizer::uptr shadow = 0;
if (!shadow)
shadow = __hwasan::PremapShadow();
return reinterpret_cast<decltype(__hwasan_shadow)*>(shadow);
}
// __hwasan_shadow is a "function" that has the same address as the first byte
// of the shadow mapping.
INTERFACE_ATTRIBUTE __attribute__((ifunc("__hwasan_premap_shadow")))
void __hwasan_shadow();
} // extern "C"
#endif // HWASAN_PREMAP_SHADOW
namespace __hwasan {
uptr FindDynamicShadowStart(uptr shadow_size_bytes) {
#if HWASAN_PREMAP_SHADOW
if (IsPremapShadowAvailable())
return FindPremappedShadowStart(shadow_size_bytes);
#endif
return MapDynamicShadow(shadow_size_bytes);
}
} // namespace __hwasan

View File

@ -0,0 +1,27 @@
//===-- hwasan_dynamic_shadow.h ---------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file is a part of HWAddressSanitizer. It reserves dynamic shadow memory
/// region.
///
//===----------------------------------------------------------------------===//
#ifndef HWASAN_PREMAP_SHADOW_H
#define HWASAN_PREMAP_SHADOW_H
#include "sanitizer_common/sanitizer_internal_defs.h"
namespace __hwasan {
uptr FindDynamicShadowStart(uptr shadow_size_bytes);
} // namespace __hwasan
#endif // HWASAN_PREMAP_SHADOW_H

View File

@ -27,3 +27,7 @@ HWASAN_FLAG(bool, atexit, false, "")
// Test only flag to disable malloc/realloc/free memory tagging on startup.
// Tagging can be reenabled with __hwasan_enable_allocator_tagging().
HWASAN_FLAG(bool, disable_allocator_tagging, false, "")
// If false, use simple increment of a thread local counter to generate new
// tags.
HWASAN_FLAG(bool, random_tags, true, "")

View File

@ -17,8 +17,10 @@
#include "interception/interception.h"
#include "hwasan.h"
#include "hwasan_mapping.h"
#include "hwasan_thread.h"
#include "hwasan_poisoning.h"
#include "hwasan_report.h"
#include "sanitizer_common/sanitizer_platform_limits_posix.h"
#include "sanitizer_common/sanitizer_allocator.h"
#include "sanitizer_common/sanitizer_allocator_interface.h"
@ -258,18 +260,17 @@ INTERCEPTOR(void *, realloc, void *ptr, SIZE_T size) {
INTERCEPTOR(void *, malloc, SIZE_T size) {
GET_MALLOC_STACK_TRACE;
if (UNLIKELY(!hwasan_init_is_running))
ENSURE_HWASAN_INITED();
if (UNLIKELY(!hwasan_inited))
// Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
return AllocateFromLocalPool(size);
return hwasan_malloc(size, &stack);
}
INTERCEPTOR(void *, mmap, void *addr, SIZE_T length, int prot, int flags,
int fd, OFF_T offset) {
if (hwasan_init_is_running)
return REAL(mmap)(addr, length, prot, flags, fd, offset);
ENSURE_HWASAN_INITED();
template <class Mmap>
static void *mmap_interceptor(Mmap real_mmap, void *addr, SIZE_T sz, int prot,
int flags, int fd, OFF64_T off) {
if (addr && !MEM_IS_APP(addr)) {
if (flags & map_fixed) {
errno = errno_EINVAL;
@ -278,30 +279,9 @@ INTERCEPTOR(void *, mmap, void *addr, SIZE_T length, int prot, int flags,
addr = nullptr;
}
}
void *res = REAL(mmap)(addr, length, prot, flags, fd, offset);
return res;
return real_mmap(addr, sz, prot, flags, fd, off);
}
#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
INTERCEPTOR(void *, mmap64, void *addr, SIZE_T length, int prot, int flags,
int fd, OFF64_T offset) {
ENSURE_HWASAN_INITED();
if (addr && !MEM_IS_APP(addr)) {
if (flags & map_fixed) {
errno = errno_EINVAL;
return (void *)-1;
} else {
addr = nullptr;
}
}
void *res = REAL(mmap64)(addr, length, prot, flags, fd, offset);
return res;
}
#define HWASAN_MAYBE_INTERCEPT_MMAP64 INTERCEPT_FUNCTION(mmap64)
#else
#define HWASAN_MAYBE_INTERCEPT_MMAP64
#endif
extern "C" int pthread_attr_init(void *attr);
extern "C" int pthread_attr_destroy(void *attr);
@ -427,6 +407,22 @@ int OnExit() {
*begin = *end = 0; \
}
#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size) \
{ \
COMMON_INTERCEPTOR_ENTER(ctx, memset, dst, v, size); \
if (common_flags()->intercept_intrin && \
MEM_IS_APP(GetAddressFromPointer(dst))) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size); \
return REAL(memset)(dst, v, size); \
}
#define COMMON_INTERCEPTOR_MMAP_IMPL(ctx, mmap, addr, length, prot, flags, fd, \
offset) \
do { \
return mmap_interceptor(REAL(mmap), addr, length, prot, flags, fd, \
offset); \
} while (false)
#include "sanitizer_common/sanitizer_platform_interceptors.h"
#include "sanitizer_common/sanitizer_common_interceptors.inc"
#include "sanitizer_common/sanitizer_signal_interceptors.inc"
@ -448,6 +444,7 @@ int OnExit() {
(void)(s); \
} while (false)
#include "sanitizer_common/sanitizer_common_syscalls.inc"
#include "sanitizer_common/sanitizer_syscalls_netbsd.inc"
@ -459,8 +456,6 @@ void InitializeInterceptors() {
InitializeCommonInterceptors();
InitializeSignalInterceptors();
INTERCEPT_FUNCTION(mmap);
HWASAN_MAYBE_INTERCEPT_MMAP64;
INTERCEPT_FUNCTION(posix_memalign);
HWASAN_MAYBE_INTERCEPT_MEMALIGN;
INTERCEPT_FUNCTION(__libc_memalign);

View File

@ -18,6 +18,7 @@
#include "sanitizer_common/sanitizer_internal_defs.h"
extern "C" {
SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_init();
@ -32,7 +33,10 @@ using __sanitizer::u16;
using __sanitizer::u8;
SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_load(uptr, uptr);
extern uptr __hwasan_shadow_memory_dynamic_address;
SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_loadN(uptr, uptr);
SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_load1(uptr);
SANITIZER_INTERFACE_ATTRIBUTE
@ -45,7 +49,7 @@ SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_load16(uptr);
SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_load_noabort(uptr, uptr);
void __hwasan_loadN_noabort(uptr, uptr);
SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_load1_noabort(uptr);
SANITIZER_INTERFACE_ATTRIBUTE
@ -58,7 +62,7 @@ SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_load16_noabort(uptr);
SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_store(uptr, uptr);
void __hwasan_storeN(uptr, uptr);
SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_store1(uptr);
SANITIZER_INTERFACE_ATTRIBUTE
@ -71,7 +75,7 @@ SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_store16(uptr);
SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_store_noabort(uptr, uptr);
void __hwasan_storeN_noabort(uptr, uptr);
SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_store1_noabort(uptr);
SANITIZER_INTERFACE_ATTRIBUTE
@ -83,6 +87,12 @@ void __hwasan_store8_noabort(uptr);
SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_store16_noabort(uptr);
SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_tag_memory(uptr p, u8 tag, uptr sz);
SANITIZER_INTERFACE_ATTRIBUTE
u8 __hwasan_generate_tag();
// Returns the offset of the first tag mismatch or -1 if the whole range is
// good.
SANITIZER_INTERFACE_ATTRIBUTE

View File

@ -1,4 +1,4 @@
//===-- hwasan_linux.cc -----------------------------------------------------===//
//===-- hwasan_linux.cc -----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -6,41 +6,45 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of HWAddressSanitizer.
//
// Linux-, NetBSD- and FreeBSD-specific code.
///
/// \file
/// This file is a part of HWAddressSanitizer and contains Linux-, NetBSD- and
/// FreeBSD-specific code.
///
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_platform.h"
#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
#include "hwasan.h"
#include "hwasan_dynamic_shadow.h"
#include "hwasan_interface_internal.h"
#include "hwasan_mapping.h"
#include "hwasan_report.h"
#include "hwasan_thread.h"
#include <elf.h>
#include <link.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <unistd.h>
#include <unwind.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_procmaps.h"
namespace __hwasan {
void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
static void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
CHECK_EQ((beg % GetMmapGranularity()), 0);
CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
uptr size = end - beg + 1;
DecreaseTotalMmap(size); // Don't count the shadow against mmap_limit_mb.
void *res = MmapFixedNoReserve(beg, size, name);
if (res != (void *)beg) {
if (!MmapFixedNoReserve(beg, size, name)) {
Report(
"ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
"Perhaps you're using ulimit -v\n",
@ -52,8 +56,11 @@ void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
}
static void ProtectGap(uptr addr, uptr size) {
if (!size)
return;
void *res = MmapFixedNoAccess(addr, size, "shadow gap");
if (addr == (uptr)res) return;
if (addr == (uptr)res)
return;
// A few pages at the start of the address space can not be protected.
// But we really want to protect as much as possible, to prevent this memory
// being returned as a result of a non-FIXED mmap().
@ -63,63 +70,160 @@ static void ProtectGap(uptr addr, uptr size) {
addr += step;
size -= step;
void *res = MmapFixedNoAccess(addr, size, "shadow gap");
if (addr == (uptr)res) return;
if (addr == (uptr)res)
return;
}
}
Report(
"ERROR: Failed to protect the shadow gap. "
"ASan cannot proceed correctly. ABORTING.\n");
"ERROR: Failed to protect shadow gap [%p, %p]. "
"HWASan cannot proceed correctly. ABORTING.\n", (void *)addr,
(void *)(addr + size));
DumpProcessMap();
Die();
}
bool InitShadow() {
const uptr maxVirtualAddress = GetMaxUserVirtualAddress();
static uptr kLowMemStart;
static uptr kLowMemEnd;
static uptr kLowShadowEnd;
static uptr kLowShadowStart;
static uptr kHighShadowStart;
static uptr kHighShadowEnd;
static uptr kHighMemStart;
static uptr kHighMemEnd;
// LowMem covers as much of the first 4GB as possible.
const uptr kLowMemEnd = 1UL<<32;
const uptr kLowShadowEnd = kLowMemEnd >> kShadowScale;
const uptr kLowShadowStart = kLowShadowEnd >> kShadowScale;
static void PrintRange(uptr start, uptr end, const char *name) {
Printf("|| [%p, %p] || %.*s ||\n", (void *)start, (void *)end, 10, name);
}
static void PrintAddressSpaceLayout() {
PrintRange(kHighMemStart, kHighMemEnd, "HighMem");
if (kHighShadowEnd + 1 < kHighMemStart)
PrintRange(kHighShadowEnd + 1, kHighMemStart - 1, "ShadowGap");
else
CHECK_EQ(kHighShadowEnd + 1, kHighMemStart);
PrintRange(kHighShadowStart, kHighShadowEnd, "HighShadow");
if (SHADOW_OFFSET) {
if (kLowShadowEnd + 1 < kHighShadowStart)
PrintRange(kLowShadowEnd + 1, kHighShadowStart - 1, "ShadowGap");
else
CHECK_EQ(kLowMemEnd + 1, kHighShadowStart);
PrintRange(kLowShadowStart, kLowShadowEnd, "LowShadow");
if (kLowMemEnd + 1 < kLowShadowStart)
PrintRange(kLowMemEnd + 1, kLowShadowStart - 1, "ShadowGap");
else
CHECK_EQ(kLowMemEnd + 1, kLowShadowStart);
PrintRange(kLowMemStart, kLowMemEnd, "LowMem");
CHECK_EQ(0, kLowMemStart);
} else {
if (kLowMemEnd + 1 < kHighShadowStart)
PrintRange(kLowMemEnd + 1, kHighShadowStart - 1, "ShadowGap");
else
CHECK_EQ(kLowMemEnd + 1, kHighShadowStart);
PrintRange(kLowMemStart, kLowMemEnd, "LowMem");
CHECK_EQ(kLowShadowEnd + 1, kLowMemStart);
PrintRange(kLowShadowStart, kLowShadowEnd, "LowShadow");
PrintRange(0, kLowShadowStart - 1, "ShadowGap");
}
}
static uptr GetHighMemEnd() {
// HighMem covers the upper part of the address space.
const uptr kHighShadowEnd = (maxVirtualAddress >> kShadowScale) + 1;
const uptr kHighShadowStart = Max(kLowMemEnd, kHighShadowEnd >> kShadowScale);
CHECK(kHighShadowStart < kHighShadowEnd);
uptr max_address = GetMaxUserVirtualAddress();
if (SHADOW_OFFSET)
// Adjust max address to make sure that kHighMemEnd and kHighMemStart are
// properly aligned:
max_address |= SHADOW_GRANULARITY * GetMmapGranularity() - 1;
return max_address;
}
const uptr kHighMemStart = kHighShadowStart << kShadowScale;
CHECK(kHighShadowEnd <= kHighMemStart);
static void InitializeShadowBaseAddress(uptr shadow_size_bytes) {
// Set the shadow memory address to uninitialized.
__hwasan_shadow_memory_dynamic_address = kDefaultShadowSentinel;
uptr shadow_start = SHADOW_OFFSET;
// Detect if a dynamic shadow address must be used and find the available
// location when necessary. When dynamic address is used, the macro
// kLowShadowBeg expands to __hwasan_shadow_memory_dynamic_address which
// was just set to kDefaultShadowSentinel.
if (shadow_start == kDefaultShadowSentinel) {
__hwasan_shadow_memory_dynamic_address = 0;
CHECK_EQ(0, SHADOW_OFFSET);
shadow_start = FindDynamicShadowStart(shadow_size_bytes);
}
// Update the shadow memory address (potentially) used by instrumentation.
__hwasan_shadow_memory_dynamic_address = shadow_start;
}
if (Verbosity()) {
Printf("|| `[%p, %p]` || HighMem ||\n", (void *)kHighMemStart,
(void *)maxVirtualAddress);
if (kHighMemStart > kHighShadowEnd)
Printf("|| `[%p, %p]` || ShadowGap2 ||\n", (void *)kHighShadowEnd,
(void *)kHighMemStart);
Printf("|| `[%p, %p]` || HighShadow ||\n", (void *)kHighShadowStart,
(void *)kHighShadowEnd);
if (kHighShadowStart > kLowMemEnd)
Printf("|| `[%p, %p]` || ShadowGap2 ||\n", (void *)kHighShadowEnd,
(void *)kHighMemStart);
Printf("|| `[%p, %p]` || LowMem ||\n", (void *)kLowShadowEnd,
(void *)kLowMemEnd);
Printf("|| `[%p, %p]` || LowShadow ||\n", (void *)kLowShadowStart,
(void *)kLowShadowEnd);
Printf("|| `[%p, %p]` || ShadowGap1 ||\n", (void *)0,
(void *)kLowShadowStart);
bool InitShadow() {
// Define the entire memory range.
kHighMemEnd = GetHighMemEnd();
// Determine shadow memory base offset.
InitializeShadowBaseAddress(MEM_TO_SHADOW_SIZE(kHighMemEnd));
// Place the low memory first.
if (SHADOW_OFFSET) {
kLowMemEnd = SHADOW_OFFSET - 1;
kLowMemStart = 0;
} else {
// LowMem covers as much of the first 4GB as possible.
kLowMemEnd = (1UL << 32) - 1;
kLowMemStart = MEM_TO_SHADOW(kLowMemEnd) + 1;
}
ReserveShadowMemoryRange(kLowShadowStart, kLowShadowEnd - 1, "low shadow");
ReserveShadowMemoryRange(kHighShadowStart, kHighShadowEnd - 1, "high shadow");
ProtectGap(0, kLowShadowStart);
if (kHighShadowStart > kLowMemEnd)
ProtectGap(kLowMemEnd, kHighShadowStart - kLowMemEnd);
if (kHighMemStart > kHighShadowEnd)
ProtectGap(kHighShadowEnd, kHighMemStart - kHighShadowEnd);
// Define the low shadow based on the already placed low memory.
kLowShadowEnd = MEM_TO_SHADOW(kLowMemEnd);
kLowShadowStart = SHADOW_OFFSET ? SHADOW_OFFSET : MEM_TO_SHADOW(kLowMemStart);
// High shadow takes whatever memory is left up there (making sure it is not
// interfering with low memory in the fixed case).
kHighShadowEnd = MEM_TO_SHADOW(kHighMemEnd);
kHighShadowStart = Max(kLowMemEnd, MEM_TO_SHADOW(kHighShadowEnd)) + 1;
// High memory starts where allocated shadow allows.
kHighMemStart = SHADOW_TO_MEM(kHighShadowStart);
// Check the sanity of the defined memory ranges (there might be gaps).
CHECK_EQ(kHighMemStart % GetMmapGranularity(), 0);
CHECK_GT(kHighMemStart, kHighShadowEnd);
CHECK_GT(kHighShadowEnd, kHighShadowStart);
CHECK_GT(kHighShadowStart, kLowMemEnd);
CHECK_GT(kLowMemEnd, kLowMemStart);
CHECK_GT(kLowShadowEnd, kLowShadowStart);
if (SHADOW_OFFSET)
CHECK_GT(kLowShadowStart, kLowMemEnd);
else
CHECK_GT(kLowMemEnd, kLowShadowStart);
if (Verbosity())
PrintAddressSpaceLayout();
// Reserve shadow memory.
ReserveShadowMemoryRange(kLowShadowStart, kLowShadowEnd, "low shadow");
ReserveShadowMemoryRange(kHighShadowStart, kHighShadowEnd, "high shadow");
// Protect all the gaps.
ProtectGap(0, Min(kLowMemStart, kLowShadowStart));
if (SHADOW_OFFSET) {
if (kLowMemEnd + 1 < kLowShadowStart)
ProtectGap(kLowMemEnd + 1, kLowShadowStart - kLowMemEnd - 1);
if (kLowShadowEnd + 1 < kHighShadowStart)
ProtectGap(kLowShadowEnd + 1, kHighShadowStart - kLowShadowEnd - 1);
} else {
if (kLowMemEnd + 1 < kHighShadowStart)
ProtectGap(kLowMemEnd + 1, kHighShadowStart - kLowMemEnd - 1);
}
if (kHighShadowEnd + 1 < kHighMemStart)
ProtectGap(kHighShadowEnd + 1, kHighMemStart - kHighShadowEnd - 1);
return true;
}
bool MemIsApp(uptr p) {
CHECK(GetTagFromPointer(p) == 0);
return p >= kHighMemStart || (p >= kLowMemStart && p <= kLowMemEnd);
}
static void HwasanAtExit(void) {
if (flags()->print_stats && (flags()->atexit || hwasan_report_count > 0))
ReportStats();
@ -177,50 +281,65 @@ struct AccessInfo {
bool recover;
};
static AccessInfo GetAccessInfo(siginfo_t *info, ucontext_t *uc) {
// Access type is passed in a platform dependent way (see below) and encoded
// as 0xXY, where X&1 is 1 for store, 0 for load, and X&2 is 1 if the error is
// recoverable. Valid values of Y are 0 to 4, which are interpreted as
// log2(access_size), and 0xF, which means that access size is passed via
// platform dependent register (see below).
#if defined(__aarch64__)
static AccessInfo GetAccessInfo(siginfo_t *info, ucontext_t *uc) {
// Access type is encoded in HLT immediate as 0x1XY,
// where X&1 is 1 for store, 0 for load,
// and X&2 is 1 if the error is recoverable.
// Valid values of Y are 0 to 4, which are interpreted as log2(access_size),
// and 0xF, which means that access size is stored in X1 register.
// Access address is always in X0 register.
AccessInfo ai;
// Access type is encoded in BRK immediate as 0x900 + 0xXY. For Y == 0xF,
// access size is stored in X1 register. Access address is always in X0
// register.
uptr pc = (uptr)info->si_addr;
unsigned code = ((*(u32 *)pc) >> 5) & 0xffff;
if ((code & 0xff00) != 0x100)
return AccessInfo{0, 0, false, false}; // Not ours.
bool is_store = code & 0x10;
bool recover = code & 0x20;
unsigned size_log = code & 0xf;
if (size_log > 4 && size_log != 0xf)
return AccessInfo{0, 0, false, false}; // Not ours.
const unsigned code = ((*(u32 *)pc) >> 5) & 0xffff;
if ((code & 0xff00) != 0x900)
return AccessInfo{}; // Not ours.
const bool is_store = code & 0x10;
const bool recover = code & 0x20;
const uptr addr = uc->uc_mcontext.regs[0];
const unsigned size_log = code & 0xf;
if (size_log > 4 && size_log != 0xf)
return AccessInfo{}; // Not ours.
const uptr size = size_log == 0xf ? uc->uc_mcontext.regs[1] : 1U << size_log;
#elif defined(__x86_64__)
// Access type is encoded in the instruction following INT3 as
// NOP DWORD ptr [EAX + 0x40 + 0xXY]. For Y == 0xF, access size is stored in
// RSI register. Access address is always in RDI register.
uptr pc = (uptr)uc->uc_mcontext.gregs[REG_RIP];
uint8_t *nop = (uint8_t*)pc;
if (*nop != 0x0f || *(nop + 1) != 0x1f || *(nop + 2) != 0x40 ||
*(nop + 3) < 0x40)
return AccessInfo{}; // Not ours.
const unsigned code = *(nop + 3);
const bool is_store = code & 0x10;
const bool recover = code & 0x20;
const uptr addr = uc->uc_mcontext.gregs[REG_RDI];
const unsigned size_log = code & 0xf;
if (size_log > 4 && size_log != 0xf)
return AccessInfo{}; // Not ours.
const uptr size =
size_log == 0xf ? uc->uc_mcontext.gregs[REG_RSI] : 1U << size_log;
ai.is_store = is_store;
ai.is_load = !is_store;
ai.addr = uc->uc_mcontext.regs[0];
if (size_log == 0xf)
ai.size = uc->uc_mcontext.regs[1];
else
ai.size = 1U << size_log;
ai.recover = recover;
return ai;
}
#else
static AccessInfo GetAccessInfo(siginfo_t *info, ucontext_t *uc) {
return AccessInfo{0, 0, false, false};
}
# error Unsupported architecture
#endif
static bool HwasanOnSIGILL(int signo, siginfo_t *info, ucontext_t *uc) {
SignalContext sig{info, uc};
return AccessInfo{addr, size, is_store, !is_store, recover};
}
static bool HwasanOnSIGTRAP(int signo, siginfo_t *info, ucontext_t *uc) {
AccessInfo ai = GetAccessInfo(info, uc);
if (!ai.is_store && !ai.is_load)
return false;
InternalScopedBuffer<BufferedStackTrace> stack_buffer(1);
InternalMmapVector<BufferedStackTrace> stack_buffer(1);
BufferedStackTrace *stack = stack_buffer.data();
stack->Reset();
SignalContext sig{info, uc};
GetStackTrace(stack, kStackTraceMax, sig.pc, sig.bp, uc,
common_flags()->fast_unwind_on_fatal);
@ -230,7 +349,12 @@ static bool HwasanOnSIGILL(int signo, siginfo_t *info, ucontext_t *uc) {
if (flags()->halt_on_error || !ai.recover)
Die();
#if defined(__aarch64__)
uc->uc_mcontext.pc += 4;
#elif defined(__x86_64__)
#else
# error Unsupported architecture
#endif
return true;
}
@ -242,8 +366,8 @@ static void OnStackUnwind(const SignalContext &sig, const void *,
void HwasanOnDeadlySignal(int signo, void *info, void *context) {
// Probably a tag mismatch.
if (signo == SIGILL)
if (HwasanOnSIGILL(signo, (siginfo_t *)info, (ucontext_t*)context))
if (signo == SIGTRAP)
if (HwasanOnSIGTRAP(signo, (siginfo_t *)info, (ucontext_t*)context))
return;
HandleDeadlySignal(info, context, GetTid(), &OnStackUnwind, nullptr);

View File

@ -0,0 +1,85 @@
//===-- hwasan_mapping.h ----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file is a part of HWAddressSanitizer and defines memory mapping.
///
//===----------------------------------------------------------------------===//
#ifndef HWASAN_MAPPING_H
#define HWASAN_MAPPING_H
#include "sanitizer_common/sanitizer_internal_defs.h"
// Typical mapping on Linux/x86_64 with fixed shadow mapping:
// || [0x080000000000, 0x7fffffffffff] || HighMem ||
// || [0x008000000000, 0x07ffffffffff] || HighShadow ||
// || [0x000100000000, 0x007fffffffff] || ShadowGap ||
// || [0x000010000000, 0x0000ffffffff] || LowMem ||
// || [0x000001000000, 0x00000fffffff] || LowShadow ||
// || [0x000000000000, 0x000000ffffff] || ShadowGap ||
//
// and with dynamic shadow mapped at [0x770d59f40000, 0x7f0d59f40000]:
// || [0x7f0d59f40000, 0x7fffffffffff] || HighMem ||
// || [0x7efe2f934000, 0x7f0d59f3ffff] || HighShadow ||
// || [0x7e7e2f934000, 0x7efe2f933fff] || ShadowGap ||
// || [0x770d59f40000, 0x7e7e2f933fff] || LowShadow ||
// || [0x000000000000, 0x770d59f3ffff] || LowMem ||
// Typical mapping on Android/AArch64 (39-bit VMA):
// || [0x001000000000, 0x007fffffffff] || HighMem ||
// || [0x000800000000, 0x000fffffffff] || ShadowGap ||
// || [0x000100000000, 0x0007ffffffff] || HighShadow ||
// || [0x000010000000, 0x0000ffffffff] || LowMem ||
// || [0x000001000000, 0x00000fffffff] || LowShadow ||
// || [0x000000000000, 0x000000ffffff] || ShadowGap ||
//
// and with dynamic shadow mapped: [0x007477480000, 0x007c77480000]:
// || [0x007c77480000, 0x007fffffffff] || HighMem ||
// || [0x007c3ebc8000, 0x007c7747ffff] || HighShadow ||
// || [0x007bbebc8000, 0x007c3ebc7fff] || ShadowGap ||
// || [0x007477480000, 0x007bbebc7fff] || LowShadow ||
// || [0x000000000000, 0x00747747ffff] || LowMem ||
static constexpr __sanitizer::u64 kDefaultShadowSentinel = ~(__sanitizer::u64)0;
// Reasonable values are 4 (for 1/16th shadow) and 6 (for 1/64th).
constexpr __sanitizer::uptr kShadowScale = 4;
constexpr __sanitizer::uptr kShadowAlignment = 1ULL << kShadowScale;
#if SANITIZER_ANDROID
# define HWASAN_FIXED_MAPPING 0
#else
# define HWASAN_FIXED_MAPPING 1
#endif
#if HWASAN_FIXED_MAPPING
# define SHADOW_OFFSET (0)
# define HWASAN_PREMAP_SHADOW 0
#else
# define SHADOW_OFFSET (__hwasan_shadow_memory_dynamic_address)
# define HWASAN_PREMAP_SHADOW 1
#endif
#define SHADOW_GRANULARITY (1ULL << kShadowScale)
#define MEM_TO_SHADOW(mem) (((uptr)(mem) >> kShadowScale) + SHADOW_OFFSET)
#define SHADOW_TO_MEM(shadow) (((uptr)(shadow) - SHADOW_OFFSET) << kShadowScale)
#define MEM_TO_SHADOW_SIZE(size) ((uptr)(size) >> kShadowScale)
#define MEM_IS_APP(mem) MemIsApp((uptr)(mem))
namespace __hwasan {
bool MemIsApp(uptr p);
} // namespace __hwasan
#endif // HWASAN_MAPPING_H

View File

@ -1,4 +1,4 @@
//===-- hwasan_new_delete.cc ------------------------------------------------===//
//===-- hwasan_new_delete.cc ----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@ -15,6 +15,7 @@
#include "hwasan.h"
#include "interception/interception.h"
#include "sanitizer_common/sanitizer_allocator.h"
#include "sanitizer_common/sanitizer_allocator_report.h"
#if HWASAN_REPLACE_OPERATORS_NEW_AND_DELETE
@ -32,7 +33,7 @@ namespace std {
#define OPERATOR_NEW_BODY(nothrow) \
GET_MALLOC_STACK_TRACE; \
void *res = hwasan_malloc(size, &stack);\
if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();\
if (!nothrow && UNLIKELY(!res)) ReportOutOfMemory(size, &stack);\
return res
INTERCEPTOR_ATTRIBUTE

Some files were not shown because too many files have changed in this diff Show More