Merge compiler-rt trunk r338150, and resolve conflicts.

svn path=/projects/clang700-import/; revision=337136
2025-01-01 12:19:28 +00:00 · 2018-08-02 17:06:03 +00:00 · 2018-08-02 17:06:03 +00:00 · 68dc77c284 · 2020-12-20 02:59:44 +00:00
commit 68dc77c284
parent 42cc096963 93c1b73a09
326 changed files with 32031 additions and 4454 deletions
--- a/contrib/compiler-rt/LICENSE.TXT
+++ b/contrib/compiler-rt/LICENSE.TXT
@ -14,7 +14,7 @@ Full text of the relevant licenses is included below.
 University of Illinois/NCSA
 Open Source License

-Copyright (c) 2009-2016 by the contributors listed in CREDITS.TXT
+Copyright (c) 2009-2018 by the contributors listed in CREDITS.TXT

 All rights reserved.

--- a/contrib/compiler-rt/include/sanitizer/common_interface_defs.h
+++ b/contrib/compiler-rt/include/sanitizer/common_interface_defs.h
@ -65,6 +65,11 @@ extern "C" {
  void __sanitizer_unaligned_store32(void *p, uint32_t x);
  void __sanitizer_unaligned_store64(void *p, uint64_t x);

+  // Returns 1 on the first call, then returns 0 thereafter.  Called by the tool
+  // to ensure only one report is printed when multiple errors occur
+  // simultaneously.
+  int __sanitizer_acquire_crash_state();
+
  // Annotate the current state of a contiguous container, such as
  // std::vector, std::string or similar.
  // A contiguous container is a container that keeps all of its elements
--- a/contrib/compiler-rt/include/sanitizer/msan_interface.h
+++ b/contrib/compiler-rt/include/sanitizer/msan_interface.h
@ -104,6 +104,14 @@ extern "C" {
     copy. Source and destination regions can overlap. */
  void __msan_copy_shadow(const volatile void *dst, const volatile void *src,
                          size_t size);
+
+  /* Disables uninitialized memory checks in interceptors. */
+  void __msan_scoped_disable_interceptor_checks(void);
+
+  /* Re-enables uninitialized memory checks in interceptors after a previous
+     call to __msan_scoped_disable_interceptor_checks. */
+  void __msan_scoped_enable_interceptor_checks(void);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/contrib/compiler-rt/include/sanitizer/netbsd_syscall_hooks.h
+++ b/contrib/compiler-rt/include/sanitizer/netbsd_syscall_hooks.h
--- a/contrib/compiler-rt/include/sanitizer/scudo_interface.h
+++ b/contrib/compiler-rt/include/sanitizer/scudo_interface.h
@ -26,7 +26,12 @@ extern "C" {
  // the hard limit (HardLimit=1) or the soft limit (HardLimit=0). The limit
  // can be removed by setting LimitMb to 0. This function's parameters should
  // be fully trusted to avoid security mishaps.
-  void __scudo_set_rss_limit(unsigned long LimitMb, int HardLimit);
+  void __scudo_set_rss_limit(size_t LimitMb, int HardLimit);
+
+  // This function outputs various allocator statistics for both the Primary
+  // and Secondary allocators, including memory usage, number of allocations
+  // and deallocations.
+  void __scudo_print_stats(void);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/contrib/compiler-rt/include/xray/xray_interface.h
+++ b/contrib/compiler-rt/include/xray/xray_interface.h
@ -27,6 +27,7 @@ enum XRayEntryType {
  TAIL = 2,
  LOG_ARGS_ENTRY = 3,
  CUSTOM_EVENT = 4,
+  TYPED_EVENT = 5,
 };

 /// Provide a function to invoke for when instrumentation points are hit. This
@ -68,12 +69,23 @@ extern int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType,
 extern int __xray_remove_handler_arg1();

 /// Provide a function to invoke when XRay encounters a custom event.
-extern int __xray_set_customevent_handler(void (*entry)(void*, std::size_t));
+extern int __xray_set_customevent_handler(void (*entry)(void *, std::size_t));

 /// This removes whatever the currently provided custom event handler is.
 /// Returns 1 on success, 0 on error.
 extern int __xray_remove_customevent_handler();

+/// Set a handler for xray typed event logging. The first parameter is a type
+/// identifier, the second is a payload, and the third is the payload size.
+extern int __xray_set_typedevent_handler(void (*entry)(uint16_t, const void *,
+                                                       std::size_t));
+
+/// Removes the currently set typed event handler.
+/// Returns 1 on success, 0 on error.
+extern int __xray_remove_typedevent_handler();
+
+extern uint16_t __xray_register_event_type(const char *event_type);
+
 enum XRayPatchingStatus {
  NOT_INITIALIZED = 0,
  SUCCESS = 1,
--- a/contrib/compiler-rt/include/xray/xray_log_interface.h
+++ b/contrib/compiler-rt/include/xray/xray_log_interface.h
@ -21,27 +21,29 @@
 ///
 /// The high-level usage pattern for these APIs look like the following:
 ///
-///   // Before we try initializing the log implementation, we must set it as
-///   // the log implementation. We provide the function pointers that define
-///   // the various initialization, finalization, and other pluggable hooks
-///   // that we need.
-///   __xray_set_log_impl({...});
+///   // We choose the mode which we'd like to install, and check whether this
+///   // has succeeded. Each mode will have their own set of flags they will
+///   // support, outside of the global XRay configuration options that are
+///   // defined in the XRAY_OPTIONS environment variable.
+///   auto select_status = __xray_log_select_mode("xray-fdr");
+///   if (select_status != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
+///     // This failed, we should not proceed with attempting to initialise
+///     // the currently selected mode.
+///     return;
+///   }
 ///
-///   // Once that's done, we can now initialize the implementation. Each
-///   // implementation has a chance to let users customize the implementation
-///   // with a struct that their implementation supports. Roughly this might
-///   // look like:
-///   MyImplementationOptions opts;
-///   opts.enable_feature = true;
-///   ...
-///   auto init_status = __xray_log_init(
-///       BufferSize, MaxBuffers, &opts, sizeof opts);
-///   if (init_status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
+///   // Once that's done, we can now attempt to configure the implementation.
+///   // To do this, we provide the string flags configuration for the mode.
+///   auto config_status = __xray_log_init_mode(
+///       "xray-fdr", "verbosity=1 some_flag=1 another_flag=2");
+///   if (config_status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
 ///     // deal with the error here, if there is one.
 ///   }
 ///
 ///   // When the log implementation has had the chance to initialize, we can
-///   // now patch the sleds.
+///   // now patch the instrumentation points. Note that we could have patched
+///   // the instrumentation points first, but there's no strict ordering to
+///   // these operations.
 ///   auto patch_status = __xray_patch();
 ///   if (patch_status != XRayPatchingStatus::SUCCESS) {
 ///     // deal with the error here, if it is an error.
@ -56,12 +58,12 @@
 ///
 ///   // We can optionally wait before flushing the log to give other threads a
 ///   // chance to see that the implementation is already finalized. Also, at
-///   // this point we can optionally unpatch the sleds to reduce overheads at
-///   // runtime.
+///   // this point we can optionally unpatch the instrumentation points to
+///   // reduce overheads at runtime.
 ///   auto unpatch_status = __xray_unpatch();
 ///   if (unpatch_status != XRayPatchingStatus::SUCCESS) {
-//      // deal with the error here, if it is an error.
-//    }
+///     // deal with the error here, if it is an error.
+///   }
 ///
 ///   // If there are logs or data to be flushed somewhere, we can do so only
 ///   // after we've finalized the log. Some implementations may not actually
@ -72,6 +74,17 @@
 ///     // deal with the error here, if it is an error.
 ///   }
 ///
+///   // Alternatively, we can go through the buffers ourselves without
+///   // relying on the implementations' flushing semantics (if the
+///   // implementation supports exporting this data directly).
+///   auto MyBufferProcessor = +[](const char* mode, XRayBuffer buffer) {
+///     // Check the "mode" to see if it's something we know how to handle...
+///     // and/or do something with an XRayBuffer instance.
+///   };
+///   auto process_status = __xray_log_process_buffers(MyBufferProcessor);
+///   if (process_status != XRayLogFlushStatus::XRAY_LOG_FLUSHED) {
+///     // deal with the error here, if it is an error.
+///   }
 ///
 /// NOTE: Before calling __xray_patch() again, consider re-initializing the
 /// implementation first. Some implementations might stay in an "off" state when
@ -182,9 +195,13 @@ struct XRayLogImpl {
  XRayLogFlushStatus (*flush_log)();
 };

+/// DEPRECATED: Use the mode registration workflow instead with
+/// __xray_log_register_mode(...) and __xray_log_select_mode(...). See the
+/// documentation for those function.
+///
 /// This function installs a new logging implementation that XRay will use. In
 /// case there are any nullptr members in Impl, XRay will *uninstall any
-/// existing implementations*. It does NOT patch the instrumentation sleds.
+/// existing implementations*. It does NOT patch the instrumentation points.
 ///
 /// NOTE: This function does NOT attempt to finalize the currently installed
 /// implementation. Use with caution.
@ -227,9 +244,14 @@ XRayLogRegisterStatus __xray_log_register_mode(const char *Mode,
 ///     does not update the currently installed implementation.
 XRayLogRegisterStatus __xray_log_select_mode(const char *Mode);

+/// Returns an identifier for the currently selected XRay mode chosen through
+/// the __xray_log_select_mode(...) function call. Returns nullptr if there is
+/// no currently installed mode.
+const char *__xray_log_get_current_mode();
+
 /// This function removes the currently installed implementation. It will also
 /// uninstall any handlers that have been previously installed. It does NOT
-/// unpatch the instrumentation sleds.
+/// unpatch the instrumentation points.
 ///
 /// NOTE: This function does NOT attempt to finalize the currently installed
 /// implementation. Use with caution.
@ -244,11 +266,37 @@ XRayLogRegisterStatus __xray_log_select_mode(const char *Mode);
 /// called while in any other states.
 void __xray_remove_log_impl();

+/// DEPRECATED: Use __xray_log_init_mode() instead, and provide all the options
+/// in string form.
 /// Invokes the installed implementation initialization routine. See
 /// XRayLogInitStatus for what the return values mean.
 XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers,
                                  void *Args, size_t ArgsSize);

+/// Invokes the installed initialization routine, which *must* support the
+/// string based form.
+///
+/// NOTE: When this API is used, we still invoke the installed initialization
+/// routine, but we will call it with the following convention to signal that we
+/// are using the string form:
+///
+/// - BufferSize = 0
+/// - MaxBuffers = 0
+/// - ArgsSize = 0
+/// - Args will be the pointer to the character buffer representing the
+///   configuration.
+///
+/// FIXME: Updating the XRayLogImpl struct is an ABI breaking change. When we
+/// are ready to make a breaking change, we should clean this up appropriately.
+XRayLogInitStatus __xray_log_init_mode(const char *Mode, const char *Config);
+
+/// Like __xray_log_init_mode(...) this version allows for providing
+/// configurations that might have non-null-terminated strings. This will
+/// operate similarly to __xray_log_init_mode, with the exception that
+/// |ArgsSize| will be what |ConfigSize| is.
+XRayLogInitStatus __xray_log_init_mode_bin(const char *Mode, const char *Config,
+                                           size_t ConfigSize);
+
 /// Invokes the installed implementation finalization routine. See
 /// XRayLogInitStatus for what the return values mean.
 XRayLogInitStatus __xray_log_finalize();
@ -257,16 +305,68 @@ XRayLogInitStatus __xray_log_finalize();
 /// XRayLogFlushStatus for what the return values mean.
 XRayLogFlushStatus __xray_log_flushLog();

+/// An XRayBuffer represents a section of memory which can be treated by log
+/// processing functions as bytes stored in the logging implementation's
+/// buffers.
+struct XRayBuffer {
+  const void *Data;
+  size_t Size;
+};
+
+/// Registers an iterator function which takes an XRayBuffer argument, then
+/// returns another XRayBuffer function representing the next buffer. When the
+/// Iterator function returns an empty XRayBuffer (Data = nullptr, Size = 0),
+/// this signifies the end of the buffers.
+///
+/// The first invocation of this Iterator function will always take an empty
+/// XRayBuffer (Data = nullptr, Size = 0).
+void __xray_log_set_buffer_iterator(XRayBuffer (*Iterator)(XRayBuffer));
+
+/// Removes the currently registered buffer iterator function.
+void __xray_log_remove_buffer_iterator();
+
+/// Invokes the provided handler to process data maintained by the logging
+/// handler. This API will be provided raw access to the data available in
+/// memory from the logging implementation. The callback function must:
+///
+/// 1) Not modify the data, to avoid running into undefined behaviour.
+///
+/// 2) Either know the data layout, or treat the data as raw bytes for later
+///    interpretation.
+///
+/// This API is best used in place of the `__xray_log_flushLog()` implementation
+/// above to enable the caller to provide an alternative means of extracting the
+/// data from the XRay implementation.
+///
+/// Implementations MUST then provide:
+///
+/// 1) A function that will return an XRayBuffer. Functions that return an
+///    "empty" XRayBuffer signifies that there are no more buffers to be
+///    processed. This function should be registered through the
+///    `__xray_log_set_buffer_iterator(...)` function.
+///
+/// 2) Its own means of converting data it holds in memory into an XRayBuffer
+///    structure.
+///
+/// See XRayLogFlushStatus for what the return values mean.
+///
+XRayLogFlushStatus __xray_log_process_buffers(void (*Processor)(const char *,
+                                                                XRayBuffer));
+
 } // extern "C"

 namespace __xray {

+/// DEPRECATED: Use __xray_log_init_mode(...) instead, and provide flag
+/// configuration strings to set the options instead.
 /// Options used by the LLVM XRay FDR logging implementation.
 struct FDRLoggingOptions {
  bool ReportErrors = false;
  int Fd = -1;
 };

+/// DEPRECATED: Use __xray_log_init_mode(...) instead, and provide flag
+/// configuration strings to set the options instead.
 /// Options used by the LLVM XRay Basic (Naive) logging implementation.
 struct BasicLoggingOptions {
  int DurationFilterMicros = 0;
--- a/contrib/compiler-rt/include/xray/xray_records.h
+++ b/contrib/compiler-rt/include/xray/xray_records.h
@ -54,7 +54,7 @@ struct alignas(32) XRayFileHeader {

  union {
    char FreeForm[16];
-    // The current civiltime timestamp, as retrived from 'clock_gettime'. This
+    // The current civiltime timestamp, as retrieved from 'clock_gettime'. This
    // allows readers of the file to determine when the file was created or
    // written down.
    struct timespec TS;
@ -95,8 +95,11 @@ struct alignas(32) XRayRecord {
  // The thread ID for the currently running thread.
  uint32_t TId = 0;

+  // The ID of process that is currently running
+  uint32_t PId = 0;
+  
  // Use some bytes in the end of the record for buffers.
-  char Buffer[4] = {};
+  char Buffer[8] = {};
 } __attribute__((packed));

 static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes");
@ -115,8 +118,8 @@ struct alignas(32) XRayArgPayload {
  // The thread ID for the currently running thread.
  uint32_t TId = 0;

-  // Add more padding.
-  uint8_t Padding2[4] = {};
+  // The ID of process that is currently running
+  uint32_t PId = 0;

  // The argument payload.
  uint64_t Arg = 0;
--- a/contrib/compiler-rt/lib/asan/asan_allocator.cc
+++ b/contrib/compiler-rt/lib/asan/asan_allocator.cc
@ -134,8 +134,9 @@ struct AsanChunk: ChunkBase {
 };

 struct QuarantineCallback {
-  explicit QuarantineCallback(AllocatorCache *cache)
-      : cache_(cache) {
+  QuarantineCallback(AllocatorCache *cache, BufferedStackTrace *stack)
+      : cache_(cache),
+        stack_(stack) {
  }

  void Recycle(AsanChunk *m) {
@ -168,7 +169,7 @@ struct QuarantineCallback {
    void *res = get_allocator().Allocate(cache_, size, 1);
    // TODO(alekseys): Consider making quarantine OOM-friendly.
    if (UNLIKELY(!res))
-      return DieOnFailure::OnOOM();
+      ReportOutOfMemory(size, stack_);
    return res;
  }

@ -176,7 +177,9 @@ struct QuarantineCallback {
    get_allocator().Deallocate(cache_, p);
  }

-  AllocatorCache *cache_;
+ private:
+  AllocatorCache* const cache_;
+  BufferedStackTrace* const stack_;
 };

 typedef Quarantine<QuarantineCallback, AsanChunk> AsanQuarantine;
@ -397,8 +400,11 @@ struct Allocator {
                 AllocType alloc_type, bool can_fill) {
    if (UNLIKELY(!asan_inited))
      AsanInitFromRtl();
-    if (RssLimitExceeded())
-      return AsanAllocator::FailureHandler::OnOOM();
+    if (RssLimitExceeded()) {
+      if (AllocatorMayReturnNull())
+        return nullptr;
+      ReportRssLimitExceeded(stack);
+    }
    Flags &fl = *flags();
    CHECK(stack);
    const uptr min_alignment = SHADOW_GRANULARITY;
@ -431,9 +437,13 @@ struct Allocator {
    }
    CHECK(IsAligned(needed_size, min_alignment));
    if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize) {
-      Report("WARNING: AddressSanitizer failed to allocate 0x%zx bytes\n",
-             (void*)size);
-      return AsanAllocator::FailureHandler::OnBadRequest();
+      if (AllocatorMayReturnNull()) {
+        Report("WARNING: AddressSanitizer failed to allocate 0x%zx bytes\n",
+               (void*)size);
+        return nullptr;
+      }
+      ReportAllocationSizeTooBig(size, needed_size, kMaxAllowedMallocSize,
+                                 stack);
    }

    AsanThread *t = GetCurrentThread();
@ -446,8 +456,12 @@ struct Allocator {
      AllocatorCache *cache = &fallback_allocator_cache;
      allocated = allocator.Allocate(cache, needed_size, 8);
    }
-    if (!allocated)
-      return nullptr;
+    if (UNLIKELY(!allocated)) {
+      SetAllocatorOutOfMemory();
+      if (AllocatorMayReturnNull())
+        return nullptr;
+      ReportOutOfMemory(size, stack);
+    }

    if (*(u8 *)MEM_TO_SHADOW((uptr)allocated) == 0 && CanPoisonMemory()) {
      // Heap poisoning is enabled, but the allocator provides an unpoisoned
@ -583,13 +597,13 @@ struct Allocator {
    if (t) {
      AsanThreadLocalMallocStorage *ms = &t->malloc_storage();
      AllocatorCache *ac = GetAllocatorCache(ms);
-      quarantine.Put(GetQuarantineCache(ms), QuarantineCallback(ac), m,
-                           m->UsedSize());
+      quarantine.Put(GetQuarantineCache(ms), QuarantineCallback(ac, stack), m,
+                     m->UsedSize());
    } else {
      SpinMutexLock l(&fallback_mutex);
      AllocatorCache *ac = &fallback_allocator_cache;
-      quarantine.Put(&fallback_quarantine_cache, QuarantineCallback(ac), m,
-                           m->UsedSize());
+      quarantine.Put(&fallback_quarantine_cache, QuarantineCallback(ac, stack),
+                     m, m->UsedSize());
    }
  }

@ -660,8 +674,11 @@ struct Allocator {
  }

  void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
-    if (CheckForCallocOverflow(size, nmemb))
-      return AsanAllocator::FailureHandler::OnBadRequest();
+    if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
+      if (AllocatorMayReturnNull())
+        return nullptr;
+      ReportCallocOverflow(nmemb, size, stack);
+    }
    void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC, false);
    // If the memory comes from the secondary allocator no need to clear it
    // as it comes directly from mmap.
@ -677,9 +694,9 @@ struct Allocator {
      ReportFreeNotMalloced((uptr)ptr, stack);
  }

-  void CommitBack(AsanThreadLocalMallocStorage *ms) {
+  void CommitBack(AsanThreadLocalMallocStorage *ms, BufferedStackTrace *stack) {
    AllocatorCache *ac = GetAllocatorCache(ms);
-    quarantine.Drain(GetQuarantineCache(ms), QuarantineCallback(ac));
+    quarantine.Drain(GetQuarantineCache(ms), QuarantineCallback(ac, stack));
    allocator.SwallowCache(ac);
  }

@ -739,17 +756,19 @@ struct Allocator {
    return AsanChunkView(m1);
  }

-  void Purge() {
+  void Purge(BufferedStackTrace *stack) {
    AsanThread *t = GetCurrentThread();
    if (t) {
      AsanThreadLocalMallocStorage *ms = &t->malloc_storage();
      quarantine.DrainAndRecycle(GetQuarantineCache(ms),
-                                 QuarantineCallback(GetAllocatorCache(ms)));
+                                 QuarantineCallback(GetAllocatorCache(ms),
+                                                    stack));
    }
    {
      SpinMutexLock l(&fallback_mutex);
      quarantine.DrainAndRecycle(&fallback_quarantine_cache,
-                                 QuarantineCallback(&fallback_allocator_cache));
+                                 QuarantineCallback(&fallback_allocator_cache,
+                                                    stack));
    }

    allocator.ForceReleaseToOS();
@ -836,7 +855,8 @@ AsanChunkView FindHeapChunkByAllocBeg(uptr addr) {
 }

 void AsanThreadLocalMallocStorage::CommitBack() {
-  instance.CommitBack(this);
+  GET_STACK_TRACE_MALLOC;
+  instance.CommitBack(this, &stack);
 }

 void PrintInternalAllocatorStats() {
@ -883,7 +903,9 @@ void *asan_pvalloc(uptr size, BufferedStackTrace *stack) {
  uptr PageSize = GetPageSizeCached();
  if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
    errno = errno_ENOMEM;
-    return AsanAllocator::FailureHandler::OnBadRequest();
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportPvallocOverflow(size, stack);
  }
  // pvalloc(0) should allocate one page.
  size = size ? RoundUpTo(size, PageSize) : PageSize;
@ -895,20 +917,35 @@ void *asan_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
                    AllocType alloc_type) {
  if (UNLIKELY(!IsPowerOfTwo(alignment))) {
    errno = errno_EINVAL;
-    return AsanAllocator::FailureHandler::OnBadRequest();
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportInvalidAllocationAlignment(alignment, stack);
  }
  return SetErrnoOnNull(
      instance.Allocate(size, alignment, stack, alloc_type, true));
 }

+void *asan_aligned_alloc(uptr alignment, uptr size, BufferedStackTrace *stack) {
+  if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
+    errno = errno_EINVAL;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportInvalidAlignedAllocAlignment(size, alignment, stack);
+  }
+  return SetErrnoOnNull(
+      instance.Allocate(size, alignment, stack, FROM_MALLOC, true));
+}
+
 int asan_posix_memalign(void **memptr, uptr alignment, uptr size,
                        BufferedStackTrace *stack) {
  if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
-    AsanAllocator::FailureHandler::OnBadRequest();
-    return errno_EINVAL;
+    if (AllocatorMayReturnNull())
+      return errno_EINVAL;
+    ReportInvalidPosixMemalignAlignment(alignment, stack);
  }
  void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC, true);
  if (UNLIKELY(!ptr))
+    // OOM error is already taken care of by Allocate.
    return errno_ENOMEM;
  CHECK(IsAligned((uptr)ptr, alignment));
  *memptr = ptr;
@ -1054,7 +1091,8 @@ uptr __sanitizer_get_allocated_size(const void *p) {
 }

 void __sanitizer_purge_allocator() {
-  instance.Purge();
+  GET_STACK_TRACE_MALLOC;
+  instance.Purge(&stack);
 }

 #if !SANITIZER_SUPPORTS_WEAK_HOOKS
--- a/contrib/compiler-rt/lib/asan/asan_allocator.h
+++ b/contrib/compiler-rt/lib/asan/asan_allocator.h
@ -125,11 +125,12 @@ const uptr kAllocatorSpace = ~(uptr)0;
 const uptr kAllocatorSize  =  0x40000000000ULL;  // 4T.
 typedef DefaultSizeClassMap SizeClassMap;
 # elif defined(__powerpc64__)
-const uptr kAllocatorSpace =  0xa0000000000ULL;
+const uptr kAllocatorSpace = ~(uptr)0;
 const uptr kAllocatorSize  =  0x20000000000ULL;  // 2T.
 typedef DefaultSizeClassMap SizeClassMap;
 # elif defined(__aarch64__) && SANITIZER_ANDROID
-const uptr kAllocatorSpace =  0x3000000000ULL;
+// Android needs to support 39, 42 and 48 bit VMA.
+const uptr kAllocatorSpace =  ~(uptr)0;
 const uptr kAllocatorSize  =  0x2000000000ULL;  // 128G.
 typedef VeryCompactSizeClassMap SizeClassMap;
 # elif defined(__aarch64__)
@ -207,6 +208,7 @@ void *asan_realloc(void *p, uptr size, BufferedStackTrace *stack);
 void *asan_valloc(uptr size, BufferedStackTrace *stack);
 void *asan_pvalloc(uptr size, BufferedStackTrace *stack);

+void *asan_aligned_alloc(uptr alignment, uptr size, BufferedStackTrace *stack);
 int asan_posix_memalign(void **memptr, uptr alignment, uptr size,
                        BufferedStackTrace *stack);
 uptr asan_malloc_usable_size(const void *ptr, uptr pc, uptr bp);
--- a/contrib/compiler-rt/lib/asan/asan_debugging.cc
+++ b/contrib/compiler-rt/lib/asan/asan_debugging.cc
@ -27,7 +27,8 @@ using namespace __asan;
 static void FindInfoForStackVar(uptr addr, const char *frame_descr, uptr offset,
                                char *name, uptr name_size,
                                uptr &region_address, uptr &region_size) {
-  InternalMmapVector<StackVarDescr> vars(16);
+  InternalMmapVector<StackVarDescr> vars;
+  vars.reserve(16);
  if (!ParseFrameDescription(frame_descr, &vars)) {
    return;
  }
--- a/contrib/compiler-rt/lib/asan/asan_descriptions.cc
+++ b/contrib/compiler-rt/lib/asan/asan_descriptions.cc
@ -20,23 +20,25 @@

 namespace __asan {

-// Return " (thread_name) " or an empty string if the name is empty.
-const char *ThreadNameWithParenthesis(AsanThreadContext *t, char buff[],
-                                      uptr buff_len) {
-  const char *name = t->name;
-  if (name[0] == '\0') return "";
-  buff[0] = 0;
-  internal_strncat(buff, " (", 3);
-  internal_strncat(buff, name, buff_len - 4);
-  internal_strncat(buff, ")", 2);
-  return buff;
+AsanThreadIdAndName::AsanThreadIdAndName(AsanThreadContext *t) {
+  Init(t->tid, t->name);
 }

-const char *ThreadNameWithParenthesis(u32 tid, char buff[], uptr buff_len) {
-  if (tid == kInvalidTid) return "";
-  asanThreadRegistry().CheckLocked();
-  AsanThreadContext *t = GetThreadContextByTidLocked(tid);
-  return ThreadNameWithParenthesis(t, buff, buff_len);
+AsanThreadIdAndName::AsanThreadIdAndName(u32 tid) {
+  if (tid == kInvalidTid) {
+    Init(tid, "");
+  } else {
+    asanThreadRegistry().CheckLocked();
+    AsanThreadContext *t = GetThreadContextByTidLocked(tid);
+    Init(tid, t->name);
+  }
+}
+
+void AsanThreadIdAndName::Init(u32 tid, const char *tname) {
+  int len = internal_snprintf(name, sizeof(name), "T%d", tid);
+  CHECK(((unsigned int)len) < sizeof(name));
+  if (tname[0] != '\0')
+    internal_snprintf(&name[len], sizeof(name) - len, " (%s)", tname);
 }

 void DescribeThread(AsanThreadContext *context) {
@ -47,18 +49,15 @@ void DescribeThread(AsanThreadContext *context) {
    return;
  }
  context->announced = true;
-  char tname[128];
  InternalScopedString str(1024);
-  str.append("Thread T%d%s", context->tid,
-             ThreadNameWithParenthesis(context->tid, tname, sizeof(tname)));
+  str.append("Thread %s", AsanThreadIdAndName(context).c_str());
  if (context->parent_tid == kInvalidTid) {
    str.append(" created by unknown thread\n");
    Printf("%s", str.data());
    return;
  }
-  str.append(
-      " created by T%d%s here:\n", context->parent_tid,
-      ThreadNameWithParenthesis(context->parent_tid, tname, sizeof(tname)));
+  str.append(" created by %s here:\n",
+             AsanThreadIdAndName(context->parent_tid).c_str());
  Printf("%s", str.data());
  StackDepotGet(context->stack_id).Print();
  // Recursively described parent thread if needed.
@ -358,10 +357,9 @@ bool GlobalAddressDescription::PointsInsideTheSameVariable(

 void StackAddressDescription::Print() const {
  Decorator d;
-  char tname[128];
  Printf("%s", d.Location());
-  Printf("Address %p is located in stack of thread T%d%s", addr, tid,
-         ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
+  Printf("Address %p is located in stack of thread %s", addr,
+         AsanThreadIdAndName(tid).c_str());

  if (!frame_descr) {
    Printf("%s\n", d.Default());
@ -380,7 +378,8 @@ void StackAddressDescription::Print() const {
  StackTrace alloca_stack(&frame_pc, 1);
  alloca_stack.Print();

-  InternalMmapVector<StackVarDescr> vars(16);
+  InternalMmapVector<StackVarDescr> vars;
+  vars.reserve(16);
  if (!ParseFrameDescription(frame_descr, &vars)) {
    Printf(
        "AddressSanitizer can't parse the stack frame "
@ -402,7 +401,7 @@ void StackAddressDescription::Print() const {
  }
  Printf(
      "HINT: this may be a false positive if your program uses "
-      "some custom stack unwind mechanism or swapcontext\n");
+      "some custom stack unwind mechanism, swapcontext or vfork\n");
  if (SANITIZER_WINDOWS)
    Printf("      (longjmp, SEH and C++ exceptions *are* supported)\n");
  else
@ -418,26 +417,19 @@ void HeapAddressDescription::Print() const {
  AsanThreadContext *alloc_thread = GetThreadContextByTidLocked(alloc_tid);
  StackTrace alloc_stack = GetStackTraceFromId(alloc_stack_id);

-  char tname[128];
  Decorator d;
  AsanThreadContext *free_thread = nullptr;
  if (free_tid != kInvalidTid) {
    free_thread = GetThreadContextByTidLocked(free_tid);
-    Printf("%sfreed by thread T%d%s here:%s\n", d.Allocation(),
-           free_thread->tid,
-           ThreadNameWithParenthesis(free_thread, tname, sizeof(tname)),
-           d.Default());
+    Printf("%sfreed by thread %s here:%s\n", d.Allocation(),
+           AsanThreadIdAndName(free_thread).c_str(), d.Default());
    StackTrace free_stack = GetStackTraceFromId(free_stack_id);
    free_stack.Print();
-    Printf("%spreviously allocated by thread T%d%s here:%s\n", d.Allocation(),
-           alloc_thread->tid,
-           ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)),
-           d.Default());
+    Printf("%spreviously allocated by thread %s here:%s\n", d.Allocation(),
+           AsanThreadIdAndName(alloc_thread).c_str(), d.Default());
  } else {
-    Printf("%sallocated by thread T%d%s here:%s\n", d.Allocation(),
-           alloc_thread->tid,
-           ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)),
-           d.Default());
+    Printf("%sallocated by thread %s here:%s\n", d.Allocation(),
+           AsanThreadIdAndName(alloc_thread).c_str(), d.Default());
  }
  alloc_stack.Print();
  DescribeThread(GetCurrentThread());
--- a/contrib/compiler-rt/lib/asan/asan_descriptions.h
+++ b/contrib/compiler-rt/lib/asan/asan_descriptions.h
@ -26,9 +26,20 @@ void DescribeThread(AsanThreadContext *context);
 static inline void DescribeThread(AsanThread *t) {
  if (t) DescribeThread(t->context());
 }
-const char *ThreadNameWithParenthesis(AsanThreadContext *t, char buff[],
-                                      uptr buff_len);
-const char *ThreadNameWithParenthesis(u32 tid, char buff[], uptr buff_len);
+
+class AsanThreadIdAndName {
+ public:
+  explicit AsanThreadIdAndName(AsanThreadContext *t);
+  explicit AsanThreadIdAndName(u32 tid);
+
+  // Contains "T%tid (%name)" or "T%tid" if the name is empty.
+  const char *c_str() const { return &name[0]; }
+
+ private:
+  void Init(u32 tid, const char *tname);
+
+  char name[128];
+};

 class Decorator : public __sanitizer::SanitizerCommonDecorator {
 public:
--- a/contrib/compiler-rt/lib/asan/asan_errors.cc
+++ b/contrib/compiler-rt/lib/asan/asan_errors.cc
@ -45,13 +45,11 @@ void ErrorDeadlySignal::Print() {

 void ErrorDoubleFree::Print() {
  Decorator d;
-  Printf("%s", d.Warning());
-  char tname[128];
+  Printf("%s", d.Error());
  Report(
-      "ERROR: AddressSanitizer: attempting %s on %p in "
-      "thread T%d%s:\n",
-      scariness.GetDescription(), addr_description.addr, tid,
-      ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
+      "ERROR: AddressSanitizer: attempting %s on %p in thread %s:\n",
+      scariness.GetDescription(), addr_description.addr,
+      AsanThreadIdAndName(tid).c_str());
  Printf("%s", d.Default());
  scariness.Print();
  GET_STACK_TRACE_FATAL(second_free_stack->trace[0],
@ -63,13 +61,11 @@ void ErrorDoubleFree::Print() {

 void ErrorNewDeleteTypeMismatch::Print() {
  Decorator d;
-  Printf("%s", d.Warning());
-  char tname[128];
+  Printf("%s", d.Error());
  Report(
-      "ERROR: AddressSanitizer: %s on %p in thread "
-      "T%d%s:\n",
-      scariness.GetDescription(), addr_description.addr, tid,
-      ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
+      "ERROR: AddressSanitizer: %s on %p in thread %s:\n",
+      scariness.GetDescription(), addr_description.addr,
+      AsanThreadIdAndName(tid).c_str());
  Printf("%s  object passed to delete has wrong type:\n", d.Default());
  if (delete_size != 0) {
    Printf(
@ -106,13 +102,11 @@ void ErrorNewDeleteTypeMismatch::Print() {

 void ErrorFreeNotMalloced::Print() {
  Decorator d;
-  Printf("%s", d.Warning());
-  char tname[128];
+  Printf("%s", d.Error());
  Report(
      "ERROR: AddressSanitizer: attempting free on address "
-      "which was not malloc()-ed: %p in thread T%d%s\n",
-      addr_description.Address(), tid,
-      ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
+      "which was not malloc()-ed: %p in thread %s\n",
+      addr_description.Address(), AsanThreadIdAndName(tid).c_str());
  Printf("%s", d.Default());
  CHECK_GT(free_stack->size, 0);
  scariness.Print();
@ -129,7 +123,7 @@ void ErrorAllocTypeMismatch::Print() {
                                        "operator delete []"};
  CHECK_NE(alloc_type, dealloc_type);
  Decorator d;
-  Printf("%s", d.Warning());
+  Printf("%s", d.Error());
  Report("ERROR: AddressSanitizer: %s (%s vs %s) on %p\n",
         scariness.GetDescription(),
         alloc_names[alloc_type], dealloc_names[dealloc_type],
@ -148,7 +142,7 @@ void ErrorAllocTypeMismatch::Print() {

 void ErrorMallocUsableSizeNotOwned::Print() {
  Decorator d;
-  Printf("%s", d.Warning());
+  Printf("%s", d.Error());
  Report(
      "ERROR: AddressSanitizer: attempting to call malloc_usable_size() for "
      "pointer which is not owned: %p\n",
@ -161,7 +155,7 @@ void ErrorMallocUsableSizeNotOwned::Print() {

 void ErrorSanitizerGetAllocatedSizeNotOwned::Print() {
  Decorator d;
-  Printf("%s", d.Warning());
+  Printf("%s", d.Error());
  Report(
      "ERROR: AddressSanitizer: attempting to call "
      "__sanitizer_get_allocated_size() for pointer which is not owned: %p\n",
@ -172,11 +166,123 @@ void ErrorSanitizerGetAllocatedSizeNotOwned::Print() {
  ReportErrorSummary(scariness.GetDescription(), stack);
 }

+void ErrorCallocOverflow::Print() {
+  Decorator d;
+  Printf("%s", d.Error());
+  Report(
+      "ERROR: AddressSanitizer: calloc parameters overflow: count * size "
+      "(%zd * %zd) cannot be represented in type size_t (thread %s)\n",
+      count, size, AsanThreadIdAndName(tid).c_str());
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(scariness.GetDescription(), stack);
+}
+
+void ErrorPvallocOverflow::Print() {
+  Decorator d;
+  Printf("%s", d.Error());
+  Report(
+      "ERROR: AddressSanitizer: pvalloc parameters overflow: size 0x%zx "
+      "rounded up to system page size 0x%zx cannot be represented in type "
+      "size_t (thread %s)\n",
+      size, GetPageSizeCached(), AsanThreadIdAndName(tid).c_str());
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(scariness.GetDescription(), stack);
+}
+
+void ErrorInvalidAllocationAlignment::Print() {
+  Decorator d;
+  Printf("%s", d.Error());
+  Report(
+      "ERROR: AddressSanitizer: invalid allocation alignment: %zd, "
+      "alignment must be a power of two (thread %s)\n",
+      alignment, AsanThreadIdAndName(tid).c_str());
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(scariness.GetDescription(), stack);
+}
+
+void ErrorInvalidAlignedAllocAlignment::Print() {
+  Decorator d;
+  Printf("%s", d.Error());
+#if SANITIZER_POSIX
+  Report("ERROR: AddressSanitizer: invalid alignment requested in "
+         "aligned_alloc: %zd, alignment must be a power of two and the "
+         "requested size 0x%zx must be a multiple of alignment "
+         "(thread %s)\n", alignment, size, AsanThreadIdAndName(tid).c_str());
+#else
+  Report("ERROR: AddressSanitizer: invalid alignment requested in "
+         "aligned_alloc: %zd, the requested size 0x%zx must be a multiple of "
+         "alignment (thread %s)\n", alignment, size,
+         AsanThreadIdAndName(tid).c_str());
+#endif
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(scariness.GetDescription(), stack);
+}
+
+void ErrorInvalidPosixMemalignAlignment::Print() {
+  Decorator d;
+  Printf("%s", d.Error());
+  Report(
+      "ERROR: AddressSanitizer: invalid alignment requested in posix_memalign: "
+      "%zd, alignment must be a power of two and a multiple of sizeof(void*) "
+      "== %zd (thread %s)\n",
+      alignment, sizeof(void*), AsanThreadIdAndName(tid).c_str());  // NOLINT
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(scariness.GetDescription(), stack);
+}
+
+void ErrorAllocationSizeTooBig::Print() {
+  Decorator d;
+  Printf("%s", d.Error());
+  Report(
+      "ERROR: AddressSanitizer: requested allocation size 0x%zx (0x%zx after "
+      "adjustments for alignment, red zones etc.) exceeds maximum supported "
+      "size of 0x%zx (thread %s)\n",
+      user_size, total_size, max_size, AsanThreadIdAndName(tid).c_str());
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(scariness.GetDescription(), stack);
+}
+
+void ErrorRssLimitExceeded::Print() {
+  Decorator d;
+  Printf("%s", d.Error());
+  Report(
+      "ERROR: AddressSanitizer: specified RSS limit exceeded, currently set to "
+      "soft_rss_limit_mb=%zd\n", common_flags()->soft_rss_limit_mb);
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(scariness.GetDescription(), stack);
+}
+
+void ErrorOutOfMemory::Print() {
+  Decorator d;
+  Printf("%s", d.Error());
+  Report(
+      "ERROR: AddressSanitizer: allocator is out of memory trying to allocate "
+      "0x%zx bytes\n", requested_size);
+  Printf("%s", d.Default());
+  stack->Print();
+  PrintHintAllocatorCannotReturnNull();
+  ReportErrorSummary(scariness.GetDescription(), stack);
+}
+
 void ErrorStringFunctionMemoryRangesOverlap::Print() {
  Decorator d;
  char bug_type[100];
  internal_snprintf(bug_type, sizeof(bug_type), "%s-param-overlap", function);
-  Printf("%s", d.Warning());
+  Printf("%s", d.Error());
  Report(
      "ERROR: AddressSanitizer: %s: memory ranges [%p,%p) and [%p, %p) "
      "overlap\n",
@ -193,7 +299,7 @@ void ErrorStringFunctionMemoryRangesOverlap::Print() {

 void ErrorStringFunctionSizeOverflow::Print() {
  Decorator d;
-  Printf("%s", d.Warning());
+  Printf("%s", d.Error());
  Report("ERROR: AddressSanitizer: %s: (size=%zd)\n",
         scariness.GetDescription(), size);
  Printf("%s", d.Default());
@ -221,7 +327,7 @@ void ErrorBadParamsToAnnotateContiguousContainer::Print() {

 void ErrorODRViolation::Print() {
  Decorator d;
-  Printf("%s", d.Warning());
+  Printf("%s", d.Error());
  Report("ERROR: AddressSanitizer: %s (%p):\n", scariness.GetDescription(),
         global1.beg);
  Printf("%s", d.Default());
@ -250,7 +356,7 @@ void ErrorODRViolation::Print() {

 void ErrorInvalidPointerPair::Print() {
  Decorator d;
-  Printf("%s", d.Warning());
+  Printf("%s", d.Error());
  Report("ERROR: AddressSanitizer: %s: %p %p\n", scariness.GetDescription(),
         addr1_description.Address(), addr2_description.Address());
  Printf("%s", d.Default());
@ -414,6 +520,7 @@ static void PrintLegend(InternalScopedString *str) {
  PrintShadowByte(str, "  ASan internal:           ", kAsanInternalHeapMagic);
  PrintShadowByte(str, "  Left alloca redzone:     ", kAsanAllocaLeftMagic);
  PrintShadowByte(str, "  Right alloca redzone:    ", kAsanAllocaRightMagic);
+  PrintShadowByte(str, "  Shadow gap:              ", kAsanShadowGap);
 }

 static void PrintShadowBytes(InternalScopedString *str, const char *before,
@ -453,17 +560,15 @@ static void PrintShadowMemoryForAddress(uptr addr) {

 void ErrorGeneric::Print() {
  Decorator d;
-  Printf("%s", d.Warning());
+  Printf("%s", d.Error());
  uptr addr = addr_description.Address();
  Report("ERROR: AddressSanitizer: %s on address %p at pc %p bp %p sp %p\n",
         bug_descr, (void *)addr, pc, bp, sp);
  Printf("%s", d.Default());

-  char tname[128];
-  Printf("%s%s of size %zu at %p thread T%d%s%s\n", d.Access(),
+  Printf("%s%s of size %zu at %p thread %s%s\n", d.Access(),
         access_size ? (is_write ? "WRITE" : "READ") : "ACCESS", access_size,
-         (void *)addr, tid,
-         ThreadNameWithParenthesis(tid, tname, sizeof(tname)), d.Default());
+         (void *)addr, AsanThreadIdAndName(tid).c_str(), d.Default());

  scariness.Print();
  GET_STACK_TRACE_FATAL(pc, bp);
--- a/contrib/compiler-rt/lib/asan/asan_errors.h
+++ b/contrib/compiler-rt/lib/asan/asan_errors.h
@ -20,20 +20,30 @@

 namespace __asan {

+// (*) VS2013 does not implement unrestricted unions, so we need a trivial
+// default constructor explicitly defined for each particular error.
+
+// None of the error classes own the stack traces mentioned in them.
+
 struct ErrorBase {
-  ErrorBase() = default;
-  explicit ErrorBase(u32 tid_) : tid(tid_) {}
  ScarinessScoreBase scariness;
  u32 tid;
+
+  ErrorBase() = default;  // (*)
+  explicit ErrorBase(u32 tid_) : tid(tid_) {}
+  ErrorBase(u32 tid_, int initial_score, const char *reason) : tid(tid_) {
+    scariness.Clear();
+    scariness.Scare(initial_score, reason);
+  }
 };

 struct ErrorDeadlySignal : ErrorBase {
  SignalContext signal;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorDeadlySignal() = default;
+
+  ErrorDeadlySignal() = default;  // (*)
  ErrorDeadlySignal(u32 tid, const SignalContext &sig)
-      : ErrorBase(tid), signal(sig) {
+      : ErrorBase(tid),
+        signal(sig) {
    scariness.Clear();
    if (signal.IsStackOverflow()) {
      scariness.Scare(10, "stack-overflow");
@ -55,125 +65,206 @@ struct ErrorDeadlySignal : ErrorBase {
 };

 struct ErrorDoubleFree : ErrorBase {
-  // ErrorDoubleFree doesn't own the stack trace.
  const BufferedStackTrace *second_free_stack;
  HeapAddressDescription addr_description;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorDoubleFree() = default;
+
+  ErrorDoubleFree() = default;  // (*)
  ErrorDoubleFree(u32 tid, BufferedStackTrace *stack, uptr addr)
-      : ErrorBase(tid), second_free_stack(stack) {
+      : ErrorBase(tid, 42, "double-free"),
+        second_free_stack(stack) {
    CHECK_GT(second_free_stack->size, 0);
    GetHeapAddressInformation(addr, 1, &addr_description);
-    scariness.Clear();
-    scariness.Scare(42, "double-free");
  }
  void Print();
 };

 struct ErrorNewDeleteTypeMismatch : ErrorBase {
-  // ErrorNewDeleteTypeMismatch doesn't own the stack trace.
  const BufferedStackTrace *free_stack;
  HeapAddressDescription addr_description;
  uptr delete_size;
  uptr delete_alignment;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorNewDeleteTypeMismatch() = default;
+
+  ErrorNewDeleteTypeMismatch() = default;  // (*)
  ErrorNewDeleteTypeMismatch(u32 tid, BufferedStackTrace *stack, uptr addr,
                             uptr delete_size_, uptr delete_alignment_)
-      : ErrorBase(tid), free_stack(stack), delete_size(delete_size_),
+      : ErrorBase(tid, 10, "new-delete-type-mismatch"),
+        free_stack(stack),
+        delete_size(delete_size_),
        delete_alignment(delete_alignment_) {
    GetHeapAddressInformation(addr, 1, &addr_description);
-    scariness.Clear();
-    scariness.Scare(10, "new-delete-type-mismatch");
  }
  void Print();
 };

 struct ErrorFreeNotMalloced : ErrorBase {
-  // ErrorFreeNotMalloced doesn't own the stack trace.
  const BufferedStackTrace *free_stack;
  AddressDescription addr_description;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorFreeNotMalloced() = default;
+
+  ErrorFreeNotMalloced() = default;  // (*)
  ErrorFreeNotMalloced(u32 tid, BufferedStackTrace *stack, uptr addr)
-      : ErrorBase(tid),
+      : ErrorBase(tid, 40, "bad-free"),
        free_stack(stack),
-        addr_description(addr, /*shouldLockThreadRegistry=*/false) {
-    scariness.Clear();
-    scariness.Scare(40, "bad-free");
-  }
+        addr_description(addr, /*shouldLockThreadRegistry=*/false) {}
  void Print();
 };

 struct ErrorAllocTypeMismatch : ErrorBase {
-  // ErrorAllocTypeMismatch doesn't own the stack trace.
  const BufferedStackTrace *dealloc_stack;
  HeapAddressDescription addr_description;
  AllocType alloc_type, dealloc_type;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorAllocTypeMismatch() = default;
+
+  ErrorAllocTypeMismatch() = default;  // (*)
  ErrorAllocTypeMismatch(u32 tid, BufferedStackTrace *stack, uptr addr,
                         AllocType alloc_type_, AllocType dealloc_type_)
-      : ErrorBase(tid),
+      : ErrorBase(tid, 10, "alloc-dealloc-mismatch"),
        dealloc_stack(stack),
        alloc_type(alloc_type_),
        dealloc_type(dealloc_type_) {
    GetHeapAddressInformation(addr, 1, &addr_description);
-    scariness.Clear();
-    scariness.Scare(10, "alloc-dealloc-mismatch");
  };
  void Print();
 };

 struct ErrorMallocUsableSizeNotOwned : ErrorBase {
-  // ErrorMallocUsableSizeNotOwned doesn't own the stack trace.
  const BufferedStackTrace *stack;
  AddressDescription addr_description;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorMallocUsableSizeNotOwned() = default;
+
+  ErrorMallocUsableSizeNotOwned() = default;  // (*)
  ErrorMallocUsableSizeNotOwned(u32 tid, BufferedStackTrace *stack_, uptr addr)
-      : ErrorBase(tid),
+      : ErrorBase(tid, 10, "bad-malloc_usable_size"),
        stack(stack_),
-        addr_description(addr, /*shouldLockThreadRegistry=*/false) {
-    scariness.Clear();
-    scariness.Scare(10, "bad-malloc_usable_size");
-  }
+        addr_description(addr, /*shouldLockThreadRegistry=*/false) {}
  void Print();
 };

 struct ErrorSanitizerGetAllocatedSizeNotOwned : ErrorBase {
-  // ErrorSanitizerGetAllocatedSizeNotOwned doesn't own the stack trace.
  const BufferedStackTrace *stack;
  AddressDescription addr_description;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorSanitizerGetAllocatedSizeNotOwned() = default;
+
+  ErrorSanitizerGetAllocatedSizeNotOwned() = default;  // (*)
  ErrorSanitizerGetAllocatedSizeNotOwned(u32 tid, BufferedStackTrace *stack_,
                                         uptr addr)
-      : ErrorBase(tid),
+      : ErrorBase(tid, 10, "bad-__sanitizer_get_allocated_size"),
        stack(stack_),
-        addr_description(addr, /*shouldLockThreadRegistry=*/false) {
-    scariness.Clear();
-    scariness.Scare(10, "bad-__sanitizer_get_allocated_size");
-  }
+        addr_description(addr, /*shouldLockThreadRegistry=*/false) {}
+  void Print();
+};
+
+struct ErrorCallocOverflow : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr count;
+  uptr size;
+
+  ErrorCallocOverflow() = default;  // (*)
+  ErrorCallocOverflow(u32 tid, BufferedStackTrace *stack_, uptr count_,
+                      uptr size_)
+      : ErrorBase(tid, 10, "calloc-overflow"),
+        stack(stack_),
+        count(count_),
+        size(size_) {}
+  void Print();
+};
+
+struct ErrorPvallocOverflow : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr size;
+
+  ErrorPvallocOverflow() = default;  // (*)
+  ErrorPvallocOverflow(u32 tid, BufferedStackTrace *stack_, uptr size_)
+      : ErrorBase(tid, 10, "pvalloc-overflow"),
+        stack(stack_),
+        size(size_) {}
+  void Print();
+};
+
+struct ErrorInvalidAllocationAlignment : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr alignment;
+
+  ErrorInvalidAllocationAlignment() = default;  // (*)
+  ErrorInvalidAllocationAlignment(u32 tid, BufferedStackTrace *stack_,
+                                  uptr alignment_)
+      : ErrorBase(tid, 10, "invalid-allocation-alignment"),
+        stack(stack_),
+        alignment(alignment_) {}
+  void Print();
+};
+
+struct ErrorInvalidAlignedAllocAlignment : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr size;
+  uptr alignment;
+
+  ErrorInvalidAlignedAllocAlignment() = default;  // (*)
+  ErrorInvalidAlignedAllocAlignment(u32 tid, BufferedStackTrace *stack_,
+                                    uptr size_, uptr alignment_)
+      : ErrorBase(tid, 10, "invalid-aligned-alloc-alignment"),
+        stack(stack_),
+        size(size_),
+        alignment(alignment_) {}
+  void Print();
+};
+
+struct ErrorInvalidPosixMemalignAlignment : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr alignment;
+
+  ErrorInvalidPosixMemalignAlignment() = default;  // (*)
+  ErrorInvalidPosixMemalignAlignment(u32 tid, BufferedStackTrace *stack_,
+                                     uptr alignment_)
+      : ErrorBase(tid, 10, "invalid-posix-memalign-alignment"),
+        stack(stack_),
+        alignment(alignment_) {}
+  void Print();
+};
+
+struct ErrorAllocationSizeTooBig : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr user_size;
+  uptr total_size;
+  uptr max_size;
+
+  ErrorAllocationSizeTooBig() = default;  // (*)
+  ErrorAllocationSizeTooBig(u32 tid, BufferedStackTrace *stack_,
+                            uptr user_size_, uptr total_size_, uptr max_size_)
+      : ErrorBase(tid, 10, "allocation-size-too-big"),
+        stack(stack_),
+        user_size(user_size_),
+        total_size(total_size_),
+        max_size(max_size_) {}
+  void Print();
+};
+
+struct ErrorRssLimitExceeded : ErrorBase {
+  const BufferedStackTrace *stack;
+
+  ErrorRssLimitExceeded() = default;  // (*)
+  ErrorRssLimitExceeded(u32 tid, BufferedStackTrace *stack_)
+      : ErrorBase(tid, 10, "rss-limit-exceeded"),
+        stack(stack_) {}
+  void Print();
+};
+
+struct ErrorOutOfMemory : ErrorBase {
+  const BufferedStackTrace *stack;
+  uptr requested_size;
+
+  ErrorOutOfMemory() = default;  // (*)
+  ErrorOutOfMemory(u32 tid, BufferedStackTrace *stack_, uptr requested_size_)
+      : ErrorBase(tid, 10, "out-of-memory"),
+        stack(stack_),
+        requested_size(requested_size_) {}
  void Print();
 };

 struct ErrorStringFunctionMemoryRangesOverlap : ErrorBase {
-  // ErrorStringFunctionMemoryRangesOverlap doesn't own the stack trace.
  const BufferedStackTrace *stack;
  uptr length1, length2;
  AddressDescription addr1_description;
  AddressDescription addr2_description;
  const char *function;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorStringFunctionMemoryRangesOverlap() = default;
+
+  ErrorStringFunctionMemoryRangesOverlap() = default;  // (*)
  ErrorStringFunctionMemoryRangesOverlap(u32 tid, BufferedStackTrace *stack_,
                                         uptr addr1, uptr length1_, uptr addr2,
                                         uptr length2_, const char *function_)
@ -193,65 +284,51 @@ struct ErrorStringFunctionMemoryRangesOverlap : ErrorBase {
 };

 struct ErrorStringFunctionSizeOverflow : ErrorBase {
-  // ErrorStringFunctionSizeOverflow doesn't own the stack trace.
  const BufferedStackTrace *stack;
  AddressDescription addr_description;
  uptr size;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorStringFunctionSizeOverflow() = default;
+
+  ErrorStringFunctionSizeOverflow() = default;  // (*)
  ErrorStringFunctionSizeOverflow(u32 tid, BufferedStackTrace *stack_,
                                  uptr addr, uptr size_)
-      : ErrorBase(tid),
+      : ErrorBase(tid, 10, "negative-size-param"),
        stack(stack_),
        addr_description(addr, /*shouldLockThreadRegistry=*/false),
-        size(size_) {
-    scariness.Clear();
-    scariness.Scare(10, "negative-size-param");
-  }
+        size(size_) {}
  void Print();
 };

 struct ErrorBadParamsToAnnotateContiguousContainer : ErrorBase {
-  // ErrorBadParamsToAnnotateContiguousContainer doesn't own the stack trace.
  const BufferedStackTrace *stack;
  uptr beg, end, old_mid, new_mid;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorBadParamsToAnnotateContiguousContainer() = default;
+
+  ErrorBadParamsToAnnotateContiguousContainer() = default;  // (*)
  // PS4: Do we want an AddressDescription for beg?
  ErrorBadParamsToAnnotateContiguousContainer(u32 tid,
                                              BufferedStackTrace *stack_,
                                              uptr beg_, uptr end_,
                                              uptr old_mid_, uptr new_mid_)
-      : ErrorBase(tid),
+      : ErrorBase(tid, 10, "bad-__sanitizer_annotate_contiguous_container"),
        stack(stack_),
        beg(beg_),
        end(end_),
        old_mid(old_mid_),
-        new_mid(new_mid_) {
-    scariness.Clear();
-    scariness.Scare(10, "bad-__sanitizer_annotate_contiguous_container");
-  }
+        new_mid(new_mid_) {}
  void Print();
 };

 struct ErrorODRViolation : ErrorBase {
  __asan_global global1, global2;
  u32 stack_id1, stack_id2;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorODRViolation() = default;
+
+  ErrorODRViolation() = default;  // (*)
  ErrorODRViolation(u32 tid, const __asan_global *g1, u32 stack_id1_,
                    const __asan_global *g2, u32 stack_id2_)
-      : ErrorBase(tid),
+      : ErrorBase(tid, 10, "odr-violation"),
        global1(*g1),
        global2(*g2),
        stack_id1(stack_id1_),
-        stack_id2(stack_id2_) {
-    scariness.Clear();
-    scariness.Scare(10, "odr-violation");
-  }
+        stack_id2(stack_id2_) {}
  void Print();
 };

@ -259,20 +336,16 @@ struct ErrorInvalidPointerPair : ErrorBase {
  uptr pc, bp, sp;
  AddressDescription addr1_description;
  AddressDescription addr2_description;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorInvalidPointerPair() = default;
+
+  ErrorInvalidPointerPair() = default;  // (*)
  ErrorInvalidPointerPair(u32 tid, uptr pc_, uptr bp_, uptr sp_, uptr p1,
                          uptr p2)
-      : ErrorBase(tid),
+      : ErrorBase(tid, 10, "invalid-pointer-pair"),
        pc(pc_),
        bp(bp_),
        sp(sp_),
        addr1_description(p1, 1, /*shouldLockThreadRegistry=*/false),
-        addr2_description(p2, 1, /*shouldLockThreadRegistry=*/false)  {
-    scariness.Clear();
-    scariness.Scare(10, "invalid-pointer-pair");
-  }
+        addr2_description(p2, 1, /*shouldLockThreadRegistry=*/false) {}
  void Print();
 };

@ -283,9 +356,8 @@ struct ErrorGeneric : ErrorBase {
  const char *bug_descr;
  bool is_write;
  u8 shadow_val;
-  // VS2013 doesn't implement unrestricted unions, so we need a trivial default
-  // constructor
-  ErrorGeneric() = default;
+
+  ErrorGeneric() = default;  // (*)
  ErrorGeneric(u32 tid, uptr addr, uptr pc_, uptr bp_, uptr sp_, bool is_write_,
               uptr access_size_);
  void Print();
@ -300,6 +372,14 @@ struct ErrorGeneric : ErrorBase {
  macro(AllocTypeMismatch)                      \
  macro(MallocUsableSizeNotOwned)               \
  macro(SanitizerGetAllocatedSizeNotOwned)      \
+  macro(CallocOverflow)                         \
+  macro(PvallocOverflow)                        \
+  macro(InvalidAllocationAlignment)             \
+  macro(InvalidAlignedAllocAlignment)           \
+  macro(InvalidPosixMemalignAlignment)          \
+  macro(AllocationSizeTooBig)                   \
+  macro(RssLimitExceeded)                       \
+  macro(OutOfMemory)                            \
  macro(StringFunctionMemoryRangesOverlap)      \
  macro(StringFunctionSizeOverflow)             \
  macro(BadParamsToAnnotateContiguousContainer) \
@ -334,6 +414,7 @@ struct ErrorDescription {
  };

  ErrorDescription() { internal_memset(this, 0, sizeof(*this)); }
+  explicit ErrorDescription(LinkerInitialized) {}
  ASAN_FOR_EACH_ERROR_KIND(ASAN_ERROR_DESCRIPTION_CONSTRUCTOR)

  bool IsValid() { return kind != kErrorKindInvalid; }
--- a/contrib/compiler-rt/lib/asan/asan_flags.cc
+++ b/contrib/compiler-rt/lib/asan/asan_flags.cc
@ -33,10 +33,7 @@ static const char *MaybeCallAsanDefaultOptions() {

 static const char *MaybeUseAsanDefaultOptionsCompileDefinition() {
 #ifdef ASAN_DEFAULT_OPTIONS
-// Stringize the macro value.
-# define ASAN_STRINGIZE(x) #x
-# define ASAN_STRINGIZE_OPTIONS(options) ASAN_STRINGIZE(options)
-  return ASAN_STRINGIZE_OPTIONS(ASAN_DEFAULT_OPTIONS);
+  return SANITIZER_STRINGIFY(ASAN_DEFAULT_OPTIONS);
 #else
  return "";
 #endif
@ -163,6 +160,10 @@ void InitializeFlags() {
  CHECK_LE(f->max_redzone, 2048);
  CHECK(IsPowerOfTwo(f->redzone));
  CHECK(IsPowerOfTwo(f->max_redzone));
+  if (SANITIZER_RTEMS) {
+    CHECK(!f->unmap_shadow_on_exit);
+    CHECK(!f->protect_shadow_gap);
+  }

  // quarantine_size is deprecated but we still honor it.
  // quarantine_size can not be used together with quarantine_size_mb.
--- a/contrib/compiler-rt/lib/asan/asan_flags.inc
+++ b/contrib/compiler-rt/lib/asan/asan_flags.inc
@ -88,7 +88,8 @@ ASAN_FLAG(bool, check_malloc_usable_size, true,
          "295.*.")
 ASAN_FLAG(bool, unmap_shadow_on_exit, false,
          "If set, explicitly unmaps the (huge) shadow at exit.")
-ASAN_FLAG(bool, protect_shadow_gap, true, "If set, mprotect the shadow gap")
+ASAN_FLAG(bool, protect_shadow_gap, !SANITIZER_RTEMS,
+          "If set, mprotect the shadow gap")
 ASAN_FLAG(bool, print_stats, false,
          "Print various statistics after printing an error message or if "
          "atexit=1.")
@ -136,9 +137,9 @@ ASAN_FLAG(
    "Android. ")
 ASAN_FLAG(
    int, detect_invalid_pointer_pairs, 0,
-    "If non-zero, try to detect operations like <, <=, >, >= and - on "
-    "invalid pointer pairs (e.g. when pointers belong to different objects). "
-    "The bigger the value the harder we try.")
+    "If >= 2, detect operations like <, <=, >, >= and - on invalid pointer "
+    "pairs (e.g. when pointers belong to different objects); "
+    "If == 1, detect invalid operations only when both pointers are non-null.")
 ASAN_FLAG(
    bool, detect_container_overflow, true,
    "If true, honor the container overflow annotations. See "
--- a/contrib/compiler-rt/lib/asan/asan_globals.cc
+++ b/contrib/compiler-rt/lib/asan/asan_globals.cc
@ -224,8 +224,9 @@ static void RegisterGlobal(const Global *g) {
  list_of_all_globals = l;
  if (g->has_dynamic_init) {
    if (!dynamic_init_globals) {
-      dynamic_init_globals = new(allocator_for_globals)
-          VectorOfGlobals(kDynamicInitGlobalsInitialCapacity);
+      dynamic_init_globals =
+          new (allocator_for_globals) VectorOfGlobals;  // NOLINT
+      dynamic_init_globals->reserve(kDynamicInitGlobalsInitialCapacity);
    }
    DynInitGlobal dyn_global = { *g, false };
    dynamic_init_globals->push_back(dyn_global);
@ -358,9 +359,11 @@ void __asan_register_globals(__asan_global *globals, uptr n) {
  GET_STACK_TRACE_MALLOC;
  u32 stack_id = StackDepotPut(stack);
  BlockingMutexLock lock(&mu_for_globals);
-  if (!global_registration_site_vector)
+  if (!global_registration_site_vector) {
    global_registration_site_vector =
-        new(allocator_for_globals) GlobalRegistrationSiteVector(128);
+        new (allocator_for_globals) GlobalRegistrationSiteVector;  // NOLINT
+    global_registration_site_vector->reserve(128);
+  }
  GlobalRegistrationSite site = {stack_id, &globals[0], &globals[n - 1]};
  global_registration_site_vector->push_back(site);
  if (flags()->report_globals >= 2) {
--- a/contrib/compiler-rt/lib/asan/asan_globals_win.cc
+++ b/contrib/compiler-rt/lib/asan/asan_globals_win.cc
@ -19,9 +19,9 @@ namespace __asan {
 #pragma section(".ASAN$GA", read, write)  // NOLINT
 #pragma section(".ASAN$GZ", read, write)  // NOLINT
 extern "C" __declspec(allocate(".ASAN$GA"))
-__asan_global __asan_globals_start = {};
+    ALIGNED(sizeof(__asan_global)) __asan_global __asan_globals_start = {};
 extern "C" __declspec(allocate(".ASAN$GZ"))
-__asan_global __asan_globals_end = {};
+    ALIGNED(sizeof(__asan_global)) __asan_global __asan_globals_end = {};
 #pragma comment(linker, "/merge:.ASAN=.data")

 static void call_on_globals(void (*hook)(__asan_global *, uptr)) {
--- a/contrib/compiler-rt/lib/asan/asan_interceptors.cc
+++ b/contrib/compiler-rt/lib/asan/asan_interceptors.cc
@ -24,15 +24,20 @@
 #include "lsan/lsan_common.h"
 #include "sanitizer_common/sanitizer_libc.h"

-// There is no general interception at all on Fuchsia.
+// There is no general interception at all on Fuchsia and RTEMS.
 // Only the functions in asan_interceptors_memintrinsics.cc are
 // really defined to replace libc functions.
-#if !SANITIZER_FUCHSIA
+#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS

 #if SANITIZER_POSIX
 #include "sanitizer_common/sanitizer_posix.h"
 #endif

+#if ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION || \
+    ASAN_INTERCEPT__SJLJ_UNWIND_RAISEEXCEPTION
+#include <unwind.h>
+#endif
+
 #if defined(__i386) && SANITIZER_LINUX
 #define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.1"
 #elif defined(__mips__) && SANITIZER_LINUX
@ -178,6 +183,7 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *)
    (void)(s);                                \
  } while (false)
 #include "sanitizer_common/sanitizer_common_syscalls.inc"
+#include "sanitizer_common/sanitizer_syscalls_netbsd.inc"

 struct ThreadStartParam {
  atomic_uintptr_t t;
@ -269,7 +275,15 @@ INTERCEPTOR(int, swapcontext, struct ucontext_t *oucp,
  uptr stack, ssize;
  ReadContextStack(ucp, &stack, &ssize);
  ClearShadowMemoryForContextStack(stack, ssize);
+#if __has_attribute(__indirect_return__) && \
+    (defined(__x86_64__) || defined(__i386__))
+  int (*real_swapcontext)(struct ucontext_t *, struct ucontext_t *)
+    __attribute__((__indirect_return__))
+    = REAL(swapcontext);
+  int res = real_swapcontext(oucp, ucp);
+#else
  int res = REAL(swapcontext)(oucp, ucp);
+#endif
  // swapcontext technically does not return, but program may swap context to
  // "oucp" later, that would look as if swapcontext() returned 0.
  // We need to clear shadow for ucp once again, as it may be in arbitrary
@ -318,6 +332,32 @@ INTERCEPTOR(void, __cxa_throw, void *a, void *b, void *c) {
 }
 #endif

+#if ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION
+INTERCEPTOR(void, __cxa_rethrow_primary_exception, void *a) {
+  CHECK(REAL(__cxa_rethrow_primary_exception));
+  __asan_handle_no_return();
+  REAL(__cxa_rethrow_primary_exception)(a);
+}
+#endif
+
+#if ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION
+INTERCEPTOR(_Unwind_Reason_Code, _Unwind_RaiseException,
+            _Unwind_Exception *object) {
+  CHECK(REAL(_Unwind_RaiseException));
+  __asan_handle_no_return();
+  return REAL(_Unwind_RaiseException)(object);
+}
+#endif
+
+#if ASAN_INTERCEPT__SJLJ_UNWIND_RAISEEXCEPTION
+INTERCEPTOR(_Unwind_Reason_Code, _Unwind_SjLj_RaiseException,
+            _Unwind_Exception *object) {
+  CHECK(REAL(_Unwind_SjLj_RaiseException));
+  __asan_handle_no_return();
+  return REAL(_Unwind_SjLj_RaiseException)(object);
+}
+#endif
+
 #if ASAN_INTERCEPT_INDEX
 # if ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX
 INTERCEPTOR(char*, index, const char *string, int c)
@ -540,14 +580,6 @@ INTERCEPTOR(int, __cxa_atexit, void (*func)(void *), void *arg,
 }
 #endif  // ASAN_INTERCEPT___CXA_ATEXIT

-#if ASAN_INTERCEPT_FORK
-INTERCEPTOR(int, fork, void) {
-  ENSURE_ASAN_INITED();
-  int pid = REAL(fork)();
-  return pid;
-}
-#endif  // ASAN_INTERCEPT_FORK
-
 // ---------------------- InitializeAsanInterceptors ---------------- {{{1
 namespace __asan {
 void InitializeAsanInterceptors() {
@ -598,6 +630,17 @@ void InitializeAsanInterceptors() {
 #if ASAN_INTERCEPT___CXA_THROW
  ASAN_INTERCEPT_FUNC(__cxa_throw);
 #endif
+#if ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION
+  ASAN_INTERCEPT_FUNC(__cxa_rethrow_primary_exception);
+#endif
+  // Indirectly intercept std::rethrow_exception.
+#if ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION
+  INTERCEPT_FUNCTION(_Unwind_RaiseException);
+#endif
+  // Indirectly intercept std::rethrow_exception.
+#if ASAN_INTERCEPT__UNWIND_SJLJ_RAISEEXCEPTION
+  INTERCEPT_FUNCTION(_Unwind_SjLj_RaiseException);
+#endif

  // Intercept threading-related functions
 #if ASAN_INTERCEPT_PTHREAD_CREATE
@ -614,10 +657,6 @@ void InitializeAsanInterceptors() {
  ASAN_INTERCEPT_FUNC(__cxa_atexit);
 #endif

-#if ASAN_INTERCEPT_FORK
-  ASAN_INTERCEPT_FUNC(fork);
-#endif
-
  InitializePlatformInterceptors();

  VReport(1, "AddressSanitizer: libc interceptors initialized\n");
--- a/contrib/compiler-rt/lib/asan/asan_interceptors.h
+++ b/contrib/compiler-rt/lib/asan/asan_interceptors.h
@ -34,10 +34,10 @@ void InitializePlatformInterceptors();

 }  // namespace __asan

-// There is no general interception at all on Fuchsia.
+// There is no general interception at all on Fuchsia and RTEMS.
 // Only the functions in asan_interceptors_memintrinsics.h are
 // really defined to replace libc functions.
-#if !SANITIZER_FUCHSIA
+#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS

 // Use macro to describe if specific function should be
 // intercepted on a given platform.
@ -46,13 +46,11 @@ void InitializePlatformInterceptors();
 # define ASAN_INTERCEPT__LONGJMP 1
 # define ASAN_INTERCEPT_INDEX 1
 # define ASAN_INTERCEPT_PTHREAD_CREATE 1
-# define ASAN_INTERCEPT_FORK 1
 #else
 # define ASAN_INTERCEPT_ATOLL_AND_STRTOLL 0
 # define ASAN_INTERCEPT__LONGJMP 0
 # define ASAN_INTERCEPT_INDEX 0
 # define ASAN_INTERCEPT_PTHREAD_CREATE 0
-# define ASAN_INTERCEPT_FORK 0
 #endif

 #if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \
@ -80,13 +78,20 @@ void InitializePlatformInterceptors();
 # define ASAN_INTERCEPT___LONGJMP_CHK 0
 #endif

-// Android bug: https://code.google.com/p/android/issues/detail?id=61799
-#if ASAN_HAS_EXCEPTIONS && !SANITIZER_WINDOWS && \
-    !(SANITIZER_ANDROID && defined(__i386)) && \
-    !SANITIZER_SOLARIS
+#if ASAN_HAS_EXCEPTIONS && !SANITIZER_WINDOWS && !SANITIZER_SOLARIS && \
+    !SANITIZER_NETBSD
 # define ASAN_INTERCEPT___CXA_THROW 1
+# define ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION 1
+# if defined(_GLIBCXX_SJLJ_EXCEPTIONS) || (SANITIZER_IOS && defined(__arm__))
+#  define ASAN_INTERCEPT__UNWIND_SJLJ_RAISEEXCEPTION 1
+# else
+#  define ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION 1
+# endif
 #else
 # define ASAN_INTERCEPT___CXA_THROW 0
+# define ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION 0
+# define ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION 0
+# define ASAN_INTERCEPT__UNWIND_SJLJ_RAISEEXCEPTION 0
 #endif

 #if !SANITIZER_WINDOWS
--- a/contrib/compiler-rt/lib/asan/asan_interceptors_memintrinsics.cc
+++ b/contrib/compiler-rt/lib/asan/asan_interceptors_memintrinsics.cc
@ -31,14 +31,14 @@ void *__asan_memmove(void *to, const void *from, uptr size) {
  ASAN_MEMMOVE_IMPL(nullptr, to, from, size);
 }

-#if SANITIZER_FUCHSIA
+#if SANITIZER_FUCHSIA || SANITIZER_RTEMS

-// Fuchsia doesn't use sanitizer_common_interceptors.inc, but the only
-// things there it wants are these three.  Just define them as aliases
-// here rather than repeating the contents.
+// Fuchsia and RTEMS don't use sanitizer_common_interceptors.inc, but
+// the only things there it wants are these three.  Just define them
+// as aliases here rather than repeating the contents.

-decltype(memcpy) memcpy[[gnu::alias("__asan_memcpy")]];
-decltype(memmove) memmove[[gnu::alias("__asan_memmove")]];
-decltype(memset) memset[[gnu::alias("__asan_memset")]];
+extern "C" decltype(__asan_memcpy) memcpy[[gnu::alias("__asan_memcpy")]];
+extern "C" decltype(__asan_memmove) memmove[[gnu::alias("__asan_memmove")]];
+extern "C" decltype(__asan_memset) memset[[gnu::alias("__asan_memset")]];

-#endif  // SANITIZER_FUCHSIA
+#endif  // SANITIZER_FUCHSIA || SANITIZER_RTEMS
--- a/contrib/compiler-rt/lib/asan/asan_interceptors_memintrinsics.h
+++ b/contrib/compiler-rt/lib/asan/asan_interceptors_memintrinsics.h
@ -133,15 +133,22 @@ static inline bool RangesOverlap(const char *offset1, uptr length1,
                                 const char *offset2, uptr length2) {
  return !((offset1 + length1 <= offset2) || (offset2 + length2 <= offset1));
 }
-#define CHECK_RANGES_OVERLAP(name, _offset1, length1, _offset2, length2) do { \
-  const char *offset1 = (const char*)_offset1; \
-  const char *offset2 = (const char*)_offset2; \
-  if (RangesOverlap(offset1, length1, offset2, length2)) { \
-    GET_STACK_TRACE_FATAL_HERE; \
-    ReportStringFunctionMemoryRangesOverlap(name, offset1, length1, \
-                                            offset2, length2, &stack); \
-  } \
-} while (0)
+#define CHECK_RANGES_OVERLAP(name, _offset1, length1, _offset2, length2)   \
+  do {                                                                     \
+    const char *offset1 = (const char *)_offset1;                          \
+    const char *offset2 = (const char *)_offset2;                          \
+    if (RangesOverlap(offset1, length1, offset2, length2)) {               \
+      GET_STACK_TRACE_FATAL_HERE;                                          \
+      bool suppressed = IsInterceptorSuppressed(name);                     \
+      if (!suppressed && HaveStackTraceBasedSuppressions()) {              \
+        suppressed = IsStackTraceSuppressed(&stack);                       \
+      }                                                                    \
+      if (!suppressed) {                                                   \
+        ReportStringFunctionMemoryRangesOverlap(name, offset1, length1,    \
+                                                offset2, length2, &stack); \
+      }                                                                    \
+    }                                                                      \
+  } while (0)

 }  // namespace __asan

--- a/contrib/compiler-rt/lib/asan/asan_internal.h
+++ b/contrib/compiler-rt/lib/asan/asan_internal.h
@ -36,7 +36,7 @@
 // If set, values like allocator chunk size, as well as defaults for some flags
 // will be changed towards less memory overhead.
 #ifndef ASAN_LOW_MEMORY
-# if SANITIZER_IOS || SANITIZER_ANDROID
+# if SANITIZER_IOS || SANITIZER_ANDROID || SANITIZER_RTEMS
 #  define ASAN_LOW_MEMORY 1
 # else
 #  define ASAN_LOW_MEMORY 0
@ -78,7 +78,7 @@ void InitializeShadowMemory();
 // asan_malloc_linux.cc / asan_malloc_mac.cc
 void ReplaceSystemMalloc();

-// asan_linux.cc / asan_mac.cc / asan_win.cc
+// asan_linux.cc / asan_mac.cc / asan_rtems.cc / asan_win.cc
 uptr FindDynamicShadowStart();
 void *AsanDoesNotSupportStaticLinkage();
 void AsanCheckDynamicRTPrereqs();
@ -147,6 +147,9 @@ const int kAsanArrayCookieMagic = 0xac;
 const int kAsanIntraObjectRedzone = 0xbb;
 const int kAsanAllocaLeftMagic = 0xca;
 const int kAsanAllocaRightMagic = 0xcb;
+// Used to populate the shadow gap for systems without memory
+// protection there (i.e. Myriad).
+const int kAsanShadowGap = 0xcc;

 static const uptr kCurrentStackFrameMagic = 0x41B58AB3;
 static const uptr kRetiredStackFrameMagic = 0x45E0360E;
--- a/contrib/compiler-rt/lib/asan/asan_mac.cc
+++ b/contrib/compiler-rt/lib/asan/asan_mac.cc
@ -62,16 +62,36 @@ uptr FindDynamicShadowStart() {
  uptr space_size = kHighShadowEnd + left_padding;

  uptr largest_gap_found = 0;
-  uptr shadow_start = FindAvailableMemoryRange(space_size, alignment,
-                                               granularity, &largest_gap_found);
+  uptr max_occupied_addr = 0;
+  VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
+  uptr shadow_start =
+      FindAvailableMemoryRange(space_size, alignment, granularity,
+                               &largest_gap_found, &max_occupied_addr);
  // If the shadow doesn't fit, restrict the address space to make it fit.
  if (shadow_start == 0) {
+    VReport(
+        2,
+        "Shadow doesn't fit, largest_gap_found = %p, max_occupied_addr = %p\n",
+        largest_gap_found, max_occupied_addr);
    uptr new_max_vm = RoundDownTo(largest_gap_found << SHADOW_SCALE, alignment);
+    if (new_max_vm < max_occupied_addr) {
+      Report("Unable to find a memory range for dynamic shadow.\n");
+      Report(
+          "space_size = %p, largest_gap_found = %p, max_occupied_addr = %p, "
+          "new_max_vm = %p\n",
+          space_size, largest_gap_found, max_occupied_addr, new_max_vm);
+      CHECK(0 && "cannot place shadow");
+    }
    RestrictMemoryToMaxAddress(new_max_vm);
    kHighMemEnd = new_max_vm - 1;
    space_size = kHighShadowEnd + left_padding;
-    shadow_start =
-        FindAvailableMemoryRange(space_size, alignment, granularity, nullptr);
+    VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
+    shadow_start = FindAvailableMemoryRange(space_size, alignment, granularity,
+                                            nullptr, nullptr);
+    if (shadow_start == 0) {
+      Report("Unable to find a memory range after restricting VM.\n");
+      CHECK(0 && "cannot place shadow after restricting vm");
+    }
  }
  CHECK_NE((uptr)0, shadow_start);
  CHECK(IsAligned(shadow_start, alignment));
--- a/contrib/compiler-rt/lib/asan/asan_malloc_linux.cc
+++ b/contrib/compiler-rt/lib/asan/asan_malloc_linux.cc
@ -16,19 +16,23 @@

 #include "sanitizer_common/sanitizer_platform.h"
 #if SANITIZER_FREEBSD || SANITIZER_FUCHSIA || SANITIZER_LINUX || \
-    SANITIZER_NETBSD || SANITIZER_SOLARIS
+    SANITIZER_NETBSD || SANITIZER_RTEMS || SANITIZER_SOLARIS

+#include "sanitizer_common/sanitizer_allocator_checks.h"
+#include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_tls_get_addr.h"
 #include "asan_allocator.h"
 #include "asan_interceptors.h"
 #include "asan_internal.h"
+#include "asan_malloc_local.h"
 #include "asan_stack.h"

 // ---------------------- Replacement functions ---------------- {{{1
 using namespace __asan;  // NOLINT

 static uptr allocated_for_dlsym;
-static const uptr kDlsymAllocPoolSize = 1024;
+static uptr last_dlsym_alloc_size_in_words;
+static const uptr kDlsymAllocPoolSize = SANITIZER_RTEMS ? 4096 : 1024;
 static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];

 static INLINE bool IsInDlsymAllocPool(const void *ptr) {
@ -39,21 +43,73 @@ static INLINE bool IsInDlsymAllocPool(const void *ptr) {
 static void *AllocateFromLocalPool(uptr size_in_bytes) {
  uptr size_in_words = RoundUpTo(size_in_bytes, kWordSize) / kWordSize;
  void *mem = (void*)&alloc_memory_for_dlsym[allocated_for_dlsym];
+  last_dlsym_alloc_size_in_words = size_in_words;
  allocated_for_dlsym += size_in_words;
  CHECK_LT(allocated_for_dlsym, kDlsymAllocPoolSize);
  return mem;
 }

+static void DeallocateFromLocalPool(const void *ptr) {
+  // Hack: since glibc 2.27 dlsym no longer uses stack-allocated memory to store
+  // error messages and instead uses malloc followed by free. To avoid pool
+  // exhaustion due to long object filenames, handle that special case here.
+  uptr prev_offset = allocated_for_dlsym - last_dlsym_alloc_size_in_words;
+  void *prev_mem = (void*)&alloc_memory_for_dlsym[prev_offset];
+  if (prev_mem == ptr) {
+    REAL(memset)(prev_mem, 0, last_dlsym_alloc_size_in_words * kWordSize);
+    allocated_for_dlsym = prev_offset;
+    last_dlsym_alloc_size_in_words = 0;
+  }
+}
+
+static int PosixMemalignFromLocalPool(void **memptr, uptr alignment,
+                                      uptr size_in_bytes) {
+  if (UNLIKELY(!CheckPosixMemalignAlignment(alignment)))
+    return errno_EINVAL;
+
+  CHECK(alignment >= kWordSize);
+
+  uptr addr = (uptr)&alloc_memory_for_dlsym[allocated_for_dlsym];
+  uptr aligned_addr = RoundUpTo(addr, alignment);
+  uptr aligned_size = RoundUpTo(size_in_bytes, kWordSize);
+
+  uptr *end_mem = (uptr*)(aligned_addr + aligned_size);
+  uptr allocated = end_mem - alloc_memory_for_dlsym;
+  if (allocated >= kDlsymAllocPoolSize)
+    return errno_ENOMEM;
+
+  allocated_for_dlsym = allocated;
+  *memptr = (void*)aligned_addr;
+  return 0;
+}
+
+#if SANITIZER_RTEMS
+void* MemalignFromLocalPool(uptr alignment, uptr size) {
+  void *ptr = nullptr;
+  alignment = Max(alignment, kWordSize);
+  PosixMemalignFromLocalPool(&ptr, alignment, size);
+  return ptr;
+}
+
+bool IsFromLocalPool(const void *ptr) {
+  return IsInDlsymAllocPool(ptr);
+}
+#endif
+
 static INLINE bool MaybeInDlsym() {
  // Fuchsia doesn't use dlsym-based interceptors.
  return !SANITIZER_FUCHSIA && asan_init_is_running;
 }

+static INLINE bool UseLocalPool() {
+  return EarlyMalloc() || MaybeInDlsym();
+}
+
 static void *ReallocFromLocalPool(void *ptr, uptr size) {
  const uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
  const uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
  void *new_ptr;
-  if (UNLIKELY(MaybeInDlsym())) {
+  if (UNLIKELY(UseLocalPool())) {
    new_ptr = AllocateFromLocalPool(size);
  } else {
    ENSURE_ASAN_INITED();
@ -66,8 +122,10 @@ static void *ReallocFromLocalPool(void *ptr, uptr size) {

 INTERCEPTOR(void, free, void *ptr) {
  GET_STACK_TRACE_FREE;
-  if (UNLIKELY(IsInDlsymAllocPool(ptr)))
+  if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
+    DeallocateFromLocalPool(ptr);
    return;
+  }
  asan_free(ptr, &stack, FROM_MALLOC);
 }

@ -81,7 +139,7 @@ INTERCEPTOR(void, cfree, void *ptr) {
 #endif // SANITIZER_INTERCEPT_CFREE

 INTERCEPTOR(void*, malloc, uptr size) {
-  if (UNLIKELY(MaybeInDlsym()))
+  if (UNLIKELY(UseLocalPool()))
    // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
    return AllocateFromLocalPool(size);
  ENSURE_ASAN_INITED();
@ -90,7 +148,7 @@ INTERCEPTOR(void*, malloc, uptr size) {
 }

 INTERCEPTOR(void*, calloc, uptr nmemb, uptr size) {
-  if (UNLIKELY(MaybeInDlsym()))
+  if (UNLIKELY(UseLocalPool()))
    // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
    return AllocateFromLocalPool(nmemb * size);
  ENSURE_ASAN_INITED();
@ -101,7 +159,7 @@ INTERCEPTOR(void*, calloc, uptr nmemb, uptr size) {
 INTERCEPTOR(void*, realloc, void *ptr, uptr size) {
  if (UNLIKELY(IsInDlsymAllocPool(ptr)))
    return ReallocFromLocalPool(ptr, size);
-  if (UNLIKELY(MaybeInDlsym()))
+  if (UNLIKELY(UseLocalPool()))
    return AllocateFromLocalPool(size);
  ENSURE_ASAN_INITED();
  GET_STACK_TRACE_MALLOC;
@ -122,10 +180,12 @@ INTERCEPTOR(void*, __libc_memalign, uptr boundary, uptr size) {
 }
 #endif // SANITIZER_INTERCEPT_MEMALIGN

+#if SANITIZER_INTERCEPT_ALIGNED_ALLOC
 INTERCEPTOR(void*, aligned_alloc, uptr boundary, uptr size) {
  GET_STACK_TRACE_MALLOC;
-  return asan_memalign(boundary, size, &stack, FROM_MALLOC);
+  return asan_aligned_alloc(boundary, size, &stack);
 }
+#endif // SANITIZER_INTERCEPT_ALIGNED_ALLOC

 INTERCEPTOR(uptr, malloc_usable_size, void *ptr) {
  GET_CURRENT_PC_BP_SP;
@ -154,8 +214,9 @@ INTERCEPTOR(int, mallopt, int cmd, int value) {
 #endif // SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO

 INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
+  if (UNLIKELY(UseLocalPool()))
+    return PosixMemalignFromLocalPool(memptr, alignment, size);
  GET_STACK_TRACE_MALLOC;
-  // Printf("posix_memalign: %zx %zu\n", alignment, size);
  return asan_posix_memalign(memptr, alignment, size, &stack);
 }

--- a/contrib/compiler-rt/lib/asan/asan_malloc_local.h
+++ b/contrib/compiler-rt/lib/asan/asan_malloc_local.h
@ -0,0 +1,44 @@
+//===-- asan_malloc_local.h -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Provide interfaces to check for and handle local pool memory allocation.
+//===----------------------------------------------------------------------===//
+
+#ifndef ASAN_MALLOC_LOCAL_H
+#define ASAN_MALLOC_LOCAL_H
+
+#include "sanitizer_common/sanitizer_platform.h"
+#include "asan_internal.h"
+
+// On RTEMS, we use the local pool to handle memory allocation when the ASan
+// run-time is not up.
+static INLINE bool EarlyMalloc() {
+  return SANITIZER_RTEMS && (!__asan::asan_inited ||
+                             __asan::asan_init_is_running);
+}
+
+void* MemalignFromLocalPool(uptr alignment, uptr size);
+
+#if SANITIZER_RTEMS
+
+bool IsFromLocalPool(const void *ptr);
+
+#define ALLOCATE_FROM_LOCAL_POOL UNLIKELY(EarlyMalloc())
+#define IS_FROM_LOCAL_POOL(ptr) UNLIKELY(IsFromLocalPool(ptr))
+
+#else  // SANITIZER_RTEMS
+
+#define ALLOCATE_FROM_LOCAL_POOL 0
+#define IS_FROM_LOCAL_POOL(ptr) 0
+
+#endif  // SANITIZER_RTEMS
+
+#endif  // ASAN_MALLOC_LOCAL_H
--- a/contrib/compiler-rt/lib/asan/asan_malloc_mac.cc
+++ b/contrib/compiler-rt/lib/asan/asan_malloc_mac.cc
@ -38,6 +38,9 @@ using namespace __asan;
 #define COMMON_MALLOC_CALLOC(count, size) \
  GET_STACK_TRACE_MALLOC; \
  void *p = asan_calloc(count, size, &stack);
+#define COMMON_MALLOC_POSIX_MEMALIGN(memptr, alignment, size) \
+  GET_STACK_TRACE_MALLOC; \
+  int res = asan_posix_memalign(memptr, alignment, size, &stack);
 #define COMMON_MALLOC_VALLOC(size) \
  GET_STACK_TRACE_MALLOC; \
  void *p = asan_memalign(GetPageSizeCached(), size, &stack, FROM_MALLOC);
--- a/contrib/compiler-rt/lib/asan/asan_mapping.h
+++ b/contrib/compiler-rt/lib/asan/asan_mapping.h
@ -122,6 +122,13 @@
 // || `[0x400000000000, 0x47ffffffffff]` || LowShadow  ||
 // || `[0x000000000000, 0x3fffffffffff]` || LowMem     ||
 //
+// Shadow mapping on NerBSD/i386 with SHADOW_OFFSET == 0x40000000:
+// || `[0x60000000, 0xfffff000]` || HighMem    ||
+// || `[0x4c000000, 0x5fffffff]` || HighShadow ||
+// || `[0x48000000, 0x4bffffff]` || ShadowGap  ||
+// || `[0x40000000, 0x47ffffff]` || LowShadow  ||
+// || `[0x00000000, 0x3fffffff]` || LowMem     ||
+//
 // Default Windows/i386 mapping:
 // (the exact location of HighShadow/HighMem may vary depending
 //  on WoW64, /LARGEADDRESSAWARE, etc).
@ -130,11 +137,17 @@
 // || `[0x36000000, 0x39ffffff]` || ShadowGap  ||
 // || `[0x30000000, 0x35ffffff]` || LowShadow  ||
 // || `[0x00000000, 0x2fffffff]` || LowMem     ||
+//
+// Shadow mapping on Myriad2 (for shadow scale 5):
+// || `[0x9ff80000, 0x9fffffff]` || ShadowGap  ||
+// || `[0x9f000000, 0x9ff7ffff]` || LowShadow  ||
+// || `[0x80000000, 0x9effffff]` || LowMem     ||
+// || `[0x00000000, 0x7fffffff]` || Ignored    ||

 #if defined(ASAN_SHADOW_SCALE)
 static const u64 kDefaultShadowScale = ASAN_SHADOW_SCALE;
 #else
-static const u64 kDefaultShadowScale = 3;
+static const u64 kDefaultShadowScale = SANITIZER_MYRIAD2 ? 5 : 3;
 #endif
 static const u64 kDefaultShadowSentinel = ~(uptr)0;
 static const u64 kDefaultShadowOffset32 = 1ULL << 29;  // 0x20000000
@ -152,9 +165,19 @@ static const u64 kPPC64_ShadowOffset64 = 1ULL << 44;
 static const u64 kSystemZ_ShadowOffset64 = 1ULL << 52;
 static const u64 kFreeBSD_ShadowOffset32 = 1ULL << 30;  // 0x40000000
 static const u64 kFreeBSD_ShadowOffset64 = 1ULL << 46;  // 0x400000000000
+static const u64 kNetBSD_ShadowOffset32 = 1ULL << 30;  // 0x40000000
 static const u64 kNetBSD_ShadowOffset64 = 1ULL << 46;  // 0x400000000000
 static const u64 kWindowsShadowOffset32 = 3ULL << 28;  // 0x30000000

+static const u64 kMyriadMemoryOffset32 = 0x80000000ULL;
+static const u64 kMyriadMemorySize32 = 0x20000000ULL;
+static const u64 kMyriadMemoryEnd32 =
+    kMyriadMemoryOffset32 + kMyriadMemorySize32 - 1;
+static const u64 kMyriadShadowOffset32 =
+    (kMyriadMemoryOffset32 + kMyriadMemorySize32 -
+     (kMyriadMemorySize32 >> kDefaultShadowScale));
+static const u64 kMyriadCacheBitMask32 = 0x40000000ULL;
+
 #define SHADOW_SCALE kDefaultShadowScale

 #if SANITIZER_FUCHSIA
@ -166,6 +189,8 @@ static const u64 kWindowsShadowOffset32 = 3ULL << 28;  // 0x30000000
 #    define SHADOW_OFFSET kMIPS32_ShadowOffset32
 #  elif SANITIZER_FREEBSD
 #    define SHADOW_OFFSET kFreeBSD_ShadowOffset32
+#  elif SANITIZER_NETBSD
+#    define SHADOW_OFFSET kNetBSD_ShadowOffset32
 #  elif SANITIZER_WINDOWS
 #    define SHADOW_OFFSET kWindowsShadowOffset32
 #  elif SANITIZER_IOS
@ -174,6 +199,8 @@ static const u64 kWindowsShadowOffset32 = 3ULL << 28;  // 0x30000000
 #    else
 #      define SHADOW_OFFSET kIosShadowOffset32
 #    endif
+#  elif SANITIZER_MYRIAD2
+#    define SHADOW_OFFSET kMyriadShadowOffset32
 #  else
 #    define SHADOW_OFFSET kDefaultShadowOffset32
 #  endif
@ -212,6 +239,39 @@ static const u64 kWindowsShadowOffset32 = 3ULL << 28;  // 0x30000000
 #endif

 #define SHADOW_GRANULARITY (1ULL << SHADOW_SCALE)
+
+#define DO_ASAN_MAPPING_PROFILE 0  // Set to 1 to profile the functions below.
+
+#if DO_ASAN_MAPPING_PROFILE
+# define PROFILE_ASAN_MAPPING() AsanMappingProfile[__LINE__]++;
+#else
+# define PROFILE_ASAN_MAPPING()
+#endif
+
+// If 1, all shadow boundaries are constants.
+// Don't set to 1 other than for testing.
+#define ASAN_FIXED_MAPPING 0
+
+namespace __asan {
+
+extern uptr AsanMappingProfile[];
+
+#if ASAN_FIXED_MAPPING
+// Fixed mapping for 64-bit Linux. Mostly used for performance comparison
+// with non-fixed mapping. As of r175253 (Feb 2013) the performance
+// difference between fixed and non-fixed mapping is below the noise level.
+static uptr kHighMemEnd = 0x7fffffffffffULL;
+static uptr kMidMemBeg =    0x3000000000ULL;
+static uptr kMidMemEnd =    0x4fffffffffULL;
+#else
+extern uptr kHighMemEnd, kMidMemBeg, kMidMemEnd;  // Initialized in __asan_init.
+#endif
+
+}  // namespace __asan
+
+#if SANITIZER_MYRIAD2
+#include "asan_mapping_myriad.h"
+#else
 #define MEM_TO_SHADOW(mem) (((mem) >> SHADOW_SCALE) + (SHADOW_OFFSET))

 #define kLowMemBeg      0
@ -243,36 +303,11 @@ static const u64 kWindowsShadowOffset32 = 3ULL << 28;  // 0x30000000
 #define kShadowGap3Beg (kMidMemBeg ? kMidMemEnd + 1 : 0)
 #define kShadowGap3End (kMidMemBeg ? kHighShadowBeg - 1 : 0)

-#define DO_ASAN_MAPPING_PROFILE 0  // Set to 1 to profile the functions below.
-
-#if DO_ASAN_MAPPING_PROFILE
-# define PROFILE_ASAN_MAPPING() AsanMappingProfile[__LINE__]++;
-#else
-# define PROFILE_ASAN_MAPPING()
-#endif
-
-// If 1, all shadow boundaries are constants.
-// Don't set to 1 other than for testing.
-#define ASAN_FIXED_MAPPING 0
-
 namespace __asan {

-extern uptr AsanMappingProfile[];
-
-#if ASAN_FIXED_MAPPING
-// Fixed mapping for 64-bit Linux. Mostly used for performance comparison
-// with non-fixed mapping. As of r175253 (Feb 2013) the performance
-// difference between fixed and non-fixed mapping is below the noise level.
-static uptr kHighMemEnd = 0x7fffffffffffULL;
-static uptr kMidMemBeg =    0x3000000000ULL;
-static uptr kMidMemEnd =    0x4fffffffffULL;
-#else
-extern uptr kHighMemEnd, kMidMemBeg, kMidMemEnd;  // Initialized in __asan_init.
-#endif
-
 static inline bool AddrIsInLowMem(uptr a) {
  PROFILE_ASAN_MAPPING();
-  return a < kLowMemEnd;
+  return a <= kLowMemEnd;
 }

 static inline bool AddrIsInLowShadow(uptr a) {
@ -280,16 +315,26 @@ static inline bool AddrIsInLowShadow(uptr a) {
  return a >= kLowShadowBeg && a <= kLowShadowEnd;
 }

-static inline bool AddrIsInHighMem(uptr a) {
-  PROFILE_ASAN_MAPPING();
-  return a >= kHighMemBeg && a <= kHighMemEnd;
-}
-
 static inline bool AddrIsInMidMem(uptr a) {
  PROFILE_ASAN_MAPPING();
  return kMidMemBeg && a >= kMidMemBeg && a <= kMidMemEnd;
 }

+static inline bool AddrIsInMidShadow(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  return kMidMemBeg && a >= kMidShadowBeg && a <= kMidShadowEnd;
+}
+
+static inline bool AddrIsInHighMem(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  return kHighMemBeg && a >= kHighMemBeg && a <= kHighMemEnd;
+}
+
+static inline bool AddrIsInHighShadow(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  return kHighMemBeg && a >= kHighShadowBeg && a <= kHighShadowEnd;
+}
+
 static inline bool AddrIsInShadowGap(uptr a) {
  PROFILE_ASAN_MAPPING();
  if (kMidMemBeg) {
@ -305,6 +350,12 @@ static inline bool AddrIsInShadowGap(uptr a) {
  return a >= kShadowGapBeg && a <= kShadowGapEnd;
 }

+}  // namespace __asan
+
+#endif  // SANITIZER_MYRIAD2
+
+namespace __asan {
+
 static inline bool AddrIsInMem(uptr a) {
  PROFILE_ASAN_MAPPING();
  return AddrIsInLowMem(a) || AddrIsInMidMem(a) || AddrIsInHighMem(a) ||
@ -317,16 +368,6 @@ static inline uptr MemToShadow(uptr p) {
  return MEM_TO_SHADOW(p);
 }

-static inline bool AddrIsInHighShadow(uptr a) {
-  PROFILE_ASAN_MAPPING();
-  return a >= kHighShadowBeg && a <= kHighMemEnd;
-}
-
-static inline bool AddrIsInMidShadow(uptr a) {
-  PROFILE_ASAN_MAPPING();
-  return kMidMemBeg && a >= kMidShadowBeg && a <= kMidMemEnd;
-}
-
 static inline bool AddrIsInShadow(uptr a) {
  PROFILE_ASAN_MAPPING();
  return AddrIsInLowShadow(a) || AddrIsInMidShadow(a) || AddrIsInHighShadow(a);
@ -339,6 +380,8 @@ static inline bool AddrIsAlignedByGranularity(uptr a) {

 static inline bool AddressIsPoisoned(uptr a) {
  PROFILE_ASAN_MAPPING();
+  if (SANITIZER_MYRIAD2 && !AddrIsInMem(a) && !AddrIsInShadow(a))
+    return false;
  const uptr kAccessSize = 1;
  u8 *shadow_address = (u8*)MEM_TO_SHADOW(a);
  s8 shadow_value = *shadow_address;
--- a/contrib/compiler-rt/lib/asan/asan_mapping_myriad.h
+++ b/contrib/compiler-rt/lib/asan/asan_mapping_myriad.h
@ -0,0 +1,86 @@
+//===-- asan_mapping_myriad.h -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// Myriad-specific definitions for ASan memory mapping.
+//===----------------------------------------------------------------------===//
+#ifndef ASAN_MAPPING_MYRIAD_H
+#define ASAN_MAPPING_MYRIAD_H
+
+#define RAW_ADDR(mem) ((mem) & ~kMyriadCacheBitMask32)
+#define MEM_TO_SHADOW(mem) \
+  (((RAW_ADDR(mem) - kLowMemBeg) >> SHADOW_SCALE) + (SHADOW_OFFSET))
+
+#define kLowMemBeg     kMyriadMemoryOffset32
+#define kLowMemEnd     (SHADOW_OFFSET - 1)
+
+#define kLowShadowBeg  SHADOW_OFFSET
+#define kLowShadowEnd  MEM_TO_SHADOW(kLowMemEnd)
+
+#define kHighMemBeg    0
+
+#define kHighShadowBeg 0
+#define kHighShadowEnd 0
+
+#define kMidShadowBeg  0
+#define kMidShadowEnd  0
+
+#define kShadowGapBeg  (kLowShadowEnd + 1)
+#define kShadowGapEnd  kMyriadMemoryEnd32
+
+#define kShadowGap2Beg 0
+#define kShadowGap2End 0
+
+#define kShadowGap3Beg 0
+#define kShadowGap3End 0
+
+namespace __asan {
+
+static inline bool AddrIsInLowMem(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  a = RAW_ADDR(a);
+  return a >= kLowMemBeg && a <= kLowMemEnd;
+}
+
+static inline bool AddrIsInLowShadow(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  a = RAW_ADDR(a);
+  return a >= kLowShadowBeg && a <= kLowShadowEnd;
+}
+
+static inline bool AddrIsInMidMem(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  return false;
+}
+
+static inline bool AddrIsInMidShadow(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  return false;
+}
+
+static inline bool AddrIsInHighMem(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  return false;
+}
+
+static inline bool AddrIsInHighShadow(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  return false;
+}
+
+static inline bool AddrIsInShadowGap(uptr a) {
+  PROFILE_ASAN_MAPPING();
+  a = RAW_ADDR(a);
+  return a >= kShadowGapBeg && a <= kShadowGapEnd;
+}
+
+}  // namespace __asan
+
+#endif  // ASAN_MAPPING_MYRIAD_H
--- a/contrib/compiler-rt/lib/asan/asan_memory_profile.cc
+++ b/contrib/compiler-rt/lib/asan/asan_memory_profile.cc
@ -31,9 +31,9 @@ struct AllocationSite {

 class HeapProfile {
 public:
-  HeapProfile() : allocations_(1024) {}
+  HeapProfile() { allocations_.reserve(1024); }

-  void ProcessChunk(const AsanChunkView& cv) {
+  void ProcessChunk(const AsanChunkView &cv) {
    if (cv.IsAllocated()) {
      total_allocated_user_size_ += cv.UsedSize();
      total_allocated_count_++;
@ -49,10 +49,10 @@ class HeapProfile {
  }

  void Print(uptr top_percent, uptr max_number_of_contexts) {
-    InternalSort(&allocations_, allocations_.size(),
-                 [](const AllocationSite &a, const AllocationSite &b) {
-                   return a.total_size > b.total_size;
-                 });
+    Sort(allocations_.data(), allocations_.size(),
+         [](const AllocationSite &a, const AllocationSite &b) {
+           return a.total_size > b.total_size;
+         });
    CHECK(total_allocated_user_size_);
    uptr total_shown = 0;
    Printf("Live Heap Allocations: %zd bytes in %zd chunks; quarantined: "
--- a/contrib/compiler-rt/lib/asan/asan_new_delete.cc
+++ b/contrib/compiler-rt/lib/asan/asan_new_delete.cc
@ -14,6 +14,8 @@

 #include "asan_allocator.h"
 #include "asan_internal.h"
+#include "asan_malloc_local.h"
+#include "asan_report.h"
 #include "asan_stack.h"

 #include "interception/interception.h"
@ -67,16 +69,28 @@ struct nothrow_t {};
 enum class align_val_t: size_t {};
 }  // namespace std

-// TODO(alekseys): throw std::bad_alloc instead of dying on OOM.
+// TODO(alekseyshl): throw std::bad_alloc instead of dying on OOM.
+// For local pool allocation, align to SHADOW_GRANULARITY to match asan
+// allocator behavior.
 #define OPERATOR_NEW_BODY(type, nothrow) \
+  if (ALLOCATE_FROM_LOCAL_POOL) {\
+    void *res = MemalignFromLocalPool(SHADOW_GRANULARITY, size);\
+    if (!nothrow) CHECK(res);\
+    return res;\
+  }\
  GET_STACK_TRACE_MALLOC;\
  void *res = asan_memalign(0, size, &stack, type);\
-  if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();\
+  if (!nothrow && UNLIKELY(!res)) ReportOutOfMemory(size, &stack);\
  return res;
 #define OPERATOR_NEW_BODY_ALIGN(type, nothrow) \
+  if (ALLOCATE_FROM_LOCAL_POOL) {\
+    void *res = MemalignFromLocalPool((uptr)align, size);\
+    if (!nothrow) CHECK(res);\
+    return res;\
+  }\
  GET_STACK_TRACE_MALLOC;\
  void *res = asan_memalign((uptr)align, size, &stack, type);\
-  if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();\
+  if (!nothrow && UNLIKELY(!res)) ReportOutOfMemory(size, &stack);\
  return res;

 // On OS X it's not enough to just provide our own 'operator new' and
@ -128,18 +142,22 @@ INTERCEPTOR(void *, _ZnamRKSt9nothrow_t, size_t size, std::nothrow_t const&) {
 #endif  // !SANITIZER_MAC

 #define OPERATOR_DELETE_BODY(type) \
+  if (IS_FROM_LOCAL_POOL(ptr)) return;\
  GET_STACK_TRACE_FREE;\
  asan_delete(ptr, 0, 0, &stack, type);

 #define OPERATOR_DELETE_BODY_SIZE(type) \
+  if (IS_FROM_LOCAL_POOL(ptr)) return;\
  GET_STACK_TRACE_FREE;\
  asan_delete(ptr, size, 0, &stack, type);

 #define OPERATOR_DELETE_BODY_ALIGN(type) \
+  if (IS_FROM_LOCAL_POOL(ptr)) return;\
  GET_STACK_TRACE_FREE;\
  asan_delete(ptr, 0, static_cast<uptr>(align), &stack, type);

 #define OPERATOR_DELETE_BODY_SIZE_ALIGN(type) \
+  if (IS_FROM_LOCAL_POOL(ptr)) return;\
  GET_STACK_TRACE_FREE;\
  asan_delete(ptr, size, static_cast<uptr>(align), &stack, type);

--- a/contrib/compiler-rt/lib/asan/asan_poisoning.cc
+++ b/contrib/compiler-rt/lib/asan/asan_poisoning.cc
@ -32,7 +32,7 @@ bool CanPoisonMemory() {
 }

 void PoisonShadow(uptr addr, uptr size, u8 value) {
-  if (!CanPoisonMemory()) return;
+  if (value && !CanPoisonMemory()) return;
  CHECK(AddrIsAlignedByGranularity(addr));
  CHECK(AddrIsInMem(addr));
  CHECK(AddrIsAlignedByGranularity(addr + size));
@ -182,8 +182,15 @@ int __asan_address_is_poisoned(void const volatile *addr) {
 uptr __asan_region_is_poisoned(uptr beg, uptr size) {
  if (!size) return 0;
  uptr end = beg + size;
-  if (!AddrIsInMem(beg)) return beg;
-  if (!AddrIsInMem(end)) return end;
+  if (SANITIZER_MYRIAD2) {
+    // On Myriad, address not in DRAM range need to be treated as
+    // unpoisoned.
+    if (!AddrIsInMem(beg) && !AddrIsInShadow(beg)) return 0;
+    if (!AddrIsInMem(end) && !AddrIsInShadow(end)) return 0;
+  } else {
+    if (!AddrIsInMem(beg)) return beg;
+    if (!AddrIsInMem(end)) return end;
+  }
  CHECK_LT(beg, end);
  uptr aligned_b = RoundUpTo(beg, SHADOW_GRANULARITY);
  uptr aligned_e = RoundDownTo(end, SHADOW_GRANULARITY);
@ -452,4 +459,3 @@ bool WordIsPoisoned(uptr addr) {
  return (__asan_region_is_poisoned(addr, sizeof(uptr)) != 0);
 }
 }
-
--- a/contrib/compiler-rt/lib/asan/asan_poisoning.h
+++ b/contrib/compiler-rt/lib/asan/asan_poisoning.h
@ -38,7 +38,7 @@ void PoisonShadowPartialRightRedzone(uptr addr,
 // performance-critical code with care.
 ALWAYS_INLINE void FastPoisonShadow(uptr aligned_beg, uptr aligned_size,
                                    u8 value) {
-  DCHECK(CanPoisonMemory());
+  DCHECK(!value || CanPoisonMemory());
  uptr shadow_beg = MEM_TO_SHADOW(aligned_beg);
  uptr shadow_end = MEM_TO_SHADOW(
      aligned_beg + aligned_size - SHADOW_GRANULARITY) + 1;
@ -51,6 +51,9 @@ ALWAYS_INLINE void FastPoisonShadow(uptr aligned_beg, uptr aligned_size,
      // changed at all.  It doesn't currently have an efficient means
      // to zero a bunch of pages, but maybe we should add one.
      SANITIZER_FUCHSIA == 1 ||
+      // RTEMS doesn't have have pages, let alone a fast way to zero
+      // them, so default to memset.
+      SANITIZER_RTEMS == 1 ||
      shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
    REAL(memset)((void*)shadow_beg, value, shadow_end - shadow_beg);
  } else {
--- a/contrib/compiler-rt/lib/asan/asan_report.cc
+++ b/contrib/compiler-rt/lib/asan/asan_report.cc
@ -84,7 +84,7 @@ static void PrintZoneForPointer(uptr ptr, uptr zone_ptr,
 bool ParseFrameDescription(const char *frame_descr,
                           InternalMmapVector<StackVarDescr> *vars) {
  CHECK(frame_descr);
-  char *p;
+  const char *p;
  // This string is created by the compiler and has the following form:
  // "n alloc_1 alloc_2 ... alloc_n"
  // where alloc_i looks like "offset size len ObjectName"
@ -134,6 +134,10 @@ class ScopedInErrorReport {
  }

  ~ScopedInErrorReport() {
+    if (halt_on_error_ && !__sanitizer_acquire_crash_state()) {
+      asanThreadRegistry().Unlock();
+      return;
+    }
    ASAN_ON_ERROR();
    if (current_error_.IsValid()) current_error_.Print();

@ -152,7 +156,7 @@ class ScopedInErrorReport {

    // Copy the message buffer so that we could start logging without holding a
    // lock that gets aquired during printing.
-    InternalScopedBuffer<char> buffer_copy(kErrorMessageBufferSize);
+    InternalMmapVector<char> buffer_copy(kErrorMessageBufferSize);
    {
      BlockingMutexLock l(&error_message_buf_mutex);
      internal_memcpy(buffer_copy.data(),
@ -202,7 +206,7 @@ class ScopedInErrorReport {
  bool halt_on_error_;
 };

-ErrorDescription ScopedInErrorReport::current_error_;
+ErrorDescription ScopedInErrorReport::current_error_(LINKER_INITIALIZED);

 void ReportDeadlySignal(const SignalContext &sig) {
  ScopedInErrorReport in_report(/*fatal*/ true);
@ -254,6 +258,62 @@ void ReportSanitizerGetAllocatedSizeNotOwned(uptr addr,
  in_report.ReportError(error);
 }

+void ReportCallocOverflow(uptr count, uptr size, BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal*/ true);
+  ErrorCallocOverflow error(GetCurrentTidOrInvalid(), stack, count, size);
+  in_report.ReportError(error);
+}
+
+void ReportPvallocOverflow(uptr size, BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal*/ true);
+  ErrorPvallocOverflow error(GetCurrentTidOrInvalid(), stack, size);
+  in_report.ReportError(error);
+}
+
+void ReportInvalidAllocationAlignment(uptr alignment,
+                                      BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal*/ true);
+  ErrorInvalidAllocationAlignment error(GetCurrentTidOrInvalid(), stack,
+                                        alignment);
+  in_report.ReportError(error);
+}
+
+void ReportInvalidAlignedAllocAlignment(uptr size, uptr alignment,
+                                        BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal*/ true);
+  ErrorInvalidAlignedAllocAlignment error(GetCurrentTidOrInvalid(), stack,
+                                          size, alignment);
+  in_report.ReportError(error);
+}
+
+void ReportInvalidPosixMemalignAlignment(uptr alignment,
+                                         BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal*/ true);
+  ErrorInvalidPosixMemalignAlignment error(GetCurrentTidOrInvalid(), stack,
+                                           alignment);
+  in_report.ReportError(error);
+}
+
+void ReportAllocationSizeTooBig(uptr user_size, uptr total_size, uptr max_size,
+                                BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal*/ true);
+  ErrorAllocationSizeTooBig error(GetCurrentTidOrInvalid(), stack, user_size,
+                                  total_size, max_size);
+  in_report.ReportError(error);
+}
+
+void ReportRssLimitExceeded(BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal*/ true);
+  ErrorRssLimitExceeded error(GetCurrentTidOrInvalid(), stack);
+  in_report.ReportError(error);
+}
+
+void ReportOutOfMemory(uptr requested_size, BufferedStackTrace *stack) {
+  ScopedInErrorReport in_report(/*fatal*/ true);
+  ErrorOutOfMemory error(GetCurrentTidOrInvalid(), stack, requested_size);
+  in_report.ReportError(error);
+}
+
 void ReportStringFunctionMemoryRangesOverlap(const char *function,
                                             const char *offset1, uptr length1,
                                             const char *offset2, uptr length2,
@ -343,7 +403,11 @@ static bool IsInvalidPointerPair(uptr a1, uptr a2) {
 }

 static INLINE void CheckForInvalidPointerPair(void *p1, void *p2) {
-  if (!flags()->detect_invalid_pointer_pairs) return;
+  switch (flags()->detect_invalid_pointer_pairs) {
+    case 0 : return;
+    case 1 : if (p1 == nullptr || p2 == nullptr) return; break;
+  }
+
  uptr a1 = reinterpret_cast<uptr>(p1);
  uptr a2 = reinterpret_cast<uptr>(p2);

--- a/contrib/compiler-rt/lib/asan/asan_report.h
+++ b/contrib/compiler-rt/lib/asan/asan_report.h
@ -58,6 +58,18 @@ void ReportAllocTypeMismatch(uptr addr, BufferedStackTrace *free_stack,
 void ReportMallocUsableSizeNotOwned(uptr addr, BufferedStackTrace *stack);
 void ReportSanitizerGetAllocatedSizeNotOwned(uptr addr,
                                             BufferedStackTrace *stack);
+void ReportCallocOverflow(uptr count, uptr size, BufferedStackTrace *stack);
+void ReportPvallocOverflow(uptr size, BufferedStackTrace *stack);
+void ReportInvalidAllocationAlignment(uptr alignment,
+                                      BufferedStackTrace *stack);
+void ReportInvalidAlignedAllocAlignment(uptr size, uptr alignment,
+                                        BufferedStackTrace *stack);
+void ReportInvalidPosixMemalignAlignment(uptr alignment,
+                                         BufferedStackTrace *stack);
+void ReportAllocationSizeTooBig(uptr user_size, uptr total_size, uptr max_size,
+                                BufferedStackTrace *stack);
+void ReportRssLimitExceeded(BufferedStackTrace *stack);
+void ReportOutOfMemory(uptr requested_size, BufferedStackTrace *stack);
 void ReportStringFunctionMemoryRangesOverlap(const char *function,
                                             const char *offset1, uptr length1,
                                             const char *offset2, uptr length2,
--- a/contrib/compiler-rt/lib/asan/asan_rtems.cc
+++ b/contrib/compiler-rt/lib/asan/asan_rtems.cc
@ -0,0 +1,253 @@
+//===-- asan_rtems.cc -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// RTEMS-specific details.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_rtems.h"
+#if SANITIZER_RTEMS
+
+#include "asan_internal.h"
+#include "asan_interceptors.h"
+#include "asan_mapping.h"
+#include "asan_poisoning.h"
+#include "asan_report.h"
+#include "asan_stack.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
+
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace __asan {
+
+static void ResetShadowMemory() {
+  uptr shadow_start = SHADOW_OFFSET;
+  uptr shadow_end = MEM_TO_SHADOW(kMyriadMemoryEnd32);
+  uptr gap_start = MEM_TO_SHADOW(shadow_start);
+  uptr gap_end = MEM_TO_SHADOW(shadow_end);
+
+  REAL(memset)((void *)shadow_start, 0, shadow_end - shadow_start);
+  REAL(memset)((void *)gap_start, kAsanShadowGap, gap_end - gap_start);
+}
+
+void InitializeShadowMemory() {
+  kHighMemEnd = 0;
+  kMidMemBeg =  0;
+  kMidMemEnd =  0;
+
+  ResetShadowMemory();
+}
+
+void AsanApplyToGlobals(globals_op_fptr op, const void *needle) {
+  UNIMPLEMENTED();
+}
+
+void AsanCheckDynamicRTPrereqs() {}
+void AsanCheckIncompatibleRT() {}
+void InitializeAsanInterceptors() {}
+void InitializePlatformInterceptors() {}
+void InitializePlatformExceptionHandlers() {}
+
+// RTEMS only support static linking; it sufficies to return with no
+// error.
+void *AsanDoesNotSupportStaticLinkage() { return nullptr; }
+
+void AsanOnDeadlySignal(int signo, void *siginfo, void *context) {
+  UNIMPLEMENTED();
+}
+
+void EarlyInit() {
+  // Provide early initialization of shadow memory so that
+  // instrumented code running before full initialzation will not
+  // report spurious errors.
+  ResetShadowMemory();
+}
+
+// We can use a plain thread_local variable for TSD.
+static thread_local void *per_thread;
+
+void *AsanTSDGet() { return per_thread; }
+
+void AsanTSDSet(void *tsd) { per_thread = tsd; }
+
+// There's no initialization needed, and the passed-in destructor
+// will never be called.  Instead, our own thread destruction hook
+// (below) will call AsanThread::TSDDtor directly.
+void AsanTSDInit(void (*destructor)(void *tsd)) {
+  DCHECK(destructor == &PlatformTSDDtor);
+}
+
+void PlatformTSDDtor(void *tsd) { UNREACHABLE(__func__); }
+
+//
+// Thread registration.  We provide an API similar to the Fushia port.
+//
+
+struct AsanThread::InitOptions {
+  uptr stack_bottom, stack_size, tls_bottom, tls_size;
+};
+
+// Shared setup between thread creation and startup for the initial thread.
+static AsanThread *CreateAsanThread(StackTrace *stack, u32 parent_tid,
+                                    uptr user_id, bool detached,
+                                    uptr stack_bottom, uptr stack_size,
+                                    uptr tls_bottom, uptr tls_size) {
+  // In lieu of AsanThread::Create.
+  AsanThread *thread = (AsanThread *)MmapOrDie(sizeof(AsanThread), __func__);
+  AsanThreadContext::CreateThreadContextArgs args = {thread, stack};
+  asanThreadRegistry().CreateThread(user_id, detached, parent_tid, &args);
+
+  // On other systems, AsanThread::Init() is called from the new
+  // thread itself.  But on RTEMS we already know the stack address
+  // range beforehand, so we can do most of the setup right now.
+  const AsanThread::InitOptions options = {stack_bottom, stack_size,
+                                           tls_bottom, tls_size};
+  thread->Init(&options);
+  return thread;
+}
+
+// This gets the same arguments passed to Init by CreateAsanThread, above.
+// We're in the creator thread before the new thread is actually started, but
+// its stack and tls address range are already known.
+void AsanThread::SetThreadStackAndTls(const AsanThread::InitOptions *options) {
+  DCHECK_NE(GetCurrentThread(), this);
+  DCHECK_NE(GetCurrentThread(), nullptr);
+  CHECK_NE(options->stack_bottom, 0);
+  CHECK_NE(options->stack_size, 0);
+  stack_bottom_ = options->stack_bottom;
+  stack_top_ = options->stack_bottom + options->stack_size;
+  tls_begin_ = options->tls_bottom;
+  tls_end_ = options->tls_bottom + options->tls_size;
+}
+
+// Called by __asan::AsanInitInternal (asan_rtl.c).  Unlike other ports, the
+// main thread on RTEMS does not require special treatment; its AsanThread is
+// already created by the provided hooks.  This function simply looks up and
+// returns the created thread.
+AsanThread *CreateMainThread() {
+  return GetThreadContextByTidLocked(0)->thread;
+}
+
+// This is called before each thread creation is attempted.  So, in
+// its first call, the calling thread is the initial and sole thread.
+static void *BeforeThreadCreateHook(uptr user_id, bool detached,
+                                    uptr stack_bottom, uptr stack_size,
+                                    uptr tls_bottom, uptr tls_size) {
+  EnsureMainThreadIDIsCorrect();
+  // Strict init-order checking is thread-hostile.
+  if (flags()->strict_init_order) StopInitOrderChecking();
+
+  GET_STACK_TRACE_THREAD;
+  u32 parent_tid = GetCurrentTidOrInvalid();
+
+  return CreateAsanThread(&stack, parent_tid, user_id, detached,
+                          stack_bottom, stack_size, tls_bottom, tls_size);
+}
+
+// This is called after creating a new thread (in the creating thread),
+// with the pointer returned by BeforeThreadCreateHook (above).
+static void ThreadCreateHook(void *hook, bool aborted) {
+  AsanThread *thread = static_cast<AsanThread *>(hook);
+  if (!aborted) {
+    // The thread was created successfully.
+    // ThreadStartHook is already running in the new thread.
+  } else {
+    // The thread wasn't created after all.
+    // Clean up everything we set up in BeforeThreadCreateHook.
+    asanThreadRegistry().FinishThread(thread->tid());
+    UnmapOrDie(thread, sizeof(AsanThread));
+  }
+}
+
+// This is called (1) in the newly-created thread before it runs anything else,
+// with the pointer returned by BeforeThreadCreateHook (above).  (2) before a
+// thread restart.
+static void ThreadStartHook(void *hook, uptr os_id) {
+  if (!hook)
+    return;
+
+  AsanThread *thread = static_cast<AsanThread *>(hook);
+  SetCurrentThread(thread);
+
+  ThreadStatus status =
+      asanThreadRegistry().GetThreadLocked(thread->tid())->status;
+  DCHECK(status == ThreadStatusCreated || status == ThreadStatusRunning);
+  // Determine whether we are starting or restarting the thread.
+  if (status == ThreadStatusCreated)
+    // In lieu of AsanThread::ThreadStart.
+    asanThreadRegistry().StartThread(thread->tid(), os_id,
+                                     /*workerthread*/ false, nullptr);
+  else {
+    // In a thread restart, a thread may resume execution at an
+    // arbitrary function entry point, with its stack and TLS state
+    // reset.  We unpoison the stack in that case.
+    PoisonShadow(thread->stack_bottom(), thread->stack_size(), 0);
+  }
+}
+
+// Each thread runs this just before it exits,
+// with the pointer returned by BeforeThreadCreateHook (above).
+// All per-thread destructors have already been called.
+static void ThreadExitHook(void *hook, uptr os_id) {
+  AsanThread *thread = static_cast<AsanThread *>(hook);
+  if (thread)
+    AsanThread::TSDDtor(thread->context());
+}
+
+static void HandleExit() {
+  // Disable ASan by setting it to uninitialized.  Also reset the
+  // shadow memory to avoid reporting errors after the run-time has
+  // been desroyed.
+  if (asan_inited) {
+    asan_inited = false;
+    ResetShadowMemory();
+  }
+}
+
+}  // namespace __asan
+
+// These are declared (in extern "C") by <some_path/sanitizer.h>.
+// The system runtime will call our definitions directly.
+
+extern "C" {
+void __sanitizer_early_init() {
+  __asan::EarlyInit();
+}
+
+void *__sanitizer_before_thread_create_hook(uptr thread, bool detached,
+                                            const char *name,
+                                            void *stack_base, size_t stack_size,
+                                            void *tls_base, size_t tls_size) {
+  return __asan::BeforeThreadCreateHook(
+      thread, detached,
+      reinterpret_cast<uptr>(stack_base), stack_size,
+      reinterpret_cast<uptr>(tls_base), tls_size);
+}
+
+void __sanitizer_thread_create_hook(void *handle, uptr thread, int status) {
+  __asan::ThreadCreateHook(handle, status != 0);
+}
+
+void __sanitizer_thread_start_hook(void *handle, uptr self) {
+  __asan::ThreadStartHook(handle, self);
+}
+
+void __sanitizer_thread_exit_hook(void *handle, uptr self) {
+  __asan::ThreadExitHook(handle, self);
+}
+
+void __sanitizer_exit() {
+  __asan::HandleExit();
+}
+}  // "C"
+
+#endif  // SANITIZER_RTEMS
--- a/contrib/compiler-rt/lib/asan/asan_rtl.cc
+++ b/contrib/compiler-rt/lib/asan/asan_rtl.cc
@ -56,7 +56,8 @@ static void AsanDie() {
      UnmapOrDie((void*)kLowShadowBeg, kMidMemBeg - kLowShadowBeg);
      UnmapOrDie((void*)kMidMemEnd, kHighShadowEnd - kMidMemEnd);
    } else {
-      UnmapOrDie((void*)kLowShadowBeg, kHighShadowEnd - kLowShadowBeg);
+      if (kHighShadowEnd)
+        UnmapOrDie((void*)kLowShadowBeg, kHighShadowEnd - kLowShadowBeg);
    }
  }
 }
@ -65,8 +66,14 @@ static void AsanCheckFailed(const char *file, int line, const char *cond,
                            u64 v1, u64 v2) {
  Report("AddressSanitizer CHECK failed: %s:%d \"%s\" (0x%zx, 0x%zx)\n", file,
         line, cond, (uptr)v1, (uptr)v2);
-  // FIXME: check for infinite recursion without a thread-local counter here.
-  PRINT_CURRENT_STACK_CHECK();
+
+  // Print a stack trace the first time we come here. Otherwise, we probably
+  // failed a CHECK during symbolization.
+  static atomic_uint32_t num_calls;
+  if (atomic_fetch_add(&num_calls, 1, memory_order_relaxed) == 0) {
+    PRINT_CURRENT_STACK_CHECK();
+  }
+
  Die();
 }

@ -140,6 +147,8 @@ ASAN_REPORT_ERROR_N(load, false)
 ASAN_REPORT_ERROR_N(store, true)

 #define ASAN_MEMORY_ACCESS_CALLBACK_BODY(type, is_write, size, exp_arg, fatal) \
+    if (SANITIZER_MYRIAD2 && !AddrIsInMem(addr) && !AddrIsInShadow(addr))      \
+      return;                                                                  \
    uptr sp = MEM_TO_SHADOW(addr);                                             \
    uptr s = size <= SHADOW_GRANULARITY ? *reinterpret_cast<u8 *>(sp)          \
                                        : *reinterpret_cast<u16 *>(sp);        \
@ -306,6 +315,7 @@ static void asan_atexit() {
 }

 static void InitializeHighMemEnd() {
+#if !SANITIZER_MYRIAD2
 #if !ASAN_FIXED_MAPPING
  kHighMemEnd = GetMaxUserVirtualAddress();
  // Increase kHighMemEnd to make sure it's properly
@ -313,13 +323,16 @@ static void InitializeHighMemEnd() {
  kHighMemEnd |= SHADOW_GRANULARITY * GetMmapGranularity() - 1;
 #endif  // !ASAN_FIXED_MAPPING
  CHECK_EQ((kHighMemBeg % GetMmapGranularity()), 0);
+#endif  // !SANITIZER_MYRIAD2
 }

 void PrintAddressSpaceLayout() {
-  Printf("|| `[%p, %p]` || HighMem    ||\n",
-         (void*)kHighMemBeg, (void*)kHighMemEnd);
-  Printf("|| `[%p, %p]` || HighShadow ||\n",
-         (void*)kHighShadowBeg, (void*)kHighShadowEnd);
+  if (kHighMemBeg) {
+    Printf("|| `[%p, %p]` || HighMem    ||\n",
+           (void*)kHighMemBeg, (void*)kHighMemEnd);
+    Printf("|| `[%p, %p]` || HighShadow ||\n",
+           (void*)kHighShadowBeg, (void*)kHighShadowEnd);
+  }
  if (kMidMemBeg) {
    Printf("|| `[%p, %p]` || ShadowGap3 ||\n",
           (void*)kShadowGap3Beg, (void*)kShadowGap3End);
@ -338,11 +351,14 @@ void PrintAddressSpaceLayout() {
    Printf("|| `[%p, %p]` || LowMem     ||\n",
           (void*)kLowMemBeg, (void*)kLowMemEnd);
  }
-  Printf("MemToShadow(shadow): %p %p %p %p",
+  Printf("MemToShadow(shadow): %p %p",
         (void*)MEM_TO_SHADOW(kLowShadowBeg),
-         (void*)MEM_TO_SHADOW(kLowShadowEnd),
-         (void*)MEM_TO_SHADOW(kHighShadowBeg),
-         (void*)MEM_TO_SHADOW(kHighShadowEnd));
+         (void*)MEM_TO_SHADOW(kLowShadowEnd));
+  if (kHighMemBeg) {
+    Printf(" %p %p",
+           (void*)MEM_TO_SHADOW(kHighShadowBeg),
+           (void*)MEM_TO_SHADOW(kHighShadowEnd));
+  }
  if (kMidMemBeg) {
    Printf(" %p %p",
           (void*)MEM_TO_SHADOW(kMidShadowBeg),
@ -374,6 +390,7 @@ static void AsanInitInternal() {
  asan_init_is_running = true;

  CacheBinaryName();
+  CheckASLR();

  // Initialize flags. This must be done early, because most of the
  // initialization steps look at flags().
@ -526,6 +543,9 @@ void NOINLINE __asan_handle_no_return() {
  if (curr_thread) {
    top = curr_thread->stack_top();
    bottom = ((uptr)&local_stack - PageSize) & ~(PageSize - 1);
+  } else if (SANITIZER_RTEMS) {
+    // Give up On RTEMS.
+    return;
  } else {
    CHECK(!SANITIZER_FUCHSIA);
    // If we haven't seen this thread, try asking the OS for stack bounds.
--- a/contrib/compiler-rt/lib/asan/asan_shadow_setup.cc
+++ b/contrib/compiler-rt/lib/asan/asan_shadow_setup.cc
@ -14,8 +14,9 @@

 #include "sanitizer_common/sanitizer_platform.h"

-// asan_fuchsia.cc has its own InitializeShadowMemory implementation.
-#if !SANITIZER_FUCHSIA
+// asan_fuchsia.cc and asan_rtems.cc have their own
+// InitializeShadowMemory implementation.
+#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS

 #include "asan_internal.h"
 #include "asan_mapping.h"
@ -30,8 +31,7 @@ void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
  CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
  uptr size = end - beg + 1;
  DecreaseTotalMmap(size);  // Don't count the shadow against mmap_limit_mb.
-  void *res = MmapFixedNoReserve(beg, size, name);
-  if (res != (void *)beg) {
+  if (!MmapFixedNoReserve(beg, size, name)) {
    Report(
        "ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
        "Perhaps you're using ulimit -v\n",
@ -162,4 +162,4 @@ void InitializeShadowMemory() {

 }  // namespace __asan

-#endif  // !SANITIZER_FUCHSIA
+#endif  // !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
--- a/contrib/compiler-rt/lib/asan/asan_thread.cc
+++ b/contrib/compiler-rt/lib/asan/asan_thread.cc
@ -221,22 +221,25 @@ FakeStack *AsanThread::AsyncSignalSafeLazyInitFakeStack() {
 void AsanThread::Init(const InitOptions *options) {
  next_stack_top_ = next_stack_bottom_ = 0;
  atomic_store(&stack_switching_, false, memory_order_release);
-  fake_stack_ = nullptr;  // Will be initialized lazily if needed.
  CHECK_EQ(this->stack_size(), 0U);
  SetThreadStackAndTls(options);
  CHECK_GT(this->stack_size(), 0U);
  CHECK(AddrIsInMem(stack_bottom_));
  CHECK(AddrIsInMem(stack_top_ - 1));
  ClearShadowForThreadStackAndTLS();
+  fake_stack_ = nullptr;
+  if (__asan_option_detect_stack_use_after_return)
+    AsyncSignalSafeLazyInitFakeStack();
  int local = 0;
  VReport(1, "T%d: stack [%p,%p) size 0x%zx; local=%p\n", tid(),
          (void *)stack_bottom_, (void *)stack_top_, stack_top_ - stack_bottom_,
          &local);
 }

-// Fuchsia doesn't use ThreadStart.
-// asan_fuchsia.c defines CreateMainThread and SetThreadStackAndTls.
-#if !SANITIZER_FUCHSIA
+// Fuchsia and RTEMS don't use ThreadStart.
+// asan_fuchsia.c/asan_rtems.c define CreateMainThread and
+// SetThreadStackAndTls.
+#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS

 thread_return_t AsanThread::ThreadStart(
    tid_t os_id, atomic_uintptr_t *signal_thread_is_registered) {
@ -296,12 +299,17 @@ void AsanThread::SetThreadStackAndTls(const InitOptions *options) {
  CHECK(AddrIsInStack((uptr)&local));
 }

-#endif  // !SANITIZER_FUCHSIA
+#endif  // !SANITIZER_FUCHSIA && !SANITIZER_RTEMS

 void AsanThread::ClearShadowForThreadStackAndTLS() {
  PoisonShadow(stack_bottom_, stack_top_ - stack_bottom_, 0);
-  if (tls_begin_ != tls_end_)
-    PoisonShadow(tls_begin_, tls_end_ - tls_begin_, 0);
+  if (tls_begin_ != tls_end_) {
+    uptr tls_begin_aligned = RoundDownTo(tls_begin_, SHADOW_GRANULARITY);
+    uptr tls_end_aligned = RoundUpTo(tls_end_, SHADOW_GRANULARITY);
+    FastPoisonShadowPartialRightRedzone(tls_begin_aligned,
+                                        tls_end_ - tls_begin_aligned,
+                                        tls_end_aligned - tls_end_, 0);
+  }
 }

 bool AsanThread::GetStackFrameAccessByAddr(uptr addr,
@ -386,6 +394,9 @@ static bool ThreadStackContainsAddress(ThreadContextBase *tctx_base,
 }

 AsanThread *GetCurrentThread() {
+  if (SANITIZER_RTEMS && !asan_inited)
+    return nullptr;
+
  AsanThreadContext *context =
      reinterpret_cast<AsanThreadContext *>(AsanTSDGet());
  if (!context) {
@ -477,6 +488,11 @@ void UnlockThreadRegistry() {
  __asan::asanThreadRegistry().Unlock();
 }

+ThreadRegistry *GetThreadRegistryLocked() {
+  __asan::asanThreadRegistry().CheckLocked();
+  return &__asan::asanThreadRegistry();
+}
+
 void EnsureMainThreadIDIsCorrect() {
  __asan::EnsureMainThreadIDIsCorrect();
 }
--- a/contrib/compiler-rt/lib/asan/asan_win.cc
+++ b/contrib/compiler-rt/lib/asan/asan_win.cc
@ -222,8 +222,8 @@ uptr FindDynamicShadowStart() {
  uptr alignment = 8 * granularity;
  uptr left_padding = granularity;
  uptr space_size = kHighShadowEnd + left_padding;
-  uptr shadow_start =
-      FindAvailableMemoryRange(space_size, alignment, granularity, nullptr);
+  uptr shadow_start = FindAvailableMemoryRange(space_size, alignment,
+                                               granularity, nullptr, nullptr);
  CHECK_NE((uptr)0, shadow_start);
  CHECK(IsAligned(shadow_start, alignment));
  return shadow_start;
@ -265,11 +265,6 @@ ShadowExceptionHandler(PEXCEPTION_POINTERS exception_pointers) {
  // Determine the address of the page that is being accessed.
  uptr page = RoundDownTo(addr, page_size);

-  // Query the existing page.
-  MEMORY_BASIC_INFORMATION mem_info = {};
-  if (::VirtualQuery((LPVOID)page, &mem_info, sizeof(mem_info)) == 0)
-    return EXCEPTION_CONTINUE_SEARCH;
-
  // Commit the page.
  uptr result =
      (uptr)::VirtualAlloc((LPVOID)page, page_size, MEM_COMMIT, PAGE_READWRITE);
--- a/contrib/compiler-rt/lib/asan/asan_win_dll_thunk.cc
+++ b/contrib/compiler-rt/lib/asan/asan_win_dll_thunk.cc
@ -99,7 +99,7 @@ INTERCEPTOR(int, _except_handler4, void *a, void *b, void *c, void *d) {
 }
 #endif

-// Window specific functions not included in asan_interface.inc.
+// Windows specific functions not included in asan_interface.inc.
 INTERCEPT_WRAP_W_V(__asan_should_detect_stack_use_after_return)
 INTERCEPT_WRAP_W_V(__asan_get_shadow_memory_dynamic_address)
 INTERCEPT_WRAP_W_W(__asan_unhandled_exception_filter)
--- a/contrib/compiler-rt/lib/builtins/arm/chkstk.S
+++ b/contrib/compiler-rt/lib/builtins/arm/chkstk.S
@ -0,0 +1,34 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// __chkstk routine
+// This routine is windows specific.
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+// This clobbers the register r12, and the condition codes, and uses r5 and r6
+// as temporaries by backing them up and restoring them afterwards.
+// Does not modify any memory or the stack pointer.
+
+//      movw    r4,  #256 // Number of bytes of stack, in units of 4 byte
+//      bl      __chkstk
+//      sub.w   sp, sp, r4
+
+#define PAGE_SIZE 4096
+
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__chkstk)
+        lsl    r4,  r4,  #2
+        mov    r12, sp
+        push   {r5, r6}
+        mov    r5,  r4
+1:
+        sub    r12, r12, #PAGE_SIZE
+        subs   r5,  r5,  #PAGE_SIZE
+        ldr    r6,  [r12]
+        bgt    1b
+
+        pop    {r5, r6}
+        bx     lr
+END_COMPILERRT_FUNCTION(__chkstk)
--- a/contrib/compiler-rt/lib/builtins/clear_cache.c
+++ b/contrib/compiler-rt/lib/builtins/clear_cache.c
@ -101,6 +101,8 @@ void __clear_cache(void *start, void *end) {
 * Intel processors have a unified instruction and data cache
 * so there is nothing to do
 */
+#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
+    FlushInstructionCache(GetCurrentProcess(), start, end - start);
 #elif defined(__arm__) && !defined(__APPLE__)
    #if defined(__FreeBSD__) || defined(__NetBSD__)
        struct arm_sync_icache_args arg;
@ -128,8 +130,6 @@ void __clear_cache(void *start, void *end) {
                          : "r"(syscall_nr), "r"(start_reg), "r"(end_reg),
                            "r"(flags));
         assert(start_reg == 0 && "Cache flush syscall failed.");
-    #elif defined(_WIN32)
-        FlushInstructionCache(GetCurrentProcess(), start, end - start);
    #else
        compilerrt_abort();
    #endif
--- a/contrib/compiler-rt/lib/builtins/clzdi2.c
+++ b/contrib/compiler-rt/lib/builtins/clzdi2.c
@ -16,6 +16,12 @@

 /* Returns: the number of leading 0-bits */

+#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__))
+/* gcc resolves __builtin_clz -> __clzdi2 leading to infinite recursion */
+#define __builtin_clz(a) __clzsi2(a)
+extern si_int __clzsi2(si_int);
+#endif
+
 /* Precondition: a != 0 */

 COMPILER_RT_ABI si_int
--- a/contrib/compiler-rt/lib/builtins/cpu_model.c
+++ b/contrib/compiler-rt/lib/builtins/cpu_model.c
@ -416,9 +416,9 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
      *Subtype = AMDFAM15H_BDVER3;
      break; // "bdver3"; 30h-3Fh: Steamroller
    }
-    if (Model >= 0x10 && Model <= 0x1f) {
+    if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
      *Subtype = AMDFAM15H_BDVER2;
-      break; // "bdver2"; 10h-1Fh: Piledriver
+      break; // "bdver2"; 02h, 10h-1Fh: Piledriver
    }
    if (Model <= 0x0f) {
      *Subtype = AMDFAM15H_BDVER1;
--- a/contrib/compiler-rt/lib/builtins/ctzdi2.c
+++ b/contrib/compiler-rt/lib/builtins/ctzdi2.c
@ -16,6 +16,12 @@

 /* Returns: the number of trailing 0-bits  */

+#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__))
+/* gcc resolves __builtin_ctz -> __ctzdi2 leading to infinite recursion */
+#define __builtin_ctz(a) __ctzsi2(a)
+extern si_int __ctzsi2(si_int);
+#endif
+
 /* Precondition: a != 0 */

 COMPILER_RT_ABI si_int
--- a/contrib/compiler-rt/lib/builtins/emutls.c
+++ b/contrib/compiler-rt/lib/builtins/emutls.c
@ -14,7 +14,22 @@
 #include "int_lib.h"
 #include "int_util.h"

+#ifdef __BIONIC__
+/* There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation
+   to round 2. We need to delay deallocation because:
+    - Android versions older than M lack __cxa_thread_atexit_impl, so apps
+      use a pthread key destructor to call C++ destructors.
+    - Apps might use __thread/thread_local variables in pthread destructors.
+   We can't wait until the final two rounds, because jemalloc needs two rounds
+   after the final malloc/free call to free its thread-specific data (see
+   https://reviews.llvm.org/D46978#1107507). */
+#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1
+#else
+#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0
+#endif
+
 typedef struct emutls_address_array {
+    uintptr_t skip_destructor_rounds;
    uintptr_t size;  /* number of elements in the 'data' array */
    void* data[];
 } emutls_address_array;
@ -65,9 +80,30 @@ static __inline void emutls_memalign_free(void *base) {
 #endif
 }

+static __inline void emutls_setspecific(emutls_address_array *value) {
+    pthread_setspecific(emutls_pthread_key, (void*) value);
+}
+
+static __inline emutls_address_array* emutls_getspecific() {
+    return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);
+}
+
 static void emutls_key_destructor(void* ptr) {
-    emutls_shutdown((emutls_address_array*)ptr);
-    free(ptr);
+    emutls_address_array *array = (emutls_address_array*)ptr;
+    if (array->skip_destructor_rounds > 0) {
+        /* emutls is deallocated using a pthread key destructor. These
+         * destructors are called in several rounds to accommodate destructor
+         * functions that (re)initialize key values with pthread_setspecific.
+         * Delay the emutls deallocation to accommodate other end-of-thread
+         * cleanup tasks like calling thread_local destructors (e.g. the
+         * __cxa_thread_atexit fallback in libc++abi).
+         */
+        array->skip_destructor_rounds--;
+        emutls_setspecific(array);
+    } else {
+        emutls_shutdown(array);
+        free(ptr);
+    }
 }

 static __inline void emutls_init(void) {
@ -88,15 +124,7 @@ static __inline void emutls_unlock() {
    pthread_mutex_unlock(&emutls_mutex);
 }

-static __inline void emutls_setspecific(emutls_address_array *value) {
-    pthread_setspecific(emutls_pthread_key, (void*) value);
-}
-
-static __inline emutls_address_array* emutls_getspecific() {
-    return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);
-}
-
-#else
+#else /* _WIN32 */

 #include <windows.h>
 #include <malloc.h>
@ -222,11 +250,11 @@ static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {
    InterlockedExchangePointer((void *volatile *)ptr, (void *)val);
 }

-#endif
+#endif /* __ATOMIC_RELEASE */

 #pragma warning (pop)

-#endif
+#endif /* _WIN32 */

 static size_t emutls_num_object = 0;  /* number of allocated TLS objects */

@ -314,11 +342,12 @@ static __inline void emutls_check_array_set_size(emutls_address_array *array,
 * which must be no smaller than the given index.
 */
 static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
-   /* Need to allocate emutls_address_array with one extra slot
-    * to store the data array size.
+   /* Need to allocate emutls_address_array with extra slots
+    * to store the header.
    * Round up the emutls_address_array size to multiple of 16.
    */
-    return ((index + 1 + 15) & ~((uintptr_t)15)) - 1;
+    uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *);
+    return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words;
 }

 /* Returns the size in bytes required for an emutls_address_array with
@ -337,8 +366,10 @@ emutls_get_address_array(uintptr_t index) {
    if (array == NULL) {
        uintptr_t new_size = emutls_new_data_array_size(index);
        array = (emutls_address_array*) malloc(emutls_asize(new_size));
-        if (array)
+        if (array) {
            memset(array->data, 0, new_size * sizeof(void*));
+            array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS;
+        }
        emutls_check_array_set_size(array, new_size);
    } else if (index > array->size) {
        uintptr_t orig_size = array->size;
--- a/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi1.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi1.S
@ -0,0 +1,103 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+/* Functions that implement common sequences in function prologues and epilogues
+   used to save code size */
+
+	.macro FUNCTION_BEGIN name
+	.text
+	.globl \name
+	.type  \name, @function
+	.falign
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+	.macro FALLTHROUGH_TAIL_CALL name0 name1
+	.size \name0, . - \name0
+	.globl \name1
+	.type \name1, @function
+	.falign
+\name1:
+	.endm
+
+
+
+
+/* Save r25:24 at fp+#-8 and r27:26 at fp+#-16. */
+
+
+
+
+/* The compiler knows that the __save_* functions clobber LR.  No other
+   registers should be used without informing the compiler. */
+
+/* Since we can only issue one store per packet, we don't hurt performance by
+   simply jumping to the right point in this sequence of stores. */
+
+FUNCTION_BEGIN __save_r24_through_r27
+		memd(fp+#-16) = r27:26
+FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25
+	{
+		memd(fp+#-8) = r25:24
+		jumpr lr
+	}
+FUNCTION_END __save_r24_through_r25
+
+
+
+
+/* For each of the *_before_tailcall functions, jumpr lr is executed in parallel
+   with deallocframe.  That way, the return gets the old value of lr, which is
+   where these functions need to return, and at the same time, lr gets the value
+   it needs going into the tail call. */
+
+FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall
+		r27:26 = memd(fp+#-16)
+FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall
+	{
+		r25:24 = memd(fp+#-8)
+		deallocframe
+		jumpr lr
+	}
+FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall
+
+
+
+
+/* Here we use the extra load bandwidth to restore LR early, allowing the return
+   to occur in parallel with the deallocframe. */
+
+FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe
+	{
+		lr = memw(fp+#4)
+		r27:26 = memd(fp+#-16)
+	}
+	{
+		r25:24 = memd(fp+#-8)
+		deallocframe
+		jumpr lr
+	}
+FUNCTION_END __restore_r24_through_r27_and_deallocframe
+
+
+
+
+/* Here the load bandwidth is maximized. */
+
+FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe
+	{
+		r25:24 = memd(fp+#-8)
+		deallocframe
+	}
+		jumpr lr
+FUNCTION_END __restore_r24_through_r25_and_deallocframe
--- a/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi2.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_abi2.S
@ -0,0 +1,268 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+/* Functions that implement common sequences in function prologues and epilogues
+   used to save code size */
+
+	.macro FUNCTION_BEGIN name
+	.p2align 2
+        .section .text.\name,"ax",@progbits
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+	.macro FALLTHROUGH_TAIL_CALL name0 name1
+	.p2align 2
+	.size \name0, . - \name0
+	.globl \name1
+	.type \name1, @function
+\name1:
+	.endm
+
+
+
+
+/* Save r17:16 at fp+#-8, r19:18 at fp+#-16, r21:20 at fp+#-24, r23:22 at
+   fp+#-32, r25:24 at fp+#-40, and r27:26 at fp+#-48.
+   The compiler knows that the __save_* functions clobber LR.  No other
+   registers should be used without informing the compiler. */
+
+FUNCTION_BEGIN __save_r16_through_r27
+        {
+                memd(fp+#-48) = r27:26
+                memd(fp+#-40) = r25:24
+        }
+        {
+                memd(fp+#-32) = r23:22
+                memd(fp+#-24) = r21:20
+        }
+        {
+                memd(fp+#-16) = r19:18
+                memd(fp+#-8) = r17:16
+                jumpr lr
+        }
+FUNCTION_END __save_r16_through_r27
+
+FUNCTION_BEGIN __save_r16_through_r25
+        {
+                memd(fp+#-40) = r25:24
+                memd(fp+#-32) = r23:22
+        }
+        {
+                memd(fp+#-24) = r21:20
+                memd(fp+#-16) = r19:18
+        }
+        {
+                memd(fp+#-8) = r17:16
+                jumpr lr
+        }
+FUNCTION_END __save_r16_through_r25
+
+FUNCTION_BEGIN __save_r16_through_r23
+        {
+                memd(fp+#-32) = r23:22
+                memd(fp+#-24) = r21:20
+        }
+        {
+                memd(fp+#-16) = r19:18
+                memd(fp+#-8) = r17:16
+                jumpr lr
+        }
+FUNCTION_END __save_r16_through_r23
+
+FUNCTION_BEGIN __save_r16_through_r21
+        {
+                memd(fp+#-24) = r21:20
+                memd(fp+#-16) = r19:18
+        }
+        {
+                memd(fp+#-8) = r17:16
+                jumpr lr
+        }
+FUNCTION_END __save_r16_through_r21
+
+FUNCTION_BEGIN __save_r16_through_r19
+        {
+                memd(fp+#-16) = r19:18
+                memd(fp+#-8) = r17:16
+                jumpr lr
+        }
+FUNCTION_END __save_r16_through_r19
+
+FUNCTION_BEGIN __save_r16_through_r17
+        {
+                memd(fp+#-8) = r17:16
+                jumpr lr
+        }
+FUNCTION_END __save_r16_through_r17
+
+/* For each of the *_before_tailcall functions, jumpr lr is executed in parallel
+   with deallocframe.  That way, the return gets the old value of lr, which is
+   where these functions need to return, and at the same time, lr gets the value
+   it needs going into the tail call. */
+
+
+FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe_before_tailcall
+                r27:26 = memd(fp+#-48)
+        {
+                r25:24 = memd(fp+#-40)
+                r23:22 = memd(fp+#-32)
+        }
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+        {
+                r17:16 = memd(fp+#-8)
+                deallocframe
+                jumpr lr
+        }
+FUNCTION_END __restore_r16_through_r27_and_deallocframe_before_tailcall
+
+FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe_before_tailcall
+        {
+                r25:24 = memd(fp+#-40)
+                r23:22 = memd(fp+#-32)
+        }
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+        {
+                r17:16 = memd(fp+#-8)
+                deallocframe
+                jumpr lr
+        }
+FUNCTION_END __restore_r16_through_r25_and_deallocframe_before_tailcall
+
+FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe_before_tailcall
+        {
+                r23:22 = memd(fp+#-32)
+                r21:20 = memd(fp+#-24)
+        }
+                r19:18 = memd(fp+#-16)
+        {
+                r17:16 = memd(fp+#-8)
+                deallocframe
+                jumpr lr
+        }
+FUNCTION_END __restore_r16_through_r23_and_deallocframe_before_tailcall
+
+
+FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe_before_tailcall
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+        {
+                r17:16 = memd(fp+#-8)
+                deallocframe
+                jumpr lr
+        }
+FUNCTION_END __restore_r16_through_r19_and_deallocframe_before_tailcall
+
+FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe_before_tailcall
+                r19:18 = memd(fp+#-16)
+        {
+                r17:16 = memd(fp+#-8)
+                deallocframe
+                jumpr lr
+        }
+FUNCTION_END __restore_r16_through_r19_and_deallocframe_before_tailcall
+
+FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe_before_tailcall
+        {
+                r17:16 = memd(fp+#-8)
+                deallocframe
+                jumpr lr
+        }
+FUNCTION_END __restore_r16_through_r17_and_deallocframe_before_tailcall
+
+
+FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe
+                r27:26 = memd(fp+#-48)
+        {
+                r25:24 = memd(fp+#-40)
+                r23:22 = memd(fp+#-32)
+        }
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+	{
+		r17:16 = memd(fp+#-8)
+		dealloc_return
+	}
+FUNCTION_END __restore_r16_through_r27_and_deallocframe
+
+FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe
+        {
+                r25:24 = memd(fp+#-40)
+                r23:22 = memd(fp+#-32)
+        }
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+	{
+		r17:16 = memd(fp+#-8)
+		dealloc_return
+	}
+FUNCTION_END __restore_r16_through_r25_and_deallocframe
+
+FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe
+        {
+                r23:22 = memd(fp+#-32)
+        }
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+	{
+		r17:16 = memd(fp+#-8)
+		dealloc_return
+	}
+FUNCTION_END __restore_r16_through_r23_and_deallocframe
+
+FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+	{
+		r17:16 = memd(fp+#-8)
+		dealloc_return
+	}
+FUNCTION_END __restore_r16_through_r21_and_deallocframe
+
+FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe
+	{
+                r19:18 = memd(fp+#-16)
+		r17:16 = memd(fp+#-8)
+        }
+        {
+		dealloc_return
+	}
+FUNCTION_END __restore_r16_through_r19_and_deallocframe
+
+FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe
+	{
+		r17:16 = memd(fp+#-8)
+		dealloc_return
+	}
+FUNCTION_END __restore_r16_through_r17_and_deallocframe
+
+FUNCTION_BEGIN __deallocframe
+        dealloc_return
+FUNCTION_END __deallocframe
--- a/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_legacy.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/common_entry_exit_legacy.S
@ -0,0 +1,157 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/* Functions that implement common sequences in function prologues and epilogues
+   used to save code size */
+
+	.macro FUNCTION_BEGIN name
+	.text
+	.globl \name
+	.type  \name, @function
+	.falign
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+	.macro FALLTHROUGH_TAIL_CALL name0 name1
+	.size \name0, . - \name0
+	.globl \name1
+	.type \name1, @function
+	.falign
+\name1:
+	.endm
+
+
+
+
+/* Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at
+   fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48. */
+
+
+
+
+/* The compiler knows that the __save_* functions clobber LR.  No other
+   registers should be used without informing the compiler. */
+
+/* Since we can only issue one store per packet, we don't hurt performance by
+   simply jumping to the right point in this sequence of stores. */
+
+FUNCTION_BEGIN __save_r27_through_r16
+		memd(fp+#-48) = r17:16
+FALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18
+		memd(fp+#-40) = r19:18
+FALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20
+		memd(fp+#-32) = r21:20
+FALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22
+		memd(fp+#-24) = r23:22
+FALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24
+		memd(fp+#-16) = r25:24
+	{
+		memd(fp+#-8) = r27:26
+		jumpr lr
+	}
+FUNCTION_END __save_r27_through_r24
+
+
+
+
+/* For each of the *_before_sibcall functions, jumpr lr is executed in parallel
+   with deallocframe.  That way, the return gets the old value of lr, which is
+   where these functions need to return, and at the same time, lr gets the value
+   it needs going into the sibcall. */
+
+FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall
+	{
+		r21:20 = memd(fp+#-32)
+		r23:22 = memd(fp+#-24)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall
+	{
+		r25:24 = memd(fp+#-16)
+		jump __restore_r27_through_r26_and_deallocframe_before_sibcall
+	}
+FUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall
+
+
+
+
+FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall
+		r17:16 = memd(fp+#-48)
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall
+	{
+		r19:18 = memd(fp+#-40)
+		r21:20 = memd(fp+#-32)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall
+	{
+		r23:22 = memd(fp+#-24)
+		r25:24 = memd(fp+#-16)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall
+	{
+		r27:26 = memd(fp+#-8)
+		deallocframe
+		jumpr lr
+	}
+FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall
+
+
+
+
+/* Here we use the extra load bandwidth to restore LR early, allowing the return
+   to occur in parallel with the deallocframe. */
+
+FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe
+	{
+		r17:16 = memd(fp+#-48)
+		r19:18 = memd(fp+#-40)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe
+	{
+		r21:20 = memd(fp+#-32)
+		r23:22 = memd(fp+#-24)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe
+	{
+		lr = memw(fp+#4)
+		r25:24 = memd(fp+#-16)
+	}
+	{
+		r27:26 = memd(fp+#-8)
+		deallocframe
+		jumpr lr
+	}
+FUNCTION_END __restore_r27_through_r24_and_deallocframe
+
+
+
+
+/* Here the load bandwidth is maximized for all three functions. */
+
+FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe
+	{
+		r19:18 = memd(fp+#-40)
+		r21:20 = memd(fp+#-32)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe
+	{
+		r23:22 = memd(fp+#-24)
+		r25:24 = memd(fp+#-16)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe
+	{
+		r27:26 = memd(fp+#-8)
+		deallocframe
+	}
+		jumpr lr
+FUNCTION_END __restore_r27_through_r26_and_deallocframe
--- a/contrib/compiler-rt/lib/builtins/hexagon/dfaddsub.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/dfaddsub.S
@ -0,0 +1,398 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+/* Double Precision Multiply */
+
+#define A r1:0
+#define AH r1
+#define AL r0
+#define B r3:2
+#define BH r3
+#define BL r2
+
+#define EXPA r4
+#define EXPB r5
+#define EXPB_A r5:4
+
+#define ZTMP r7:6
+#define ZTMPH r7
+#define ZTMPL r6
+
+#define ATMP r13:12
+#define ATMPH r13
+#define ATMPL r12
+
+#define BTMP r9:8
+#define BTMPH r9
+#define BTMPL r8
+
+#define ATMP2 r11:10
+#define ATMP2H r11
+#define ATMP2L r10
+
+#define EXPDIFF r15
+#define EXTRACTOFF r14
+#define EXTRACTAMT r15:14
+
+#define TMP r28
+
+#define MANTBITS 52
+#define HI_MANTBITS 20
+#define EXPBITS 11
+#define BIAS 1024
+#define MANTISSA_TO_INT_BIAS 52
+#define SR_BIT_INEXACT 5
+
+#ifndef SR_ROUND_OFF
+#define SR_ROUND_OFF 22
+#endif
+
+#define NORMAL p3
+#define BIGB p2
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
+#define END(TAG) .size TAG,.-TAG
+
+	.text
+	.global __hexagon_adddf3
+	.global __hexagon_subdf3
+	.type __hexagon_adddf3, @function
+	.type __hexagon_subdf3, @function
+
+Q6_ALIAS(adddf3)
+FAST_ALIAS(adddf3)
+FAST2_ALIAS(adddf3)
+Q6_ALIAS(subdf3)
+FAST_ALIAS(subdf3)
+FAST2_ALIAS(subdf3)
+
+	.p2align 5
+__hexagon_adddf3:
+	{
+		EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
+		EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
+		ATMP = combine(##0x20000000,#0)
+	}
+	{
+		NORMAL = dfclass(A,#2)
+		NORMAL = dfclass(B,#2)
+		BTMP = ATMP
+		BIGB = cmp.gtu(EXPB,EXPA)			// Is B substantially greater than A?
+	}
+	{
+		if (!NORMAL) jump .Ladd_abnormal		// If abnormal, go to special code
+		if (BIGB) A = B				// if B >> A, swap A and B
+		if (BIGB) B = A				// If B >> A, swap A and B
+		if (BIGB) EXPB_A = combine(EXPA,EXPB)	// swap exponents
+	}
+	{
+		ATMP = insert(A,#MANTBITS,#EXPBITS-2)	// Q1.62
+		BTMP = insert(B,#MANTBITS,#EXPBITS-2)	// Q1.62
+		EXPDIFF = sub(EXPA,EXPB)
+		ZTMP = combine(#62,#1)
+	}
+#undef BIGB
+#undef NORMAL
+#define B_POS p3
+#define A_POS p2
+#define NO_STICKIES p1
+.Ladd_continue:
+	{
+		EXPDIFF = min(EXPDIFF,ZTMPH)		// If exponent difference >= ~60,
+							// will collapse to sticky bit
+		ATMP2 = neg(ATMP)
+		A_POS = cmp.gt(AH,#-1)
+		EXTRACTOFF = #0
+	}
+	{
+		if (!A_POS) ATMP = ATMP2
+		ATMP2 = extractu(BTMP,EXTRACTAMT)
+		BTMP = ASR(BTMP,EXPDIFF)
+#undef EXTRACTAMT
+#undef EXPDIFF
+#undef EXTRACTOFF
+#define ZERO r15:14
+		ZERO = #0
+	}
+	{
+		NO_STICKIES = cmp.eq(ATMP2,ZERO)
+		if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
+		EXPB = add(EXPA,#-BIAS-60)
+		B_POS = cmp.gt(BH,#-1)
+	}
+	{
+		ATMP = add(ATMP,BTMP)			// ADD!!!
+		ATMP2 = sub(ATMP,BTMP)			// Negate and ADD --> SUB!!!
+		ZTMP = combine(#54,##2045)
+	}
+	{
+		p0 = cmp.gtu(EXPA,ZTMPH)		// must be pretty high in case of large cancellation
+		p0 = !cmp.gtu(EXPA,ZTMPL)
+		if (!p0.new) jump:nt .Ladd_ovf_unf
+		if (!B_POS) ATMP = ATMP2		// if B neg, pick difference
+	}
+	{
+		A = convert_d2df(ATMP)			// Convert to Double Precision, taking care of flags, etc.  So nice!
+		p0 = cmp.eq(ATMPH,#0)
+		p0 = cmp.eq(ATMPL,#0)
+		if (p0.new) jump:nt .Ladd_zero		// or maybe conversion handles zero case correctly?
+	}
+	{
+		AH += asl(EXPB,#HI_MANTBITS)
+		jumpr r31
+	}
+	.falign
+__hexagon_subdf3:
+	{
+		BH = togglebit(BH,#31)
+		jump __qdsp_adddf3
+	}
+
+
+	.falign
+.Ladd_zero:
+	// True zero, full cancellation
+	// +0 unless round towards negative infinity
+	{
+		TMP = USR
+		A = #0
+		BH = #1
+	}
+	{
+		TMP = extractu(TMP,#2,#22)
+		BH = asl(BH,#31)
+	}
+	{
+		p0 = cmp.eq(TMP,#2)
+		if (p0.new) AH = xor(AH,BH)
+		jumpr r31
+	}
+	.falign
+.Ladd_ovf_unf:
+	// Overflow or Denormal is possible
+	// Good news: Underflow flag is not possible!
+	/*
+	 * ATMP has 2's complement value
+	 *
+	 * EXPA has A's exponent, EXPB has EXPA-BIAS-60
+	 *
+	 * Convert, extract exponent, add adjustment.
+	 * If > 2046, overflow
+	 * If <= 0, denormal
+	 *
+	 * Note that we've not done our zero check yet, so do that too
+	 *
+	 */
+	{
+		A = convert_d2df(ATMP)
+		p0 = cmp.eq(ATMPH,#0)
+		p0 = cmp.eq(ATMPL,#0)
+		if (p0.new) jump:nt .Ladd_zero
+	}
+	{
+		TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
+		AH += asl(EXPB,#HI_MANTBITS)
+	}
+	{
+		EXPB = add(EXPB,TMP)
+		B = combine(##0x00100000,#0)
+	}
+	{
+		p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
+		if (p0.new) jump:nt .Ladd_ovf
+	}
+	{
+		p0 = cmp.gt(EXPB,#0)
+		if (p0.new) jumpr:t r31
+		TMP = sub(#1,EXPB)
+	}
+	{
+		B = insert(A,#MANTBITS,#0)
+		A = ATMP
+	}
+	{
+		B = lsr(B,TMP)
+	}
+	{
+		A = insert(B,#63,#0)
+		jumpr r31
+	}
+	.falign
+.Ladd_ovf:
+	// We get either max finite value or infinity.  Either way, overflow+inexact
+	{
+		A = ATMP				// 2's complement value
+		TMP = USR
+		ATMP = combine(##0x7fefffff,#-1)	// positive max finite
+	}
+	{
+		EXPB = extractu(TMP,#2,#SR_ROUND_OFF)	// rounding bits
+		TMP = or(TMP,#0x28)			// inexact + overflow
+		BTMP = combine(##0x7ff00000,#0)		// positive infinity
+	}
+	{
+		USR = TMP
+		EXPB ^= lsr(AH,#31)			// Does sign match rounding?
+		TMP = EXPB				// unmodified rounding mode
+	}
+	{
+		p0 = !cmp.eq(TMP,#1)			// If not round-to-zero and
+		p0 = !cmp.eq(EXPB,#2)			// Not rounding the other way,
+		if (p0.new) ATMP = BTMP			// we should get infinity
+	}
+	{
+		A = insert(ATMP,#63,#0)			// insert inf/maxfinite, leave sign
+	}
+	{
+		p0 = dfcmp.eq(A,A)
+		jumpr r31
+	}
+
+.Ladd_abnormal:
+	{
+		ATMP = extractu(A,#63,#0)		// strip off sign
+		BTMP = extractu(B,#63,#0)		// strip off sign
+	}
+	{
+		p3 = cmp.gtu(ATMP,BTMP)
+		if (!p3.new) A = B			// sort values
+		if (!p3.new) B = A			// sort values
+	}
+	{
+		// Any NaN --> NaN, possibly raise invalid if sNaN
+		p0 = dfclass(A,#0x0f)		// A not NaN?
+		if (!p0.new) jump:nt .Linvalid_nan_add
+		if (!p3) ATMP = BTMP
+		if (!p3) BTMP = ATMP
+	}
+	{
+		// Infinity + non-infinity number is infinity
+		// Infinity + infinity --> inf or nan
+		p1 = dfclass(A,#0x08)		// A is infinity
+		if (p1.new) jump:nt .Linf_add
+	}
+	{
+		p2 = dfclass(B,#0x01)		// B is zero
+		if (p2.new) jump:nt .LB_zero	// so return A or special 0+0
+		ATMP = #0
+	}
+	// We are left with adding one or more subnormals
+	{
+		p0 = dfclass(A,#4)
+		if (p0.new) jump:nt .Ladd_two_subnormal
+		ATMP = combine(##0x20000000,#0)
+	}
+	{
+		EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
+		EXPB = #1
+		// BTMP already ABS(B)
+		BTMP = asl(BTMP,#EXPBITS-2)
+	}
+#undef ZERO
+#define EXTRACTOFF r14
+#define EXPDIFF r15
+	{
+		ATMP = insert(A,#MANTBITS,#EXPBITS-2)
+		EXPDIFF = sub(EXPA,EXPB)
+		ZTMP = combine(#62,#1)
+		jump .Ladd_continue
+	}
+
+.Ladd_two_subnormal:
+	{
+		ATMP = extractu(A,#63,#0)
+		BTMP = extractu(B,#63,#0)
+	}
+	{
+		ATMP = neg(ATMP)
+		BTMP = neg(BTMP)
+		p0 = cmp.gt(AH,#-1)
+		p1 = cmp.gt(BH,#-1)
+	}
+	{
+		if (p0) ATMP = A
+		if (p1) BTMP = B
+	}
+	{
+		ATMP = add(ATMP,BTMP)
+	}
+	{
+		BTMP = neg(ATMP)
+		p0 = cmp.gt(ATMPH,#-1)
+		B = #0
+	}
+	{
+		if (!p0) A = BTMP
+		if (p0) A = ATMP
+		BH = ##0x80000000
+	}
+	{
+		if (!p0) AH = or(AH,BH)
+		p0 = dfcmp.eq(A,B)
+		if (p0.new) jump:nt .Lzero_plus_zero
+	}
+	{
+		jumpr r31
+	}
+
+.Linvalid_nan_add:
+	{
+		TMP = convert_df2sf(A)			// will generate invalid if sNaN
+		p0 = dfclass(B,#0x0f)			// if B is not NaN
+		if (p0.new) B = A 			// make it whatever A is
+	}
+	{
+		BL = convert_df2sf(B)			// will generate invalid if sNaN
+		A = #-1
+		jumpr r31
+	}
+	.falign
+.LB_zero:
+	{
+		p0 = dfcmp.eq(ATMP,A)			// is A also zero?
+		if (!p0.new) jumpr:t r31		// If not, just return A
+	}
+	// 0 + 0 is special
+	// if equal integral values, they have the same sign, which is fine for all rounding
+	// modes.
+	// If unequal in sign, we get +0 for all rounding modes except round down
+.Lzero_plus_zero:
+	{
+		p0 = cmp.eq(A,B)
+		if (p0.new) jumpr:t r31
+	}
+	{
+		TMP = USR
+	}
+	{
+		TMP = extractu(TMP,#2,#SR_ROUND_OFF)
+		A = #0
+	}
+	{
+		p0 = cmp.eq(TMP,#2)
+		if (p0.new) AH = ##0x80000000
+		jumpr r31
+	}
+.Linf_add:
+	// adding infinities is only OK if they are equal
+	{
+		p0 = !cmp.eq(AH,BH)			// Do they have different signs
+		p0 = dfclass(B,#8)			// And is B also infinite?
+		if (!p0.new) jumpr:t r31		// If not, just a normal inf
+	}
+	{
+		BL = ##0x7f800001			// sNAN
+	}
+	{
+		A = convert_sf2df(BL)			// trigger invalid, set NaN
+		jumpr r31
+	}
+END(__hexagon_adddf3)
--- a/contrib/compiler-rt/lib/builtins/hexagon/dfdiv.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/dfdiv.S
@ -0,0 +1,492 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+/* Double Precision Divide */
+
+#define A r1:0
+#define AH r1
+#define AL r0
+
+#define B r3:2
+#define BH r3
+#define BL r2
+
+#define Q r5:4
+#define QH r5
+#define QL r4
+
+#define PROD r7:6
+#define PRODHI r7
+#define PRODLO r6
+
+#define SFONE r8
+#define SFDEN r9
+#define SFERROR r10
+#define SFRECIP r11
+
+#define EXPBA r13:12
+#define EXPB r13
+#define EXPA r12
+
+#define REMSUB2 r15:14
+
+
+
+#define SIGN r28
+
+#define Q_POSITIVE p3
+#define NORMAL p2
+#define NO_OVF_UNF p1
+#define P_TMP p0
+
+#define RECIPEST_SHIFT 3
+#define QADJ 61
+
+#define DFCLASS_NORMAL 0x02
+#define DFCLASS_NUMBER 0x0F
+#define DFCLASS_INFINITE 0x08
+#define DFCLASS_ZERO 0x01
+#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
+#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
+
+#define DF_MANTBITS 52
+#define DF_EXPBITS 11
+#define SF_MANTBITS 23
+#define SF_EXPBITS 8
+#define DF_BIAS 0x3ff
+
+#define SR_ROUND_OFF 22
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
+#define END(TAG) .size TAG,.-TAG
+
+	.text
+	.global __hexagon_divdf3
+	.type __hexagon_divdf3,@function
+	Q6_ALIAS(divdf3)
+        FAST_ALIAS(divdf3)
+        FAST2_ALIAS(divdf3)
+	.p2align 5
+__hexagon_divdf3:
+	{
+		NORMAL = dfclass(A,#DFCLASS_NORMAL)
+		NORMAL = dfclass(B,#DFCLASS_NORMAL)
+		EXPBA = combine(BH,AH)
+		SIGN = xor(AH,BH)
+	}
+#undef A
+#undef AH
+#undef AL
+#undef B
+#undef BH
+#undef BL
+#define REM r1:0
+#define REMHI r1
+#define REMLO r0
+#define DENOM r3:2
+#define DENOMHI r3
+#define DENOMLO r2
+	{
+		if (!NORMAL) jump .Ldiv_abnormal
+		PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
+		SFONE = ##0x3f800001
+	}
+	{
+		SFDEN = or(SFONE,PRODLO)
+		EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
+		EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
+		Q_POSITIVE = cmp.gt(SIGN,#-1)
+	}
+#undef SIGN
+#define ONE r28
+.Ldenorm_continue:
+	{
+		SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
+		SFERROR = and(SFONE,#-2)
+		ONE = #1
+		EXPA = sub(EXPA,EXPB)
+	}
+#undef EXPB
+#define RECIPEST r13
+	{
+		SFERROR -= sfmpy(SFRECIP,SFDEN):lib
+		REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
+		RECIPEST = ##0x00800000 << RECIPEST_SHIFT
+	}
+	{
+		SFRECIP += sfmpy(SFRECIP,SFERROR):lib
+		DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
+		SFERROR = and(SFONE,#-2)
+	}
+	{
+		SFERROR -= sfmpy(SFRECIP,SFDEN):lib
+		QH = #-DF_BIAS+1
+		QL = #DF_BIAS-1
+	}
+	{
+		SFRECIP += sfmpy(SFRECIP,SFERROR):lib
+		NO_OVF_UNF = cmp.gt(EXPA,QH)
+		NO_OVF_UNF = !cmp.gt(EXPA,QL)
+	}
+	{
+		RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
+		Q = #0
+		EXPA = add(EXPA,#-QADJ)
+	}
+#undef SFERROR
+#undef SFRECIP
+#define TMP r10
+#define TMP1 r11
+	{
+		RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
+	}
+
+#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
+	{ \
+		PROD = mpyu(RECIPEST,REMHI); \
+		REM = asl(REM,# ## ( REMSHIFT )); \
+	}; \
+	{ \
+		PRODLO = # ## 0; \
+		REM -= mpyu(PRODHI,DENOMLO); \
+		REMSUB2 = mpyu(PRODHI,DENOMHI); \
+	}; \
+	{ \
+		Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
+		REM -= asl(REMSUB2, # ## 32); \
+		EXTRA \
+	}
+
+
+	DIV_ITER1B(ASL,14,15,)
+	DIV_ITER1B(ASR,1,15,)
+	DIV_ITER1B(ASR,16,15,)
+	DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
+
+#undef REMSUB2
+#define TMPPAIR r15:14
+#define TMPPAIRHI r15
+#define TMPPAIRLO r14
+#undef RECIPEST
+#define EXPB r13
+	{
+		// compare or sub with carry
+		TMPPAIR = sub(REM,DENOM)
+		P_TMP = cmp.gtu(DENOM,REM)
+		// set up amt to add to q
+		if (!P_TMP.new) PRODLO  = #2
+	}
+	{
+		Q = add(Q,PROD)
+		if (!P_TMP) REM = TMPPAIR
+		TMPPAIR = #0
+	}
+	{
+		P_TMP = cmp.eq(REM,TMPPAIR)
+		if (!P_TMP.new) QL = or(QL,ONE)
+	}
+	{
+		PROD = neg(Q)
+	}
+	{
+		if (!Q_POSITIVE) Q = PROD
+	}
+#undef REM
+#undef REMHI
+#undef REMLO
+#undef DENOM
+#undef DENOMLO
+#undef DENOMHI
+#define A r1:0
+#define AH r1
+#define AL r0
+#define B r3:2
+#define BH r3
+#define BL r2
+	{
+		A = convert_d2df(Q)
+		if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
+	}
+	{
+		AH += asl(EXPA,#DF_MANTBITS-32)
+		jumpr r31
+	}
+
+.Ldiv_ovf_unf:
+	{
+		AH += asl(EXPA,#DF_MANTBITS-32)
+		EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
+	}
+	{
+		PROD = abs(Q)
+		EXPA = add(EXPA,EXPB)
+	}
+	{
+		P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS)		// overflow
+		if (P_TMP.new) jump:nt .Ldiv_ovf
+	}
+	{
+		P_TMP = cmp.gt(EXPA,#0)
+		if (P_TMP.new) jump:nt .Lpossible_unf		// round up to normal possible...
+	}
+	/* Underflow */
+	/* We know what the infinite range exponent should be (EXPA) */
+	/* Q is 2's complement, PROD is abs(Q) */
+	/* Normalize Q, shift right, add a high bit, convert, change exponent */
+
+#define FUDGE1 7	// how much to shift right
+#define FUDGE2 4	// how many guard/round to keep at lsbs
+
+	{
+		EXPB = add(clb(PROD),#-1)			// doesn't need to be added in since
+		EXPA = sub(#FUDGE1,EXPA)			// we extract post-converted exponent
+		TMP = USR
+		TMP1 = #63
+	}
+	{
+		EXPB = min(EXPA,TMP1)
+		TMP1 = or(TMP,#0x030)
+		PROD = asl(PROD,EXPB)
+		EXPA = #0
+	}
+	{
+		TMPPAIR = extractu(PROD,EXPBA)				// bits that will get shifted out
+		PROD = lsr(PROD,EXPB)					// shift out bits
+		B = #1
+	}
+	{
+		P_TMP = cmp.gtu(B,TMPPAIR)
+		if (!P_TMP.new) PRODLO = or(BL,PRODLO)
+		PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
+	}
+	{
+		Q = neg(PROD)
+		P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
+		if (!P_TMP.new) TMP = TMP1
+	}
+	{
+		USR = TMP
+		if (Q_POSITIVE) Q = PROD
+		TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
+	}
+	{
+		A = convert_d2df(Q)
+	}
+	{
+		AH += asl(TMP,#DF_MANTBITS-32)
+		jumpr r31
+	}
+
+
+.Lpossible_unf:
+	/* If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal */
+	/* The answer is correct, but we need to raise Underflow */
+	{
+		B = extractu(A,#63,#0)
+		TMPPAIR = combine(##0x00100000,#0)		// min normal
+		TMP = #0x7FFF
+	}
+	{
+		P_TMP = dfcmp.eq(TMPPAIR,B)		// Is everything zero in the rounded value...
+		P_TMP = bitsset(PRODHI,TMP)		// but a bunch of bits set in the unrounded abs(quotient)?
+	}
+
+#if (__HEXAGON_ARCH__ == 60)
+		TMP = USR		// If not, just return
+		if (!P_TMP) jumpr r31   // Else, we want to set Unf+Inexact
+					// Note that inexact is already set...
+#else
+	{
+		if (!P_TMP) jumpr r31			// If not, just return
+		TMP = USR				// Else, we want to set Unf+Inexact
+	}						// Note that inexact is already set...
+#endif
+	{
+		TMP = or(TMP,#0x30)
+	}
+	{
+		USR = TMP
+	}
+	{
+		p0 = dfcmp.eq(A,A)
+		jumpr r31
+	}
+
+.Ldiv_ovf:
+	/*
+	 * Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
+	 */
+	{
+		TMP = USR
+		B = combine(##0x7fefffff,#-1)
+		AH = mux(Q_POSITIVE,#0,#-1)
+	}
+	{
+		PROD = combine(##0x7ff00000,#0)
+		QH = extractu(TMP,#2,#SR_ROUND_OFF)
+		TMP = or(TMP,#0x28)
+	}
+	{
+		USR = TMP
+		QH ^= lsr(AH,#31)
+		QL = QH
+	}
+	{
+		p0 = !cmp.eq(QL,#1)		// if not round-to-zero
+		p0 = !cmp.eq(QH,#2)		// and not rounding the other way
+		if (p0.new) B = PROD		// go to inf
+		p0 = dfcmp.eq(B,B)		// get exceptions
+	}
+	{
+		A = insert(B,#63,#0)
+		jumpr r31
+	}
+
+#undef ONE
+#define SIGN r28
+#undef NORMAL
+#undef NO_OVF_UNF
+#define P_INF p1
+#define P_ZERO p2
+.Ldiv_abnormal:
+	{
+		P_TMP = dfclass(A,#DFCLASS_NUMBER)
+		P_TMP = dfclass(B,#DFCLASS_NUMBER)
+		Q_POSITIVE = cmp.gt(SIGN,#-1)
+	}
+	{
+		P_INF = dfclass(A,#DFCLASS_INFINITE)
+		P_INF = dfclass(B,#DFCLASS_INFINITE)
+	}
+	{
+		P_ZERO = dfclass(A,#DFCLASS_ZERO)
+		P_ZERO = dfclass(B,#DFCLASS_ZERO)
+	}
+	{
+		if (!P_TMP) jump .Ldiv_nan
+		if (P_INF) jump .Ldiv_invalid
+	}
+	{
+		if (P_ZERO) jump .Ldiv_invalid
+	}
+	{
+		P_ZERO = dfclass(A,#DFCLASS_NONZERO)		// nonzero
+		P_ZERO = dfclass(B,#DFCLASS_NONINFINITE)	// non-infinite
+	}
+	{
+		P_INF = dfclass(A,#DFCLASS_NONINFINITE)	// non-infinite
+		P_INF = dfclass(B,#DFCLASS_NONZERO)	// nonzero
+	}
+	{
+		if (!P_ZERO) jump .Ldiv_zero_result
+		if (!P_INF) jump .Ldiv_inf_result
+	}
+	/* Now we've narrowed it down to (de)normal / (de)normal */
+	/* Set up A/EXPA B/EXPB and go back */
+#undef P_ZERO
+#undef P_INF
+#define P_TMP2 p1
+	{
+		P_TMP = dfclass(A,#DFCLASS_NORMAL)
+		P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
+		TMP = ##0x00100000
+	}
+	{
+		EXPBA = combine(BH,AH)
+		AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)		// clear out hidden bit, sign bit
+		BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)		// clear out hidden bit, sign bit
+	}
+	{
+		if (P_TMP) AH = or(AH,TMP)				// if normal, add back in hidden bit
+		if (P_TMP2) BH = or(BH,TMP)				// if normal, add back in hidden bit
+	}
+	{
+		QH = add(clb(A),#-DF_EXPBITS)
+		QL = add(clb(B),#-DF_EXPBITS)
+		TMP = #1
+	}
+	{
+		EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
+		EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
+	}
+	{
+		A = asl(A,QH)
+		B = asl(B,QL)
+		if (!P_TMP) EXPA = sub(TMP,QH)
+		if (!P_TMP2) EXPB = sub(TMP,QL)
+	}	// recreate values needed by resume coke
+	{
+		PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
+	}
+	{
+		SFDEN = or(SFONE,PRODLO)
+		jump .Ldenorm_continue
+	}
+
+.Ldiv_zero_result:
+	{
+		AH = xor(AH,BH)
+		B = #0
+	}
+	{
+		A = insert(B,#63,#0)
+		jumpr r31
+	}
+.Ldiv_inf_result:
+	{
+		p2 = dfclass(B,#DFCLASS_ZERO)
+		p2 = dfclass(A,#DFCLASS_NONINFINITE)
+	}
+	{
+		TMP = USR
+		if (!p2) jump 1f
+		AH = xor(AH,BH)
+	}
+	{
+		TMP = or(TMP,#0x04)		// DBZ
+	}
+	{
+		USR = TMP
+	}
+1:
+	{
+		B = combine(##0x7ff00000,#0)
+		p0 = dfcmp.uo(B,B)		// take possible exception
+	}
+	{
+		A = insert(B,#63,#0)
+		jumpr r31
+	}
+.Ldiv_nan:
+	{
+		p0 = dfclass(A,#0x10)
+		p1 = dfclass(B,#0x10)
+		if (!p0.new) A = B
+		if (!p1.new) B = A
+	}
+	{
+		QH = convert_df2sf(A)	// get possible invalid exceptions
+		QL = convert_df2sf(B)
+	}
+	{
+		A = #-1
+		jumpr r31
+	}
+
+.Ldiv_invalid:
+	{
+		TMP = ##0x7f800001
+	}
+	{
+		A = convert_sf2df(TMP)		// get invalid, get DF qNaN
+		jumpr r31
+	}
+END(__hexagon_divdf3)
--- a/contrib/compiler-rt/lib/builtins/hexagon/dffma.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/dffma.S
@ -0,0 +1,705 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define END(TAG) .size TAG,.-TAG
+
+/* Double Precision Multiply */
+
+
+#define A r1:0
+#define AH r1
+#define AL r0
+#define B r3:2
+#define BH r3
+#define BL r2
+#define C r5:4
+#define CH r5
+#define CL r4
+
+
+
+#define BTMP r15:14
+#define BTMPH r15
+#define BTMPL r14
+
+#define ATMP r13:12
+#define ATMPH r13
+#define ATMPL r12
+
+#define CTMP r11:10
+#define CTMPH r11
+#define CTMPL r10
+
+#define PP_LL r9:8
+#define PP_LL_H r9
+#define PP_LL_L r8
+
+#define PP_ODD r7:6
+#define PP_ODD_H r7
+#define PP_ODD_L r6
+
+
+#define PP_HH r17:16
+#define PP_HH_H r17
+#define PP_HH_L r16
+
+#define EXPA r18
+#define EXPB r19
+#define EXPBA r19:18
+
+#define TMP r28
+
+#define P_TMP p0
+#define PROD_NEG p3
+#define EXACT p2
+#define SWAP p1
+
+#define MANTBITS 52
+#define HI_MANTBITS 20
+#define EXPBITS 11
+#define BIAS 1023
+#define STACKSPACE 32
+
+#define ADJUST 4
+
+#define FUDGE 7
+#define FUDGE2 3
+
+#ifndef SR_ROUND_OFF
+#define SR_ROUND_OFF 22
+#endif
+
+	/*
+	 * First, classify for normal values, and abort if abnormal
+	 *
+	 * Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8
+	 *
+	 * Since we know that the 2 MSBs of the H registers is zero, we should never carry
+	 * the partial products that involve the H registers
+	 *
+	 * Try to buy X slots, at the expense of latency if needed
+	 *
+	 * We will have PP_HH with the upper bits of the product, PP_LL with the lower
+	 * PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts
+	 * PP_HH can have a minimum of 0x0100_0000_0000_0000
+	 *
+	 * 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS
+	 *
+	 * We need to align CTMP.
+	 * If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add
+	 * If CTMP << PP align CTMP and add 128 bits.  Then compute sticky
+	 * If CTMP ~= PP, align CTMP and add 128 bits.  May have massive cancellation.
+	 *
+	 * Convert partial product and CTMP to 2's complement prior to addition
+	 *
+	 * After we add, we need to normalize into upper 64 bits, then compute sticky.
+	 *
+	 *
+	 */
+
+	.text
+	.global __hexagon_fmadf4
+        .type __hexagon_fmadf4,@function
+	.global __hexagon_fmadf5
+        .type __hexagon_fmadf5,@function
+	.global fma
+	.type fma,@function
+	Q6_ALIAS(fmadf5)
+	.p2align 5
+__hexagon_fmadf4:
+__hexagon_fmadf5:
+fma:
+	{
+		P_TMP = dfclass(A,#2)
+		P_TMP = dfclass(B,#2)
+		ATMP = #0
+		BTMP = #0
+	}
+	{
+		ATMP = insert(A,#MANTBITS,#EXPBITS-3)
+		BTMP = insert(B,#MANTBITS,#EXPBITS-3)
+		PP_ODD_H = ##0x10000000
+		allocframe(#STACKSPACE)
+	}
+	{
+		PP_LL = mpyu(ATMPL,BTMPL)
+		if (!P_TMP) jump .Lfma_abnormal_ab
+		ATMPH = or(ATMPH,PP_ODD_H)
+		BTMPH = or(BTMPH,PP_ODD_H)
+	}
+	{
+		P_TMP = dfclass(C,#2)
+		if (!P_TMP.new) jump:nt .Lfma_abnormal_c
+		CTMP = combine(PP_ODD_H,#0)
+		PP_ODD = combine(#0,PP_LL_H)
+	}
+.Lfma_abnormal_c_restart:
+	{
+		PP_ODD += mpyu(BTMPL,ATMPH)
+		CTMP = insert(C,#MANTBITS,#EXPBITS-3)
+		memd(r29+#0) = PP_HH
+		memd(r29+#8) = EXPBA
+	}
+	{
+		PP_ODD += mpyu(ATMPL,BTMPH)
+		EXPBA = neg(CTMP)
+		P_TMP = cmp.gt(CH,#-1)
+		TMP = xor(AH,BH)
+	}
+	{
+		EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
+		EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
+		PP_HH = combine(#0,PP_ODD_H)
+		if (!P_TMP) CTMP = EXPBA
+	}
+	{
+		PP_HH += mpyu(ATMPH,BTMPH)
+		PP_LL = combine(PP_ODD_L,PP_LL_L)
+#undef PP_ODD
+#undef PP_ODD_H
+#undef PP_ODD_L
+#undef ATMP
+#undef ATMPL
+#undef ATMPH
+#undef BTMP
+#undef BTMPL
+#undef BTMPH
+#define RIGHTLEFTSHIFT r13:12
+#define RIGHTSHIFT r13
+#define LEFTSHIFT r12
+
+		EXPA = add(EXPA,EXPB)
+#undef EXPB
+#undef EXPBA
+#define EXPC r19
+#define EXPCA r19:18
+		EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS)
+	}
+	/* PP_HH:PP_LL now has product */
+	/* CTMP is negated */
+	/* EXPA,B,C are extracted */
+	/*
+	 * We need to negate PP
+	 * Since we will be adding with carry later, if we need to negate,
+	 * just invert all bits now, which we can do conditionally and in parallel
+	 */
+#define PP_HH_TMP r15:14
+#define PP_LL_TMP r7:6
+	{
+		EXPA = add(EXPA,#-BIAS+(ADJUST))
+		PROD_NEG = !cmp.gt(TMP,#-1)
+		PP_LL_TMP = #0
+		PP_HH_TMP = #0
+	}
+	{
+		PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry
+		P_TMP = !cmp.gt(TMP,#-1)
+		SWAP = cmp.gt(EXPC,EXPA)	// If C >> PP
+		if (SWAP.new) EXPCA = combine(EXPA,EXPC)
+	}
+	{
+		PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry
+		if (P_TMP) PP_LL = PP_LL_TMP
+#undef PP_LL_TMP
+#define CTMP2 r7:6
+#define CTMP2H r7
+#define CTMP2L r6
+		CTMP2 = #0
+		EXPC = sub(EXPA,EXPC)
+	}
+	{
+		if (P_TMP) PP_HH = PP_HH_TMP
+		P_TMP = cmp.gt(EXPC,#63)
+		if (SWAP) PP_LL = CTMP2
+		if (SWAP) CTMP2 = PP_LL
+	}
+#undef PP_HH_TMP
+//#define ONE r15:14
+//#define S_ONE r14
+#define ZERO r15:14
+#define S_ZERO r15
+#undef PROD_NEG
+#define P_CARRY p3
+	{
+		if (SWAP) PP_HH = CTMP	// Swap C and PP
+		if (SWAP) CTMP = PP_HH
+		if (P_TMP) EXPC = add(EXPC,#-64)
+		TMP = #63
+	}
+	{
+		// If diff > 63, pre-shift-right by 64...
+		if (P_TMP) CTMP2 = CTMP
+		TMP = asr(CTMPH,#31)
+		RIGHTSHIFT = min(EXPC,TMP)
+		LEFTSHIFT = #0
+	}
+#undef C
+#undef CH
+#undef CL
+#define STICKIES r5:4
+#define STICKIESH r5
+#define STICKIESL r4
+	{
+		if (P_TMP) CTMP = combine(TMP,TMP)	// sign extension of pre-shift-right-64
+		STICKIES = extract(CTMP2,RIGHTLEFTSHIFT)
+		CTMP2 = lsr(CTMP2,RIGHTSHIFT)
+		LEFTSHIFT = sub(#64,RIGHTSHIFT)
+	}
+	{
+		ZERO = #0
+		TMP = #-2
+		CTMP2 |= lsl(CTMP,LEFTSHIFT)
+		CTMP = asr(CTMP,RIGHTSHIFT)
+	}
+	{
+		P_CARRY = cmp.gtu(STICKIES,ZERO)	// If we have sticky bits from C shift
+		if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR
+#undef ZERO
+#define ONE r15:14
+#define S_ONE r14
+		ONE = #1
+		STICKIES = #0
+	}
+	{
+		PP_LL = add(CTMP2,PP_LL,P_CARRY):carry	// use the carry to add the sticky
+	}
+	{
+		PP_HH = add(CTMP,PP_HH,P_CARRY):carry
+		TMP = #62
+	}
+	/*
+	 * PP_HH:PP_LL now holds the sum
+	 * We may need to normalize left, up to ??? bits.
+	 *
+	 * I think that if we have massive cancellation, the range we normalize by
+	 * is still limited
+	 */
+	{
+		LEFTSHIFT = add(clb(PP_HH),#-2)
+		if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f	// all sign bits?
+	}
+	/* We had all sign bits, shift left by 62. */
+	{
+		CTMP = extractu(PP_LL,#62,#2)
+		PP_LL = asl(PP_LL,#62)
+		EXPA = add(EXPA,#-62)			// And adjust exponent of result
+	}
+	{
+		PP_HH = insert(CTMP,#62,#0)		// Then shift 63
+	}
+	{
+		LEFTSHIFT = add(clb(PP_HH),#-2)
+	}
+	.falign
+1:
+	{
+		CTMP = asl(PP_HH,LEFTSHIFT)
+		STICKIES |= asl(PP_LL,LEFTSHIFT)
+		RIGHTSHIFT = sub(#64,LEFTSHIFT)
+		EXPA = sub(EXPA,LEFTSHIFT)
+	}
+	{
+		CTMP |= lsr(PP_LL,RIGHTSHIFT)
+		EXACT = cmp.gtu(ONE,STICKIES)
+		TMP = #BIAS+BIAS-2
+	}
+	{
+		if (!EXACT) CTMPL = or(CTMPL,S_ONE)
+		// If EXPA is overflow/underflow, jump to ovf_unf
+		P_TMP = !cmp.gt(EXPA,TMP)
+		P_TMP = cmp.gt(EXPA,#1)
+		if (!P_TMP.new) jump:nt .Lfma_ovf_unf
+	}
+	{
+		// XXX: FIXME: should PP_HH for check of zero be CTMP?
+		P_TMP = cmp.gtu(ONE,CTMP)		// is result true zero?
+		A = convert_d2df(CTMP)
+		EXPA = add(EXPA,#-BIAS-60)
+		PP_HH = memd(r29+#0)
+	}
+	{
+		AH += asl(EXPA,#HI_MANTBITS)
+		EXPCA = memd(r29+#8)
+		if (!P_TMP) dealloc_return		// not zero, return
+	}
+.Ladd_yields_zero:
+	/* We had full cancellation.  Return +/- zero (-0 when round-down) */
+	{
+		TMP = USR
+		A = #0
+	}
+	{
+		TMP = extractu(TMP,#2,#SR_ROUND_OFF)
+		PP_HH = memd(r29+#0)
+		EXPCA = memd(r29+#8)
+	}
+	{
+		p0 = cmp.eq(TMP,#2)
+		if (p0.new) AH = ##0x80000000
+		dealloc_return
+	}
+
+#undef RIGHTLEFTSHIFT
+#undef RIGHTSHIFT
+#undef LEFTSHIFT
+#undef CTMP2
+#undef CTMP2H
+#undef CTMP2L
+
+.Lfma_ovf_unf:
+	{
+		p0 = cmp.gtu(ONE,CTMP)
+		if (p0.new) jump:nt .Ladd_yields_zero
+	}
+	{
+		A = convert_d2df(CTMP)
+		EXPA = add(EXPA,#-BIAS-60)
+		TMP = EXPA
+	}
+#define NEW_EXPB r7
+#define NEW_EXPA r6
+	{
+		AH += asl(EXPA,#HI_MANTBITS)
+		NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS)
+	}
+	{
+		NEW_EXPA = add(EXPA,NEW_EXPB)
+		PP_HH = memd(r29+#0)
+		EXPCA = memd(r29+#8)
+#undef PP_HH
+#undef PP_HH_H
+#undef PP_HH_L
+#undef EXPCA
+#undef EXPC
+#undef EXPA
+#undef PP_LL
+#undef PP_LL_H
+#undef PP_LL_L
+#define EXPA r6
+#define EXPB r7
+#define EXPBA r7:6
+#define ATMP r9:8
+#define ATMPH r9
+#define ATMPL r8
+#undef NEW_EXPB
+#undef NEW_EXPA
+		ATMP = abs(CTMP)
+	}
+	{
+		p0 = cmp.gt(EXPA,##BIAS+BIAS)
+		if (p0.new) jump:nt .Lfma_ovf
+	}
+	{
+		p0 = cmp.gt(EXPA,#0)
+		if (p0.new) jump:nt .Lpossible_unf
+	}
+	{
+		// TMP has original EXPA.
+		// ATMP is corresponding value
+		// Normalize ATMP and shift right to correct location
+		EXPB = add(clb(ATMP),#-2)		// Amount to left shift to normalize
+		EXPA = sub(#1+5,TMP)			// Amount to right shift to denormalize
+		p3 = cmp.gt(CTMPH,#-1)
+	}
+	/* Underflow */
+	/* We know that the infinte range exponent should be EXPA */
+	/* CTMP is 2's complement, ATMP is abs(CTMP) */
+	{
+		EXPA = add(EXPA,EXPB)		// how much to shift back right
+		ATMP = asl(ATMP,EXPB)		// shift left
+		AH = USR
+		TMP = #63
+	}
+	{
+		EXPB = min(EXPA,TMP)
+		EXPA = #0
+		AL = #0x0030
+	}
+	{
+		B = extractu(ATMP,EXPBA)
+		ATMP = asr(ATMP,EXPB)
+	}
+	{
+		p0 = cmp.gtu(ONE,B)
+		if (!p0.new) ATMPL = or(ATMPL,S_ONE)
+		ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2)
+	}
+	{
+		CTMP = neg(ATMP)
+		p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1)
+		if (!p1.new) AH = or(AH,AL)
+		B = #0
+	}
+	{
+		if (p3) CTMP = ATMP
+		USR = AH
+		TMP = #-BIAS-(MANTBITS+FUDGE2)
+	}
+	{
+		A = convert_d2df(CTMP)
+	}
+	{
+		AH += asl(TMP,#HI_MANTBITS)
+		dealloc_return
+	}
+.Lpossible_unf:
+	{
+		TMP = ##0x7fefffff
+		ATMP = abs(CTMP)
+	}
+	{
+		p0 = cmp.eq(AL,#0)
+		p0 = bitsclr(AH,TMP)
+		if (!p0.new) dealloc_return:t
+		TMP = #0x7fff
+	}
+	{
+		p0 = bitsset(ATMPH,TMP)
+		BH = USR
+		BL = #0x0030
+	}
+	{
+		if (p0) BH = or(BH,BL)
+	}
+	{
+		USR = BH
+	}
+	{
+		p0 = dfcmp.eq(A,A)
+		dealloc_return
+	}
+.Lfma_ovf:
+	{
+		TMP = USR
+		CTMP = combine(##0x7fefffff,#-1)
+		A = CTMP
+	}
+	{
+		ATMP = combine(##0x7ff00000,#0)
+		BH = extractu(TMP,#2,#SR_ROUND_OFF)
+		TMP = or(TMP,#0x28)
+	}
+	{
+		USR = TMP
+		BH ^= lsr(AH,#31)
+		BL = BH
+	}
+	{
+		p0 = !cmp.eq(BL,#1)
+		p0 = !cmp.eq(BH,#2)
+	}
+	{
+		p0 = dfcmp.eq(ATMP,ATMP)
+		if (p0.new) CTMP = ATMP
+	}
+	{
+		A = insert(CTMP,#63,#0)
+		dealloc_return
+	}
+#undef CTMP
+#undef CTMPH
+#undef CTMPL
+#define BTMP r11:10
+#define BTMPH r11
+#define BTMPL r10
+
+#undef STICKIES
+#undef STICKIESH
+#undef STICKIESL
+#define C r5:4
+#define CH r5
+#define CL r4
+
+.Lfma_abnormal_ab:
+	{
+		ATMP = extractu(A,#63,#0)
+		BTMP = extractu(B,#63,#0)
+		deallocframe
+	}
+	{
+		p3 = cmp.gtu(ATMP,BTMP)
+		if (!p3.new) A = B		// sort values
+		if (!p3.new) B = A
+	}
+	{
+		p0 = dfclass(A,#0x0f)		// A NaN?
+		if (!p0.new) jump:nt .Lnan
+		if (!p3) ATMP = BTMP
+		if (!p3) BTMP = ATMP
+	}
+	{
+		p1 = dfclass(A,#0x08)		// A is infinity
+		p1 = dfclass(B,#0x0e)		// B is nonzero
+	}
+	{
+		p0 = dfclass(A,#0x08)		// a is inf
+		p0 = dfclass(B,#0x01)		// b is zero
+	}
+	{
+		if (p1) jump .Lab_inf
+		p2 = dfclass(B,#0x01)
+	}
+	{
+		if (p0) jump .Linvalid
+		if (p2) jump .Lab_true_zero
+		TMP = ##0x7c000000
+	}
+	// We are left with a normal or subnormal times a subnormal, A > B
+	// If A and B are both very small, we will go to a single sticky bit; replace
+	// A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results
+	// if A and B might multiply to something bigger, decrease A exp and increase B exp
+	// and start over
+	{
+		p0 = bitsclr(AH,TMP)
+		if (p0.new) jump:nt .Lfma_ab_tiny
+	}
+	{
+		TMP = add(clb(BTMP),#-EXPBITS)
+	}
+	{
+		BTMP = asl(BTMP,TMP)
+	}
+	{
+		B = insert(BTMP,#63,#0)
+		AH -= asl(TMP,#HI_MANTBITS)
+	}
+	jump fma
+
+.Lfma_ab_tiny:
+	ATMP = combine(##0x00100000,#0)
+	{
+		A = insert(ATMP,#63,#0)
+		B = insert(ATMP,#63,#0)
+	}
+	jump fma
+
+.Lab_inf:
+	{
+		B = lsr(B,#63)
+		p0 = dfclass(C,#0x10)
+	}
+	{
+		A ^= asl(B,#63)
+		if (p0) jump .Lnan
+	}
+	{
+		p1 = dfclass(C,#0x08)
+		if (p1.new) jump:nt .Lfma_inf_plus_inf
+	}
+	/* A*B is +/- inf, C is finite.  Return A */
+	{
+		jumpr r31
+	}
+	.falign
+.Lfma_inf_plus_inf:
+	{	// adding infinities of different signs is invalid
+		p0 = dfcmp.eq(A,C)
+		if (!p0.new) jump:nt .Linvalid
+	}
+	{
+		jumpr r31
+	}
+
+.Lnan:
+	{
+		p0 = dfclass(B,#0x10)
+		p1 = dfclass(C,#0x10)
+		if (!p0.new) B = A
+		if (!p1.new) C = A
+	}
+	{	// find sNaNs
+		BH = convert_df2sf(B)
+		BL = convert_df2sf(C)
+	}
+	{
+		BH = convert_df2sf(A)
+		A = #-1
+		jumpr r31
+	}
+
+.Linvalid:
+	{
+		TMP = ##0x7f800001		// sp snan
+	}
+	{
+		A = convert_sf2df(TMP)
+		jumpr r31
+	}
+
+.Lab_true_zero:
+	// B is zero, A is finite number
+	{
+		p0 = dfclass(C,#0x10)
+		if (p0.new) jump:nt .Lnan
+		if (p0.new) A = C
+	}
+	{
+		p0 = dfcmp.eq(B,C)		// is C also zero?
+		AH = lsr(AH,#31)		// get sign
+	}
+	{
+		BH ^= asl(AH,#31)		// form correctly signed zero in B
+		if (!p0) A = C			// If C is not zero, return C
+		if (!p0) jumpr r31
+	}
+	/* B has correctly signed zero, C is also zero */
+.Lzero_plus_zero:
+	{
+		p0 = cmp.eq(B,C)		// yes, scalar equals.  +0++0 or -0+-0
+		if (p0.new) jumpr:t r31
+		A = B
+	}
+	{
+		TMP = USR
+	}
+	{
+		TMP = extractu(TMP,#2,#SR_ROUND_OFF)
+		A = #0
+	}
+	{
+		p0 = cmp.eq(TMP,#2)
+		if (p0.new) AH = ##0x80000000
+		jumpr r31
+	}
+#undef BTMP
+#undef BTMPH
+#undef BTMPL
+#define CTMP r11:10
+	.falign
+.Lfma_abnormal_c:
+	/* We know that AB is normal * normal */
+	/* C is not normal: zero, subnormal, inf, or NaN. */
+	{
+		p0 = dfclass(C,#0x10)		// is C NaN?
+		if (p0.new) jump:nt .Lnan
+		if (p0.new) A = C		// move NaN to A
+		deallocframe
+	}
+	{
+		p0 = dfclass(C,#0x08)		// is C inf?
+		if (p0.new) A = C		// return C
+		if (p0.new) jumpr:nt r31
+	}
+	// zero or subnormal
+	// If we have a zero, and we know AB is normal*normal, we can just call normal multiply
+	{
+		p0 = dfclass(C,#0x01)		// is C zero?
+		if (p0.new) jump:nt __hexagon_muldf3
+		TMP = #1
+	}
+	// Left with: subnormal
+	// Adjust C and jump back to restart
+	{
+		allocframe(#STACKSPACE)		// oops, deallocated above, re-allocate frame
+		CTMP = #0
+		CH = insert(TMP,#EXPBITS,#HI_MANTBITS)
+		jump .Lfma_abnormal_c_restart
+	}
+END(fma)
--- a/contrib/compiler-rt/lib/builtins/hexagon/dfminmax.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/dfminmax.S
@ -0,0 +1,79 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define A r1:0
+#define B r3:2
+#define ATMP r5:4
+
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define END(TAG) .size TAG,.-TAG
+
+/*
+ * Min and Max return A if B is NaN, or B if A is NaN
+ * Otherwise, they return the smaller or bigger value
+ *
+ * If values are equal, we want to favor -0.0 for min and +0.0 for max.
+ */
+
+/*
+ * Compares always return false for NaN
+ * if (isnan(A)) A = B; if (A > B) A = B will only trigger at most one of those options.
+ */
+	.text
+	.global __hexagon_mindf3
+	.global __hexagon_maxdf3
+	.global fmin
+	.type fmin,@function
+	.global fmax
+	.type fmax,@function
+	.type __hexagon_mindf3,@function
+	.type __hexagon_maxdf3,@function
+	Q6_ALIAS(mindf3)
+	Q6_ALIAS(maxdf3)
+	.p2align 5
+__hexagon_mindf3:
+fmin:
+	{
+		p0 = dfclass(A,#0x10)		// If A is a number
+		p1 = dfcmp.gt(A,B)		// AND B > A, don't swap
+		ATMP = A
+	}
+	{
+		if (p0) A = B			// if A is NaN use B
+		if (p1) A = B			// gt is always false if either is NaN
+		p2 = dfcmp.eq(A,B)		// if A == B
+		if (!p2.new) jumpr:t r31
+	}
+	/* A == B, return A|B to select -0.0 over 0.0 */
+	{
+		A = or(ATMP,B)
+		jumpr r31
+	}
+END(__hexagon_mindf3)
+	.falign
+__hexagon_maxdf3:
+fmax:
+	{
+		p0 = dfclass(A,#0x10)
+		p1 = dfcmp.gt(B,A)
+		ATMP = A
+	}
+	{
+		if (p0) A = B
+		if (p1) A = B
+		p2 = dfcmp.eq(A,B)
+		if (!p2.new) jumpr:t r31
+	}
+	/* A == B, return A&B to select 0.0 over -0.0 */
+	{
+		A = and(ATMP,B)
+		jumpr r31
+	}
+END(__hexagon_maxdf3)
--- a/contrib/compiler-rt/lib/builtins/hexagon/dfmul.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/dfmul.S
@ -0,0 +1,418 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+/* Double Precision Multiply */
+#define A r1:0
+#define AH r1
+#define AL r0
+#define B r3:2
+#define BH r3
+#define BL r2
+
+#define BTMP r5:4
+#define BTMPH r5
+#define BTMPL r4
+
+#define PP_ODD r7:6
+#define PP_ODD_H r7
+#define PP_ODD_L r6
+
+#define ONE r9:8
+#define S_ONE r8
+#define S_ZERO r9
+
+#define PP_HH r11:10
+#define PP_HH_H r11
+#define PP_HH_L r10
+
+#define ATMP r13:12
+#define ATMPH r13
+#define ATMPL r12
+
+#define PP_LL r15:14
+#define PP_LL_H r15
+#define PP_LL_L r14
+
+#define TMP r28
+
+#define MANTBITS 52
+#define HI_MANTBITS 20
+#define EXPBITS 11
+#define BIAS 1024
+#define MANTISSA_TO_INT_BIAS 52
+
+/* Some constant to adjust normalization amount in error code */
+/* Amount to right shift the partial product to get to a denorm */
+#define FUDGE 5
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
+#define END(TAG) .size TAG,.-TAG
+
+#define SR_ROUND_OFF 22
+	.text
+	.global __hexagon_muldf3
+	.type __hexagon_muldf3,@function
+	Q6_ALIAS(muldf3)
+  FAST_ALIAS(muldf3)
+  FAST2_ALIAS(muldf3)
+	.p2align 5
+__hexagon_muldf3:
+	{
+		p0 = dfclass(A,#2)
+		p0 = dfclass(B,#2)
+		ATMP = combine(##0x40000000,#0)
+	}
+	{
+		ATMP = insert(A,#MANTBITS,#EXPBITS-1)
+		BTMP = asl(B,#EXPBITS-1)
+		TMP = #-BIAS
+		ONE = #1
+	}
+	{
+		PP_ODD = mpyu(BTMPL,ATMPH)
+		BTMP = insert(ONE,#2,#62)
+	}
+	/* since we know that the MSB of the H registers is zero, we should never carry */
+	/* H <= 2^31-1.  L <= 2^32-1.  Therefore, HL <= 2^63-2^32-2^31+1 */
+	/* Adding 2 HLs, we get 2^64-3*2^32+2 maximum.  */
+	/* Therefore, we can add 3 2^32-1 values safely without carry.  We only need one. */
+	{
+		PP_LL = mpyu(ATMPL,BTMPL)
+		PP_ODD += mpyu(ATMPL,BTMPH)
+	}
+	{
+		PP_ODD += lsr(PP_LL,#32)
+		PP_HH = mpyu(ATMPH,BTMPH)
+		BTMP = combine(##BIAS+BIAS-4,#0)
+	}
+	{
+		PP_HH += lsr(PP_ODD,#32)
+		if (!p0) jump .Lmul_abnormal
+		p1 = cmp.eq(PP_LL_L,#0)		// 64 lsb's 0?
+		p1 = cmp.eq(PP_ODD_L,#0)	// 64 lsb's 0?
+	}
+	/*
+	 * PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts
+	 * PP_HH can have a minimum of 0x1000_0000_0000_0000 or so
+	 */
+#undef PP_ODD
+#undef PP_ODD_H
+#undef PP_ODD_L
+#define EXP10 r7:6
+#define EXP1 r7
+#define EXP0 r6
+	{
+		if (!p1) PP_HH_L = or(PP_HH_L,S_ONE)
+		EXP0 = extractu(AH,#EXPBITS,#HI_MANTBITS)
+		EXP1 = extractu(BH,#EXPBITS,#HI_MANTBITS)
+	}
+	{
+		PP_LL = neg(PP_HH)
+		EXP0 += add(TMP,EXP1)
+		TMP = xor(AH,BH)
+	}
+	{
+		if (!p2.new) PP_HH = PP_LL
+		p2 = cmp.gt(TMP,#-1)
+		p0 = !cmp.gt(EXP0,BTMPH)
+		p0 = cmp.gt(EXP0,BTMPL)
+		if (!p0.new) jump:nt .Lmul_ovf_unf
+	}
+	{
+		A = convert_d2df(PP_HH)
+		EXP0 = add(EXP0,#-BIAS-58)
+	}
+	{
+		AH += asl(EXP0,#HI_MANTBITS)
+		jumpr r31
+	}
+
+	.falign
+.Lpossible_unf:
+	/* We end up with a positive exponent */
+	/* But we may have rounded up to an exponent of 1. */
+	/* If the exponent is 1, if we rounded up to it
+	 * we need to also raise underflow
+	 * Fortunately, this is pretty easy to detect, we must have +/- 0x0010_0000_0000_0000
+	 * And the PP should also have more than one bit set
+	 */
+	/* Note: ATMP should have abs(PP_HH) */
+	/* Note: BTMPL should have 0x7FEFFFFF */
+	{
+		p0 = cmp.eq(AL,#0)
+		p0 = bitsclr(AH,BTMPL)
+		if (!p0.new) jumpr:t r31
+		BTMPH = #0x7fff
+	}
+	{
+		p0 = bitsset(ATMPH,BTMPH)
+		BTMPL = USR
+		BTMPH = #0x030
+	}
+	{
+		if (p0) BTMPL = or(BTMPL,BTMPH)
+	}
+	{
+		USR = BTMPL
+	}
+	{
+		p0 = dfcmp.eq(A,A)
+		jumpr r31
+	}
+	.falign
+.Lmul_ovf_unf:
+	{
+		A = convert_d2df(PP_HH)
+		ATMP = abs(PP_HH)			// take absolute value
+		EXP1 = add(EXP0,#-BIAS-58)
+	}
+	{
+		AH += asl(EXP1,#HI_MANTBITS)
+		EXP1 = extractu(AH,#EXPBITS,#HI_MANTBITS)
+		BTMPL = ##0x7FEFFFFF
+	}
+	{
+		EXP1 += add(EXP0,##-BIAS-58)
+		//BTMPH = add(clb(ATMP),#-2)
+		BTMPH = #0
+	}
+	{
+		p0 = cmp.gt(EXP1,##BIAS+BIAS-2)	// overflow
+		if (p0.new) jump:nt .Lmul_ovf
+	}
+	{
+		p0 = cmp.gt(EXP1,#0)
+		if (p0.new) jump:nt .Lpossible_unf
+		BTMPH = sub(EXP0,BTMPH)
+		TMP = #63				// max amount to shift
+	}
+	/* Underflow */
+	/*
+	 * PP_HH has the partial product with sticky LSB.
+	 * PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts
+	 * PP_HH can have a minimum of 0x1000_0000_0000_0000 or so
+	 * The exponent of PP_HH is in  EXP1, which is non-positive (0 or negative)
+	 * That's the exponent that happens after the normalization
+	 *
+	 * EXP0 has the exponent that, when added to the normalized value, is out of range.
+	 *
+	 * Strategy:
+	 *
+	 * * Shift down bits, with sticky bit, such that the bits are aligned according
+	 *   to the LZ count and appropriate exponent, but not all the way to mantissa
+	 *   field, keep around the last few bits.
+	 * * Put a 1 near the MSB
+	 * * Check the LSBs for inexact; if inexact also set underflow
+	 * * Convert [u]d2df -- will correctly round according to rounding mode
+	 * * Replace exponent field with zero
+	 *
+	 *
+	 */
+
+
+	{
+		BTMPL = #0	 			// offset for extract
+		BTMPH = sub(#FUDGE,BTMPH)		// amount to right shift
+	}
+	{
+		p3 = cmp.gt(PP_HH_H,#-1)		// is it positive?
+		BTMPH = min(BTMPH,TMP)			// Don't shift more than 63
+		PP_HH = ATMP
+	}
+	{
+		TMP = USR
+		PP_LL = extractu(PP_HH,BTMP)
+	}
+	{
+		PP_HH = asr(PP_HH,BTMPH)
+		BTMPL = #0x0030					// underflow flag
+		AH = insert(S_ZERO,#EXPBITS,#HI_MANTBITS)
+	}
+	{
+		p0 = cmp.gtu(ONE,PP_LL)				// Did we extract all zeros?
+		if (!p0.new) PP_HH_L = or(PP_HH_L,S_ONE)	// add sticky bit
+		PP_HH_H = setbit(PP_HH_H,#HI_MANTBITS+3)	// Add back in a bit so we can use convert instruction
+	}
+	{
+		PP_LL = neg(PP_HH)
+		p1 = bitsclr(PP_HH_L,#0x7)		// Are the LSB's clear?
+		if (!p1.new) TMP = or(BTMPL,TMP)	// If not, Inexact+Underflow
+	}
+	{
+		if (!p3) PP_HH = PP_LL
+		USR = TMP
+	}
+	{
+		A = convert_d2df(PP_HH)			// Do rounding
+		p0 = dfcmp.eq(A,A)			// realize exception
+	}
+	{
+		AH = insert(S_ZERO,#EXPBITS-1,#HI_MANTBITS+1)		// Insert correct exponent
+		jumpr r31
+	}
+	.falign
+.Lmul_ovf:
+	// We get either max finite value or infinity.  Either way, overflow+inexact
+	{
+		TMP = USR
+		ATMP = combine(##0x7fefffff,#-1)	// positive max finite
+		A = PP_HH
+	}
+	{
+		PP_LL_L = extractu(TMP,#2,#SR_ROUND_OFF)	// rounding bits
+		TMP = or(TMP,#0x28)			// inexact + overflow
+		BTMP = combine(##0x7ff00000,#0)		// positive infinity
+	}
+	{
+		USR = TMP
+		PP_LL_L ^= lsr(AH,#31)			// Does sign match rounding?
+		TMP = PP_LL_L				// unmodified rounding mode
+	}
+	{
+		p0 = !cmp.eq(TMP,#1)			// If not round-to-zero and
+		p0 = !cmp.eq(PP_LL_L,#2)		// Not rounding the other way,
+		if (p0.new) ATMP = BTMP			// we should get infinity
+		p0 = dfcmp.eq(A,A)			// Realize FP exception if enabled
+	}
+	{
+		A = insert(ATMP,#63,#0)			// insert inf/maxfinite, leave sign
+		jumpr r31
+	}
+
+.Lmul_abnormal:
+	{
+		ATMP = extractu(A,#63,#0)		// strip off sign
+		BTMP = extractu(B,#63,#0)		// strip off sign
+	}
+	{
+		p3 = cmp.gtu(ATMP,BTMP)
+		if (!p3.new) A = B			// sort values
+		if (!p3.new) B = A			// sort values
+	}
+	{
+		// Any NaN --> NaN, possibly raise invalid if sNaN
+		p0 = dfclass(A,#0x0f)		// A not NaN?
+		if (!p0.new) jump:nt .Linvalid_nan
+		if (!p3) ATMP = BTMP
+		if (!p3) BTMP = ATMP
+	}
+	{
+		// Infinity * nonzero number is infinity
+		p1 = dfclass(A,#0x08)		// A is infinity
+		p1 = dfclass(B,#0x0e)		// B is nonzero
+	}
+	{
+		// Infinity * zero --> NaN, raise invalid
+		// Other zeros return zero
+		p0 = dfclass(A,#0x08)		// A is infinity
+		p0 = dfclass(B,#0x01)		// B is zero
+	}
+	{
+		if (p1) jump .Ltrue_inf
+		p2 = dfclass(B,#0x01)
+	}
+	{
+		if (p0) jump .Linvalid_zeroinf
+		if (p2) jump .Ltrue_zero		// so return zero
+		TMP = ##0x7c000000
+	}
+	// We are left with a normal or subnormal times a subnormal. A > B
+	// If A and B are both very small (exp(a) < BIAS-MANTBITS),
+	// we go to a single sticky bit, which we can round easily.
+	// If A and B might multiply to something bigger, decrease A exponent and increase
+	// B exponent and try again
+	{
+		p0 = bitsclr(AH,TMP)
+		if (p0.new) jump:nt .Lmul_tiny
+	}
+	{
+		TMP = cl0(BTMP)
+	}
+	{
+		TMP = add(TMP,#-EXPBITS)
+	}
+	{
+		BTMP = asl(BTMP,TMP)
+	}
+	{
+		B = insert(BTMP,#63,#0)
+		AH -= asl(TMP,#HI_MANTBITS)
+	}
+	jump __hexagon_muldf3
+.Lmul_tiny:
+	{
+		TMP = USR
+		A = xor(A,B)				// get sign bit
+	}
+	{
+		TMP = or(TMP,#0x30)			// Inexact + Underflow
+		A = insert(ONE,#63,#0)			// put in rounded up value
+		BTMPH = extractu(TMP,#2,#SR_ROUND_OFF)	// get rounding mode
+	}
+	{
+		USR = TMP
+		p0 = cmp.gt(BTMPH,#1)			// Round towards pos/neg inf?
+		if (!p0.new) AL = #0			// If not, zero
+		BTMPH ^= lsr(AH,#31)			// rounding my way --> set LSB
+	}
+	{
+		p0 = cmp.eq(BTMPH,#3)			// if rounding towards right inf
+		if (!p0.new) AL = #0			// don't go to zero
+		jumpr r31
+	}
+.Linvalid_zeroinf:
+	{
+		TMP = USR
+	}
+	{
+		A = #-1
+		TMP = or(TMP,#2)
+	}
+	{
+		USR = TMP
+	}
+	{
+		p0 = dfcmp.uo(A,A)			// force exception if enabled
+		jumpr r31
+	}
+.Linvalid_nan:
+	{
+		p0 = dfclass(B,#0x0f)			// if B is not NaN
+		TMP = convert_df2sf(A)			// will generate invalid if sNaN
+		if (p0.new) B = A 			// make it whatever A is
+	}
+	{
+		BL = convert_df2sf(B)			// will generate invalid if sNaN
+		A = #-1
+		jumpr r31
+	}
+	.falign
+.Ltrue_zero:
+	{
+		A = B
+		B = A
+	}
+.Ltrue_inf:
+	{
+		BH = extract(BH,#1,#31)
+	}
+	{
+		AH ^= asl(BH,#31)
+		jumpr r31
+	}
+END(__hexagon_muldf3)
+
+#undef ATMP
+#undef ATMPL
+#undef ATMPH
+#undef BTMP
+#undef BTMPL
+#undef BTMPH
--- a/contrib/compiler-rt/lib/builtins/hexagon/dfsqrt.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/dfsqrt.S
@ -0,0 +1,406 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+/* Double Precision square root */
+
+#define EXP r28
+
+#define A r1:0
+#define AH r1
+#define AL r0
+
+#define SFSH r3:2
+#define SF_S r3
+#define SF_H r2
+
+#define SFHALF_SONE r5:4
+#define S_ONE r4
+#define SFHALF r5
+#define SF_D r6
+#define SF_E r7
+#define RECIPEST r8
+#define SFRAD r9
+
+#define FRACRAD r11:10
+#define FRACRADH r11
+#define FRACRADL r10
+
+#define ROOT r13:12
+#define ROOTHI r13
+#define ROOTLO r12
+
+#define PROD r15:14
+#define PRODHI r15
+#define PRODLO r14
+
+#define P_TMP p0
+#define P_EXP1 p1
+#define NORMAL p2
+
+#define SF_EXPBITS 8
+#define SF_MANTBITS 23
+
+#define DF_EXPBITS 11
+#define DF_MANTBITS 52
+
+#define DF_BIAS 0x3ff
+
+#define DFCLASS_ZERO     0x01
+#define DFCLASS_NORMAL   0x02
+#define DFCLASS_DENORMAL 0x02
+#define DFCLASS_INFINITE 0x08
+#define DFCLASS_NAN      0x10
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG; .type __qdsp_##TAG,@function
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG; .type __hexagon_fast_##TAG,@function
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG; .type __hexagon_fast2_##TAG,@function
+#define END(TAG) .size TAG,.-TAG
+
+	.text
+	.global __hexagon_sqrtdf2
+	.type __hexagon_sqrtdf2,@function
+	.global __hexagon_sqrt
+	.type __hexagon_sqrt,@function
+	Q6_ALIAS(sqrtdf2)
+	Q6_ALIAS(sqrt)
+	FAST_ALIAS(sqrtdf2)
+	FAST_ALIAS(sqrt)
+	FAST2_ALIAS(sqrtdf2)
+	FAST2_ALIAS(sqrt)
+	.type sqrt,@function
+	.p2align 5
+__hexagon_sqrtdf2:
+__hexagon_sqrt:
+	{
+		PROD = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS)
+		EXP = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
+		SFHALF_SONE = combine(##0x3f000004,#1)
+	}
+	{
+		NORMAL = dfclass(A,#DFCLASS_NORMAL)		// Is it normal
+		NORMAL = cmp.gt(AH,#-1)				// and positive?
+		if (!NORMAL.new) jump:nt .Lsqrt_abnormal
+		SFRAD = or(SFHALF,PRODLO)
+	}
+#undef NORMAL
+.Ldenormal_restart:
+	{
+		FRACRAD = A
+		SF_E,P_TMP = sfinvsqrta(SFRAD)
+		SFHALF = and(SFHALF,#-16)
+		SFSH = #0
+	}
+#undef A
+#undef AH
+#undef AL
+#define ERROR r1:0
+#define ERRORHI r1
+#define ERRORLO r0
+	// SF_E : reciprocal square root
+	// SF_H : half rsqrt
+	// sf_S : square root
+	// SF_D : error term
+	// SFHALF: 0.5
+	{
+		SF_S += sfmpy(SF_E,SFRAD):lib		// s0: root
+		SF_H += sfmpy(SF_E,SFHALF):lib		// h0: 0.5*y0. Could also decrement exponent...
+		SF_D = SFHALF
+#undef SFRAD
+#define SHIFTAMT r9
+		SHIFTAMT = and(EXP,#1)
+	}
+	{
+		SF_D -= sfmpy(SF_S,SF_H):lib		// d0: 0.5-H*S = 0.5-0.5*~1
+		FRACRADH = insert(S_ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)	// replace upper bits with hidden
+		P_EXP1 = cmp.gtu(SHIFTAMT,#0)
+	}
+	{
+		SF_S += sfmpy(SF_S,SF_D):lib		// s1: refine sqrt
+		SF_H += sfmpy(SF_H,SF_D):lib		// h1: refine half-recip
+		SF_D = SFHALF
+		SHIFTAMT = mux(P_EXP1,#8,#9)
+	}
+	{
+		SF_D -= sfmpy(SF_S,SF_H):lib		// d1: error term
+		FRACRAD = asl(FRACRAD,SHIFTAMT)		// Move fracrad bits to right place
+		SHIFTAMT = mux(P_EXP1,#3,#2)
+	}
+	{
+		SF_H += sfmpy(SF_H,SF_D):lib		// d2: rsqrt
+		// cool trick: half of 1/sqrt(x) has same mantissa as 1/sqrt(x).
+		PROD = asl(FRACRAD,SHIFTAMT)		// fracrad<<(2+exp1)
+	}
+	{
+		SF_H = and(SF_H,##0x007fffff)
+	}
+	{
+		SF_H = add(SF_H,##0x00800000 - 3)
+		SHIFTAMT = mux(P_EXP1,#7,#8)
+	}
+	{
+		RECIPEST = asl(SF_H,SHIFTAMT)
+		SHIFTAMT = mux(P_EXP1,#15-(1+1),#15-(1+0))
+	}
+	{
+		ROOT = mpyu(RECIPEST,PRODHI)		// root = mpyu_full(recipest,hi(fracrad<<(2+exp1)))
+	}
+
+#undef SFSH	// r3:2
+#undef SF_H	// r2
+#undef SF_S	// r3
+#undef S_ONE	// r4
+#undef SFHALF	// r5
+#undef SFHALF_SONE	// r5:4
+#undef SF_D	// r6
+#undef SF_E	// r7
+
+#define HL r3:2
+#define LL r5:4
+#define HH r7:6
+
+#undef P_EXP1
+#define P_CARRY0 p1
+#define P_CARRY1 p2
+#define P_CARRY2 p3
+
+	/* Iteration 0 */
+	/* Maybe we can save a cycle by starting with ERROR=asl(fracrad), then as we multiply */
+	/* We can shift and subtract instead of shift and add? */
+	{
+		ERROR = asl(FRACRAD,#15)
+		PROD = mpyu(ROOTHI,ROOTHI)
+		P_CARRY0 = cmp.eq(r0,r0)
+	}
+	{
+		ERROR -= asl(PROD,#15)
+		PROD = mpyu(ROOTHI,ROOTLO)
+		P_CARRY1 = cmp.eq(r0,r0)
+	}
+	{
+		ERROR -= lsr(PROD,#16)
+		P_CARRY2 = cmp.eq(r0,r0)
+	}
+	{
+		ERROR = mpyu(ERRORHI,RECIPEST)
+	}
+	{
+		ROOT += lsr(ERROR,SHIFTAMT)
+		SHIFTAMT = add(SHIFTAMT,#16)
+		ERROR = asl(FRACRAD,#31)		// for next iter
+	}
+	/* Iteration 1 */
+	{
+		PROD = mpyu(ROOTHI,ROOTHI)
+		ERROR -= mpyu(ROOTHI,ROOTLO)	// amount is 31, no shift needed
+	}
+	{
+		ERROR -= asl(PROD,#31)
+		PROD = mpyu(ROOTLO,ROOTLO)
+	}
+	{
+		ERROR -= lsr(PROD,#33)
+	}
+	{
+		ERROR = mpyu(ERRORHI,RECIPEST)
+	}
+	{
+		ROOT += lsr(ERROR,SHIFTAMT)
+		SHIFTAMT = add(SHIFTAMT,#16)
+		ERROR = asl(FRACRAD,#47)	// for next iter
+	}
+	/* Iteration 2 */
+	{
+		PROD = mpyu(ROOTHI,ROOTHI)
+	}
+	{
+		ERROR -= asl(PROD,#47)
+		PROD = mpyu(ROOTHI,ROOTLO)
+	}
+	{
+		ERROR -= asl(PROD,#16)		// bidir shr 31-47
+		PROD = mpyu(ROOTLO,ROOTLO)
+	}
+	{
+		ERROR -= lsr(PROD,#17)		// 64-47
+	}
+	{
+		ERROR = mpyu(ERRORHI,RECIPEST)
+	}
+	{
+		ROOT += lsr(ERROR,SHIFTAMT)
+	}
+#undef ERROR
+#undef PROD
+#undef PRODHI
+#undef PRODLO
+#define REM_HI r15:14
+#define REM_HI_HI r15
+#define REM_LO r1:0
+#undef RECIPEST
+#undef SHIFTAMT
+#define TWOROOT_LO r9:8
+	/* Adjust Root */
+	{
+		HL = mpyu(ROOTHI,ROOTLO)
+		LL = mpyu(ROOTLO,ROOTLO)
+		REM_HI = #0
+		REM_LO = #0
+	}
+	{
+		HL += lsr(LL,#33)
+		LL += asl(HL,#33)
+		P_CARRY0 = cmp.eq(r0,r0)
+	}
+	{
+		HH = mpyu(ROOTHI,ROOTHI)
+		REM_LO = sub(REM_LO,LL,P_CARRY0):carry
+		TWOROOT_LO = #1
+	}
+	{
+		HH += lsr(HL,#31)
+		TWOROOT_LO += asl(ROOT,#1)
+	}
+#undef HL
+#undef LL
+#define REM_HI_TMP r3:2
+#define REM_HI_TMP_HI r3
+#define REM_LO_TMP r5:4
+	{
+		REM_HI = sub(FRACRAD,HH,P_CARRY0):carry
+		REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY1):carry
+#undef FRACRAD
+#undef HH
+#define ZERO r11:10
+#define ONE r7:6
+		ONE = #1
+		ZERO = #0
+	}
+	{
+		REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY1):carry
+		ONE = add(ROOT,ONE)
+		EXP = add(EXP,#-DF_BIAS)			// subtract bias --> signed exp
+	}
+	{
+				// If carry set, no borrow: result was still positive
+		if (P_CARRY1) ROOT = ONE
+		if (P_CARRY1) REM_LO = REM_LO_TMP
+		if (P_CARRY1) REM_HI = REM_HI_TMP
+	}
+	{
+		REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY2):carry
+		ONE = #1
+		EXP = asr(EXP,#1)				// divide signed exp by 2
+	}
+	{
+		REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY2):carry
+		ONE = add(ROOT,ONE)
+	}
+	{
+		if (P_CARRY2) ROOT = ONE
+		if (P_CARRY2) REM_LO = REM_LO_TMP
+								// since tworoot <= 2^32, remhi must be zero
+#undef REM_HI_TMP
+#undef REM_HI_TMP_HI
+#define S_ONE r2
+#define ADJ r3
+		S_ONE = #1
+	}
+	{
+		P_TMP = cmp.eq(REM_LO,ZERO)			// is the low part zero
+		if (!P_TMP.new) ROOTLO = or(ROOTLO,S_ONE)	// if so, it's exact... hopefully
+		ADJ = cl0(ROOT)
+		EXP = add(EXP,#-63)
+	}
+#undef REM_LO
+#define RET r1:0
+#define RETHI r1
+	{
+		RET = convert_ud2df(ROOT)			// set up mantissa, maybe set inexact flag
+		EXP = add(EXP,ADJ)				// add back bias
+	}
+	{
+		RETHI += asl(EXP,#DF_MANTBITS-32)		// add exponent adjust
+		jumpr r31
+	}
+#undef REM_LO_TMP
+#undef REM_HI_TMP
+#undef REM_HI_TMP_HI
+#undef REM_LO
+#undef REM_HI
+#undef TWOROOT_LO
+
+#undef RET
+#define A r1:0
+#define AH r1
+#define AL r1
+#undef S_ONE
+#define TMP r3:2
+#define TMPHI r3
+#define TMPLO r2
+#undef P_CARRY0
+#define P_NEG p1
+
+
+#define SFHALF r5
+#define SFRAD r9
+.Lsqrt_abnormal:
+	{
+		P_TMP = dfclass(A,#DFCLASS_ZERO)			// zero?
+		if (P_TMP.new) jumpr:t r31
+	}
+	{
+		P_TMP = dfclass(A,#DFCLASS_NAN)
+		if (P_TMP.new) jump:nt .Lsqrt_nan
+	}
+	{
+		P_TMP = cmp.gt(AH,#-1)
+		if (!P_TMP.new) jump:nt .Lsqrt_invalid_neg
+		if (!P_TMP.new) EXP = ##0x7F800001			// sNaN
+	}
+	{
+		P_TMP = dfclass(A,#DFCLASS_INFINITE)
+		if (P_TMP.new) jumpr:nt r31
+	}
+	// If we got here, we're denormal
+	// prepare to restart
+	{
+		A = extractu(A,#DF_MANTBITS,#0)		// Extract mantissa
+	}
+	{
+		EXP = add(clb(A),#-DF_EXPBITS)		// how much to normalize?
+	}
+	{
+		A = asl(A,EXP)				// Shift mantissa
+		EXP = sub(#1,EXP)			// Form exponent
+	}
+	{
+		AH = insert(EXP,#1,#DF_MANTBITS-32)		// insert lsb of exponent
+	}
+	{
+		TMP = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS)	// get sf value (mant+exp1)
+		SFHALF = ##0x3f000004						// form half constant
+	}
+	{
+		SFRAD = or(SFHALF,TMPLO)			// form sf value
+		SFHALF = and(SFHALF,#-16)
+		jump .Ldenormal_restart				// restart
+	}
+.Lsqrt_nan:
+	{
+		EXP = convert_df2sf(A)				// if sNaN, get invalid
+		A = #-1						// qNaN
+		jumpr r31
+	}
+.Lsqrt_invalid_neg:
+	{
+		A = convert_sf2df(EXP)				// Invalid,NaNval
+		jumpr r31
+	}
+END(__hexagon_sqrt)
+END(__hexagon_sqrtdf2)
--- a/contrib/compiler-rt/lib/builtins/hexagon/divdi3.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/divdi3.S
@ -0,0 +1,85 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+	.macro FUNCTION_BEGIN name
+	.text
+        .p2align 5
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+
+FUNCTION_BEGIN __hexagon_divdi3
+	{
+		p2 = tstbit(r1,#31)
+		p3 = tstbit(r3,#31)
+	}
+	{
+		r1:0 = abs(r1:0)
+		r3:2 = abs(r3:2)
+	}
+	{
+		r6 = cl0(r1:0)              // count leading 0's of dividend (numerator)
+		r7 = cl0(r3:2)              // count leading 0's of divisor (denominator)
+		r5:4 = r3:2                 // divisor moved into working registers
+		r3:2 = r1:0                 // dividend is the initial remainder, r3:2 contains remainder
+	}
+	{
+		p3 = xor(p2,p3)
+		r10 = sub(r7,r6)            // left shift count for bit & divisor
+		r1:0 = #0                   // initialize quotient to 0
+		r15:14 = #1                 // initialize bit to 1
+	}
+	{
+		r11 = add(r10,#1)           // loop count is 1 more than shift count
+		r13:12 = lsl(r5:4,r10)      // shift divisor msb into same bit position as dividend msb
+		r15:14 = lsl(r15:14,r10)    // shift the bit left by same amount as divisor
+	}
+	{
+		p0 = cmp.gtu(r5:4,r3:2)     // check if divisor > dividend
+		loop0(1f,r11)               // register loop
+	}
+	{
+		if (p0) jump .hexagon_divdi3_return          // if divisor > dividend, we're done, so return
+	}
+	.falign
+1:
+	{
+		p0 = cmp.gtu(r13:12,r3:2)   // set predicate reg if shifted divisor > current remainder
+	}
+	{
+		r7:6 = sub(r3:2, r13:12)    // subtract shifted divisor from current remainder
+		r9:8 = add(r1:0, r15:14)    // save current quotient to temp (r9:8)
+	}
+	{
+		r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
+		r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
+	}
+	{
+		r15:14 = lsr(r15:14, #1)    // shift bit right by 1 for next iteration
+		r13:12 = lsr(r13:12, #1)    // shift "shifted divisor" right by 1 for next iteration
+	}:endloop0
+
+.hexagon_divdi3_return:
+	{
+		r3:2 = neg(r1:0)
+	}
+	{
+		r1:0 = vmux(p3,r3:2,r1:0)
+		jumpr r31
+	}
+FUNCTION_END __hexagon_divdi3
+
+  .globl __qdsp_divdi3
+  .set   __qdsp_divdi3, __hexagon_divdi3
--- a/contrib/compiler-rt/lib/builtins/hexagon/divsi3.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/divsi3.S
@ -0,0 +1,84 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+	.macro FUNCTION_BEGIN name
+	.text
+        .p2align 5
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+
+FUNCTION_BEGIN __hexagon_divsi3
+	{
+		p0 = cmp.ge(r0,#0)
+		p1 = cmp.ge(r1,#0)
+		r1 = abs(r0)
+		r2 = abs(r1)
+	}
+	{
+		r3 = cl0(r1)
+		r4 = cl0(r2)
+		r5 = sub(r1,r2)
+		p2 = cmp.gtu(r2,r1)
+	}
+#if (__HEXAGON_ARCH__ == 60)
+	{
+		r0 = #0
+		p1 = xor(p0,p1)
+		p0 = cmp.gtu(r2,r5)
+	}
+		if (p2) jumpr r31
+#else
+	{
+		r0 = #0
+		p1 = xor(p0,p1)
+		p0 = cmp.gtu(r2,r5)
+		if (p2) jumpr r31
+	}
+#endif
+	{
+		r0 = mux(p1,#-1,#1)
+		if (p0) jumpr r31
+		r4 = sub(r4,r3)
+		r3 = #1
+	}
+	{
+		r0 = #0
+		r3:2 = vlslw(r3:2,r4)
+		loop0(1f,r4)
+	}
+	.falign
+1:
+	{
+		p0 = cmp.gtu(r2,r1)
+		if (!p0.new) r1 = sub(r1,r2)
+		if (!p0.new) r0 = add(r0,r3)
+		r3:2 = vlsrw(r3:2,#1)
+	}:endloop0
+	{
+		p0 = cmp.gtu(r2,r1)
+		if (!p0.new) r0 = add(r0,r3)
+		if (!p1) jumpr r31
+	}
+	{
+		r0 = neg(r0)
+		jumpr r31
+	}
+FUNCTION_END __hexagon_divsi3
+
+  .globl __qdsp_divsi3
+  .set   __qdsp_divsi3, __hexagon_divsi3
--- a/contrib/compiler-rt/lib/builtins/hexagon/fabs_opt.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fabs_opt.S
@ -0,0 +1,37 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+.macro FUNCTION_BEGIN name
+.text
+.p2align 5
+.globl \name
+.type  \name, @function
+\name:
+.endm
+
+.macro FUNCTION_END name
+.size  \name, . - \name
+.endm
+
+FUNCTION_BEGIN fabs
+  {
+    r1 = clrbit(r1, #31)
+    jumpr r31
+  }
+FUNCTION_END fabs
+
+FUNCTION_BEGIN fabsf
+  {
+    r0 = clrbit(r0, #31)
+    jumpr r31
+  }
+FUNCTION_END fabsf
+
+  .globl fabsl
+  .set fabsl, fabs
--- a/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_dlib_asm.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_dlib_asm.S
@ -0,0 +1,491 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/* ==================================================================== */
+/*   FUNCTIONS Optimized double floating point operators                */
+/* ==================================================================== */
+/*      c = dadd_asm(a, b)                                              */
+/* ==================================================================== *
+fast2_QDOUBLE fast2_dadd(fast2_QDOUBLE a,fast2_QDOUBLE b) {
+      fast2_QDOUBLE c;
+      lint manta = a & MANTMASK;
+      int  expa  = Q6_R_sxth_R(a) ;
+      lint mantb = b & MANTMASK;
+      int  expb  = Q6_R_sxth_R(b) ;
+      int  exp, expdiff, j, k, hi, lo, cn;
+      lint mant;
+
+        expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
+        expdiff = Q6_R_sxth_R(expdiff) ;
+        if (expdiff > 63) { expdiff = 62;}
+        if (expa > expb) {
+          exp = expa + 1;
+          expa = 1;
+          expb = expdiff + 1;
+        } else {
+          exp = expb + 1;
+          expb = 1;
+          expa = expdiff + 1;
+        }
+        mant = (manta>>expa) + (mantb>>expb);
+
+        hi = (int) (mant>>32);
+        lo = (int) (mant);
+
+        k =  Q6_R_normamt_R(hi);
+        if(hi == 0 || hi == -1) k = 31+Q6_R_normamt_R(lo);
+
+        mant = (mant << k);
+        cn  = (mant == 0x8000000000000000LL);
+        exp = exp - k + cn;
+
+        if (mant ==  0 || mant == -1)  exp = 0x8001;
+        c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+      return(c);
+ }
+ * ==================================================================== */
+        .text
+        .global fast2_dadd_asm
+        .type fast2_dadd_asm, @function
+fast2_dadd_asm:
+#define manta      R0
+#define mantexpa   R1:0
+#define lmanta     R1:0
+#define mantb      R2
+#define mantexpb   R3:2
+#define lmantb     R3:2
+#define expa       R4
+#define expb       R5
+#define mantexpd   R7:6
+#define expd       R6
+#define exp        R8
+#define c63        R9
+#define lmant      R1:0
+#define manth      R1
+#define mantl      R0
+#define minmin     R11:10  // exactly 0x000000000000008001LL
+#define minminl    R10
+#define k          R4
+#define ce         P0
+        .falign
+      {
+        mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
+        c63 = #62
+        expa = SXTH(manta)
+        expb = SXTH(mantb)
+      } {
+        expd = SXTH(expd)
+        ce = CMP.GT(expa, expb);
+        if ( ce.new) exp = add(expa, #1)
+        if (!ce.new) exp = add(expb, #1)
+      } {
+        if ( ce) expa = #1
+        if (!ce) expb = #1
+        manta.L = #0
+        expd = MIN(expd, c63)
+      } {
+        if (!ce) expa = add(expd, #1)
+        if ( ce) expb = add(expd, #1)
+        mantb.L = #0
+        minmin = #0
+      } {
+        lmanta = ASR(lmanta, expa)
+        lmantb = ASR(lmantb, expb)
+      } {
+        lmant = add(lmanta, lmantb)
+        minminl.L = #0x8001
+      } {
+        k  = clb(lmant)
+        c63 = #58
+      } {
+        k = add(k, #-1)
+        p0 = cmp.gt(k, c63)
+      } {
+        mantexpa = ASL(lmant, k)
+        exp = SUB(exp, k)
+        if(p0) jump .Ldenorma
+      } {
+        manta = insert(exp, #16, #0)
+        jumpr  r31
+      }
+.Ldenorma:
+      {
+        mantexpa = minmin
+        jumpr  r31
+      }
+/* =================================================================== *
+ fast2_QDOUBLE fast2_dsub(fast2_QDOUBLE a,fast2_QDOUBLE b) {
+      fast2_QDOUBLE c;
+      lint manta = a & MANTMASK;
+      int  expa  = Q6_R_sxth_R(a) ;
+      lint mantb = b & MANTMASK;
+      int  expb  = Q6_R_sxth_R(b) ;
+      int  exp, expdiff, j, k;
+      lint mant;
+
+        expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
+        expdiff = Q6_R_sxth_R(expdiff) ;
+        if (expdiff > 63) { expdiff = 62;}
+        if (expa > expb) {
+          exp = expa + 1;
+          expa = 1;
+          expb = expdiff + 1;
+        } else {
+          exp = expb + 1;
+          expb = 1;
+          expa = expdiff + 1;
+        }
+        mant = (manta>>expa) - (mantb>>expb);
+        k =  Q6_R_clb_P(mant)-1;
+        mant = (mant << k);
+        exp = exp - k;
+        if (mant ==  0 || mant == -1)  exp = 0x8001;
+        c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+      return(c);
+ }
+ * ==================================================================== */
+        .text
+        .global fast2_dsub_asm
+        .type fast2_dsub_asm, @function
+fast2_dsub_asm:
+
+#define manta      R0
+#define mantexpa   R1:0
+#define lmanta     R1:0
+#define mantb      R2
+#define mantexpb   R3:2
+#define lmantb     R3:2
+#define expa       R4
+#define expb       R5
+#define mantexpd   R7:6
+#define expd       R6
+#define exp        R8
+#define c63        R9
+#define lmant      R1:0
+#define manth      R1
+#define mantl      R0
+#define minmin     R11:10  // exactly 0x000000000000008001LL
+#define minminl    R10
+#define k          R4
+#define ce         P0
+        .falign
+      {
+        mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
+        c63 = #62
+        expa = SXTH(manta)
+        expb = SXTH(mantb)
+      } {
+        expd = SXTH(expd)
+        ce = CMP.GT(expa, expb);
+        if ( ce.new) exp = add(expa, #1)
+        if (!ce.new) exp = add(expb, #1)
+      } {
+        if ( ce) expa = #1
+        if (!ce) expb = #1
+        manta.L = #0
+        expd = MIN(expd, c63)
+      } {
+        if (!ce) expa = add(expd, #1)
+        if ( ce) expb = add(expd, #1)
+        mantb.L = #0
+        minmin = #0
+      } {
+        lmanta = ASR(lmanta, expa)
+        lmantb = ASR(lmantb, expb)
+      } {
+        lmant = sub(lmanta, lmantb)
+        minminl.L = #0x8001
+      } {
+        k  = clb(lmant)
+        c63 = #58
+      } {
+        k = add(k, #-1)
+        p0 = cmp.gt(k, c63)
+      } {
+        mantexpa = ASL(lmant, k)
+        exp = SUB(exp, k)
+        if(p0) jump .Ldenorm
+      } {
+        manta = insert(exp, #16, #0)
+        jumpr  r31
+      }
+.Ldenorm:
+      {
+        mantexpa = minmin
+        jumpr  r31
+      }
+/* ==================================================================== *
+ fast2_QDOUBLE fast2_dmpy(fast2_QDOUBLE a,fast2_QDOUBLE b) {
+        fast2_QDOUBLE c;
+        lint manta = a & MANTMASK;
+        int  expa  = Q6_R_sxth_R(a) ;
+        lint mantb = b & MANTMASK;
+        int  expb  = Q6_R_sxth_R(b) ;
+        int exp, k;
+        lint mant;
+        int          hia, hib, hi, lo;
+        unsigned int loa, lob;
+
+        hia = (int)(a >> 32);
+        loa = Q6_R_extractu_RII((int)manta, 31, 1);
+        hib = (int)(b >> 32);
+        lob = Q6_R_extractu_RII((int)mantb, 31, 1);
+
+        mant = Q6_P_mpy_RR(hia, lob);
+        mant = Q6_P_mpyacc_RR(mant,hib, loa);
+        mant = (mant >> 30) + (Q6_P_mpy_RR(hia, hib)<<1);
+
+        hi = (int) (mant>>32);
+
+        k =  Q6_R_normamt_R(hi);
+        mant = mant << k;
+        exp = expa + expb - k;
+        if (mant ==  0 || mant == -1)  exp = 0x8001;
+        c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+        return(c);
+ }
+ * ==================================================================== */
+        .text
+        .global fast2_dmpy_asm
+        .type fast2_dmpy_asm, @function
+fast2_dmpy_asm:
+
+#define mantal     R0
+#define mantah     R1
+#define mantexpa   R1:0
+#define mantbl     R2
+#define mantbh     R3
+#define mantexpb   R3:2
+#define expa       R4
+#define expb       R5
+#define c8001      R12
+#define mantexpd   R7:6
+#define mantdh     R7
+#define exp        R8
+#define lmantc     R11:10
+#define kb         R9
+#define guard      R11
+#define mantal_    R12
+#define mantbl_    R13
+#define min        R15:14
+#define minh       R15
+
+        .falign
+      {
+        mantbl_= lsr(mantbl, #16)
+        expb = sxth(mantbl)
+        expa = sxth(mantal)
+        mantal_= lsr(mantal, #16)
+      }
+      {
+        lmantc = mpy(mantah, mantbh)
+        mantexpd = mpy(mantah, mantbl_)
+        mantal.L = #0x0
+        min = #0
+      }
+      {
+        lmantc = add(lmantc, lmantc)
+        mantexpd+= mpy(mantbh, mantal_)
+        mantbl.L = #0x0
+        minh.H = #0x8000
+      }
+      {
+        mantexpd = asr(mantexpd, #15)
+        c8001.L =  #0x8001
+        p1 = cmp.eq(mantexpa, mantexpb)
+      }
+      {
+        mantexpd = add(mantexpd, lmantc)
+        exp = add(expa, expb)
+        p2 = cmp.eq(mantexpa, min)
+      }
+      {
+        kb  = clb(mantexpd)
+        mantexpb = abs(mantexpd)
+        guard = #58
+      }
+      {
+        p1 = and(p1, p2)
+        exp = sub(exp, kb)
+        kb = add(kb, #-1)
+	p0 = cmp.gt(kb, guard)
+      }
+      {
+        exp = add(exp, #1)
+        mantexpa = asl(mantexpd, kb)
+        if(p1) jump .Lsat   //rarely happens
+      }
+      {
+        mantal = insert(exp,#16, #0)
+        if(!p0) jumpr  r31
+      }
+      {
+        mantal = insert(c8001,#16, #0)
+        jumpr  r31
+      }
+.Lsat:
+      {
+        mantexpa = #-1
+      }
+      {
+        mantexpa = lsr(mantexpa, #1)
+      }
+      {
+        mantal = insert(exp,#16, #0)
+        jumpr  r31
+      }
+
+/* ==================================================================== *
+ int fast2_qd2f(fast2_QDOUBLE a) {
+        int exp;
+        long long int manta;
+        int ic, rnd, mantb;
+
+        manta = a>>32;
+        exp = Q6_R_sxth_R(a) ;
+        ic = 0x80000000 & manta;
+        manta = Q6_R_abs_R_sat(manta);
+        mantb = (manta + rnd)>>7;
+        rnd = 0x40
+        exp = (exp + 126);
+        if((manta & 0xff) == rnd) rnd = 0x00;
+        if((manta & 0x7fffffc0) == 0x7fffffc0) {
+           manta = 0x0; exp++;
+        } else {
+           manta= mantb & 0x007fffff;
+        }
+        exp = (exp << 23) & 0x7fffffc0;
+        ic = Q6_R_addacc_RR(ic, exp, manta);
+        return (ic);
+ }
+ * ==================================================================== */
+
+        .text
+        .global fast2_qd2f_asm
+        .type fast2_qd2f_asm, @function
+fast2_qd2f_asm:
+#define mantah   R1
+#define mantal   R0
+#define cff      R0
+#define mant     R3
+#define expo     R4
+#define rnd      R5
+#define mask     R6
+#define c07f     R7
+#define c80      R0
+#define mantb    R2
+#define ic       R0
+
+      .falign
+     {
+       mant = abs(mantah):sat
+       expo = sxth(mantal)
+       rnd = #0x40
+       mask.L = #0xffc0
+     }
+     {
+       cff = extractu(mant, #8, #0)
+       p2 = cmp.gt(expo, #126)
+       p3 = cmp.ge(expo, #-126)
+       mask.H = #0x7fff
+     }
+     {
+       p1 = cmp.eq(cff,#0x40)
+       if(p1.new) rnd = #0
+       expo = add(expo, #126)
+       if(!p3) jump .Lmin
+     }
+     {
+       p0 = bitsset(mant, mask)
+       c80.L = #0x0000
+       mantb = add(mant, rnd)
+       c07f = lsr(mask, #8)
+     }
+     {
+       if(p0) expo = add(expo, #1)
+       if(p0) mant = #0
+       mantb = lsr(mantb, #7)
+       c80.H = #0x8000
+     }
+     {
+       ic = and(c80, mantah)
+       mask &= asl(expo, #23)
+       if(!p0) mant = and(mantb, c07f)
+       if(p2) jump .Lmax
+     }
+     {
+       ic += add(mask, mant)
+       jumpr r31
+     }
+.Lmax:
+     {
+       ic.L = #0xffff;
+     }
+     {
+       ic.H = #0x7f7f;
+       jumpr r31
+     }
+.Lmin:
+     {
+       ic = #0x0
+       jumpr r31
+     }
+
+/* ==================================================================== *
+fast2_QDOUBLE fast2_f2qd(int ia) {
+        lint exp;
+        lint mant;
+        fast2_QDOUBLE c;
+
+        mant = ((ia << 7) | 0x40000000)&0x7fffff80 ;
+        if (ia & 0x80000000) mant = -mant;
+        exp =  ((ia >> 23) & 0xFFLL) - 126;
+        c = (mant<<32) | Q6_R_zxth_R(exp);;
+        return(c);
+}
+ * ==================================================================== */
+        .text
+        .global fast2_f2qd_asm
+        .type fast2_f2qd_asm, @function
+fast2_f2qd_asm:
+#define ia    R0
+#define mag   R3
+#define mantr R1
+#define expr  R0
+#define zero  R2
+#define maxneg R5:4
+#define maxnegl R4
+        .falign
+  {
+       mantr = asl(ia, #7)
+       p0 = tstbit(ia, #31)
+       maxneg = #0
+       mag = add(ia,ia)
+  }
+  {
+       mantr = setbit(mantr, #30)
+       expr= extractu(ia,#8,#23)
+       maxnegl.L = #0x8001
+       p1 = cmp.eq(mag, #0)
+  }
+  {
+       mantr= extractu(mantr, #31, #0)
+       expr= add(expr, #-126)
+       zero = #0
+       if(p1) jump .Lminqd
+  }
+  {
+       expr = zxth(expr)
+       if(p0) mantr= sub(zero, mantr)
+       jumpr r31
+  }
+.Lminqd:
+  {
+       R1:0 = maxneg
+       jumpr r31
+  }
--- a/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_ldlib_asm.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fastmath2_ldlib_asm.S
@ -0,0 +1,345 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/* ==================================================================== *
+
+fast2_QLDOUBLE fast2_ldadd(fast2_QLDOUBLE a,fast2_QLDOUBLE b) {
+      fast2_QLDOUBLE c;
+      lint manta = a & MANTMASK;
+      int  expa  = Q6_R_sxth_R(a) ;
+      lint mantb = b & MANTMASK;
+      int  expb  = Q6_R_sxth_R(b) ;
+      int  exp, expdiff, j, k, hi, lo, cn;
+      lint mant;
+
+        expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
+        expdiff = Q6_R_sxth_R(expdiff) ;
+        if (expdiff > 63) { expdiff = 62;}
+        if (expa > expb) {
+          exp = expa + 1;
+          expa = 1;
+          expb = expdiff + 1;
+        } else {
+          exp = expb + 1;
+          expb = 1;
+          expa = expdiff + 1;
+        }
+        mant = (manta>>expa) + (mantb>>expb);
+
+        hi = (int) (mant>>32);
+        lo = (int) (mant);
+
+        k =  Q6_R_normamt_R(hi);
+        if(hi == 0 || hi == -1) k = 31+Q6_R_normamt_R(lo);
+
+        mant = (mant << k);
+        cn  = (mant == 0x8000000000000000LL);
+        exp = exp - k + cn;
+
+        if (mant ==  0 || mant == -1)  exp = 0x8001;
+        c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+      return(c);
+ }
+ * ==================================================================== */
+        .text
+        .global fast2_ldadd_asm
+        .type fast2_ldadd_asm, @function
+fast2_ldadd_asm:
+#define manta      R1:0
+#define lmanta     R1:0
+#define mantb      R3:2
+#define lmantb     R3:2
+#define expa       R4
+#define expb       R5
+#define expd       R6
+#define exp        R8
+#define c63        R9
+#define lmant      R1:0
+#define k          R4
+#define ce         P0
+#define zero       R3:2
+        .falign
+      {
+        expa = memw(r29+#8)
+        expb = memw(r29+#24)
+        r7 = r0
+      }
+      {
+        expd = sub(expa, expb):sat
+        ce = CMP.GT(expa, expb);
+        if ( ce.new) exp = add(expa, #1)
+        if (!ce.new) exp = add(expb, #1)
+      } {
+        expd = abs(expd):sat
+        if ( ce) expa = #1
+        if (!ce) expb = #1
+        c63 = #62
+      } {
+        expd = MIN(expd, c63)
+        manta = memd(r29+#0)
+        mantb = memd(r29+#16)
+      } {
+        if (!ce) expa = add(expd, #1)
+        if ( ce) expb = add(expd, #1)
+      } {
+        lmanta = ASR(lmanta, expa)
+        lmantb = ASR(lmantb, expb)
+      } {
+        lmant = add(lmanta, lmantb)
+        zero = #0
+      } {
+        k  = clb(lmant)
+        c63.L =#0x0001
+      } {
+        exp -= add(k, #-1)  //exp =  exp - (k-1)
+        k = add(k, #-1)
+        p0 = cmp.gt(k, #58)
+        c63.H =#0x8000
+      } {
+        if(!p0)memw(r7+#8) = exp
+        lmant = ASL(lmant, k)
+        if(p0) jump .Ldenorma
+      } {
+        memd(r7+#0) = lmant
+        jumpr  r31
+      }
+.Ldenorma:
+        memd(r7+#0) = zero
+      {
+        memw(r7+#8) = c63
+        jumpr  r31
+      }
+/* =================================================================== *
+ fast2_QLDOUBLE fast2_ldsub(fast2_QLDOUBLE a,fast2_QLDOUBLE b) {
+      fast2_QLDOUBLE c;
+      lint manta = a & MANTMASK;
+      int  expa  = Q6_R_sxth_R(a) ;
+      lint mantb = b & MANTMASK;
+      int  expb  = Q6_R_sxth_R(b) ;
+      int  exp, expdiff, j, k;
+      lint mant;
+
+        expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
+        expdiff = Q6_R_sxth_R(expdiff) ;
+        if (expdiff > 63) { expdiff = 62;}
+        if (expa > expb) {
+          exp = expa + 1;
+          expa = 1;
+          expb = expdiff + 1;
+        } else {
+          exp = expb + 1;
+          expb = 1;
+          expa = expdiff + 1;
+        }
+        mant = (manta>>expa) - (mantb>>expb);
+        k =  Q6_R_clb_P(mant)-1;
+        mant = (mant << k);
+        exp = exp - k;
+        if (mant ==  0 || mant == -1)  exp = 0x8001;
+        c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+      return(c);
+ }
+ * ==================================================================== */
+        .text
+        .global fast2_ldsub_asm
+        .type fast2_ldsub_asm, @function
+fast2_ldsub_asm:
+#define manta      R1:0
+#define lmanta     R1:0
+#define mantb      R3:2
+#define lmantb     R3:2
+#define expa       R4
+#define expb       R5
+#define expd       R6
+#define exp        R8
+#define c63        R9
+#define lmant      R1:0
+#define k          R4
+#define ce         P0
+#define zero       R3:2
+        .falign
+      {
+        expa = memw(r29+#8)
+        expb = memw(r29+#24)
+        r7 = r0
+      }
+      {
+        expd = sub(expa, expb):sat
+        ce = CMP.GT(expa, expb);
+        if ( ce.new) exp = add(expa, #1)
+        if (!ce.new) exp = add(expb, #1)
+      } {
+        expd = abs(expd):sat
+        if ( ce) expa = #1
+        if (!ce) expb = #1
+        c63 = #62
+      } {
+        expd = min(expd, c63)
+        manta = memd(r29+#0)
+        mantb = memd(r29+#16)
+      } {
+        if (!ce) expa = add(expd, #1)
+        if ( ce) expb = add(expd, #1)
+      } {
+        lmanta = ASR(lmanta, expa)
+        lmantb = ASR(lmantb, expb)
+      } {
+        lmant = sub(lmanta, lmantb)
+        zero = #0
+      } {
+        k  = clb(lmant)
+        c63.L =#0x0001
+      } {
+        exp -= add(k, #-1)  //exp =  exp - (k+1)
+        k = add(k, #-1)
+        p0 = cmp.gt(k, #58)
+        c63.H =#0x8000
+      } {
+        if(!p0)memw(r7+#8) = exp
+        lmant = asl(lmant, k)
+        if(p0) jump .Ldenorma_s
+      } {
+        memd(r7+#0) = lmant
+        jumpr  r31
+      }
+.Ldenorma_s:
+        memd(r7+#0) = zero
+      {
+        memw(r7+#8) = c63
+        jumpr  r31
+      }
+
+/* ==================================================================== *
+ fast2_QLDOUBLE fast2_ldmpy(fast2_QLDOUBLE a,fast2_QLDOUBLE b) {
+        fast2_QLDOUBLE c;
+        lint manta = a & MANTMASK;
+        int  expa  = Q6_R_sxth_R(a) ;
+        lint mantb = b & MANTMASK;
+        int  expb  = Q6_R_sxth_R(b) ;
+        int exp, k;
+        lint mant;
+        int          hia, hib, hi, lo;
+        unsigned int loa, lob;
+
+        hia = (int)(a >> 32);
+        loa = Q6_R_extractu_RII((int)manta, 31, 1);
+        hib = (int)(b >> 32);
+        lob = Q6_R_extractu_RII((int)mantb, 31, 1);
+
+        mant = Q6_P_mpy_RR(hia, lob);
+        mant = Q6_P_mpyacc_RR(mant,hib, loa);
+        mant = (mant >> 30) + (Q6_P_mpy_RR(hia, hib)<<1);
+
+        hi = (int) (mant>>32);
+
+        k =  Q6_R_normamt_R(hi);
+        mant = mant << k;
+        exp = expa + expb - k;
+        if (mant ==  0 || mant == -1)  exp = 0x8001;
+        c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+        return(c);
+ }
+ * ==================================================================== */
+        .text
+        .global fast2_ldmpy_asm
+        .type fast2_ldmpy_asm, @function
+fast2_ldmpy_asm:
+
+#define mantxl_    R9
+#define mantxl     R14
+#define mantxh     R15
+#define mantx      R15:14
+#define mantbl     R2
+#define mantbl_    R8
+#define mantbh     R3
+#define mantb      R3:2
+#define expa       R4
+#define expb       R5
+#define c8001      R8
+#define mantd      R7:6
+#define lmantc     R11:10
+#define kp         R9
+#define min        R13:12
+#define minh       R13
+#define max        R13:12
+#define maxh       R13
+#define ret        R0
+
+        .falign
+      {
+        mantx = memd(r29+#0)
+        mantb = memd(r29+#16)
+        min = #0
+      }
+      {
+        mantbl_= extractu(mantbl, #31, #1)
+        mantxl_= extractu(mantxl, #31, #1)
+        minh.H = #0x8000
+      }
+      {
+        lmantc = mpy(mantxh, mantbh)
+        mantd = mpy(mantxh, mantbl_)
+        expa = memw(r29+#8)
+        expb = memw(r29+#24)
+      }
+      {
+        lmantc = add(lmantc, lmantc)
+        mantd += mpy(mantbh, mantxl_)
+      }
+      {
+        mantd = asr(mantd, #30)
+        c8001.L =  #0x0001
+        p1 = cmp.eq(mantx, mantb)
+      }
+      {
+        mantd = add(mantd, lmantc)
+        expa= add(expa, expb)
+        p2 = cmp.eq(mantb, min)
+      }
+      {
+        kp  = clb(mantd)
+        c8001.H =  #0x8000
+        p1 = and(p1, p2)
+      }
+      {
+        expa-= add(kp, #-1)
+        kp = add(kp, #-1)
+        if(p1) jump .Lsat
+      }
+      {
+        mantd = asl(mantd, kp)
+        memw(ret+#8) = expa
+	p0 = cmp.gt(kp, #58)
+        if(p0.new) jump:NT .Ldenorm   //rarely happens
+      }
+      {
+        memd(ret+#0) = mantd
+        jumpr  r31
+      }
+.Lsat:
+      {
+        max = #0
+        expa+= add(kp, #1)
+      }
+      {
+        maxh.H = #0x4000
+        memw(ret+#8) = expa
+      }
+      {
+        memd(ret+#0) = max
+        jumpr  r31
+      }
+.Ldenorm:
+      {
+        memw(ret+#8) = c8001
+        mantx = #0
+      }
+      {
+        memd(ret+#0) = mantx
+        jumpr  r31
+      }
--- a/contrib/compiler-rt/lib/builtins/hexagon/fastmath_dlib_asm.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fastmath_dlib_asm.S
@ -0,0 +1,400 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/* ==================================================================== */
+/*   FUNCTIONS Optimized double floating point operators                */
+/* ==================================================================== */
+/*      c = dadd_asm(a, b)                                              */
+/* ====================================================================
+
+QDOUBLE dadd(QDOUBLE a,QDOUBLE b) {
+      QDOUBLE c;
+      lint manta = a & MANTMASK;
+      int  expa  = HEXAGON_R_sxth_R(a) ;
+      lint mantb = b & MANTMASK;
+      int  expb  = HEXAGON_R_sxth_R(b) ;
+      int  exp, expdiff, j, k, hi, lo, cn;
+      lint mant;
+
+        expdiff = (int) HEXAGON_P_vabsdiffh_PP(a, b);
+        expdiff = HEXAGON_R_sxth_R(expdiff) ;
+        if (expdiff > 63) { expdiff = 62;}
+        if (expa > expb) {
+          exp = expa + 1;
+          expa = 1;
+          expb = expdiff + 1;
+        } else {
+          exp = expb + 1;
+          expb = 1;
+          expa = expdiff + 1;
+        }
+        mant = (manta>>expa) + (mantb>>expb);
+
+        hi = (int) (mant>>32);
+        lo = (int) (mant);
+
+        k =  HEXAGON_R_normamt_R(hi);
+        if(hi == 0 || hi == -1) k =  31+HEXAGON_R_normamt_R(lo);
+
+        mant = (mant << k);
+        cn  = (mant == 0x8000000000000000LL);
+        exp = exp - k + cn;
+
+        if (mant ==  0 || mant == -1)  exp = 0x8001;
+        c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+      return(c);
+ }
+ * ==================================================================== */
+        .text
+        .global dadd_asm
+        .type dadd_asm, @function
+dadd_asm:
+
+#define manta      R0
+#define mantexpa   R1:0
+#define lmanta     R1:0
+#define mantb      R2
+#define mantexpb   R3:2
+#define lmantb     R3:2
+#define expa       R4
+#define expb       R5
+#define mantexpd   R7:6
+#define expd       R6
+#define exp        R8
+#define c63        R9
+#define lmant      R1:0
+#define manth      R1
+#define mantl      R0
+#define zero       R7:6
+#define zerol      R6
+#define minus      R3:2
+#define minusl     R2
+#define maxneg     R9
+#define minmin     R11:10  // exactly 0x800000000000000000LL
+#define minminh    R11
+#define k          R4
+#define kl         R5
+#define ce         P0
+        .falign
+      {
+        mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
+        c63 = #62
+        expa = SXTH(manta)
+        expb = SXTH(mantb)
+      } {
+        expd = SXTH(expd)
+        ce = CMP.GT(expa, expb);
+        if ( ce.new) exp = add(expa, #1)
+        if (!ce.new) exp = add(expb, #1)
+      } {
+        if ( ce) expa = #1
+        if (!ce) expb = #1
+        manta.L = #0
+        expd = MIN(expd, c63)
+      } {
+        if (!ce) expa = add(expd, #1)
+        if ( ce) expb = add(expd, #1)
+        mantb.L = #0
+        zero = #0
+      } {
+        lmanta = ASR(lmanta, expa)
+        lmantb = ASR(lmantb, expb)
+        minmin = #0
+      } {
+        lmant = add(lmanta, lmantb)
+        minus = #-1
+        minminh.H = #0x8000
+      } {
+        k  = NORMAMT(manth)
+        kl = NORMAMT(mantl)
+        p0 = cmp.eq(manth, zerol)
+        p1 = cmp.eq(manth, minusl)
+      } {
+        p0 = OR(p0, p1)
+        if(p0.new) k = add(kl, #31)
+        maxneg.H = #0
+      } {
+        mantexpa = ASL(lmant, k)
+        exp = SUB(exp, k)
+        maxneg.L = #0x8001
+      } {
+        p0 = cmp.eq(mantexpa, zero)
+        p1 = cmp.eq(mantexpa, minus)
+        manta.L = #0
+        exp = ZXTH(exp)
+      } {
+        p2 = cmp.eq(mantexpa, minmin)    //is result 0x80....0
+        if(p2.new) exp = add(exp, #1)
+      }
+#if (__HEXAGON_ARCH__ == 60)
+      {
+        p0 = OR(p0, p1)
+        if( p0.new) manta = OR(manta,maxneg)
+        if(!p0.new) manta = OR(manta,exp)
+      }
+        jumpr  r31
+#else
+      {
+        p0 = OR(p0, p1)
+        if( p0.new) manta = OR(manta,maxneg)
+        if(!p0.new) manta = OR(manta,exp)
+        jumpr  r31
+      }
+#endif
+/* =================================================================== *
+ QDOUBLE dsub(QDOUBLE a,QDOUBLE b) {
+      QDOUBLE c;
+      lint manta = a & MANTMASK;
+      int  expa  = HEXAGON_R_sxth_R(a) ;
+      lint mantb = b & MANTMASK;
+      int  expb  = HEXAGON_R_sxth_R(b) ;
+      int  exp, expdiff, j, k, hi, lo, cn;
+      lint mant;
+
+        expdiff = (int) HEXAGON_P_vabsdiffh_PP(a, b);
+        expdiff = HEXAGON_R_sxth_R(expdiff) ;
+        if (expdiff > 63) { expdiff = 62;}
+        if (expa > expb) {
+          exp = expa + 1;
+          expa = 1;
+          expb = expdiff + 1;
+        } else {
+          exp = expb + 1;
+          expb = 1;
+          expa = expdiff + 1;
+        }
+        mant = (manta>>expa) - (mantb>>expb);
+
+        hi = (int) (mant>>32);
+        lo = (int) (mant);
+
+        k =  HEXAGON_R_normamt_R(hi);
+        if(hi == 0 || hi == -1) k =  31+HEXAGON_R_normamt_R(lo);
+
+        mant = (mant << k);
+        cn  = (mant == 0x8000000000000000LL);
+        exp = exp - k + cn;
+
+        if (mant ==  0 || mant == -1)  exp = 0x8001;
+        c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+      return(c);
+ }
+ * ==================================================================== */
+        .text
+        .global dsub_asm
+        .type dsub_asm, @function
+dsub_asm:
+
+#define manta      R0
+#define mantexpa   R1:0
+#define lmanta     R1:0
+#define mantb      R2
+#define mantexpb   R3:2
+#define lmantb     R3:2
+#define expa       R4
+#define expb       R5
+#define mantexpd   R7:6
+#define expd       R6
+#define exp        R8
+#define c63        R9
+#define lmant      R1:0
+#define manth      R1
+#define mantl      R0
+#define zero       R7:6
+#define zerol      R6
+#define minus      R3:2
+#define minusl     R2
+#define maxneg     R9
+#define minmin     R11:10  // exactly 0x800000000000000000LL
+#define minminh    R11
+#define k          R4
+#define kl         R5
+#define ce         P0
+        .falign
+      {
+        mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
+        c63 = #62
+        expa = SXTH(manta)
+        expb = SXTH(mantb)
+      } {
+        expd = SXTH(expd)
+        ce = CMP.GT(expa, expb);
+        if ( ce.new) exp = add(expa, #1)
+        if (!ce.new) exp = add(expb, #1)
+      } {
+        if ( ce) expa = #1
+        if (!ce) expb = #1
+        manta.L = #0
+        expd = MIN(expd, c63)
+      } {
+        if (!ce) expa = add(expd, #1)
+        if ( ce) expb = add(expd, #1)
+        mantb.L = #0
+        zero = #0
+      } {
+        lmanta = ASR(lmanta, expa)
+        lmantb = ASR(lmantb, expb)
+        minmin = #0
+      } {
+        lmant = sub(lmanta, lmantb)
+        minus = #-1
+        minminh.H = #0x8000
+      } {
+        k  = NORMAMT(manth)
+        kl = NORMAMT(mantl)
+        p0 = cmp.eq(manth, zerol)
+        p1 = cmp.eq(manth, minusl)
+      } {
+        p0 = OR(p0, p1)
+        if(p0.new) k = add(kl, #31)
+        maxneg.H = #0
+      } {
+        mantexpa = ASL(lmant, k)
+        exp = SUB(exp, k)
+        maxneg.L = #0x8001
+      } {
+        p0 = cmp.eq(mantexpa, zero)
+        p1 = cmp.eq(mantexpa, minus)
+        manta.L = #0
+        exp = ZXTH(exp)
+      } {
+        p2 = cmp.eq(mantexpa, minmin)    //is result 0x80....0
+        if(p2.new) exp = add(exp, #1)
+      }
+#if (__HEXAGON_ARCH__ == 60)
+      {
+        p0 = OR(p0, p1)
+        if( p0.new) manta = OR(manta,maxneg)
+        if(!p0.new) manta = OR(manta,exp)
+      }
+        jumpr  r31
+#else
+      {
+        p0 = OR(p0, p1)
+        if( p0.new) manta = OR(manta,maxneg)
+        if(!p0.new) manta = OR(manta,exp)
+        jumpr  r31
+      }
+#endif
+/* ==================================================================== *
+ QDOUBLE dmpy(QDOUBLE a,QDOUBLE b) {
+        QDOUBLE c;
+        lint manta = a & MANTMASK;
+        int  expa  = HEXAGON_R_sxth_R(a) ;
+        lint mantb = b & MANTMASK;
+        int  expb  = HEXAGON_R_sxth_R(b) ;
+        int exp, k;
+        lint mant;
+        int          hia, hib, hi, lo;
+        unsigned int loa, lob;
+
+        hia = (int)(a >> 32);
+        loa = HEXAGON_R_extractu_RII((int)manta, 31, 1);
+        hib = (int)(b >> 32);
+        lob = HEXAGON_R_extractu_RII((int)mantb, 31, 1);
+
+        mant = HEXAGON_P_mpy_RR(hia, lob);
+        mant = HEXAGON_P_mpyacc_RR(mant,hib, loa);
+        mant = (mant >> 30) + (HEXAGON_P_mpy_RR(hia, hib)<<1);
+
+        hi = (int) (mant>>32);
+        lo = (int) (mant);
+
+        k =  HEXAGON_R_normamt_R(hi);
+        if(hi == 0 || hi == -1) k =  31+HEXAGON_R_normamt_R(lo);
+        mant = mant << k;
+        exp = expa + expb - k;
+        if (mant ==  0 || mant == -1)  exp = 0x8001;
+        c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
+        return(c);
+ }
+ * ==================================================================== */
+        .text
+        .global dmpy_asm
+        .type dmpy_asm, @function
+dmpy_asm:
+
+#define mantal     R0
+#define mantah     R1
+#define mantexpa   R1:0
+#define mantbl     R2
+#define mantbh     R3
+#define mantexpb   R3:2
+#define expa       R4
+#define expb       R5
+#define mantexpd   R7:6
+#define exp        R8
+#define lmantc     R11:10
+#define mantch     R11
+#define mantcl     R10
+#define zero0      R7:6
+#define zero0l     R6
+#define minus1     R3:2
+#define minus1l    R2
+#define maxneg     R9
+#define k          R4
+#define kl         R5
+
+        .falign
+      {
+        mantbl = lsr(mantbl, #16)
+        mantal = lsr(mantal, #16)
+        expa = sxth(mantal)
+        expb = sxth(mantbl)
+      }
+      {
+        lmantc = mpy(mantah, mantbh)
+        mantexpd = mpy(mantah, mantbl)
+      }
+      {
+        lmantc = add(lmantc, lmantc) //<<1
+        mantexpd+= mpy(mantbh, mantal)
+      }
+      {
+        lmantc += asr(mantexpd, #15)
+        exp = add(expa, expb)
+        zero0 = #0
+        minus1 = #-1
+      }
+      {
+        k  = normamt(mantch)
+        kl = normamt(mantcl)
+        p0 = cmp.eq(mantch, zero0l)
+        p1 = cmp.eq(mantch, minus1l)
+      }
+      {
+        p0 = or(p0, p1)
+        if(p0.new) k = add(kl, #31)
+        maxneg.H = #0
+      }
+      {
+        mantexpa = asl(lmantc, k)
+        exp = sub(exp, k)
+        maxneg.L = #0x8001
+      }
+      {
+        p0 = cmp.eq(mantexpa, zero0)
+        p1 = cmp.eq(mantexpa, minus1)
+        mantal.L = #0
+        exp = zxth(exp)
+      }
+#if (__HEXAGON_ARCH__ == 60)
+      {
+        p0 = or(p0, p1)
+        if( p0.new) mantal = or(mantal,maxneg)
+        if(!p0.new) mantal = or(mantal,exp)
+      }
+        jumpr  r31
+#else
+      {
+        p0 = or(p0, p1)
+        if( p0.new) mantal = or(mantal,maxneg)
+        if(!p0.new) mantal = or(mantal,exp)
+        jumpr  r31
+      }
+#endif
--- a/contrib/compiler-rt/lib/builtins/hexagon/fma_opt.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fma_opt.S
@ -0,0 +1,31 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+.macro FUNCTION_BEGIN name
+.text
+.p2align 5
+.globl \name
+.type  \name, @function
+\name:
+.endm
+
+.macro FUNCTION_END name
+.size  \name, . - \name
+.endm
+
+FUNCTION_BEGIN fmaf
+  r2 += sfmpy(r0, r1)
+  {
+    r0 = r2
+    jumpr r31
+  }
+FUNCTION_END fmaf
+
+  .globl fmal
+  .set fmal, fma
--- a/contrib/compiler-rt/lib/builtins/hexagon/fmax_opt.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fmax_opt.S
@ -0,0 +1,30 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+.macro FUNCTION_BEGIN name
+.text
+.p2align 5
+.globl \name
+.type  \name, @function
+\name:
+.endm
+
+.macro FUNCTION_END name
+.size  \name, . - \name
+.endm
+
+FUNCTION_BEGIN fmaxf
+  {
+    r0 = sfmax(r0, r1)
+    jumpr r31
+  }
+FUNCTION_END fmaxf
+
+  .globl fmaxl
+  .set fmaxl, fmax
--- a/contrib/compiler-rt/lib/builtins/hexagon/fmin_opt.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/fmin_opt.S
@ -0,0 +1,30 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+.macro FUNCTION_BEGIN name
+.text
+.p2align 5
+.globl \name
+.type  \name, @function
+\name:
+.endm
+
+.macro FUNCTION_END name
+.size  \name, . - \name
+.endm
+
+FUNCTION_BEGIN fminf
+  {
+    r0 = sfmin(r0, r1)
+    jumpr r31
+  }
+FUNCTION_END fminf
+
+  .globl fminl
+  .set fminl, fmin
--- a/contrib/compiler-rt/lib/builtins/hexagon/memcpy_forward_vp4cp4n2.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/memcpy_forward_vp4cp4n2.S
@ -0,0 +1,125 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An optimized version of a memcpy which is equivalent to the following loop:
+//
+//   volatile unsigned *dest;
+//   unsigned *src;
+//
+//   for (i = 0; i < num_words; ++i)
+//     *dest++ = *src++;
+//
+// The corresponding C prototype for this function would be
+// void hexagon_memcpy_forward_vp4cp4n2(volatile unsigned *dest,
+//                                      const unsigned *src,
+//                                      unsigned num_words);
+//
+// *** Both dest and src must be aligned to 32-bit boundaries. ***
+// The code does not perform any runtime checks for this, and will fail
+// in bad ways if this requirement is not met.
+//
+// The "forward" in the name refers to the fact that the function copies
+// the words going forward in memory.  It is incorrect to use this function
+// for cases where the original code copied words in any other order.
+//
+// *** This function is only for the use by the compiler. ***
+// The only indended use is for the LLVM compiler to generate calls to
+// this function, when a mem-copy loop, like the one above, is detected.
+
+  .text
+
+// Inputs:
+//   r0: dest
+//   r1: src
+//   r2: num_words
+
+  .globl  hexagon_memcpy_forward_vp4cp4n2
+  .balign 32
+  .type   hexagon_memcpy_forward_vp4cp4n2,@function
+hexagon_memcpy_forward_vp4cp4n2:
+
+    // Compute r3 to be the number of words remaining in the current page.
+    // At the same time, compute r4 to be the number of 32-byte blocks
+    // remaining in the page (for prefetch).
+  {
+    r3 = sub(##4096, r1)
+    r5 = lsr(r2, #3)
+  }
+  {
+    // The word count before end-of-page is in the 12 lowest bits of r3.
+    // (If the address in r1 was already page-aligned, the bits are 0.)
+    r3 = extractu(r3, #10, #2)
+    r4 = extractu(r3, #7, #5)
+  }
+  {
+    r3 = minu(r2, r3)
+    r4 = minu(r5, r4)
+  }
+  {
+    r4 = or(r4, ##2105344)      // 2105344 = 0x202000
+    p0 = cmp.eq(r3, #0)
+    if (p0.new) jump:nt .Lskipprolog
+  }
+    l2fetch(r1, r4)
+  {
+    loop0(.Lprolog, r3)
+    r2 = sub(r2, r3)            // r2 = number of words left after the prolog.
+  }
+  .falign
+.Lprolog:
+  {
+    r4 = memw(r1++#4)
+    memw(r0++#4) = r4.new
+  } :endloop0
+.Lskipprolog:
+  {
+    // Let r3 = number of whole pages left (page = 1024 words).
+    r3 = lsr(r2, #10)
+    if (cmp.eq(r3.new, #0)) jump:nt .Lskipmain
+  }
+  {
+    loop1(.Lout, r3)
+    r2 = extractu(r2, #10, #0)  // r2 = r2 & 1023
+    r3 = ##2105472              // r3 = 0x202080 (prefetch info)
+  }
+    // Iterate over pages.
+  .falign
+.Lout:
+    // Prefetch each individual page.
+    l2fetch(r1, r3)
+    loop0(.Lpage, #512)
+  .falign
+.Lpage:
+    r5:4 = memd(r1++#8)
+  {
+    memw(r0++#8) = r4
+    memw(r0+#4) = r5
+  } :endloop0:endloop1
+.Lskipmain:
+  {
+    r3 = ##2105344              // r3 = 0x202000 (prefetch info)
+    r4 = lsr(r2, #3)            // r4 = number of 32-byte blocks remaining.
+    p0 = cmp.eq(r2, #0)
+    if (p0.new) jumpr:nt r31
+  }
+  {
+    r3 = or(r3, r4)
+    loop0(.Lepilog, r2)
+  }
+    l2fetch(r1, r3)
+  .falign
+.Lepilog:
+  {
+    r4 = memw(r1++#4)
+    memw(r0++#4) = r4.new
+  } :endloop0
+
+    jumpr r31
+
+.size hexagon_memcpy_forward_vp4cp4n2, . - hexagon_memcpy_forward_vp4cp4n2
--- a/contrib/compiler-rt/lib/builtins/hexagon/memcpy_likely_aligned.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/memcpy_likely_aligned.S
@ -0,0 +1,64 @@
+//===------------------------- memcopy routines ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+	.macro FUNCTION_BEGIN name
+	.text
+	.p2align 5
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+FUNCTION_BEGIN __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
+	{
+		p0 = bitsclr(r1,#7)
+		p0 = bitsclr(r0,#7)
+		if (p0.new) r5:4 = memd(r1)
+		r3 = #-3
+	}
+	{
+		if (!p0) jump .Lmemcpy_call
+		if (p0) memd(r0++#8) = r5:4
+		if (p0) r5:4 = memd(r1+#8)
+		r3 += lsr(r2,#3)
+	}
+	{
+		memd(r0++#8) = r5:4
+		r5:4 = memd(r1+#16)
+		r1 = add(r1,#24)
+		loop0(1f,r3)
+	}
+	.falign
+1:
+	{
+		memd(r0++#8) = r5:4
+		r5:4 = memd(r1++#8)
+	}:endloop0
+	{
+		memd(r0) = r5:4
+		r0 -= add(r2,#-8)
+		jumpr r31
+	}
+FUNCTION_END __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
+
+.Lmemcpy_call:
+#ifdef __PIC__
+	jump memcpy@PLT
+#else
+	jump memcpy
+#endif
+
+  .globl __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes
+  .set   __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes, \
+         __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
--- a/contrib/compiler-rt/lib/builtins/hexagon/moddi3.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/moddi3.S
@ -0,0 +1,83 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+	.macro FUNCTION_BEGIN name
+	.text
+	.p2align 5
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+
+FUNCTION_BEGIN __hexagon_moddi3
+	{
+		p3 = tstbit(r1,#31)
+	}
+	{
+		r1:0 = abs(r1:0)
+		r3:2 = abs(r3:2)
+	}
+	{
+		r6 = cl0(r1:0)              // count leading 0's of dividend (numerator)
+		r7 = cl0(r3:2)              // count leading 0's of divisor (denominator)
+		r5:4 = r3:2                 // divisor moved into working registers
+		r3:2 = r1:0                 // dividend is the initial remainder, r3:2 contains remainder
+	}
+	{
+		r10 = sub(r7,r6)            // left shift count for bit & divisor
+		r1:0 = #0                   // initialize quotient to 0
+		r15:14 = #1                 // initialize bit to 1
+	}
+	{
+		r11 = add(r10,#1)           // loop count is 1 more than shift count
+		r13:12 = lsl(r5:4,r10)      // shift divisor msb into same bit position as dividend msb
+		r15:14 = lsl(r15:14,r10)    // shift the bit left by same amount as divisor
+	}
+	{
+		p0 = cmp.gtu(r5:4,r3:2)     // check if divisor > dividend
+		loop0(1f,r11)               // register loop
+	}
+	{
+		if (p0) jump .hexagon_moddi3_return          // if divisor > dividend, we're done, so return
+	}
+	.falign
+1:
+	{
+		p0 = cmp.gtu(r13:12,r3:2)   // set predicate reg if shifted divisor > current remainder
+	}
+	{
+		r7:6 = sub(r3:2, r13:12)    // subtract shifted divisor from current remainder
+		r9:8 = add(r1:0, r15:14)    // save current quotient to temp (r9:8)
+	}
+	{
+		r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
+		r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
+	}
+	{
+		r15:14 = lsr(r15:14, #1)    // shift bit right by 1 for next iteration
+		r13:12 = lsr(r13:12, #1)    // shift "shifted divisor" right by 1 for next iteration
+	}:endloop0
+
+.hexagon_moddi3_return:
+	{
+		r1:0 = neg(r3:2)
+	}
+	{
+		r1:0 = vmux(p3,r1:0,r3:2)
+		jumpr r31
+	}
+FUNCTION_END __hexagon_moddi3
+
+  .globl __qdsp_moddi3
+  .set   __qdsp_moddi3, __hexagon_moddi3
--- a/contrib/compiler-rt/lib/builtins/hexagon/modsi3.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/modsi3.S
@ -0,0 +1,66 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+	.macro FUNCTION_BEGIN name
+	.text
+	.p2align 5
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+
+FUNCTION_BEGIN __hexagon_modsi3
+	{
+		p2 = cmp.ge(r0,#0)
+		r2 = abs(r0)
+		r1 = abs(r1)
+	}
+	{
+		r3 = cl0(r2)
+		r4 = cl0(r1)
+		p0 = cmp.gtu(r1,r2)
+	}
+	{
+		r3 = sub(r4,r3)
+		if (p0) jumpr r31
+	}
+	{
+		p1 = cmp.eq(r3,#0)
+		loop0(1f,r3)
+		r0 = r2
+		r2 = lsl(r1,r3)
+	}
+	.falign
+1:
+	{
+		p0 = cmp.gtu(r2,r0)
+		if (!p0.new) r0 = sub(r0,r2)
+		r2 = lsr(r2,#1)
+		if (p1) r1 = #0
+	}:endloop0
+	{
+		p0 = cmp.gtu(r2,r0)
+		if (!p0.new) r0 = sub(r0,r1)
+		if (p2) jumpr r31
+	}
+	{
+		r0 = neg(r0)
+		jumpr r31
+	}
+FUNCTION_END __hexagon_modsi3
+
+  .globl __qdsp_modsi3
+  .set   __qdsp_modsi3, __hexagon_modsi3
--- a/contrib/compiler-rt/lib/builtins/hexagon/sfdiv_opt.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/sfdiv_opt.S
@ -0,0 +1,66 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+	.macro FUNCTION_BEGIN name
+	.text
+	.p2align 5
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
+
+FUNCTION_BEGIN __hexagon_divsf3
+  {
+    r2,p0 = sfrecipa(r0,r1)
+    r4 = sffixupd(r0,r1)
+    r3 = ##0x3f800000   // 1.0
+  }
+  {
+    r5 = sffixupn(r0,r1)
+    r3 -= sfmpy(r4,r2):lib  // 1-(den/recip) yields error?
+    r6 = ##0x80000000
+    r7 = r3
+  }
+  {
+    r2 += sfmpy(r3,r2):lib
+    r3 = r7
+    r6 = r5
+    r0 = and(r6,r5)
+  }
+  {
+    r3 -= sfmpy(r4,r2):lib
+    r0 += sfmpy(r5,r2):lib
+  }
+  {
+    r2 += sfmpy(r3,r2):lib
+    r6 -= sfmpy(r0,r4):lib
+  }
+  {
+    r0 += sfmpy(r6,r2):lib
+  }
+  {
+    r5 -= sfmpy(r0,r4):lib
+  }
+  {
+    r0 += sfmpy(r5,r2,p0):scale
+    jumpr r31
+  }
+FUNCTION_END __hexagon_divsf3
+
+Q6_ALIAS(divsf3)
+FAST_ALIAS(divsf3)
+FAST2_ALIAS(divsf3)
--- a/contrib/compiler-rt/lib/builtins/hexagon/sfsqrt_opt.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/sfsqrt_opt.S
@ -0,0 +1,82 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+	.macro FUNCTION_BEGIN name
+	.text
+	.p2align 5
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+#define RIN r0
+#define S r0
+#define H r1
+#define D r2
+#define E r3
+#define HALF r4
+#define R r5
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
+
+FUNCTION_BEGIN __hexagon_sqrtf
+  {
+    E,p0 = sfinvsqrta(RIN)
+    R = sffixupr(RIN)
+    HALF = ##0x3f000000   // 0.5
+    r1:0 = combine(#0,#0)   // clear S/H
+  }
+  {
+    S += sfmpy(E,R):lib   // S0
+    H += sfmpy(E,HALF):lib    // H0
+    D = HALF
+    E = R
+  }
+  {
+    D -= sfmpy(S,H):lib   // d0
+    p1 = sfclass(R,#1)    // is zero?
+    //E -= sfmpy(S,S):lib   // e0
+  }
+  {
+    S += sfmpy(S,D):lib   // S1
+    H += sfmpy(H,D):lib   // H1
+    D = HALF
+    E = R
+  }
+  {
+    D -= sfmpy(S,H):lib   // d0
+    E -= sfmpy(S,S):lib   // e0
+  }
+  {
+    S += sfmpy(H,E):lib   // S2
+    H += sfmpy(H,D):lib   // H2
+    D = HALF
+    E = R
+  }
+  {
+    //D -= sfmpy(S,H):lib   // d2
+    E -= sfmpy(S,S):lib   // e2
+    if (p1) r0 = or(r0,R)     // sqrt(-0.0) = -0.0
+  }
+  {
+    S += sfmpy(H,E,p0):scale  // S3
+    jumpr r31
+  }
+
+FUNCTION_END __hexagon_sqrtf
+
+Q6_ALIAS(sqrtf)
+FAST_ALIAS(sqrtf)
+FAST2_ALIAS(sqrtf)
--- a/contrib/compiler-rt/lib/builtins/hexagon/udivdi3.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/udivdi3.S
@ -0,0 +1,71 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+	.macro FUNCTION_BEGIN name
+	.text
+        .p2align 5
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+
+FUNCTION_BEGIN __hexagon_udivdi3
+	{
+		r6 = cl0(r1:0)              // count leading 0's of dividend (numerator)
+		r7 = cl0(r3:2)              // count leading 0's of divisor (denominator)
+		r5:4 = r3:2                 // divisor moved into working registers
+		r3:2 = r1:0                 // dividend is the initial remainder, r3:2 contains remainder
+	}
+	{
+		r10 = sub(r7,r6)            // left shift count for bit & divisor
+		r1:0 = #0                   // initialize quotient to 0
+		r15:14 = #1                 // initialize bit to 1
+	}
+	{
+		r11 = add(r10,#1)           // loop count is 1 more than shift count
+		r13:12 = lsl(r5:4,r10)      // shift divisor msb into same bit position as dividend msb
+		r15:14 = lsl(r15:14,r10)    // shift the bit left by same amount as divisor
+	}
+	{
+		p0 = cmp.gtu(r5:4,r3:2)     // check if divisor > dividend
+		loop0(1f,r11)               // register loop
+	}
+	{
+		if (p0) jumpr r31           // if divisor > dividend, we're done, so return
+	}
+	.falign
+1:
+	{
+		p0 = cmp.gtu(r13:12,r3:2)   // set predicate reg if shifted divisor > current remainder
+	}
+	{
+		r7:6 = sub(r3:2, r13:12)    // subtract shifted divisor from current remainder
+		r9:8 = add(r1:0, r15:14)    // save current quotient to temp (r9:8)
+	}
+	{
+		r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
+		r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
+	}
+	{
+		r15:14 = lsr(r15:14, #1)    // shift bit right by 1 for next iteration
+		r13:12 = lsr(r13:12, #1)    // shift "shifted divisor" right by 1 for next iteration
+	}:endloop0
+	{
+		jumpr r31                   // return
+	}
+FUNCTION_END __hexagon_udivdi3
+
+  .globl __qdsp_udivdi3
+  .set   __qdsp_udivdi3, __hexagon_udivdi3
--- a/contrib/compiler-rt/lib/builtins/hexagon/udivmoddi4.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/udivmoddi4.S
@ -0,0 +1,71 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+	.macro FUNCTION_BEGIN name
+	.text
+	.p2align 5
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+
+FUNCTION_BEGIN __hexagon_udivmoddi4
+	{
+		r6 = cl0(r1:0)              // count leading 0's of dividend (numerator)
+		r7 = cl0(r3:2)              // count leading 0's of divisor (denominator)
+		r5:4 = r3:2                 // divisor moved into working registers
+		r3:2 = r1:0                 // dividend is the initial remainder, r3:2 contains remainder
+	}
+	{
+		r10 = sub(r7,r6)            // left shift count for bit & divisor
+		r1:0 = #0                   // initialize quotient to 0
+		r15:14 = #1                 // initialize bit to 1
+	}
+	{
+		r11 = add(r10,#1)           // loop count is 1 more than shift count
+		r13:12 = lsl(r5:4,r10)      // shift divisor msb into same bit position as dividend msb
+		r15:14 = lsl(r15:14,r10)    // shift the bit left by same amount as divisor
+	}
+	{
+		p0 = cmp.gtu(r5:4,r3:2)     // check if divisor > dividend
+		loop0(1f,r11)               // register loop
+	}
+	{
+		if (p0) jumpr r31           // if divisor > dividend, we're done, so return
+	}
+	.falign
+1:
+	{
+		p0 = cmp.gtu(r13:12,r3:2)   // set predicate reg if shifted divisor > current remainder
+	}
+	{
+		r7:6 = sub(r3:2, r13:12)    // subtract shifted divisor from current remainder
+		r9:8 = add(r1:0, r15:14)    // save current quotient to temp (r9:8)
+	}
+	{
+		r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
+		r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
+	}
+	{
+		r15:14 = lsr(r15:14, #1)    // shift bit right by 1 for next iteration
+		r13:12 = lsr(r13:12, #1)    // shift "shifted divisor" right by 1 for next iteration
+	}:endloop0
+	{
+		jumpr r31                   // return
+	}
+FUNCTION_END __hexagon_udivmoddi4
+
+  .globl __qdsp_udivmoddi4
+  .set   __qdsp_udivmoddi4, __hexagon_udivmoddi4
--- a/contrib/compiler-rt/lib/builtins/hexagon/udivmodsi4.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/udivmodsi4.S
@ -0,0 +1,60 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+	.macro FUNCTION_BEGIN name
+	.text
+	.p2align 5
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+
+FUNCTION_BEGIN __hexagon_udivmodsi4
+	{
+		r2 = cl0(r0)
+		r3 = cl0(r1)
+		r5:4 = combine(#1,#0)
+		p0 = cmp.gtu(r1,r0)
+	}
+	{
+		r6 = sub(r3,r2)
+		r4 = r1
+		r1:0 = combine(r0,r4)
+		if (p0) jumpr r31
+	}
+	{
+		r3:2 = vlslw(r5:4,r6)
+		loop0(1f,r6)
+		p0 = cmp.eq(r6,#0)
+		if (p0.new) r4 = #0
+	}
+	.falign
+1:
+	{
+		p0 = cmp.gtu(r2,r1)
+		if (!p0.new) r1 = sub(r1,r2)
+		if (!p0.new) r0 = add(r0,r3)
+		r3:2 = vlsrw(r3:2,#1)
+	}:endloop0
+	{
+		p0 = cmp.gtu(r2,r1)
+		if (!p0.new) r1 = sub(r1,r4)
+		if (!p0.new) r0 = add(r0,r3)
+		jumpr r31
+	}
+FUNCTION_END __hexagon_udivmodsi4
+
+  .globl __qdsp_udivmodsi4
+  .set   __qdsp_udivmodsi4, __hexagon_udivmodsi4
--- a/contrib/compiler-rt/lib/builtins/hexagon/udivsi3.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/udivsi3.S
@ -0,0 +1,56 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+	.macro FUNCTION_BEGIN name
+	.text
+        .p2align 5
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+
+FUNCTION_BEGIN __hexagon_udivsi3
+	{
+		r2 = cl0(r0)
+		r3 = cl0(r1)
+		r5:4 = combine(#1,#0)
+		p0 = cmp.gtu(r1,r0)
+	}
+	{
+		r6 = sub(r3,r2)
+		r4 = r1
+		r1:0 = combine(r0,r4)
+		if (p0) jumpr r31
+	}
+	{
+		r3:2 = vlslw(r5:4,r6)
+		loop0(1f,r6)
+	}
+	.falign
+1:
+	{
+		p0 = cmp.gtu(r2,r1)
+		if (!p0.new) r1 = sub(r1,r2)
+		if (!p0.new) r0 = add(r0,r3)
+		r3:2 = vlsrw(r3:2,#1)
+	}:endloop0
+	{
+		p0 = cmp.gtu(r2,r1)
+		if (!p0.new) r0 = add(r0,r3)
+		jumpr r31
+	}
+FUNCTION_END __hexagon_udivsi3
+
+  .globl __qdsp_udivsi3
+  .set   __qdsp_udivsi3, __hexagon_udivsi3
--- a/contrib/compiler-rt/lib/builtins/hexagon/umoddi3.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/umoddi3.S
@ -0,0 +1,74 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+	.macro FUNCTION_BEGIN name
+	.text
+	.p2align 5
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+
+FUNCTION_BEGIN __hexagon_umoddi3
+	{
+		r6 = cl0(r1:0)              // count leading 0's of dividend (numerator)
+		r7 = cl0(r3:2)              // count leading 0's of divisor (denominator)
+		r5:4 = r3:2                 // divisor moved into working registers
+		r3:2 = r1:0                 // dividend is the initial remainder, r3:2 contains remainder
+	}
+	{
+		r10 = sub(r7,r6)            // left shift count for bit & divisor
+		r1:0 = #0                   // initialize quotient to 0
+		r15:14 = #1                 // initialize bit to 1
+	}
+	{
+		r11 = add(r10,#1)           // loop count is 1 more than shift count
+		r13:12 = lsl(r5:4,r10)      // shift divisor msb into same bit position as dividend msb
+		r15:14 = lsl(r15:14,r10)    // shift the bit left by same amount as divisor
+	}
+	{
+		p0 = cmp.gtu(r5:4,r3:2)     // check if divisor > dividend
+		loop0(1f,r11)               // register loop
+	}
+	{
+		if (p0) jump .hexagon_umoddi3_return           // if divisor > dividend, we're done, so return
+	}
+	.falign
+1:
+	{
+		p0 = cmp.gtu(r13:12,r3:2)   // set predicate reg if shifted divisor > current remainder
+	}
+	{
+		r7:6 = sub(r3:2, r13:12)    // subtract shifted divisor from current remainder
+		r9:8 = add(r1:0, r15:14)    // save current quotient to temp (r9:8)
+	}
+	{
+		r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
+		r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
+	}
+	{
+		r15:14 = lsr(r15:14, #1)    // shift bit right by 1 for next iteration
+		r13:12 = lsr(r13:12, #1)    // shift "shifted divisor" right by 1 for next iteration
+	}:endloop0
+
+.hexagon_umoddi3_return:
+	{
+		r1:0 = r3:2
+		jumpr r31
+	}
+FUNCTION_END __hexagon_umoddi3
+
+  .globl __qdsp_umoddi3
+  .set   __qdsp_umoddi3, __hexagon_umoddi3
--- a/contrib/compiler-rt/lib/builtins/hexagon/umodsi3.S
+++ b/contrib/compiler-rt/lib/builtins/hexagon/umodsi3.S
@ -0,0 +1,55 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+	.macro FUNCTION_BEGIN name
+	.text
+	.p2align 5
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+
+FUNCTION_BEGIN __hexagon_umodsi3
+	{
+		r2 = cl0(r0)
+		r3 = cl0(r1)
+		p0 = cmp.gtu(r1,r0)
+	}
+	{
+		r2 = sub(r3,r2)
+		if (p0) jumpr r31
+	}
+	{
+		loop0(1f,r2)
+		p1 = cmp.eq(r2,#0)
+		r2 = lsl(r1,r2)
+	}
+	.falign
+1:
+	{
+		p0 = cmp.gtu(r2,r0)
+		if (!p0.new) r0 = sub(r0,r2)
+		r2 = lsr(r2,#1)
+		if (p1) r1 = #0
+	}:endloop0
+	{
+		p0 = cmp.gtu(r2,r0)
+		if (!p0.new) r0 = sub(r0,r1)
+		jumpr r31
+	}
+FUNCTION_END __hexagon_umodsi3
+
+  .globl __qdsp_umodsi3
+  .set   __qdsp_umodsi3, __hexagon_umodsi3
--- a/contrib/compiler-rt/lib/builtins/int_types.h
+++ b/contrib/compiler-rt/lib/builtins/int_types.h
@ -60,7 +60,7 @@ typedef union
    }s;
 } udwords;

-#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64))
+#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64)) || defined(__riscv)
 #define CRT_HAS_128BIT
 #endif

--- a/contrib/compiler-rt/lib/builtins/os_version_check.c
+++ b/contrib/compiler-rt/lib/builtins/os_version_check.c
@ -16,8 +16,8 @@
 #ifdef __APPLE__

 #include <CoreFoundation/CoreFoundation.h>
-#include <dispatch/dispatch.h>
 #include <TargetConditionals.h>
+#include <dispatch/dispatch.h>
 #include <dlfcn.h>
 #include <stdint.h>
 #include <stdio.h>
@ -28,6 +28,26 @@
 static int32_t GlobalMajor, GlobalMinor, GlobalSubminor;
 static dispatch_once_t DispatchOnceCounter;

+typedef CFDataRef (*CFDataCreateWithBytesNoCopyFuncTy)(CFAllocatorRef,
+                                                       const UInt8 *, CFIndex,
+                                                       CFAllocatorRef);
+typedef CFPropertyListRef (*CFPropertyListCreateWithDataFuncTy)(
+    CFAllocatorRef, CFDataRef, CFOptionFlags, CFPropertyListFormat *,
+    CFErrorRef *);
+typedef CFPropertyListRef (*CFPropertyListCreateFromXMLDataFuncTy)(
+    CFAllocatorRef, CFDataRef, CFOptionFlags, CFStringRef *);
+typedef CFStringRef (*CFStringCreateWithCStringNoCopyFuncTy)(CFAllocatorRef,
+                                                             const char *,
+                                                             CFStringEncoding,
+                                                             CFAllocatorRef);
+typedef const void *(*CFDictionaryGetValueFuncTy)(CFDictionaryRef,
+                                                  const void *);
+typedef CFTypeID (*CFGetTypeIDFuncTy)(CFTypeRef);
+typedef CFTypeID (*CFStringGetTypeIDFuncTy)(void);
+typedef Boolean (*CFStringGetCStringFuncTy)(CFStringRef, char *, CFIndex,
+                                            CFStringEncoding);
+typedef void (*CFReleaseFuncTy)(CFTypeRef);
+
 /* Find and parse the SystemVersion.plist file. */
 static void parseSystemVersionPList(void *Unused) {
  (void)Unused;
@ -37,50 +57,49 @@ static void parseSystemVersionPList(void *Unused) {
    return;
  const CFAllocatorRef kCFAllocatorNull =
      *(const CFAllocatorRef *)NullAllocator;
-  typeof(CFDataCreateWithBytesNoCopy) *CFDataCreateWithBytesNoCopyFunc =
-      (typeof(CFDataCreateWithBytesNoCopy) *)dlsym(
-          RTLD_DEFAULT, "CFDataCreateWithBytesNoCopy");
+  CFDataCreateWithBytesNoCopyFuncTy CFDataCreateWithBytesNoCopyFunc =
+      (CFDataCreateWithBytesNoCopyFuncTy)dlsym(RTLD_DEFAULT,
+                                               "CFDataCreateWithBytesNoCopy");
  if (!CFDataCreateWithBytesNoCopyFunc)
    return;
-  typeof(CFPropertyListCreateWithData) *CFPropertyListCreateWithDataFunc =
-      (typeof(CFPropertyListCreateWithData) *)dlsym(
+  CFPropertyListCreateWithDataFuncTy CFPropertyListCreateWithDataFunc =
+      (CFPropertyListCreateWithDataFuncTy)dlsym(
          RTLD_DEFAULT, "CFPropertyListCreateWithData");
-  /* CFPropertyListCreateWithData was introduced only in macOS 10.6+, so it
-   * will be NULL on earlier OS versions. */
+/* CFPropertyListCreateWithData was introduced only in macOS 10.6+, so it
+ * will be NULL on earlier OS versions. */
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
-  typeof(CFPropertyListCreateFromXMLData) *CFPropertyListCreateFromXMLDataFunc =
-      (typeof(CFPropertyListCreateFromXMLData) *)dlsym(
+  CFPropertyListCreateFromXMLDataFuncTy CFPropertyListCreateFromXMLDataFunc =
+      (CFPropertyListCreateFromXMLDataFuncTy)dlsym(
          RTLD_DEFAULT, "CFPropertyListCreateFromXMLData");
 #pragma clang diagnostic pop
  /* CFPropertyListCreateFromXMLDataFunc is deprecated in macOS 10.10, so it
   * might be NULL in future OS versions. */
  if (!CFPropertyListCreateWithDataFunc && !CFPropertyListCreateFromXMLDataFunc)
    return;
-  typeof(CFStringCreateWithCStringNoCopy) *CFStringCreateWithCStringNoCopyFunc =
-      (typeof(CFStringCreateWithCStringNoCopy) *)dlsym(
+  CFStringCreateWithCStringNoCopyFuncTy CFStringCreateWithCStringNoCopyFunc =
+      (CFStringCreateWithCStringNoCopyFuncTy)dlsym(
          RTLD_DEFAULT, "CFStringCreateWithCStringNoCopy");
  if (!CFStringCreateWithCStringNoCopyFunc)
    return;
-  typeof(CFDictionaryGetValue) *CFDictionaryGetValueFunc =
-      (typeof(CFDictionaryGetValue) *)dlsym(RTLD_DEFAULT,
-                                            "CFDictionaryGetValue");
+  CFDictionaryGetValueFuncTy CFDictionaryGetValueFunc =
+      (CFDictionaryGetValueFuncTy)dlsym(RTLD_DEFAULT, "CFDictionaryGetValue");
  if (!CFDictionaryGetValueFunc)
    return;
-  typeof(CFGetTypeID) *CFGetTypeIDFunc =
-      (typeof(CFGetTypeID) *)dlsym(RTLD_DEFAULT, "CFGetTypeID");
+  CFGetTypeIDFuncTy CFGetTypeIDFunc =
+      (CFGetTypeIDFuncTy)dlsym(RTLD_DEFAULT, "CFGetTypeID");
  if (!CFGetTypeIDFunc)
    return;
-  typeof(CFStringGetTypeID) *CFStringGetTypeIDFunc =
-      (typeof(CFStringGetTypeID) *)dlsym(RTLD_DEFAULT, "CFStringGetTypeID");
+  CFStringGetTypeIDFuncTy CFStringGetTypeIDFunc =
+      (CFStringGetTypeIDFuncTy)dlsym(RTLD_DEFAULT, "CFStringGetTypeID");
  if (!CFStringGetTypeIDFunc)
    return;
-  typeof(CFStringGetCString) *CFStringGetCStringFunc =
-      (typeof(CFStringGetCString) *)dlsym(RTLD_DEFAULT, "CFStringGetCString");
+  CFStringGetCStringFuncTy CFStringGetCStringFunc =
+      (CFStringGetCStringFuncTy)dlsym(RTLD_DEFAULT, "CFStringGetCString");
  if (!CFStringGetCStringFunc)
    return;
-  typeof(CFRelease) *CFReleaseFunc =
-      (typeof(CFRelease) *)dlsym(RTLD_DEFAULT, "CFRelease");
+  CFReleaseFuncTy CFReleaseFunc =
+      (CFReleaseFuncTy)dlsym(RTLD_DEFAULT, "CFRelease");
  if (!CFReleaseFunc)
    return;

@ -163,10 +182,14 @@ int32_t __isOSVersionAtLeast(int32_t Major, int32_t Minor, int32_t Subminor) {
  /* Populate the global version variables, if they haven't already. */
  dispatch_once_f(&DispatchOnceCounter, NULL, parseSystemVersionPList);

-  if (Major < GlobalMajor) return 1;
-  if (Major > GlobalMajor) return 0;
-  if (Minor < GlobalMinor) return 1;
-  if (Minor > GlobalMinor) return 0;
+  if (Major < GlobalMajor)
+    return 1;
+  if (Major > GlobalMajor)
+    return 0;
+  if (Minor < GlobalMinor)
+    return 1;
+  if (Minor > GlobalMinor)
+    return 0;
  return Subminor <= GlobalSubminor;
 }

--- a/contrib/compiler-rt/lib/builtins/riscv/mulsi3.S
+++ b/contrib/compiler-rt/lib/builtins/riscv/mulsi3.S
@ -0,0 +1,28 @@
+//===--- mulsi3.S - Integer multiplication routines routines ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#if !defined(__riscv_mul) && __riscv_xlen == 32
+	.text
+	.align 2
+
+	.globl __mulsi3
+	.type  __mulsi3, @function
+__mulsi3:
+	mv     a2, a0
+	mv     a0, zero
+.L1:
+	andi   a3, a1, 1
+	beqz   a3, .L2
+	add    a0, a0, a2
+.L2:
+	srli   a1, a1, 1
+	slli   a2, a2, 1
+	bnez   a1, .L1
+	ret
+#endif
--- a/contrib/compiler-rt/lib/cfi/cfi.cc
+++ b/contrib/compiler-rt/lib/cfi/cfi.cc
@ -132,7 +132,11 @@ void ShadowBuilder::Start() {
 void ShadowBuilder::AddUnchecked(uptr begin, uptr end) {
  uint16_t *shadow_begin = MemToShadow(begin, shadow_);
  uint16_t *shadow_end = MemToShadow(end - 1, shadow_) + 1;
-  memset(shadow_begin, kUncheckedShadow,
+  // memset takes a byte, so our unchecked shadow value requires both bytes to
+  // be the same. Make sure we're ok during compilation.
+  static_assert((kUncheckedShadow & 0xff) == ((kUncheckedShadow >> 8) & 0xff),
+                "Both bytes of the 16-bit value must be the same!");
+  memset(shadow_begin, kUncheckedShadow & 0xff,
         (shadow_end - shadow_begin) * sizeof(*shadow_begin));
 }

@ -379,6 +383,8 @@ __cfi_slowpath_diag(u64 CallSiteTypeId, void *Ptr, void *DiagData) {
 }
 #endif

+static void EnsureInterceptorsInitialized();
+
 // Setup shadow for dlopen()ed libraries.
 // The actual shadow setup happens after dlopen() returns, which means that
 // a library can not be a target of any CFI checks while its constructors are
@ -388,6 +394,7 @@ __cfi_slowpath_diag(u64 CallSiteTypeId, void *Ptr, void *DiagData) {
 // We could insert a high-priority constructor into the library, but that would
 // not help with the uninstrumented libraries.
 INTERCEPTOR(void*, dlopen, const char *filename, int flag) {
+  EnsureInterceptorsInitialized();
  EnterLoader();
  void *handle = REAL(dlopen)(filename, flag);
  ExitLoader();
@ -395,12 +402,27 @@ INTERCEPTOR(void*, dlopen, const char *filename, int flag) {
 }

 INTERCEPTOR(int, dlclose, void *handle) {
+  EnsureInterceptorsInitialized();
  EnterLoader();
  int res = REAL(dlclose)(handle);
  ExitLoader();
  return res;
 }

+static BlockingMutex interceptor_init_lock(LINKER_INITIALIZED);
+static bool interceptors_inited = false;
+
+static void EnsureInterceptorsInitialized() {
+  BlockingMutexLock lock(&interceptor_init_lock);
+  if (interceptors_inited)
+    return;
+
+  INTERCEPT_FUNCTION(dlopen);
+  INTERCEPT_FUNCTION(dlclose);
+
+  interceptors_inited = true;
+}
+
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
 #if !SANITIZER_CAN_USE_PREINIT_ARRAY
 // On ELF platforms, the constructor is invoked using .preinit_array (see below)
@ -411,9 +433,6 @@ void __cfi_init() {
  InitializeFlags();
  InitShadow();

-  INTERCEPT_FUNCTION(dlopen);
-  INTERCEPT_FUNCTION(dlclose);
-
 #ifdef CFI_ENABLE_DIAG
  __ubsan::InitAsPlugin();
 #endif
--- a/contrib/compiler-rt/lib/cfi/cfi_blacklist.txt
+++ b/contrib/compiler-rt/lib/cfi/cfi_blacklist.txt
@ -1,7 +1,9 @@
 [cfi-unrelated-cast]
-# std::get_temporary_buffer, likewise (libstdc++, libc++).
+# The specification of std::get_temporary_buffer mandates a cast to
+# uninitialized T* (libstdc++, libc++, MSVC stdlib).
 fun:_ZSt20get_temporary_buffer*
 fun:_ZNSt3__120get_temporary_buffer*
+fun:*get_temporary_buffer@.*@std@@*

 # STL address-of magic (libstdc++, libc++).
 fun:*__addressof*
--- a/contrib/compiler-rt/lib/dfsan/dfsan.cc
+++ b/contrib/compiler-rt/lib/dfsan/dfsan.cc
@ -425,7 +425,8 @@ static void dfsan_init(int argc, char **argv, char **envp) {

  InitializePlatformEarly();

-  MmapFixedNoReserve(ShadowAddr(), UnusedAddr() - ShadowAddr());
+  if (!MmapFixedNoReserve(ShadowAddr(), UnusedAddr() - ShadowAddr()))
+    Die();

  // Protect the region of memory we don't use, to preserve the one-to-one
  // mapping from application to shadow memory. But if ASLR is disabled, Linux
--- a/contrib/compiler-rt/lib/dfsan/dfsan_custom.cc
+++ b/contrib/compiler-rt/lib/dfsan/dfsan_custom.cc
@ -1132,4 +1132,26 @@ int __dfsw_snprintf(char *str, size_t size, const char *format,
  va_end(ap);
  return ret;
 }
-} // extern "C"
+
+// Default empty implementations (weak). Users should redefine them.
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard, u32 *) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard_init, u32 *,
+                             u32 *) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_pcs_init, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_indir, void) {}
+
+SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp1, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp2, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp4, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp8, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_const_cmp1,
+                             void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_const_cmp2,
+                             void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_const_cmp4,
+                             void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_const_cmp8,
+                             void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_switch, void) {}
+}  // extern "C"
--- a/contrib/compiler-rt/lib/dfsan/done_abilist.txt
+++ b/contrib/compiler-rt/lib/dfsan/done_abilist.txt
@ -264,8 +264,6 @@ fun:reflect.makeFuncStub=discard
 # lib/Fuzzer
 ###############################################################################
 # Replaces __sanitizer_cov_trace_cmp with __dfsw___sanitizer_cov_trace_cmp
-fun:__sanitizer_cov_trace_cmp=custom
-fun:__sanitizer_cov_trace_cmp=uninstrumented
 fun:__sanitizer_cov_trace_cmp1=custom
 fun:__sanitizer_cov_trace_cmp1=uninstrumented
 fun:__sanitizer_cov_trace_cmp2=custom
@ -274,6 +272,14 @@ fun:__sanitizer_cov_trace_cmp4=custom
 fun:__sanitizer_cov_trace_cmp4=uninstrumented
 fun:__sanitizer_cov_trace_cmp8=custom
 fun:__sanitizer_cov_trace_cmp8=uninstrumented
+fun:__sanitizer_cov_trace_const_cmp1=custom
+fun:__sanitizer_cov_trace_const_cmp1=uninstrumented
+fun:__sanitizer_cov_trace_const_cmp2=custom
+fun:__sanitizer_cov_trace_const_cmp2=uninstrumented
+fun:__sanitizer_cov_trace_const_cmp4=custom
+fun:__sanitizer_cov_trace_const_cmp4=uninstrumented
+fun:__sanitizer_cov_trace_const_cmp8=custom
+fun:__sanitizer_cov_trace_const_cmp8=uninstrumented
 # Similar for __sanitizer_cov_trace_switch
 fun:__sanitizer_cov_trace_switch=custom
 fun:__sanitizer_cov_trace_switch=uninstrumented
@ -289,10 +295,11 @@ fun:__sanitizer_set_death_callback=uninstrumented
 fun:__sanitizer_set_death_callback=discard
 fun:__sanitizer_update_counter_bitset_and_clear_counters=uninstrumented
 fun:__sanitizer_update_counter_bitset_and_clear_counters=discard
+fun:__sanitizer_cov_trace_pc*=uninstrumented
+fun:__sanitizer_cov_trace_pc*=discard
+fun:__sanitizer_cov_pcs_init=uninstrumented
+fun:__sanitizer_cov_pcs_init=discard

 # Ignores the dfsan wrappers.
 fun:__dfsw_*=uninstrumented
 fun:__dfsw_*=discard
-
-# Don't add extra parameters to the Fuzzer callback.
-fun:LLVMFuzzerTestOneInput=uninstrumented
--- a/contrib/compiler-rt/lib/esan/esan.cpp
+++ b/contrib/compiler-rt/lib/esan/esan.cpp
@ -163,15 +163,15 @@ static void initializeShadow() {
    VPrintf(1, "Shadow #%d: [%zx-%zx) (%zuGB)\n", i, ShadowStart, ShadowEnd,
            (ShadowEnd - ShadowStart) >> 30);

-    uptr Map;
+    uptr Map = 0;
    if (__esan_which_tool == ESAN_WorkingSet) {
      // We want to identify all shadow pages that are touched so we start
      // out inaccessible.
      Map = (uptr)MmapFixedNoAccess(ShadowStart, ShadowEnd- ShadowStart,
                                    "shadow");
    } else {
-      Map = (uptr)MmapFixedNoReserve(ShadowStart, ShadowEnd - ShadowStart,
-                                     "shadow");
+      if (MmapFixedNoReserve(ShadowStart, ShadowEnd - ShadowStart, "shadow"))
+        Map = ShadowStart;
    }
    if (Map != ShadowStart) {
      Printf("FATAL: EfficiencySanitizer failed to map its shadow memory.\n");
--- a/contrib/compiler-rt/lib/esan/esan_interceptors.cpp
+++ b/contrib/compiler-rt/lib/esan/esan_interceptors.cpp
@ -175,6 +175,15 @@ DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr)
  do {                                                                         \
  } while (false)

+#define COMMON_INTERCEPTOR_MMAP_IMPL(ctx, mmap, addr, sz, prot, flags, fd,     \
+                                     off)                                      \
+  do {                                                                         \
+    if (!fixMmapAddr(&addr, sz, flags))                                        \
+      return (void *)-1;                                                       \
+    void *result = REAL(mmap)(addr, sz, prot, flags, fd, off);                 \
+    return (void *)checkMmapResult((uptr)result, sz);                          \
+  } while (false)
+
 #include "sanitizer_common/sanitizer_common_interceptors.inc"

 //===----------------------------------------------------------------------===//
@ -232,6 +241,7 @@ DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr)
  } while (false)

 #include "sanitizer_common/sanitizer_common_syscalls.inc"
+#include "sanitizer_common/sanitizer_syscalls_netbsd.inc"

 //===----------------------------------------------------------------------===//
 // Custom interceptors
@ -306,13 +316,6 @@ INTERCEPTOR(int, unlink, char *path) {
  return REAL(unlink)(path);
 }

-INTERCEPTOR(int, puts, const char *s) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, puts, s);
-  COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s));
-  return REAL(puts)(s);
-}
-
 INTERCEPTOR(int, rmdir, char *path) {
  void *ctx;
  COMMON_INTERCEPTOR_ENTER(ctx, rmdir, path);
@ -320,44 +323,6 @@ INTERCEPTOR(int, rmdir, char *path) {
  return REAL(rmdir)(path);
 }

-//===----------------------------------------------------------------------===//
-// Shadow-related interceptors
-//===----------------------------------------------------------------------===//
-
-// These are candidates for sharing with all sanitizers if shadow memory
-// support is also standardized.
-
-INTERCEPTOR(void *, mmap, void *addr, SIZE_T sz, int prot, int flags,
-                 int fd, OFF_T off) {
-  if (UNLIKELY(REAL(mmap) == nullptr)) {
-    // With esan init during interceptor init and a static libc preventing
-    // our early-calloc from triggering, we can end up here before our
-    // REAL pointer is set up.
-    return (void *)internal_mmap(addr, sz, prot, flags, fd, off);
-  }
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, mmap, addr, sz, prot, flags, fd, off);
-  if (!fixMmapAddr(&addr, sz, flags))
-    return (void *)-1;
-  void *result = REAL(mmap)(addr, sz, prot, flags, fd, off);
-  return (void *)checkMmapResult((uptr)result, sz);
-}
-
-#if SANITIZER_LINUX
-INTERCEPTOR(void *, mmap64, void *addr, SIZE_T sz, int prot, int flags,
-                 int fd, OFF64_T off) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, mmap64, addr, sz, prot, flags, fd, off);
-  if (!fixMmapAddr(&addr, sz, flags))
-    return (void *)-1;
-  void *result = REAL(mmap64)(addr, sz, prot, flags, fd, off);
-  return (void *)checkMmapResult((uptr)result, sz);
-}
-#define ESAN_MAYBE_INTERCEPT_MMAP64 INTERCEPT_FUNCTION(mmap64)
-#else
-#define ESAN_MAYBE_INTERCEPT_MMAP64
-#endif
-
 //===----------------------------------------------------------------------===//
 // Signal-related interceptors
 //===----------------------------------------------------------------------===//
@ -521,14 +486,8 @@ void initializeInterceptors() {
  INTERCEPT_FUNCTION(creat);
  ESAN_MAYBE_INTERCEPT_CREAT64;
  INTERCEPT_FUNCTION(unlink);
-  INTERCEPT_FUNCTION(fread);
-  INTERCEPT_FUNCTION(fwrite);
-  INTERCEPT_FUNCTION(puts);
  INTERCEPT_FUNCTION(rmdir);

-  INTERCEPT_FUNCTION(mmap);
-  ESAN_MAYBE_INTERCEPT_MMAP64;
-
  ESAN_MAYBE_INTERCEPT_SIGNAL;
  ESAN_MAYBE_INTERCEPT_SIGACTION;
  ESAN_MAYBE_INTERCEPT_SIGPROCMASK;
--- a/contrib/compiler-rt/lib/esan/esan_sideline_linux.cpp
+++ b/contrib/compiler-rt/lib/esan/esan_sideline_linux.cpp
@ -70,7 +70,7 @@ int SidelineThread::runSideline(void *Arg) {
  internal_prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);

  // Set up a signal handler on an alternate stack for safety.
-  InternalScopedBuffer<char> StackMap(SigAltStackSize);
+  InternalMmapVector<char> StackMap(SigAltStackSize);
  stack_t SigAltStack;
  SigAltStack.ss_sp = StackMap.data();
  SigAltStack.ss_size = SigAltStackSize;
--- a/contrib/compiler-rt/lib/hwasan/hwasan.cc
+++ b/contrib/compiler-rt/lib/hwasan/hwasan.cc
@ -1,4 +1,4 @@
-//===-- hwasan.cc -----------------------------------------------------------===//
+//===-- hwasan.cc ---------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -13,8 +13,10 @@
 //===----------------------------------------------------------------------===//

 #include "hwasan.h"
-#include "hwasan_thread.h"
+#include "hwasan_mapping.h"
 #include "hwasan_poisoning.h"
+#include "hwasan_report.h"
+#include "hwasan_thread.h"
 #include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_flags.h"
@ -84,7 +86,7 @@ static void InitializeFlags() {
    cf.check_printf = false;
    cf.intercept_tls_get_addr = true;
    cf.exitcode = 99;
-    cf.handle_sigill = kHandleSignalExclusive;
+    cf.handle_sigtrap = kHandleSignalExclusive;
    OverrideCommonFlags(cf);
  }

@ -143,12 +145,22 @@ void PrintWarning(uptr pc, uptr bp) {
  ReportInvalidAccess(&stack, 0);
 }

+static void HWAsanCheckFailed(const char *file, int line, const char *cond,
+                              u64 v1, u64 v2) {
+  Report("HWAddressSanitizer CHECK failed: %s:%d \"%s\" (0x%zx, 0x%zx)\n", file,
+         line, cond, (uptr)v1, (uptr)v2);
+  PRINT_CURRENT_STACK_CHECK();
+  Die();
+}
+
 } // namespace __hwasan

 // Interface.

 using namespace __hwasan;

+uptr __hwasan_shadow_memory_dynamic_address;  // Global interface symbol.
+
 void __hwasan_init() {
  CHECK(!hwasan_init_is_running);
  if (hwasan_inited) return;
@ -160,23 +172,28 @@ void __hwasan_init() {
  CacheBinaryName();
  InitializeFlags();

+  // Install tool-specific callbacks in sanitizer_common.
+  SetCheckFailedCallback(HWAsanCheckFailed);
+
  __sanitizer_set_report_path(common_flags()->log_path);

+  DisableCoreDumperIfNecessary();
+  if (!InitShadow()) {
+    Printf("FATAL: HWAddressSanitizer cannot mmap the shadow memory.\n");
+    if (HWASAN_FIXED_MAPPING) {
+      Printf("FATAL: Make sure to compile with -fPIE and to link with -pie.\n");
+      Printf("FATAL: Disabling ASLR is known to cause this error.\n");
+      Printf("FATAL: If running under GDB, try "
+             "'set disable-randomization off'.\n");
+    }
+    DumpProcessMap();
+    Die();
+  }
+
  InitializeInterceptors();
  InstallDeadlySignalHandlers(HwasanOnDeadlySignal);
  InstallAtExitHandler(); // Needs __cxa_atexit interceptor.

-  DisableCoreDumperIfNecessary();
-  if (!InitShadow()) {
-    Printf("FATAL: HWAddressSanitizer can not mmap the shadow memory.\n");
-    Printf("FATAL: Make sure to compile with -fPIE and to link with -pie.\n");
-    Printf("FATAL: Disabling ASLR is known to cause this error.\n");
-    Printf("FATAL: If running under GDB, try "
-           "'set disable-randomization off'.\n");
-    DumpProcessMap();
-    Die();
-  }
-
  Symbolizer::GetOrInit()->AddHooks(EnterSymbolizer, ExitSymbolizer);

  InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir);
@ -240,11 +257,23 @@ void __sanitizer_unaligned_store64(uu64 *p, u64 x) {

 template<unsigned X>
 __attribute__((always_inline))
-static void SigIll() {
+static void SigTrap(uptr p) {
 #if defined(__aarch64__)
-  asm("hlt %0\n\t" ::"n"(X));
-#elif defined(__x86_64__) || defined(__i386__)
-  asm("ud2\n\t");
+  (void)p;
+  // 0x900 is added to do not interfere with the kernel use of lower values of
+  // brk immediate.
+  // FIXME: Add a constraint to put the pointer into x0, the same as x86 branch.
+  asm("brk %0\n\t" ::"n"(0x900 + X));
+#elif defined(__x86_64__)
+  // INT3 + NOP DWORD ptr [EAX + X] to pass X to our signal handler, 5 bytes
+  // total. The pointer is passed via rdi.
+  // 0x40 is added as a safeguard, to help distinguish our trap from others and
+  // to avoid 0 offsets in the command (otherwise it'll be reduced to a
+  // different nop command, the three bytes one).
+  asm volatile(
+      "int3\n"
+      "nopl %c0(%%rax)\n"
+      :: "n"(0x40 + X), "D"(p));
 #else
  // FIXME: not always sigill.
  __builtin_trap();
@ -261,8 +290,8 @@ __attribute__((always_inline, nodebug)) static void CheckAddress(uptr p) {
  uptr ptr_raw = p & ~kAddressTagMask;
  tag_t mem_tag = *(tag_t *)MEM_TO_SHADOW(ptr_raw);
  if (UNLIKELY(ptr_tag != mem_tag)) {
-    SigIll<0x100 + 0x20 * (EA == ErrorAction::Recover) +
-           0x10 * (AT == AccessType::Store) + LogSize>();
+    SigTrap<0x20 * (EA == ErrorAction::Recover) +
+           0x10 * (AT == AccessType::Store) + LogSize>(p);
    if (EA == ErrorAction::Abort) __builtin_unreachable();
  }
 }
@ -277,13 +306,13 @@ __attribute__((always_inline, nodebug)) static void CheckAddressSized(uptr p,
  tag_t *shadow_last = (tag_t *)MEM_TO_SHADOW(ptr_raw + sz - 1);
  for (tag_t *t = shadow_first; t <= shadow_last; ++t)
    if (UNLIKELY(ptr_tag != *t)) {
-      SigIll<0x100 + 0x20 * (EA == ErrorAction::Recover) +
-             0x10 * (AT == AccessType::Store) + 0xf>();
+      SigTrap<0x20 * (EA == ErrorAction::Recover) +
+             0x10 * (AT == AccessType::Store) + 0xf>(p);
      if (EA == ErrorAction::Abort) __builtin_unreachable();
    }
 }

-void __hwasan_load(uptr p, uptr sz) {
+void __hwasan_loadN(uptr p, uptr sz) {
  CheckAddressSized<ErrorAction::Abort, AccessType::Load>(p, sz);
 }
 void __hwasan_load1(uptr p) {
@ -302,7 +331,7 @@ void __hwasan_load16(uptr p) {
  CheckAddress<ErrorAction::Abort, AccessType::Load, 4>(p);
 }

-void __hwasan_load_noabort(uptr p, uptr sz) {
+void __hwasan_loadN_noabort(uptr p, uptr sz) {
  CheckAddressSized<ErrorAction::Recover, AccessType::Load>(p, sz);
 }
 void __hwasan_load1_noabort(uptr p) {
@ -321,7 +350,7 @@ void __hwasan_load16_noabort(uptr p) {
  CheckAddress<ErrorAction::Recover, AccessType::Load, 4>(p);
 }

-void __hwasan_store(uptr p, uptr sz) {
+void __hwasan_storeN(uptr p, uptr sz) {
  CheckAddressSized<ErrorAction::Abort, AccessType::Store>(p, sz);
 }
 void __hwasan_store1(uptr p) {
@ -340,7 +369,7 @@ void __hwasan_store16(uptr p) {
  CheckAddress<ErrorAction::Abort, AccessType::Store, 4>(p);
 }

-void __hwasan_store_noabort(uptr p, uptr sz) {
+void __hwasan_storeN_noabort(uptr p, uptr sz) {
  CheckAddressSized<ErrorAction::Recover, AccessType::Store>(p, sz);
 }
 void __hwasan_store1_noabort(uptr p) {
@ -359,6 +388,18 @@ void __hwasan_store16_noabort(uptr p) {
  CheckAddress<ErrorAction::Recover, AccessType::Store, 4>(p);
 }

+void __hwasan_tag_memory(uptr p, u8 tag, uptr sz) {
+  TagMemoryAligned(p, sz, tag);
+}
+
+static const u8 kFallbackTag = 0xBB;
+
+u8 __hwasan_generate_tag() {
+  HwasanThread *t = GetCurrentThread();
+  if (!t) return kFallbackTag;
+  return t->GenerateRandomTag();
+}
+
 #if !SANITIZER_SUPPORTS_WEAK_HOOKS
 extern "C" {
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
--- a/contrib/compiler-rt/lib/hwasan/hwasan.h
+++ b/contrib/compiler-rt/lib/hwasan/hwasan.h
@ -1,4 +1,4 @@
-//===-- hwasan.h --------------------------------------------------*- C++ -*-===//
+//===-- hwasan.h ------------------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -32,16 +32,6 @@

 typedef u8 tag_t;

-// Reasonable values are 4 (for 1/16th shadow) and 6 (for 1/64th).
-const uptr kShadowScale = 4;
-const uptr kShadowAlignment = 1UL << kShadowScale;
-
-#define MEM_TO_SHADOW_OFFSET(mem) ((uptr)(mem) >> kShadowScale)
-#define MEM_TO_SHADOW(mem) ((uptr)(mem) >> kShadowScale)
-#define SHADOW_TO_MEM(shadow) ((uptr)(shadow) << kShadowScale)
-
-#define MEM_IS_APP(mem) true
-
 // TBI (Top Byte Ignore) feature of AArch64: bits [63:56] are ignored in address
 // translation and can be used to store a tag.
 const unsigned kAddressTagShift = 56;
@ -107,15 +97,6 @@ void PrintWarning(uptr pc, uptr bp);
 void GetStackTrace(BufferedStackTrace *stack, uptr max_s, uptr pc, uptr bp,
                   void *context, bool request_fast_unwind);

-void ReportInvalidAccess(StackTrace *stack, u32 origin);
-void ReportTagMismatch(StackTrace *stack, uptr addr, uptr access_size,
-                       bool is_store);
-void ReportStats();
-void ReportAtExitStatistics();
-void DescribeMemoryRange(const void *x, uptr size);
-void ReportInvalidAccessInsideAddressRange(const char *what, const void *start, uptr size,
-                                 uptr offset);
-
 // Returns a "chained" origin id, pointing to the given stack trace followed by
 // the previous origin id.
 u32 ChainOrigin(u32 id, StackTrace *stack);
@ -135,6 +116,15 @@ const int STACK_TRACE_TAG_POISON = StackTrace::TAG_CUSTOM + 1;
  GetStackTrace(&stack, kStackTraceMax, pc, bp, nullptr, \
                common_flags()->fast_unwind_on_fatal)

+#define GET_FATAL_STACK_TRACE_HERE \
+  GET_FATAL_STACK_TRACE_PC_BP(StackTrace::GetCurrentPc(), GET_CURRENT_FRAME())
+
+#define PRINT_CURRENT_STACK_CHECK() \
+  {                                 \
+    GET_FATAL_STACK_TRACE_HERE;     \
+    stack.Print();                  \
+  }
+
 class ScopedThreadLocalStateBackup {
 public:
  ScopedThreadLocalStateBackup() { Backup(); }
--- a/contrib/compiler-rt/lib/hwasan/hwasan_allocator.cc
+++ b/contrib/compiler-rt/lib/hwasan/hwasan_allocator.cc
@ -1,4 +1,4 @@
-//===-- hwasan_allocator.cc --------------------------- ---------------------===//
+//===-- hwasan_allocator.cc ------------------------- ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -15,11 +15,13 @@
 #include "sanitizer_common/sanitizer_allocator.h"
 #include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_allocator_report.h"
 #include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
 #include "hwasan.h"
 #include "hwasan_allocator.h"
+#include "hwasan_mapping.h"
 #include "hwasan_thread.h"
 #include "hwasan_poisoning.h"

@ -70,8 +72,8 @@ struct HwasanMapUnmapCallback {
  }
 };

-#if !defined(__aarch64__)
-#error unsupported platform
+#if !defined(__aarch64__) && !defined(__x86_64__)
+#error Unsupported platform
 #endif

 static const uptr kMaxAllowedMallocSize = 2UL << 30;  // 2G
@ -100,6 +102,9 @@ static AllocatorCache fallback_allocator_cache;
 static SpinMutex fallback_mutex;
 static atomic_uint8_t hwasan_allocator_tagging_enabled;

+static const tag_t kFallbackAllocTag = 0xBB;
+static const tag_t kFallbackFreeTag = 0xBC;
+
 void HwasanAllocatorInit() {
  atomic_store_relaxed(&hwasan_allocator_tagging_enabled,
                       !flags()->disable_allocator_tagging);
@ -123,9 +128,12 @@ static void *HwasanAllocate(StackTrace *stack, uptr size, uptr alignment,
  size = RoundUpTo(size, kShadowAlignment);

  if (size > kMaxAllowedMallocSize) {
-    Report("WARNING: HWAddressSanitizer failed to allocate %p bytes\n",
-           (void *)size);
-    return Allocator::FailureHandler::OnBadRequest();
+    if (AllocatorMayReturnNull()) {
+      Report("WARNING: HWAddressSanitizer failed to allocate 0x%zx bytes\n",
+             size);
+      return nullptr;
+    }
+    ReportAllocationSizeTooBig(size, kMaxAllowedMallocSize, stack);
  }
  HwasanThread *t = GetCurrentThread();
  void *allocated;
@ -137,6 +145,12 @@ static void *HwasanAllocate(StackTrace *stack, uptr size, uptr alignment,
    AllocatorCache *cache = &fallback_allocator_cache;
    allocated = allocator.Allocate(cache, size, alignment);
  }
+  if (UNLIKELY(!allocated)) {
+    SetAllocatorOutOfMemory();
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportOutOfMemory(size, stack);
+  }
  Metadata *meta =
      reinterpret_cast<Metadata *>(allocator.GetMetaData(allocated));
  meta->state = CHUNK_ALLOCATED;
@ -145,10 +159,11 @@ static void *HwasanAllocate(StackTrace *stack, uptr size, uptr alignment,
  if (zeroise)
    internal_memset(allocated, 0, size);

-  void *user_ptr = (flags()->tag_in_malloc &&
-                    atomic_load_relaxed(&hwasan_allocator_tagging_enabled))
-                       ? (void *)TagMemoryAligned((uptr)allocated, size, 0xBB)
-                       : allocated;
+  void *user_ptr = allocated;
+  if (flags()->tag_in_malloc &&
+      atomic_load_relaxed(&hwasan_allocator_tagging_enabled))
+    user_ptr = (void *)TagMemoryAligned(
+        (uptr)user_ptr, size, t ? t->GenerateRandomTag() : kFallbackAllocTag);

  HWASAN_MALLOC_HOOK(user_ptr, size);
  return user_ptr;
@ -166,10 +181,11 @@ void HwasanDeallocate(StackTrace *stack, void *user_ptr) {
  meta->free_context_id = StackDepotPut(*stack);
  // This memory will not be reused by anyone else, so we are free to keep it
  // poisoned.
+  HwasanThread *t = GetCurrentThread();
  if (flags()->tag_in_free &&
      atomic_load_relaxed(&hwasan_allocator_tagging_enabled))
-    TagMemoryAligned((uptr)p, size, 0xBC);
-  HwasanThread *t = GetCurrentThread();
+    TagMemoryAligned((uptr)p, size,
+                     t ? t->GenerateRandomTag() : kFallbackFreeTag);
  if (t) {
    AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
    allocator.Deallocate(cache, p);
@ -195,8 +211,12 @@ void *HwasanReallocate(StackTrace *stack, void *user_old_p, uptr new_size,
    meta->requested_size = new_size;
    if (!atomic_load_relaxed(&hwasan_allocator_tagging_enabled))
      return user_old_p;
-    if (flags()->retag_in_realloc)
-      return (void *)TagMemoryAligned((uptr)old_p, new_size, 0xCC);
+    if (flags()->retag_in_realloc) {
+      HwasanThread *t = GetCurrentThread();
+      return (void *)TagMemoryAligned(
+          (uptr)old_p, new_size,
+          t ? t->GenerateRandomTag() : kFallbackAllocTag);
+    }
    if (new_size > old_size) {
      tag_t tag = GetTagFromPointer((uptr)user_old_p);
      TagMemoryAligned((uptr)old_p + old_size, new_size - old_size, tag);
@ -212,6 +232,15 @@ void *HwasanReallocate(StackTrace *stack, void *user_old_p, uptr new_size,
  return new_p;
 }

+void *HwasanCalloc(StackTrace *stack, uptr nmemb, uptr size) {
+  if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportCallocOverflow(nmemb, size, stack);
+  }
+  return HwasanAllocate(stack, nmemb * size, sizeof(u64), true);
+}
+
 HwasanChunkView FindHeapChunkByAddress(uptr address) {
  void *block = allocator.GetBlockBegin(reinterpret_cast<void*>(address));
  if (!block)
@ -235,9 +264,7 @@ void *hwasan_malloc(uptr size, StackTrace *stack) {
 }

 void *hwasan_calloc(uptr nmemb, uptr size, StackTrace *stack) {
-  if (UNLIKELY(CheckForCallocOverflow(size, nmemb)))
-    return SetErrnoOnNull(Allocator::FailureHandler::OnBadRequest());
-  return SetErrnoOnNull(HwasanAllocate(stack, nmemb * size, sizeof(u64), true));
+  return SetErrnoOnNull(HwasanCalloc(stack, nmemb, size));
 }

 void *hwasan_realloc(void *ptr, uptr size, StackTrace *stack) {
@ -251,14 +278,17 @@ void *hwasan_realloc(void *ptr, uptr size, StackTrace *stack) {
 }

 void *hwasan_valloc(uptr size, StackTrace *stack) {
-  return SetErrnoOnNull(HwasanAllocate(stack, size, GetPageSizeCached(), false));
+  return SetErrnoOnNull(
+      HwasanAllocate(stack, size, GetPageSizeCached(), false));
 }

 void *hwasan_pvalloc(uptr size, StackTrace *stack) {
  uptr PageSize = GetPageSizeCached();
  if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
    errno = errno_ENOMEM;
-    return Allocator::FailureHandler::OnBadRequest();
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportPvallocOverflow(size, stack);
  }
  // pvalloc(0) should allocate one page.
  size = size ? RoundUpTo(size, PageSize) : PageSize;
@ -268,7 +298,9 @@ void *hwasan_pvalloc(uptr size, StackTrace *stack) {
 void *hwasan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack) {
  if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
    errno = errno_EINVAL;
-    return Allocator::FailureHandler::OnBadRequest();
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportInvalidAlignedAllocAlignment(size, alignment, stack);
  }
  return SetErrnoOnNull(HwasanAllocate(stack, size, alignment, false));
 }
@ -276,7 +308,9 @@ void *hwasan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack) {
 void *hwasan_memalign(uptr alignment, uptr size, StackTrace *stack) {
  if (UNLIKELY(!IsPowerOfTwo(alignment))) {
    errno = errno_EINVAL;
-    return Allocator::FailureHandler::OnBadRequest();
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    ReportInvalidAllocationAlignment(alignment, stack);
  }
  return SetErrnoOnNull(HwasanAllocate(stack, size, alignment, false));
 }
@ -284,18 +318,20 @@ void *hwasan_memalign(uptr alignment, uptr size, StackTrace *stack) {
 int hwasan_posix_memalign(void **memptr, uptr alignment, uptr size,
                        StackTrace *stack) {
  if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
-    Allocator::FailureHandler::OnBadRequest();
-    return errno_EINVAL;
+    if (AllocatorMayReturnNull())
+      return errno_EINVAL;
+    ReportInvalidPosixMemalignAlignment(alignment, stack);
  }
  void *ptr = HwasanAllocate(stack, size, alignment, false);
  if (UNLIKELY(!ptr))
+    // OOM error is already taken care of by HwasanAllocate.
    return errno_ENOMEM;
  CHECK(IsAligned((uptr)ptr, alignment));
  *memptr = ptr;
  return 0;
 }

-} // namespace __hwasan
+}  // namespace __hwasan

 using namespace __hwasan;

--- a/contrib/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cc
+++ b/contrib/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cc
@ -0,0 +1,132 @@
+//===-- hwasan_dynamic_shadow.cc --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file is a part of HWAddressSanitizer. It reserves dynamic shadow memory
+/// region and handles ifunc resolver case, when necessary.
+///
+//===----------------------------------------------------------------------===//
+
+#include "hwasan_dynamic_shadow.h"
+#include "hwasan_mapping.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_posix.h"
+
+// The code in this file needs to run in an unrelocated binary. It should not
+// access any external symbol, including its own non-hidden globals.
+
+namespace __hwasan {
+
+static void UnmapFromTo(uptr from, uptr to) {
+  if (to == from)
+    return;
+  CHECK(to >= from);
+  uptr res = internal_munmap(reinterpret_cast<void *>(from), to - from);
+  if (UNLIKELY(internal_iserror(res))) {
+    Report("ERROR: %s failed to unmap 0x%zx (%zd) bytes at address %p\n",
+           SanitizerToolName, to - from, to - from, from);
+    CHECK("unable to unmap" && 0);
+  }
+}
+
+// Returns an address aligned to 8 pages, such that one page on the left and
+// shadow_size_bytes bytes on the right of it are mapped r/o.
+static uptr MapDynamicShadow(uptr shadow_size_bytes) {
+  const uptr granularity = GetMmapGranularity();
+  const uptr alignment = granularity * SHADOW_GRANULARITY;
+  const uptr left_padding = granularity;
+  const uptr shadow_size =
+      RoundUpTo(shadow_size_bytes, granularity);
+  const uptr map_size = shadow_size + left_padding + alignment;
+
+  const uptr map_start = (uptr)MmapNoAccess(map_size);
+  CHECK_NE(map_start, ~(uptr)0);
+
+  const uptr shadow_start = RoundUpTo(map_start + left_padding, alignment);
+
+  UnmapFromTo(map_start, shadow_start - left_padding);
+  UnmapFromTo(shadow_start + shadow_size, map_start + map_size);
+
+  return shadow_start;
+}
+
+}  // namespace __hwasan
+
+#if HWASAN_PREMAP_SHADOW
+
+extern "C" {
+
+INTERFACE_ATTRIBUTE void __hwasan_shadow();
+decltype(__hwasan_shadow)* __hwasan_premap_shadow();
+
+}  // extern "C"
+
+namespace __hwasan {
+
+// Conservative upper limit.
+static uptr PremapShadowSize() {
+  return RoundUpTo(GetMaxVirtualAddress() >> kShadowScale,
+                   GetMmapGranularity());
+}
+
+static uptr PremapShadow() {
+  return MapDynamicShadow(PremapShadowSize());
+}
+
+static bool IsPremapShadowAvailable() {
+  const uptr shadow = reinterpret_cast<uptr>(&__hwasan_shadow);
+  const uptr resolver = reinterpret_cast<uptr>(&__hwasan_premap_shadow);
+  // shadow == resolver is how Android KitKat and older handles ifunc.
+  // shadow == 0 just in case.
+  return shadow != 0 && shadow != resolver;
+}
+
+static uptr FindPremappedShadowStart(uptr shadow_size_bytes) {
+  const uptr granularity = GetMmapGranularity();
+  const uptr shadow_start = reinterpret_cast<uptr>(&__hwasan_shadow);
+  const uptr premap_shadow_size = PremapShadowSize();
+  const uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity);
+
+  // We may have mapped too much. Release extra memory.
+  UnmapFromTo(shadow_start + shadow_size, shadow_start + premap_shadow_size);
+  return shadow_start;
+}
+
+}  // namespace __hwasan
+
+extern "C" {
+
+decltype(__hwasan_shadow)* __hwasan_premap_shadow() {
+  // The resolver might be called multiple times. Map the shadow just once.
+  static __sanitizer::uptr shadow = 0;
+  if (!shadow)
+    shadow = __hwasan::PremapShadow();
+  return reinterpret_cast<decltype(__hwasan_shadow)*>(shadow);
+}
+
+// __hwasan_shadow is a "function" that has the same address as the first byte
+// of the shadow mapping.
+INTERFACE_ATTRIBUTE __attribute__((ifunc("__hwasan_premap_shadow")))
+void __hwasan_shadow();
+
+}  // extern "C"
+
+#endif  // HWASAN_PREMAP_SHADOW
+
+namespace __hwasan {
+
+uptr FindDynamicShadowStart(uptr shadow_size_bytes) {
+#if HWASAN_PREMAP_SHADOW
+  if (IsPremapShadowAvailable())
+    return FindPremappedShadowStart(shadow_size_bytes);
+#endif
+  return MapDynamicShadow(shadow_size_bytes);
+}
+
+}  // namespace __hwasan
--- a/contrib/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.h
+++ b/contrib/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.h
@ -0,0 +1,27 @@
+//===-- hwasan_dynamic_shadow.h ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file is a part of HWAddressSanitizer. It reserves dynamic shadow memory
+/// region.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef HWASAN_PREMAP_SHADOW_H
+#define HWASAN_PREMAP_SHADOW_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __hwasan {
+
+uptr FindDynamicShadowStart(uptr shadow_size_bytes);
+
+}  // namespace __hwasan
+
+#endif  // HWASAN_PREMAP_SHADOW_H
--- a/contrib/compiler-rt/lib/hwasan/hwasan_flags.inc
+++ b/contrib/compiler-rt/lib/hwasan/hwasan_flags.inc
@ -27,3 +27,7 @@ HWASAN_FLAG(bool, atexit, false, "")
 // Test only flag to disable malloc/realloc/free memory tagging on startup.
 // Tagging can be reenabled with __hwasan_enable_allocator_tagging().
 HWASAN_FLAG(bool, disable_allocator_tagging, false, "")
+
+// If false, use simple increment of a thread local counter to generate new
+// tags.
+HWASAN_FLAG(bool, random_tags, true, "")
--- a/contrib/compiler-rt/lib/hwasan/hwasan_interceptors.cc
+++ b/contrib/compiler-rt/lib/hwasan/hwasan_interceptors.cc
@ -17,8 +17,10 @@

 #include "interception/interception.h"
 #include "hwasan.h"
+#include "hwasan_mapping.h"
 #include "hwasan_thread.h"
 #include "hwasan_poisoning.h"
+#include "hwasan_report.h"
 #include "sanitizer_common/sanitizer_platform_limits_posix.h"
 #include "sanitizer_common/sanitizer_allocator.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
@ -258,18 +260,17 @@ INTERCEPTOR(void *, realloc, void *ptr, SIZE_T size) {

 INTERCEPTOR(void *, malloc, SIZE_T size) {
  GET_MALLOC_STACK_TRACE;
+  if (UNLIKELY(!hwasan_init_is_running))
+    ENSURE_HWASAN_INITED();
  if (UNLIKELY(!hwasan_inited))
    // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
    return AllocateFromLocalPool(size);
  return hwasan_malloc(size, &stack);
 }

-
-INTERCEPTOR(void *, mmap, void *addr, SIZE_T length, int prot, int flags,
-            int fd, OFF_T offset) {
-  if (hwasan_init_is_running)
-    return REAL(mmap)(addr, length, prot, flags, fd, offset);
-  ENSURE_HWASAN_INITED();
+template <class Mmap>
+static void *mmap_interceptor(Mmap real_mmap, void *addr, SIZE_T sz, int prot,
+                              int flags, int fd, OFF64_T off) {
  if (addr && !MEM_IS_APP(addr)) {
    if (flags & map_fixed) {
      errno = errno_EINVAL;
@ -278,30 +279,9 @@ INTERCEPTOR(void *, mmap, void *addr, SIZE_T length, int prot, int flags,
      addr = nullptr;
    }
  }
-  void *res = REAL(mmap)(addr, length, prot, flags, fd, offset);
-  return res;
+  return real_mmap(addr, sz, prot, flags, fd, off);
 }

-#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
-INTERCEPTOR(void *, mmap64, void *addr, SIZE_T length, int prot, int flags,
-            int fd, OFF64_T offset) {
-  ENSURE_HWASAN_INITED();
-  if (addr && !MEM_IS_APP(addr)) {
-    if (flags & map_fixed) {
-      errno = errno_EINVAL;
-      return (void *)-1;
-    } else {
-      addr = nullptr;
-    }
-  }
-  void *res = REAL(mmap64)(addr, length, prot, flags, fd, offset);
-  return res;
-}
-#define HWASAN_MAYBE_INTERCEPT_MMAP64 INTERCEPT_FUNCTION(mmap64)
-#else
-#define HWASAN_MAYBE_INTERCEPT_MMAP64
-#endif
-
 extern "C" int pthread_attr_init(void *attr);
 extern "C" int pthread_attr_destroy(void *attr);

@ -427,6 +407,22 @@ int OnExit() {
    *begin = *end = 0;                                                         \
  }

+#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size) \
+  {                                                       \
+    COMMON_INTERCEPTOR_ENTER(ctx, memset, dst, v, size);  \
+    if (common_flags()->intercept_intrin &&               \
+        MEM_IS_APP(GetAddressFromPointer(dst)))           \
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size);     \
+    return REAL(memset)(dst, v, size);                    \
+  }
+
+#define COMMON_INTERCEPTOR_MMAP_IMPL(ctx, mmap, addr, length, prot, flags, fd, \
+                                     offset)                                   \
+  do {                                                                         \
+    return mmap_interceptor(REAL(mmap), addr, length, prot, flags, fd,         \
+                            offset);                                           \
+  } while (false)
+
 #include "sanitizer_common/sanitizer_platform_interceptors.h"
 #include "sanitizer_common/sanitizer_common_interceptors.inc"
 #include "sanitizer_common/sanitizer_signal_interceptors.inc"
@ -448,6 +444,7 @@ int OnExit() {
    (void)(s);                                \
  } while (false)
 #include "sanitizer_common/sanitizer_common_syscalls.inc"
+#include "sanitizer_common/sanitizer_syscalls_netbsd.inc"



@ -459,8 +456,6 @@ void InitializeInterceptors() {
  InitializeCommonInterceptors();
  InitializeSignalInterceptors();

-  INTERCEPT_FUNCTION(mmap);
-  HWASAN_MAYBE_INTERCEPT_MMAP64;
  INTERCEPT_FUNCTION(posix_memalign);
  HWASAN_MAYBE_INTERCEPT_MEMALIGN;
  INTERCEPT_FUNCTION(__libc_memalign);
--- a/contrib/compiler-rt/lib/hwasan/hwasan_interface_internal.h
+++ b/contrib/compiler-rt/lib/hwasan/hwasan_interface_internal.h
@ -18,6 +18,7 @@
 #include "sanitizer_common/sanitizer_internal_defs.h"

 extern "C" {
+
 SANITIZER_INTERFACE_ATTRIBUTE
 void __hwasan_init();

@ -32,7 +33,10 @@ using __sanitizer::u16;
 using __sanitizer::u8;

 SANITIZER_INTERFACE_ATTRIBUTE
-void __hwasan_load(uptr, uptr);
+extern uptr __hwasan_shadow_memory_dynamic_address;
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __hwasan_loadN(uptr, uptr);
 SANITIZER_INTERFACE_ATTRIBUTE
 void __hwasan_load1(uptr);
 SANITIZER_INTERFACE_ATTRIBUTE
@ -45,7 +49,7 @@ SANITIZER_INTERFACE_ATTRIBUTE
 void __hwasan_load16(uptr);

 SANITIZER_INTERFACE_ATTRIBUTE
-void __hwasan_load_noabort(uptr, uptr);
+void __hwasan_loadN_noabort(uptr, uptr);
 SANITIZER_INTERFACE_ATTRIBUTE
 void __hwasan_load1_noabort(uptr);
 SANITIZER_INTERFACE_ATTRIBUTE
@ -58,7 +62,7 @@ SANITIZER_INTERFACE_ATTRIBUTE
 void __hwasan_load16_noabort(uptr);

 SANITIZER_INTERFACE_ATTRIBUTE
-void __hwasan_store(uptr, uptr);
+void __hwasan_storeN(uptr, uptr);
 SANITIZER_INTERFACE_ATTRIBUTE
 void __hwasan_store1(uptr);
 SANITIZER_INTERFACE_ATTRIBUTE
@ -71,7 +75,7 @@ SANITIZER_INTERFACE_ATTRIBUTE
 void __hwasan_store16(uptr);

 SANITIZER_INTERFACE_ATTRIBUTE
-void __hwasan_store_noabort(uptr, uptr);
+void __hwasan_storeN_noabort(uptr, uptr);
 SANITIZER_INTERFACE_ATTRIBUTE
 void __hwasan_store1_noabort(uptr);
 SANITIZER_INTERFACE_ATTRIBUTE
@ -83,6 +87,12 @@ void __hwasan_store8_noabort(uptr);
 SANITIZER_INTERFACE_ATTRIBUTE
 void __hwasan_store16_noabort(uptr);

+SANITIZER_INTERFACE_ATTRIBUTE
+void __hwasan_tag_memory(uptr p, u8 tag, uptr sz);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+u8 __hwasan_generate_tag();
+
 // Returns the offset of the first tag mismatch or -1 if the whole range is
 // good.
 SANITIZER_INTERFACE_ATTRIBUTE
--- a/contrib/compiler-rt/lib/hwasan/hwasan_linux.cc
+++ b/contrib/compiler-rt/lib/hwasan/hwasan_linux.cc
@ -1,4 +1,4 @@
-//===-- hwasan_linux.cc -----------------------------------------------------===//
+//===-- hwasan_linux.cc -----------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -6,41 +6,45 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file is a part of HWAddressSanitizer.
-//
-// Linux-, NetBSD- and FreeBSD-specific code.
+///
+/// \file
+/// This file is a part of HWAddressSanitizer and contains Linux-, NetBSD- and
+/// FreeBSD-specific code.
+///
 //===----------------------------------------------------------------------===//

 #include "sanitizer_common/sanitizer_platform.h"
 #if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD

 #include "hwasan.h"
+#include "hwasan_dynamic_shadow.h"
+#include "hwasan_interface_internal.h"
+#include "hwasan_mapping.h"
+#include "hwasan_report.h"
 #include "hwasan_thread.h"

 #include <elf.h>
 #include <link.h>
 #include <pthread.h>
+#include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <signal.h>
+#include <sys/resource.h>
+#include <sys/time.h>
 #include <unistd.h>
 #include <unwind.h>
-#include <sys/time.h>
-#include <sys/resource.h>

 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_procmaps.h"

 namespace __hwasan {

-void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
+static void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
  CHECK_EQ((beg % GetMmapGranularity()), 0);
  CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
  uptr size = end - beg + 1;
  DecreaseTotalMmap(size);  // Don't count the shadow against mmap_limit_mb.
-  void *res = MmapFixedNoReserve(beg, size, name);
-  if (res != (void *)beg) {
+  if (!MmapFixedNoReserve(beg, size, name)) {
    Report(
        "ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
        "Perhaps you're using ulimit -v\n",
@ -52,8 +56,11 @@ void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
 }

 static void ProtectGap(uptr addr, uptr size) {
+  if (!size)
+    return;
  void *res = MmapFixedNoAccess(addr, size, "shadow gap");
-  if (addr == (uptr)res) return;
+  if (addr == (uptr)res)
+    return;
  // A few pages at the start of the address space can not be protected.
  // But we really want to protect as much as possible, to prevent this memory
  // being returned as a result of a non-FIXED mmap().
@ -63,63 +70,160 @@ static void ProtectGap(uptr addr, uptr size) {
      addr += step;
      size -= step;
      void *res = MmapFixedNoAccess(addr, size, "shadow gap");
-      if (addr == (uptr)res) return;
+      if (addr == (uptr)res)
+        return;
    }
  }

  Report(
-      "ERROR: Failed to protect the shadow gap. "
-      "ASan cannot proceed correctly. ABORTING.\n");
+      "ERROR: Failed to protect shadow gap [%p, %p]. "
+      "HWASan cannot proceed correctly. ABORTING.\n", (void *)addr,
+      (void *)(addr + size));
  DumpProcessMap();
  Die();
 }

-bool InitShadow() {
-  const uptr maxVirtualAddress = GetMaxUserVirtualAddress();
+static uptr kLowMemStart;
+static uptr kLowMemEnd;
+static uptr kLowShadowEnd;
+static uptr kLowShadowStart;
+static uptr kHighShadowStart;
+static uptr kHighShadowEnd;
+static uptr kHighMemStart;
+static uptr kHighMemEnd;

-  // LowMem covers as much of the first 4GB as possible.
-  const uptr kLowMemEnd = 1UL<<32;
-  const uptr kLowShadowEnd = kLowMemEnd >> kShadowScale;
-  const uptr kLowShadowStart = kLowShadowEnd >> kShadowScale;
+static void PrintRange(uptr start, uptr end, const char *name) {
+  Printf("|| [%p, %p] || %.*s ||\n", (void *)start, (void *)end, 10, name);
+}

+static void PrintAddressSpaceLayout() {
+  PrintRange(kHighMemStart, kHighMemEnd, "HighMem");
+  if (kHighShadowEnd + 1 < kHighMemStart)
+    PrintRange(kHighShadowEnd + 1, kHighMemStart - 1, "ShadowGap");
+  else
+    CHECK_EQ(kHighShadowEnd + 1, kHighMemStart);
+  PrintRange(kHighShadowStart, kHighShadowEnd, "HighShadow");
+  if (SHADOW_OFFSET) {
+    if (kLowShadowEnd + 1 < kHighShadowStart)
+      PrintRange(kLowShadowEnd + 1, kHighShadowStart - 1, "ShadowGap");
+    else
+      CHECK_EQ(kLowMemEnd + 1, kHighShadowStart);
+    PrintRange(kLowShadowStart, kLowShadowEnd, "LowShadow");
+    if (kLowMemEnd + 1 < kLowShadowStart)
+      PrintRange(kLowMemEnd + 1, kLowShadowStart - 1, "ShadowGap");
+    else
+      CHECK_EQ(kLowMemEnd + 1, kLowShadowStart);
+    PrintRange(kLowMemStart, kLowMemEnd, "LowMem");
+    CHECK_EQ(0, kLowMemStart);
+  } else {
+    if (kLowMemEnd + 1 < kHighShadowStart)
+      PrintRange(kLowMemEnd + 1, kHighShadowStart - 1, "ShadowGap");
+    else
+      CHECK_EQ(kLowMemEnd + 1, kHighShadowStart);
+    PrintRange(kLowMemStart, kLowMemEnd, "LowMem");
+    CHECK_EQ(kLowShadowEnd + 1, kLowMemStart);
+    PrintRange(kLowShadowStart, kLowShadowEnd, "LowShadow");
+    PrintRange(0, kLowShadowStart - 1, "ShadowGap");
+  }
+}
+
+static uptr GetHighMemEnd() {
  // HighMem covers the upper part of the address space.
-  const uptr kHighShadowEnd = (maxVirtualAddress >> kShadowScale) + 1;
-  const uptr kHighShadowStart = Max(kLowMemEnd, kHighShadowEnd >> kShadowScale);
-  CHECK(kHighShadowStart < kHighShadowEnd);
+  uptr max_address = GetMaxUserVirtualAddress();
+  if (SHADOW_OFFSET)
+    // Adjust max address to make sure that kHighMemEnd and kHighMemStart are
+    // properly aligned:
+    max_address |= SHADOW_GRANULARITY * GetMmapGranularity() - 1;
+  return max_address;
+}

-  const uptr kHighMemStart = kHighShadowStart << kShadowScale;
-  CHECK(kHighShadowEnd <= kHighMemStart);
+static void InitializeShadowBaseAddress(uptr shadow_size_bytes) {
+  // Set the shadow memory address to uninitialized.
+  __hwasan_shadow_memory_dynamic_address = kDefaultShadowSentinel;
+  uptr shadow_start = SHADOW_OFFSET;
+  // Detect if a dynamic shadow address must be used and find the available
+  // location when necessary. When dynamic address is used, the macro
+  // kLowShadowBeg expands to __hwasan_shadow_memory_dynamic_address which
+  // was just set to kDefaultShadowSentinel.
+  if (shadow_start == kDefaultShadowSentinel) {
+    __hwasan_shadow_memory_dynamic_address = 0;
+    CHECK_EQ(0, SHADOW_OFFSET);
+    shadow_start = FindDynamicShadowStart(shadow_size_bytes);
+  }
+  // Update the shadow memory address (potentially) used by instrumentation.
+  __hwasan_shadow_memory_dynamic_address = shadow_start;
+}

-  if (Verbosity()) {
-    Printf("|| `[%p, %p]` || HighMem    ||\n", (void *)kHighMemStart,
-           (void *)maxVirtualAddress);
-    if (kHighMemStart > kHighShadowEnd)
-      Printf("|| `[%p, %p]` || ShadowGap2 ||\n", (void *)kHighShadowEnd,
-             (void *)kHighMemStart);
-    Printf("|| `[%p, %p]` || HighShadow ||\n", (void *)kHighShadowStart,
-           (void *)kHighShadowEnd);
-    if (kHighShadowStart > kLowMemEnd)
-      Printf("|| `[%p, %p]` || ShadowGap2 ||\n", (void *)kHighShadowEnd,
-             (void *)kHighMemStart);
-    Printf("|| `[%p, %p]` || LowMem     ||\n", (void *)kLowShadowEnd,
-           (void *)kLowMemEnd);
-    Printf("|| `[%p, %p]` || LowShadow  ||\n", (void *)kLowShadowStart,
-           (void *)kLowShadowEnd);
-    Printf("|| `[%p, %p]` || ShadowGap1 ||\n", (void *)0,
-           (void *)kLowShadowStart);
+bool InitShadow() {
+  // Define the entire memory range.
+  kHighMemEnd = GetHighMemEnd();
+
+  // Determine shadow memory base offset.
+  InitializeShadowBaseAddress(MEM_TO_SHADOW_SIZE(kHighMemEnd));
+
+  // Place the low memory first.
+  if (SHADOW_OFFSET) {
+    kLowMemEnd = SHADOW_OFFSET - 1;
+    kLowMemStart = 0;
+  } else {
+    // LowMem covers as much of the first 4GB as possible.
+    kLowMemEnd = (1UL << 32) - 1;
+    kLowMemStart = MEM_TO_SHADOW(kLowMemEnd) + 1;
  }

-  ReserveShadowMemoryRange(kLowShadowStart, kLowShadowEnd - 1, "low shadow");
-  ReserveShadowMemoryRange(kHighShadowStart, kHighShadowEnd - 1, "high shadow");
-  ProtectGap(0, kLowShadowStart);
-  if (kHighShadowStart > kLowMemEnd)
-    ProtectGap(kLowMemEnd, kHighShadowStart - kLowMemEnd);
-  if (kHighMemStart > kHighShadowEnd)
-    ProtectGap(kHighShadowEnd, kHighMemStart - kHighShadowEnd);
+  // Define the low shadow based on the already placed low memory.
+  kLowShadowEnd = MEM_TO_SHADOW(kLowMemEnd);
+  kLowShadowStart = SHADOW_OFFSET ? SHADOW_OFFSET : MEM_TO_SHADOW(kLowMemStart);
+
+  // High shadow takes whatever memory is left up there (making sure it is not
+  // interfering with low memory in the fixed case).
+  kHighShadowEnd = MEM_TO_SHADOW(kHighMemEnd);
+  kHighShadowStart = Max(kLowMemEnd, MEM_TO_SHADOW(kHighShadowEnd)) + 1;
+
+  // High memory starts where allocated shadow allows.
+  kHighMemStart = SHADOW_TO_MEM(kHighShadowStart);
+
+  // Check the sanity of the defined memory ranges (there might be gaps).
+  CHECK_EQ(kHighMemStart % GetMmapGranularity(), 0);
+  CHECK_GT(kHighMemStart, kHighShadowEnd);
+  CHECK_GT(kHighShadowEnd, kHighShadowStart);
+  CHECK_GT(kHighShadowStart, kLowMemEnd);
+  CHECK_GT(kLowMemEnd, kLowMemStart);
+  CHECK_GT(kLowShadowEnd, kLowShadowStart);
+  if (SHADOW_OFFSET)
+    CHECK_GT(kLowShadowStart, kLowMemEnd);
+  else
+    CHECK_GT(kLowMemEnd, kLowShadowStart);
+
+  if (Verbosity())
+    PrintAddressSpaceLayout();
+
+  // Reserve shadow memory.
+  ReserveShadowMemoryRange(kLowShadowStart, kLowShadowEnd, "low shadow");
+  ReserveShadowMemoryRange(kHighShadowStart, kHighShadowEnd, "high shadow");
+
+  // Protect all the gaps.
+  ProtectGap(0, Min(kLowMemStart, kLowShadowStart));
+  if (SHADOW_OFFSET) {
+    if (kLowMemEnd + 1 < kLowShadowStart)
+      ProtectGap(kLowMemEnd + 1, kLowShadowStart - kLowMemEnd - 1);
+    if (kLowShadowEnd + 1 < kHighShadowStart)
+      ProtectGap(kLowShadowEnd + 1, kHighShadowStart - kLowShadowEnd - 1);
+  } else {
+    if (kLowMemEnd + 1 < kHighShadowStart)
+      ProtectGap(kLowMemEnd + 1, kHighShadowStart - kLowMemEnd - 1);
+  }
+  if (kHighShadowEnd + 1 < kHighMemStart)
+    ProtectGap(kHighShadowEnd + 1, kHighMemStart - kHighShadowEnd - 1);

  return true;
 }

+bool MemIsApp(uptr p) {
+  CHECK(GetTagFromPointer(p) == 0);
+  return p >= kHighMemStart || (p >= kLowMemStart && p <= kLowMemEnd);
+}
+
 static void HwasanAtExit(void) {
  if (flags()->print_stats && (flags()->atexit || hwasan_report_count > 0))
    ReportStats();
@ -177,50 +281,65 @@ struct AccessInfo {
  bool recover;
 };

+static AccessInfo GetAccessInfo(siginfo_t *info, ucontext_t *uc) {
+  // Access type is passed in a platform dependent way (see below) and encoded
+  // as 0xXY, where X&1 is 1 for store, 0 for load, and X&2 is 1 if the error is
+  // recoverable. Valid values of Y are 0 to 4, which are interpreted as
+  // log2(access_size), and 0xF, which means that access size is passed via
+  // platform dependent register (see below).
 #if defined(__aarch64__)
-static AccessInfo GetAccessInfo(siginfo_t *info, ucontext_t *uc) {
-  // Access type is encoded in HLT immediate as 0x1XY,
-  // where X&1 is 1 for store, 0 for load,
-  // and X&2 is 1 if the error is recoverable.
-  // Valid values of Y are 0 to 4, which are interpreted as log2(access_size),
-  // and 0xF, which means that access size is stored in X1 register.
-  // Access address is always in X0 register.
-  AccessInfo ai;
+  // Access type is encoded in BRK immediate as 0x900 + 0xXY. For Y == 0xF,
+  // access size is stored in X1 register. Access address is always in X0
+  // register.
  uptr pc = (uptr)info->si_addr;
-  unsigned code = ((*(u32 *)pc) >> 5) & 0xffff;
-  if ((code & 0xff00) != 0x100)
-    return AccessInfo{0, 0, false, false}; // Not ours.
-  bool is_store = code & 0x10;
-  bool recover = code & 0x20;
-  unsigned size_log = code & 0xf;
-  if (size_log > 4 && size_log != 0xf)
-    return AccessInfo{0, 0, false, false}; // Not ours.
+  const unsigned code = ((*(u32 *)pc) >> 5) & 0xffff;
+  if ((code & 0xff00) != 0x900)
+    return AccessInfo{}; // Not ours.
+
+  const bool is_store = code & 0x10;
+  const bool recover = code & 0x20;
+  const uptr addr = uc->uc_mcontext.regs[0];
+  const unsigned size_log = code & 0xf;
+  if (size_log > 4 && size_log != 0xf)
+    return AccessInfo{}; // Not ours.
+  const uptr size = size_log == 0xf ? uc->uc_mcontext.regs[1] : 1U << size_log;
+
+#elif defined(__x86_64__)
+  // Access type is encoded in the instruction following INT3 as
+  // NOP DWORD ptr [EAX + 0x40 + 0xXY]. For Y == 0xF, access size is stored in
+  // RSI register. Access address is always in RDI register.
+  uptr pc = (uptr)uc->uc_mcontext.gregs[REG_RIP];
+  uint8_t *nop = (uint8_t*)pc;
+  if (*nop != 0x0f || *(nop + 1) != 0x1f || *(nop + 2) != 0x40  ||
+      *(nop + 3) < 0x40)
+    return AccessInfo{}; // Not ours.
+  const unsigned code = *(nop + 3);
+
+  const bool is_store = code & 0x10;
+  const bool recover = code & 0x20;
+  const uptr addr = uc->uc_mcontext.gregs[REG_RDI];
+  const unsigned size_log = code & 0xf;
+  if (size_log > 4 && size_log != 0xf)
+    return AccessInfo{}; // Not ours.
+  const uptr size =
+      size_log == 0xf ? uc->uc_mcontext.gregs[REG_RSI] : 1U << size_log;

-  ai.is_store = is_store;
-  ai.is_load = !is_store;
-  ai.addr = uc->uc_mcontext.regs[0];
-  if (size_log == 0xf)
-    ai.size = uc->uc_mcontext.regs[1];
-  else
-    ai.size = 1U << size_log;
-  ai.recover = recover;
-  return ai;
-}
 #else
-static AccessInfo GetAccessInfo(siginfo_t *info, ucontext_t *uc) {
-  return AccessInfo{0, 0, false, false};
-}
+# error Unsupported architecture
 #endif

-static bool HwasanOnSIGILL(int signo, siginfo_t *info, ucontext_t *uc) {
-  SignalContext sig{info, uc};
+  return AccessInfo{addr, size, is_store, !is_store, recover};
+}
+
+static bool HwasanOnSIGTRAP(int signo, siginfo_t *info, ucontext_t *uc) {
  AccessInfo ai = GetAccessInfo(info, uc);
  if (!ai.is_store && !ai.is_load)
    return false;

-  InternalScopedBuffer<BufferedStackTrace> stack_buffer(1);
+  InternalMmapVector<BufferedStackTrace> stack_buffer(1);
  BufferedStackTrace *stack = stack_buffer.data();
  stack->Reset();
+  SignalContext sig{info, uc};
  GetStackTrace(stack, kStackTraceMax, sig.pc, sig.bp, uc,
                common_flags()->fast_unwind_on_fatal);

@ -230,7 +349,12 @@ static bool HwasanOnSIGILL(int signo, siginfo_t *info, ucontext_t *uc) {
  if (flags()->halt_on_error || !ai.recover)
    Die();

+#if defined(__aarch64__)
  uc->uc_mcontext.pc += 4;
+#elif defined(__x86_64__)
+#else
+# error Unsupported architecture
+#endif
  return true;
 }

@ -242,8 +366,8 @@ static void OnStackUnwind(const SignalContext &sig, const void *,

 void HwasanOnDeadlySignal(int signo, void *info, void *context) {
  // Probably a tag mismatch.
-  if (signo == SIGILL)
-    if (HwasanOnSIGILL(signo, (siginfo_t *)info, (ucontext_t*)context))
+  if (signo == SIGTRAP)
+    if (HwasanOnSIGTRAP(signo, (siginfo_t *)info, (ucontext_t*)context))
      return;

  HandleDeadlySignal(info, context, GetTid(), &OnStackUnwind, nullptr);
--- a/contrib/compiler-rt/lib/hwasan/hwasan_mapping.h
+++ b/contrib/compiler-rt/lib/hwasan/hwasan_mapping.h
@ -0,0 +1,85 @@
+//===-- hwasan_mapping.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file is a part of HWAddressSanitizer and defines memory mapping.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef HWASAN_MAPPING_H
+#define HWASAN_MAPPING_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+// Typical mapping on Linux/x86_64 with fixed shadow mapping:
+// || [0x080000000000, 0x7fffffffffff] || HighMem    ||
+// || [0x008000000000, 0x07ffffffffff] || HighShadow ||
+// || [0x000100000000, 0x007fffffffff] || ShadowGap  ||
+// || [0x000010000000, 0x0000ffffffff] || LowMem     ||
+// || [0x000001000000, 0x00000fffffff] || LowShadow  ||
+// || [0x000000000000, 0x000000ffffff] || ShadowGap  ||
+//
+// and with dynamic shadow mapped at [0x770d59f40000, 0x7f0d59f40000]:
+// || [0x7f0d59f40000, 0x7fffffffffff] || HighMem    ||
+// || [0x7efe2f934000, 0x7f0d59f3ffff] || HighShadow ||
+// || [0x7e7e2f934000, 0x7efe2f933fff] || ShadowGap  ||
+// || [0x770d59f40000, 0x7e7e2f933fff] || LowShadow  ||
+// || [0x000000000000, 0x770d59f3ffff] || LowMem     ||
+
+// Typical mapping on Android/AArch64 (39-bit VMA):
+// || [0x001000000000, 0x007fffffffff] || HighMem    ||
+// || [0x000800000000, 0x000fffffffff] || ShadowGap  ||
+// || [0x000100000000, 0x0007ffffffff] || HighShadow ||
+// || [0x000010000000, 0x0000ffffffff] || LowMem     ||
+// || [0x000001000000, 0x00000fffffff] || LowShadow  ||
+// || [0x000000000000, 0x000000ffffff] || ShadowGap  ||
+//
+// and with dynamic shadow mapped: [0x007477480000, 0x007c77480000]:
+// || [0x007c77480000, 0x007fffffffff] || HighMem    ||
+// || [0x007c3ebc8000, 0x007c7747ffff] || HighShadow ||
+// || [0x007bbebc8000, 0x007c3ebc7fff] || ShadowGap  ||
+// || [0x007477480000, 0x007bbebc7fff] || LowShadow  ||
+// || [0x000000000000, 0x00747747ffff] || LowMem     ||
+
+static constexpr __sanitizer::u64 kDefaultShadowSentinel = ~(__sanitizer::u64)0;
+
+// Reasonable values are 4 (for 1/16th shadow) and 6 (for 1/64th).
+constexpr __sanitizer::uptr kShadowScale = 4;
+constexpr __sanitizer::uptr kShadowAlignment = 1ULL << kShadowScale;
+
+#if SANITIZER_ANDROID
+# define HWASAN_FIXED_MAPPING 0
+#else
+# define HWASAN_FIXED_MAPPING 1
+#endif
+
+#if HWASAN_FIXED_MAPPING
+# define SHADOW_OFFSET (0)
+# define HWASAN_PREMAP_SHADOW 0
+#else
+# define SHADOW_OFFSET (__hwasan_shadow_memory_dynamic_address)
+# define HWASAN_PREMAP_SHADOW 1
+#endif
+
+#define SHADOW_GRANULARITY (1ULL << kShadowScale)
+
+#define MEM_TO_SHADOW(mem) (((uptr)(mem) >> kShadowScale) + SHADOW_OFFSET)
+#define SHADOW_TO_MEM(shadow) (((uptr)(shadow) - SHADOW_OFFSET) << kShadowScale)
+
+#define MEM_TO_SHADOW_SIZE(size) ((uptr)(size) >> kShadowScale)
+
+#define MEM_IS_APP(mem) MemIsApp((uptr)(mem))
+
+namespace __hwasan {
+
+bool MemIsApp(uptr p);
+
+}  // namespace __hwasan
+
+#endif  // HWASAN_MAPPING_H
--- a/contrib/compiler-rt/lib/hwasan/hwasan_new_delete.cc
+++ b/contrib/compiler-rt/lib/hwasan/hwasan_new_delete.cc
@ -1,4 +1,4 @@
-//===-- hwasan_new_delete.cc ------------------------------------------------===//
+//===-- hwasan_new_delete.cc ----------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@ -15,6 +15,7 @@
 #include "hwasan.h"
 #include "interception/interception.h"
 #include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_report.h"

 #if HWASAN_REPLACE_OPERATORS_NEW_AND_DELETE

@ -32,7 +33,7 @@ namespace std {
 #define OPERATOR_NEW_BODY(nothrow) \
  GET_MALLOC_STACK_TRACE; \
  void *res = hwasan_malloc(size, &stack);\
-  if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();\
+  if (!nothrow && UNLIKELY(!res)) ReportOutOfMemory(size, &stack);\
  return res

 INTERCEPTOR_ATTRIBUTE
--- a/Show More
+++ b/Show More