mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-29 12:03:03 +00:00
Vendor import of llvm release_80 branch r354799:
https://llvm.org/svn/llvm-project/llvm/branches/release_80@354799
This commit is contained in:
parent
519e1985ae
commit
bd7f07563c
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/vendor/llvm/dist-release_80/; revision=344535 svn path=/vendor/llvm/llvm-release_80-r354799/; revision=344536; tag=vendor/llvm/llvm-release_80-r354799
@ -5,12 +5,6 @@ LLVM 8.0.0 Release Notes
|
||||
.. contents::
|
||||
:local:
|
||||
|
||||
.. warning::
|
||||
These are in-progress notes for the upcoming LLVM 8 release.
|
||||
Release notes for previous releases can be found on
|
||||
`the Download Page <https://releases.llvm.org/download.html>`_.
|
||||
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
@ -26,10 +20,24 @@ have questions or comments, the `LLVM Developer's Mailing List
|
||||
<https://lists.llvm.org/mailman/listinfo/llvm-dev>`_ is a good place to send
|
||||
them.
|
||||
|
||||
Note that if you are reading this file from a Subversion checkout or the main
|
||||
LLVM web page, this document applies to the *next* release, not the current
|
||||
one. To see the release notes for a specific release, please see the `releases
|
||||
page <https://llvm.org/releases/>`_.
|
||||
Minimum Required Compiler Version
|
||||
=================================
|
||||
As `discussed on the mailing list
|
||||
<https://lists.llvm.org/pipermail/llvm-dev/2019-January/129452.html>`_,
|
||||
building LLVM will soon require more recent toolchains as follows:
|
||||
|
||||
============= ====
|
||||
Clang 3.5
|
||||
Apple Clang 6.0
|
||||
GCC 5.1
|
||||
Visual Studio 2017
|
||||
============= ====
|
||||
|
||||
A new CMake check when configuring LLVM provides a soft-error if your
|
||||
toolchain will become unsupported soon. You can opt out of the soft-error by
|
||||
setting the ``LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN`` CMake variable to
|
||||
``ON``.
|
||||
|
||||
|
||||
Non-comprehensive list of changes in this release
|
||||
=================================================
|
||||
@ -40,27 +48,11 @@ Non-comprehensive list of changes in this release
|
||||
functionality, or simply have a lot to talk about), see the `NOTE` below
|
||||
for adding a new subsection.
|
||||
|
||||
* As `discussed on the mailing list
|
||||
<https://lists.llvm.org/pipermail/llvm-dev/2019-January/129452.html>`_,
|
||||
building LLVM will soon require more recent toolchains as follows:
|
||||
|
||||
============= ====
|
||||
Clang 3.5
|
||||
Apple Clang 6.0
|
||||
GCC 5.1
|
||||
Visual Studio 2017
|
||||
============= ====
|
||||
|
||||
A new CMake check when configuring LLVM provides a soft-error if your
|
||||
toolchain will become unsupported soon. You can opt out of the soft-error by
|
||||
setting the ``LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN`` CMake variable to
|
||||
``ON``.
|
||||
|
||||
* The **llvm-cov** tool can now export lcov trace files using the
|
||||
`-format=lcov` option of the `export` command.
|
||||
|
||||
* The add_llvm_loadable_module CMake macro has been removed. The
|
||||
add_llvm_library macro with the MODULE argument now provides the same
|
||||
* The ``add_llvm_loadable_module`` CMake macro has been removed. The
|
||||
``add_llvm_library`` macro with the ``MODULE`` argument now provides the same
|
||||
functionality. See `Writing an LLVM Pass
|
||||
<WritingAnLLVMPass.html#setting-up-the-build-environment>`_.
|
||||
|
||||
@ -70,6 +62,24 @@ Non-comprehensive list of changes in this release
|
||||
|
||||
* Added support for labels as offsets in ``.reloc`` directive.
|
||||
|
||||
* Support for precise identification of X86 instructions with memory operands,
|
||||
by using debug information. This supports profile-driven cache prefetching.
|
||||
It is enabled with the ``-x86-discriminate-memops`` LLVM Flag.
|
||||
|
||||
* Support for profile-driven software cache prefetching on X86. This is part of
|
||||
a larger system, consisting of: an offline cache prefetches recommender,
|
||||
AutoFDO tooling, and LLVM. In this system, a binary compiled with
|
||||
``-x86-discriminate-memops`` is run under the observation of the recommender.
|
||||
The recommender identifies certain memory access instructions by their binary
|
||||
file address, and recommends a prefetch of a specific type (NTA, T0, etc) be
|
||||
performed at a specified fixed offset from such an instruction's memory
|
||||
operand. Next, this information needs to be converted to the AutoFDO syntax
|
||||
and the resulting profile may be passed back to the compiler with the LLVM
|
||||
flag ``-prefetch-hints-file``, together with the exact same set of
|
||||
compilation parameters used for the original binary. More information is
|
||||
available in the `RFC
|
||||
<https://lists.llvm.org/pipermail/llvm-dev/2018-November/127461.html>`_.
|
||||
|
||||
.. NOTE
|
||||
If you would like to document a larger change, then you can add a
|
||||
subsection about it right here. You can copy the following boilerplate
|
||||
@ -83,10 +93,19 @@ Non-comprehensive list of changes in this release
|
||||
Changes to the LLVM IR
|
||||
----------------------
|
||||
|
||||
* Function attribute ``speculative_load_hardening`` has been introduced to
|
||||
allow indicating that `Speculative Load Hardening
|
||||
<SpeculativeLoadHardening.html>`_ must be enabled for the function body.
|
||||
|
||||
|
||||
Changes to the AArch64 Target
|
||||
-----------------------------
|
||||
|
||||
* Support for Speculative Load Hardening has been added.
|
||||
|
||||
* Initial support for the Tiny code model, where code and its statically
|
||||
defined symbols must live within 1MB of each other.
|
||||
|
||||
* Added support for the ``.arch_extension`` assembler directive, just like
|
||||
on ARM.
|
||||
|
||||
@ -126,7 +145,40 @@ Changes to the MIPS Target
|
||||
Changes to the PowerPC Target
|
||||
-----------------------------
|
||||
|
||||
During this release ...
|
||||
* Switched to non-PIC default
|
||||
|
||||
* Deprecated Darwin support
|
||||
|
||||
* Enabled Out-of-Order scheduling for P9
|
||||
|
||||
* Better overload rules for compatible vector type parameter
|
||||
|
||||
* Support constraint ‘wi’, modifier ‘x’ and VSX registers in inline asm
|
||||
|
||||
* More ``__float128`` support
|
||||
|
||||
* Added new builtins like vector int128 ``pack``/``unpack`` and
|
||||
``stxvw4x.be``/``stxvd2x.be``
|
||||
|
||||
* Provided significant improvements to the automatic vectorizer
|
||||
|
||||
* Code-gen improvements (especially for Power9)
|
||||
|
||||
* Fixed some long-standing bugs in the back end
|
||||
|
||||
* Added experimental prologue/epilogue improvements
|
||||
|
||||
* Enabled builtins tests in compiler-rt
|
||||
|
||||
* Add ``___fixunstfti``/``floattitf`` in compiler-rt to support conversion
|
||||
between IBM double-double and unsigned int128
|
||||
|
||||
* Disable randomized address space when running the sanitizers on Linux ppc64le
|
||||
|
||||
* Completed support in LLD for ELFv2
|
||||
|
||||
* Enabled llvm-exegesis latency mode for PPC
|
||||
|
||||
|
||||
Changes to the X86 Target
|
||||
-------------------------
|
||||
@ -134,6 +186,18 @@ Changes to the X86 Target
|
||||
* Machine model for AMD bdver2 (Piledriver) CPU was added. It is used to support
|
||||
instruction scheduling and other instruction cost heuristics.
|
||||
|
||||
* New AVX512F gather and scatter intrinsics were added that take a <X x i1> mask
|
||||
instead of a scalar integer. This removes the need for a bitcast in IR. The
|
||||
new intrinsics are named like the old intrinsics with ``llvm.avx512.``
|
||||
replaced with ``llvm.avx512.mask.``. The old intrinsics will be removed in a
|
||||
future release.
|
||||
|
||||
* Added ``cascadelake`` as a CPU name for -march. This is ``skylake-avx512``
|
||||
with the addition of the ``avx512vnni`` instruction set.
|
||||
|
||||
* ADCX instruction will no longer be emitted. This instruction is rarely better
|
||||
than the legacy ADC instruction and just increased code size.
|
||||
|
||||
Changes to the AMDGPU Target
|
||||
-----------------------------
|
||||
|
||||
@ -156,6 +220,10 @@ use for it will be to add support for returning small structs as multiple
|
||||
return values, once the underlying WebAssembly platform itself supports it.
|
||||
Additionally, multithreading support is not yet included in the stable ABI.
|
||||
|
||||
Changes to the Nios2 Target
|
||||
---------------------------
|
||||
|
||||
* The Nios2 target was removed from this release.
|
||||
|
||||
Changes to the OCaml bindings
|
||||
-----------------------------
|
||||
@ -169,6 +237,14 @@ Changes to the C API
|
||||
Changes to the DAG infrastructure
|
||||
---------------------------------
|
||||
|
||||
Changes to LLDB
|
||||
===============
|
||||
* Printed source code is now syntax highlighted in the terminal (only for C
|
||||
languages).
|
||||
|
||||
* The expression command now supports tab completing expressions.
|
||||
|
||||
|
||||
External Open Source Projects Using LLVM 8
|
||||
==========================================
|
||||
|
||||
|
@ -1,11 +1,6 @@
|
||||
Overview
|
||||
========
|
||||
|
||||
.. warning::
|
||||
|
||||
If you are using a released version of LLVM, see `the download page
|
||||
<http://llvm.org/releases/>`_ to find your documentation.
|
||||
|
||||
The LLVM compiler infrastructure supports a wide range of projects, from
|
||||
industrial strength compilers to specialized JIT applications to small
|
||||
research projects.
|
||||
|
@ -1275,14 +1275,20 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
|
||||
if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
|
||||
continue;
|
||||
|
||||
// FIXME: produce a better error message.
|
||||
// FIXME: Get source locations for these errors or diagnose them earlier.
|
||||
if (Symbol.isUndefined() && Rest.startswith("@@") &&
|
||||
!Rest.startswith("@@@"))
|
||||
report_fatal_error("A @@ version cannot be undefined");
|
||||
!Rest.startswith("@@@")) {
|
||||
Asm.getContext().reportError(SMLoc(), "versioned symbol " + AliasName +
|
||||
" must be defined");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (Renames.count(&Symbol) && Renames[&Symbol] != Alias)
|
||||
report_fatal_error(llvm::Twine("Multiple symbol versions defined for ") +
|
||||
Symbol.getName());
|
||||
if (Renames.count(&Symbol) && Renames[&Symbol] != Alias) {
|
||||
Asm.getContext().reportError(
|
||||
SMLoc(), llvm::Twine("multiple symbol versions defined for ") +
|
||||
Symbol.getName());
|
||||
continue;
|
||||
}
|
||||
|
||||
Renames.insert(std::make_pair(&Symbol, Alias));
|
||||
}
|
||||
|
@ -13884,7 +13884,6 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
|
||||
int NumEltsPerLane = NumElts / NumLanes;
|
||||
|
||||
SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
|
||||
SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
|
||||
SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);
|
||||
|
||||
for (int i = 0; i != NumElts; ++i) {
|
||||
@ -13899,10 +13898,20 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
|
||||
return SDValue();
|
||||
SrcLaneMask[DstLane] = SrcLane;
|
||||
|
||||
LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane);
|
||||
PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
|
||||
}
|
||||
|
||||
// Make sure we set all elements of the lane mask, to avoid undef propagation.
|
||||
SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
|
||||
for (int DstLane = 0; DstLane != NumLanes; ++DstLane) {
|
||||
int SrcLane = SrcLaneMask[DstLane];
|
||||
if (0 <= SrcLane)
|
||||
for (int j = 0; j != NumEltsPerLane; ++j) {
|
||||
LaneMask[(DstLane * NumEltsPerLane) + j] =
|
||||
(SrcLane * NumEltsPerLane) + j;
|
||||
}
|
||||
}
|
||||
|
||||
// If we're only shuffling a single lowest lane and the rest are identity
|
||||
// then don't bother.
|
||||
// TODO - isShuffleMaskInputInPlace could be extended to something like this.
|
||||
|
@ -1376,7 +1376,8 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
|
||||
if (match(LHS, m_ShuffleVector(m_Value(L0), m_Value(L1), m_Constant(Mask))) &&
|
||||
match(RHS, m_ShuffleVector(m_Value(R0), m_Value(R1), m_Specific(Mask))) &&
|
||||
LHS->hasOneUse() && RHS->hasOneUse() &&
|
||||
cast<ShuffleVectorInst>(LHS)->isConcat()) {
|
||||
cast<ShuffleVectorInst>(LHS)->isConcat() &&
|
||||
cast<ShuffleVectorInst>(RHS)->isConcat()) {
|
||||
// This transform does not have the speculative execution constraint as
|
||||
// below because the shuffle is a concatenation. The new binops are
|
||||
// operating on exactly the same elements as the existing binop.
|
||||
|
@ -11,21 +11,37 @@
|
||||
// later typically inlined as a chain of efficient hardware comparisons). This
|
||||
// typically benefits c++ member or nonmember operator==().
|
||||
//
|
||||
// The basic idea is to replace a larger chain of integer comparisons loaded
|
||||
// from contiguous memory locations into a smaller chain of such integer
|
||||
// The basic idea is to replace a longer chain of integer comparisons loaded
|
||||
// from contiguous memory locations into a shorter chain of larger integer
|
||||
// comparisons. Benefits are double:
|
||||
// - There are less jumps, and therefore less opportunities for mispredictions
|
||||
// and I-cache misses.
|
||||
// - Code size is smaller, both because jumps are removed and because the
|
||||
// encoding of a 2*n byte compare is smaller than that of two n-byte
|
||||
// compares.
|
||||
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// struct S {
|
||||
// int a;
|
||||
// char b;
|
||||
// char c;
|
||||
// uint16_t d;
|
||||
// bool operator==(const S& o) const {
|
||||
// return a == o.a && b == o.b && c == o.c && d == o.d;
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// Is optimized as :
|
||||
//
|
||||
// bool S::operator==(const S& o) const {
|
||||
// return memcmp(this, &o, 8) == 0;
|
||||
// }
|
||||
//
|
||||
// Which will later be expanded (ExpandMemCmp) as a single 8-bytes icmp.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "llvm/Analysis/Loads.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
@ -34,6 +50,10 @@
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/BuildLibCalls.h"
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -50,76 +70,95 @@ static bool isSimpleLoadOrStore(const Instruction *I) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// A BCE atom.
|
||||
// A BCE atom "Binary Compare Expression Atom" represents an integer load
|
||||
// that is a constant offset from a base value, e.g. `a` or `o.c` in the example
|
||||
// at the top.
|
||||
struct BCEAtom {
|
||||
BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {}
|
||||
|
||||
const Value *Base() const { return GEP ? GEP->getPointerOperand() : nullptr; }
|
||||
BCEAtom() = default;
|
||||
BCEAtom(GetElementPtrInst *GEP, LoadInst *LoadI, int BaseId, APInt Offset)
|
||||
: GEP(GEP), LoadI(LoadI), BaseId(BaseId), Offset(Offset) {}
|
||||
|
||||
// We want to order BCEAtoms by (Base, Offset). However we cannot use
|
||||
// the pointer values for Base because these are non-deterministic.
|
||||
// To make sure that the sort order is stable, we first assign to each atom
|
||||
// base value an index based on its order of appearance in the chain of
|
||||
// comparisons. We call this index `BaseOrdering`. For example, for:
|
||||
// b[3] == c[2] && a[1] == d[1] && b[4] == c[3]
|
||||
// | block 1 | | block 2 | | block 3 |
|
||||
// b gets assigned index 0 and a index 1, because b appears as LHS in block 1,
|
||||
// which is before block 2.
|
||||
// We then sort by (BaseOrdering[LHS.Base()], LHS.Offset), which is stable.
|
||||
bool operator<(const BCEAtom &O) const {
|
||||
assert(Base() && "invalid atom");
|
||||
assert(O.Base() && "invalid atom");
|
||||
// Just ordering by (Base(), Offset) is sufficient. However because this
|
||||
// means that the ordering will depend on the addresses of the base
|
||||
// values, which are not reproducible from run to run. To guarantee
|
||||
// stability, we use the names of the values if they exist; we sort by:
|
||||
// (Base.getName(), Base(), Offset).
|
||||
const int NameCmp = Base()->getName().compare(O.Base()->getName());
|
||||
if (NameCmp == 0) {
|
||||
if (Base() == O.Base()) {
|
||||
return Offset.slt(O.Offset);
|
||||
}
|
||||
return Base() < O.Base();
|
||||
}
|
||||
return NameCmp < 0;
|
||||
return BaseId != O.BaseId ? BaseId < O.BaseId : Offset.slt(O.Offset);
|
||||
}
|
||||
|
||||
GetElementPtrInst *GEP;
|
||||
LoadInst *LoadI;
|
||||
GetElementPtrInst *GEP = nullptr;
|
||||
LoadInst *LoadI = nullptr;
|
||||
unsigned BaseId = 0;
|
||||
APInt Offset;
|
||||
};
|
||||
|
||||
// A class that assigns increasing ids to values in the order in which they are
|
||||
// seen. See comment in `BCEAtom::operator<()``.
|
||||
class BaseIdentifier {
|
||||
public:
|
||||
// Returns the id for value `Base`, after assigning one if `Base` has not been
|
||||
// seen before.
|
||||
int getBaseId(const Value *Base) {
|
||||
assert(Base && "invalid base");
|
||||
const auto Insertion = BaseToIndex.try_emplace(Base, Order);
|
||||
if (Insertion.second)
|
||||
++Order;
|
||||
return Insertion.first->second;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned Order = 1;
|
||||
DenseMap<const Value*, int> BaseToIndex;
|
||||
};
|
||||
|
||||
// If this value is a load from a constant offset w.r.t. a base address, and
|
||||
// there are no other users of the load or address, returns the base address and
|
||||
// the offset.
|
||||
BCEAtom visitICmpLoadOperand(Value *const Val) {
|
||||
BCEAtom Result;
|
||||
if (auto *const LoadI = dyn_cast<LoadInst>(Val)) {
|
||||
LLVM_DEBUG(dbgs() << "load\n");
|
||||
if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
|
||||
LLVM_DEBUG(dbgs() << "used outside of block\n");
|
||||
return {};
|
||||
}
|
||||
// Do not optimize atomic loads to non-atomic memcmp
|
||||
if (!LoadI->isSimple()) {
|
||||
LLVM_DEBUG(dbgs() << "volatile or atomic\n");
|
||||
return {};
|
||||
}
|
||||
Value *const Addr = LoadI->getOperand(0);
|
||||
if (auto *const GEP = dyn_cast<GetElementPtrInst>(Addr)) {
|
||||
LLVM_DEBUG(dbgs() << "GEP\n");
|
||||
if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
|
||||
LLVM_DEBUG(dbgs() << "used outside of block\n");
|
||||
return {};
|
||||
}
|
||||
const auto &DL = GEP->getModule()->getDataLayout();
|
||||
if (!isDereferenceablePointer(GEP, DL)) {
|
||||
LLVM_DEBUG(dbgs() << "not dereferenceable\n");
|
||||
// We need to make sure that we can do comparison in any order, so we
|
||||
// require memory to be unconditionnally dereferencable.
|
||||
return {};
|
||||
}
|
||||
Result.Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
|
||||
if (GEP->accumulateConstantOffset(DL, Result.Offset)) {
|
||||
Result.GEP = GEP;
|
||||
Result.LoadI = LoadI;
|
||||
}
|
||||
}
|
||||
BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
|
||||
auto *const LoadI = dyn_cast<LoadInst>(Val);
|
||||
if (!LoadI)
|
||||
return {};
|
||||
LLVM_DEBUG(dbgs() << "load\n");
|
||||
if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
|
||||
LLVM_DEBUG(dbgs() << "used outside of block\n");
|
||||
return {};
|
||||
}
|
||||
return Result;
|
||||
// Do not optimize atomic loads to non-atomic memcmp
|
||||
if (!LoadI->isSimple()) {
|
||||
LLVM_DEBUG(dbgs() << "volatile or atomic\n");
|
||||
return {};
|
||||
}
|
||||
Value *const Addr = LoadI->getOperand(0);
|
||||
auto *const GEP = dyn_cast<GetElementPtrInst>(Addr);
|
||||
if (!GEP)
|
||||
return {};
|
||||
LLVM_DEBUG(dbgs() << "GEP\n");
|
||||
if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
|
||||
LLVM_DEBUG(dbgs() << "used outside of block\n");
|
||||
return {};
|
||||
}
|
||||
const auto &DL = GEP->getModule()->getDataLayout();
|
||||
if (!isDereferenceablePointer(GEP, DL)) {
|
||||
LLVM_DEBUG(dbgs() << "not dereferenceable\n");
|
||||
// We need to make sure that we can do comparison in any order, so we
|
||||
// require memory to be unconditionnally dereferencable.
|
||||
return {};
|
||||
}
|
||||
APInt Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
|
||||
if (!GEP->accumulateConstantOffset(DL, Offset))
|
||||
return {};
|
||||
return BCEAtom(GEP, LoadI, BaseId.getBaseId(GEP->getPointerOperand()),
|
||||
Offset);
|
||||
}
|
||||
|
||||
// A basic block with a comparison between two BCE atoms.
|
||||
// A basic block with a comparison between two BCE atoms, e.g. `a == o.a` in the
|
||||
// example at the top.
|
||||
// The block might do extra work besides the atom comparison, in which case
|
||||
// doesOtherWork() returns true. Under some conditions, the block can be
|
||||
// split into the atom comparison part and the "other work" part
|
||||
@ -137,9 +176,7 @@ class BCECmpBlock {
|
||||
if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_);
|
||||
}
|
||||
|
||||
bool IsValid() const {
|
||||
return Lhs_.Base() != nullptr && Rhs_.Base() != nullptr;
|
||||
}
|
||||
bool IsValid() const { return Lhs_.BaseId != 0 && Rhs_.BaseId != 0; }
|
||||
|
||||
// Assert the block is consistent: If valid, it should also have
|
||||
// non-null members besides Lhs_ and Rhs_.
|
||||
@ -265,7 +302,8 @@ bool BCECmpBlock::doesOtherWork() const {
|
||||
// Visit the given comparison. If this is a comparison between two valid
|
||||
// BCE atoms, returns the comparison.
|
||||
BCECmpBlock visitICmp(const ICmpInst *const CmpI,
|
||||
const ICmpInst::Predicate ExpectedPredicate) {
|
||||
const ICmpInst::Predicate ExpectedPredicate,
|
||||
BaseIdentifier &BaseId) {
|
||||
// The comparison can only be used once:
|
||||
// - For intermediate blocks, as a branch condition.
|
||||
// - For the final block, as an incoming value for the Phi.
|
||||
@ -275,25 +313,27 @@ BCECmpBlock visitICmp(const ICmpInst *const CmpI,
|
||||
LLVM_DEBUG(dbgs() << "cmp has several uses\n");
|
||||
return {};
|
||||
}
|
||||
if (CmpI->getPredicate() == ExpectedPredicate) {
|
||||
LLVM_DEBUG(dbgs() << "cmp "
|
||||
<< (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
|
||||
<< "\n");
|
||||
auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0));
|
||||
if (!Lhs.Base()) return {};
|
||||
auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1));
|
||||
if (!Rhs.Base()) return {};
|
||||
const auto &DL = CmpI->getModule()->getDataLayout();
|
||||
return BCECmpBlock(std::move(Lhs), std::move(Rhs),
|
||||
DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
|
||||
}
|
||||
return {};
|
||||
if (CmpI->getPredicate() != ExpectedPredicate)
|
||||
return {};
|
||||
LLVM_DEBUG(dbgs() << "cmp "
|
||||
<< (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
|
||||
<< "\n");
|
||||
auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0), BaseId);
|
||||
if (!Lhs.BaseId)
|
||||
return {};
|
||||
auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1), BaseId);
|
||||
if (!Rhs.BaseId)
|
||||
return {};
|
||||
const auto &DL = CmpI->getModule()->getDataLayout();
|
||||
return BCECmpBlock(std::move(Lhs), std::move(Rhs),
|
||||
DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
|
||||
}
|
||||
|
||||
// Visit the given comparison block. If this is a comparison between two valid
|
||||
// BCE atoms, returns the comparison.
|
||||
BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
|
||||
const BasicBlock *const PhiBlock) {
|
||||
const BasicBlock *const PhiBlock,
|
||||
BaseIdentifier &BaseId) {
|
||||
if (Block->empty()) return {};
|
||||
auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator());
|
||||
if (!BranchI) return {};
|
||||
@ -306,7 +346,7 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
|
||||
auto *const CmpI = dyn_cast<ICmpInst>(Val);
|
||||
if (!CmpI) return {};
|
||||
LLVM_DEBUG(dbgs() << "icmp\n");
|
||||
auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ);
|
||||
auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ, BaseId);
|
||||
Result.CmpI = CmpI;
|
||||
Result.BranchI = BranchI;
|
||||
return Result;
|
||||
@ -323,7 +363,8 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
|
||||
assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch");
|
||||
BasicBlock *const FalseBlock = BranchI->getSuccessor(1);
|
||||
auto Result = visitICmp(
|
||||
CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE);
|
||||
CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
|
||||
BaseId);
|
||||
Result.CmpI = CmpI;
|
||||
Result.BranchI = BranchI;
|
||||
return Result;
|
||||
@ -335,9 +376,9 @@ static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
|
||||
BCECmpBlock &Comparison) {
|
||||
LLVM_DEBUG(dbgs() << "Block '" << Comparison.BB->getName()
|
||||
<< "': Found cmp of " << Comparison.SizeBits()
|
||||
<< " bits between " << Comparison.Lhs().Base() << " + "
|
||||
<< " bits between " << Comparison.Lhs().BaseId << " + "
|
||||
<< Comparison.Lhs().Offset << " and "
|
||||
<< Comparison.Rhs().Base() << " + "
|
||||
<< Comparison.Rhs().BaseId << " + "
|
||||
<< Comparison.Rhs().Offset << "\n");
|
||||
LLVM_DEBUG(dbgs() << "\n");
|
||||
Comparisons.push_back(Comparison);
|
||||
@ -360,8 +401,8 @@ class BCECmpChain {
|
||||
private:
|
||||
static bool IsContiguous(const BCECmpBlock &First,
|
||||
const BCECmpBlock &Second) {
|
||||
return First.Lhs().Base() == Second.Lhs().Base() &&
|
||||
First.Rhs().Base() == Second.Rhs().Base() &&
|
||||
return First.Lhs().BaseId == Second.Lhs().BaseId &&
|
||||
First.Rhs().BaseId == Second.Rhs().BaseId &&
|
||||
First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
|
||||
First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
|
||||
}
|
||||
@ -385,11 +426,12 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
|
||||
assert(!Blocks.empty() && "a chain should have at least one block");
|
||||
// Now look inside blocks to check for BCE comparisons.
|
||||
std::vector<BCECmpBlock> Comparisons;
|
||||
BaseIdentifier BaseId;
|
||||
for (size_t BlockIdx = 0; BlockIdx < Blocks.size(); ++BlockIdx) {
|
||||
BasicBlock *const Block = Blocks[BlockIdx];
|
||||
assert(Block && "invalid block");
|
||||
BCECmpBlock Comparison = visitCmpBlock(Phi.getIncomingValueForBlock(Block),
|
||||
Block, Phi.getParent());
|
||||
Block, Phi.getParent(), BaseId);
|
||||
Comparison.BB = Block;
|
||||
if (!Comparison.IsValid()) {
|
||||
LLVM_DEBUG(dbgs() << "chain with invalid BCECmpBlock, no merge.\n");
|
||||
@ -466,9 +508,10 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
|
||||
#endif // MERGEICMPS_DOT_ON
|
||||
// Reorder blocks by LHS. We can do that without changing the
|
||||
// semantics because we are only accessing dereferencable memory.
|
||||
llvm::sort(Comparisons_, [](const BCECmpBlock &a, const BCECmpBlock &b) {
|
||||
return a.Lhs() < b.Lhs();
|
||||
});
|
||||
llvm::sort(Comparisons_,
|
||||
[](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
|
||||
return LhsBlock.Lhs() < RhsBlock.Lhs();
|
||||
});
|
||||
#ifdef MERGEICMPS_DOT_ON
|
||||
errs() << "AFTER REORDERING:\n\n";
|
||||
dump();
|
||||
|
@ -10,6 +10,7 @@
|
||||
...
|
||||
---
|
||||
name: foo
|
||||
# CHECK-LABEL: name: foo
|
||||
tracksRegLiveness: true
|
||||
constants:
|
||||
- id: 0
|
||||
|
36
test/CodeGen/X86/pr40730.ll
Normal file
36
test/CodeGen/X86/pr40730.ll
Normal file
@ -0,0 +1,36 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
|
||||
|
||||
define <8 x i32> @shuffle_v8i32_0dcd3f14(<8 x i32> %a, <8 x i32> %b) {
|
||||
; CHECK-LABEL: shuffle_v8i32_0dcd3f14:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,1,0]
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,2,3]
|
||||
; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,2]
|
||||
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5],ymm0[6,7]
|
||||
; CHECK-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 13, i32 12, i32 13, i32 3, i32 15, i32 1, i32 4>
|
||||
ret <8 x i32> %shuffle
|
||||
}
|
||||
|
||||
; CHECK: .LCPI1_0:
|
||||
; CHECK-NEXT: .quad 60129542157
|
||||
; CHECK-NEXT: .quad 60129542157
|
||||
; CHECK-NEXT: .quad 68719476736
|
||||
; CHECK-NEXT: .quad 60129542157
|
||||
|
||||
define <8 x i32> @shuffle_v8i32_0dcd3f14_constant(<8 x i32> %a0) {
|
||||
; CHECK-LABEL: shuffle_v8i32_0dcd3f14_constant:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,1,0]
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],mem[1,2,3],ymm0[4],mem[5],ymm0[6,7]
|
||||
; CHECK-NEXT: retq
|
||||
%res = shufflevector <8 x i32> %a0, <8 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, <8 x i32> <i32 0, i32 13, i32 12, i32 13, i32 3, i32 15, i32 1, i32 4>
|
||||
ret <8 x i32> %res
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t 2> %t.out
|
||||
// RUN: FileCheck --input-file=%t.out %s
|
||||
|
||||
// CHECK: A @@ version cannot be undefined
|
||||
// CHECK: error: versioned symbol foo@@bar must be defined
|
||||
|
||||
.symver undefined, foo@@bar
|
||||
.long undefined
|
||||
|
@ -1,6 +1,6 @@
|
||||
// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t 2>&1 | FileCheck %s
|
||||
|
||||
// CHECK: Multiple symbol versions defined for foo
|
||||
// CHECK: error: multiple symbol versions defined for foo
|
||||
|
||||
.symver foo, foo@1
|
||||
.symver foo, foo@2
|
||||
|
@ -1114,3 +1114,18 @@ define <2 x float> @frem_splat_constant1(<2 x float> %x) {
|
||||
ret <2 x float> %r
|
||||
}
|
||||
|
||||
; Equivalent shuffle masks, but only one is a narrowing op.
|
||||
|
||||
define <2 x i1> @PR40734(<1 x i1> %x, <4 x i1> %y) {
|
||||
; CHECK-LABEL: @PR40734(
|
||||
; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <1 x i1> zeroinitializer, <1 x i1> [[X:%.*]], <2 x i32> <i32 0, i32 1>
|
||||
; CHECK-NEXT: [[NARROW:%.*]] = shufflevector <4 x i1> [[Y:%.*]], <4 x i1> undef, <2 x i32> <i32 0, i32 1>
|
||||
; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[WIDEN]], [[NARROW]]
|
||||
; CHECK-NEXT: ret <2 x i1> [[R]]
|
||||
;
|
||||
%widen = shufflevector <1 x i1> zeroinitializer, <1 x i1> %x, <2 x i32> <i32 0, i32 1>
|
||||
%narrow = shufflevector <4 x i1> %y, <4 x i1> undef, <2 x i32> <i32 0, i32 1>
|
||||
%r = and <2 x i1> %widen, %narrow
|
||||
ret <2 x i1> %r
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user