mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-01 12:19:28 +00:00
Vendor import of llvm release_40 branch r294123:
https://llvm.org/svn/llvm-project/llvm/branches/release_40@294123
This commit is contained in:
parent
823f87a1f8
commit
963c784e8c
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/vendor/llvm/dist/; revision=313289 svn path=/vendor/llvm/llvm-release_40-r294123/; revision=313290; tag=vendor/llvm/llvm-release_40-r294123
@ -8123,9 +8123,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
|
||||
}
|
||||
|
||||
// More folding opportunities when target permits.
|
||||
if ((AllowFusion || HasFMAD) && Aggressive) {
|
||||
if (Aggressive) {
|
||||
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
|
||||
if (N0.getOpcode() == PreferredFusedOpcode &&
|
||||
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
|
||||
// are currently only supported on binary nodes.
|
||||
if (Options.UnsafeFPMath &&
|
||||
N0.getOpcode() == PreferredFusedOpcode &&
|
||||
N0.getOperand(2).getOpcode() == ISD::FMUL &&
|
||||
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
|
||||
return DAG.getNode(PreferredFusedOpcode, SL, VT,
|
||||
@ -8137,7 +8140,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
|
||||
}
|
||||
|
||||
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
|
||||
if (N1->getOpcode() == PreferredFusedOpcode &&
|
||||
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
|
||||
// are currently only supported on binary nodes.
|
||||
if (Options.UnsafeFPMath &&
|
||||
N1->getOpcode() == PreferredFusedOpcode &&
|
||||
N1.getOperand(2).getOpcode() == ISD::FMUL &&
|
||||
N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
|
||||
return DAG.getNode(PreferredFusedOpcode, SL, VT,
|
||||
@ -8367,10 +8373,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
|
||||
}
|
||||
|
||||
// More folding opportunities when target permits.
|
||||
if ((AllowFusion || HasFMAD) && Aggressive) {
|
||||
if (Aggressive) {
|
||||
// fold (fsub (fma x, y, (fmul u, v)), z)
|
||||
// -> (fma x, y (fma u, v, (fneg z)))
|
||||
if (N0.getOpcode() == PreferredFusedOpcode &&
|
||||
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
|
||||
// are currently only supported on binary nodes.
|
||||
if (Options.UnsafeFPMath &&
|
||||
N0.getOpcode() == PreferredFusedOpcode &&
|
||||
N0.getOperand(2).getOpcode() == ISD::FMUL &&
|
||||
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
|
||||
return DAG.getNode(PreferredFusedOpcode, SL, VT,
|
||||
@ -8384,7 +8393,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
|
||||
|
||||
// fold (fsub x, (fma y, z, (fmul u, v)))
|
||||
// -> (fma (fneg y), z, (fma (fneg u), v, x))
|
||||
if (N1.getOpcode() == PreferredFusedOpcode &&
|
||||
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
|
||||
// are currently only supported on binary nodes.
|
||||
if (Options.UnsafeFPMath &&
|
||||
N1.getOpcode() == PreferredFusedOpcode &&
|
||||
N1.getOperand(2).getOpcode() == ISD::FMUL) {
|
||||
SDValue N20 = N1.getOperand(2).getOperand(0);
|
||||
SDValue N21 = N1.getOperand(2).getOperand(1);
|
||||
|
@ -4039,11 +4039,6 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
|
||||
Constant *CMinus1 = ConstantInt::get(Op0->getType(), *CmpC - 1);
|
||||
return new ICmpInst(ICmpInst::ICMP_EQ, Op0, CMinus1);
|
||||
}
|
||||
// (x <u 2147483648) -> (x >s -1) -> true if sign bit clear
|
||||
if (CmpC->isMinSignedValue()) {
|
||||
Constant *AllOnes = Constant::getAllOnesValue(Op0->getType());
|
||||
return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -4063,11 +4058,6 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
|
||||
if (*CmpC == Op0Max - 1)
|
||||
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
|
||||
ConstantInt::get(Op1->getType(), *CmpC + 1));
|
||||
|
||||
// (x >u 2147483647) -> (x <s 0) -> true if sign bit set
|
||||
if (CmpC->isMaxSignedValue())
|
||||
return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
|
||||
Constant::getNullValue(Op0->getType()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -4299,6 +4289,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
|
||||
(SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
|
||||
return nullptr;
|
||||
|
||||
// FIXME: We only do this after checking for min/max to prevent infinite
|
||||
// looping caused by a reverse canonicalization of these patterns for min/max.
|
||||
// FIXME: The organization of folds is a mess. These would naturally go into
|
||||
// canonicalizeCmpWithConstant(), but we can't move all of the above folds
|
||||
// down here after the min/max restriction.
|
||||
ICmpInst::Predicate Pred = I.getPredicate();
|
||||
const APInt *C;
|
||||
if (match(Op1, m_APInt(C))) {
|
||||
// For i32: x >u 2147483647 -> x <s 0 -> true if sign bit set
|
||||
if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) {
|
||||
Constant *Zero = Constant::getNullValue(Op0->getType());
|
||||
return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero);
|
||||
}
|
||||
|
||||
// For i32: x <u 2147483648 -> x >s -1 -> true if sign bit clear
|
||||
if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) {
|
||||
Constant *AllOnes = Constant::getAllOnesValue(Op0->getType());
|
||||
return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes);
|
||||
}
|
||||
}
|
||||
|
||||
if (Instruction *Res = foldICmpInstWithConstant(I))
|
||||
return Res;
|
||||
|
||||
|
@ -158,8 +158,9 @@ struct MemAccessTy {
|
||||
|
||||
bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
|
||||
|
||||
static MemAccessTy getUnknown(LLVMContext &Ctx) {
|
||||
return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace);
|
||||
static MemAccessTy getUnknown(LLVMContext &Ctx,
|
||||
unsigned AS = UnknownAddressSpace) {
|
||||
return MemAccessTy(Type::getVoidTy(Ctx), AS);
|
||||
}
|
||||
};
|
||||
|
||||
@ -2279,8 +2280,10 @@ bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
|
||||
// TODO: Be less conservative when the type is similar and can use the same
|
||||
// addressing modes.
|
||||
if (Kind == LSRUse::Address) {
|
||||
if (AccessTy != LU.AccessTy)
|
||||
NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext());
|
||||
if (AccessTy.MemTy != LU.AccessTy.MemTy) {
|
||||
NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
|
||||
AccessTy.AddrSpace);
|
||||
}
|
||||
}
|
||||
|
||||
// Conservatively assume HasBaseReg is true for now.
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -mattr=+fp32-denormals < %s | FileCheck -check-prefix=SI-FMA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; Note: The SI-FMA conversions of type x * (y + 1) --> x * y + x would be
|
||||
; beneficial even without fp32 denormals, but they do require no-infs-fp-math
|
||||
@ -308,8 +308,14 @@ define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %o
|
||||
; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}}
|
||||
; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]]
|
||||
; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]]
|
||||
|
||||
; SI-SAFE: v_mul_f64 [[TMP0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]]
|
||||
; SI-SAFE: v_fma_f64 [[TMP1:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[TMP0]]
|
||||
; SI-SAFE: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[TMP1]], -[[Z]]
|
||||
|
||||
; SI-UNSAFE: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]]
|
||||
; SI-UNSAFE: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]]
|
||||
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
@ -343,8 +349,14 @@ define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %
|
||||
; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
|
||||
; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}}
|
||||
; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]]
|
||||
; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]]
|
||||
|
||||
; SI-SAFE: v_mul_f64 [[TMP0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]]
|
||||
; SI-SAFE: v_fma_f64 [[TMP1:v\[[0-9]+:[0-9]+\]]], [[Y]], [[Z]], [[TMP0]]
|
||||
; SI-SAFE: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[TMP1]]
|
||||
|
||||
; SI-UNSAFE: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]]
|
||||
; SI-UNSAFE: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]]
|
||||
|
||||
; SI: buffer_store_dwordx2 [[RESULT]]
|
||||
define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
@ -1,12 +1,12 @@
|
||||
; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma.
|
||||
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s
|
||||
|
||||
; Make sure we don't form mad with denormals
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare float @llvm.fabs.f32(float) #0
|
||||
@ -21,7 +21,7 @@ declare float @llvm.fmuladd.f32(float, float, float) #0
|
||||
|
||||
; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
|
||||
|
||||
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
|
||||
; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF-NOT: v_fma
|
||||
; SI-DENORM-SLOWFMAF-NOT: v_mad
|
||||
@ -58,8 +58,8 @@ define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrsp
|
||||
; SI-STD-DAG: v_mac_f32_e32 [[C]], [[B]], [[A]]
|
||||
; SI-STD-DAG: v_mac_f32_e32 [[D]], [[B]], [[A]]
|
||||
|
||||
; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
|
||||
; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
|
||||
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
|
||||
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
|
||||
; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
|
||||
@ -100,7 +100,7 @@ define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float a
|
||||
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
|
||||
; SI-STD: v_mac_f32_e32 [[C]], [[B]], [[A]]
|
||||
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
|
||||
; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
|
||||
; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
|
||||
@ -131,7 +131,7 @@ define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrsp
|
||||
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
|
||||
; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
|
||||
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
|
||||
; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
|
||||
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
|
||||
@ -164,8 +164,8 @@ define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float a
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
|
||||
|
||||
; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
|
||||
; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
|
||||
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
|
||||
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
|
||||
; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
|
||||
@ -203,7 +203,7 @@ define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, fl
|
||||
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
|
||||
; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
|
||||
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
|
||||
; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
|
||||
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
|
||||
@ -235,8 +235,8 @@ define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float a
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
|
||||
|
||||
; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
|
||||
; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
|
||||
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
|
||||
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
|
||||
; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
|
||||
@ -275,7 +275,7 @@ define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, fl
|
||||
|
||||
; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
|
||||
|
||||
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
|
||||
; SI-DENORM-FASTFMAF: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e64 [[TMP:v[0-9]+]], [[A]], -[[B]]
|
||||
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
|
||||
@ -309,8 +309,8 @@ define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float a
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], -[[B]], -[[C]]
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], -[[B]], -[[D]]
|
||||
|
||||
; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
|
||||
; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
|
||||
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
|
||||
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e64 [[TMP:v[0-9]+]], [[A]], -[[B]]
|
||||
; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
|
||||
@ -352,8 +352,8 @@ define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %ou
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
|
||||
; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
|
||||
|
||||
; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
|
||||
; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
|
||||
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
|
||||
; SI-DENORM-FASTFMAF-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
|
||||
; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]]
|
||||
@ -399,12 +399,9 @@ define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %ou
|
||||
; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
|
||||
; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP1]]
|
||||
|
||||
; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], [[D]], [[E]], -[[C]]
|
||||
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP0]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
|
||||
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[C]], [[TMP1]]
|
||||
; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
|
||||
; SI-DENORM: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[C]], [[TMP1]]
|
||||
|
||||
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
@ -444,12 +441,9 @@ define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %o
|
||||
; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
|
||||
; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
|
||||
|
||||
; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], -[[D]], [[E]], [[A]]
|
||||
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP0]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
|
||||
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
|
||||
; SI-DENORM: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-DENORM: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
|
||||
; SI-DENORM: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
|
||||
|
||||
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: s_endpgm
|
||||
@ -485,19 +479,23 @@ define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %o
|
||||
; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
|
||||
; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
|
||||
|
||||
; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
|
||||
; SI-STD: v_mac_f32_e32 [[TMP]], [[B]], [[A]]
|
||||
; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[B]], [[A]]
|
||||
; SI-STD-SAFE: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP0]]
|
||||
|
||||
; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
|
||||
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]]
|
||||
; SI-STD-UNSAFE: v_mad_f32 [[RESULT:v[0-9]+]], [[D]], [[E]], -[[C]]
|
||||
; SI-STD-UNSAFE: v_mac_f32_e32 [[RESULT]], [[B]], [[A]]
|
||||
|
||||
; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-DENORM-FASTFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
|
||||
; SI-DENORM-FASTFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP1]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[A]]
|
||||
; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
|
||||
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP2]]
|
||||
|
||||
; SI-DENORM: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-STD: buffer_store_dword [[TMP]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
@ -532,11 +530,16 @@ define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %o
|
||||
; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
|
||||
; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
|
||||
|
||||
; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
|
||||
; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
|
||||
; SI-STD-SAFE: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-STD-SAFE: v_mac_f32_e32 [[TMP0]], [[C]], [[B]]
|
||||
; SI-STD-SAFE: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP0]], [[A]]
|
||||
|
||||
; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
|
||||
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
|
||||
; SI-STD-UNSAFE: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
|
||||
; SI-STD-UNSAFE: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
|
||||
|
||||
; SI-DENORM-FASTFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-DENORM-FASTFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
|
||||
; SI-DENORM-FASTFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[C]], [[B]]
|
||||
|
@ -1,9 +1,10 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s -check-prefix=CHECK
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -enable-unsafe-fp-math | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNSAFE
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y, float %z,
|
||||
float %u, float %v) {
|
||||
; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK-UNSAFE: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK-UNSAFE: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fmul float %x, %y
|
||||
%b = fmul float %u, %v
|
||||
@ -14,8 +15,8 @@ define ptx_device float @t1_f32(float %x, float %y, float %z,
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y, double %z,
|
||||
double %u, double %v) {
|
||||
; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK-UNSAFE: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK-UNSAFE: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fmul double %x, %y
|
||||
%b = fmul double %u, %v
|
||||
|
@ -1,5 +1,7 @@
|
||||
; RUN: llc -verify-machineinstrs < %s -march=ppc32 -fp-contract=fast -mattr=-vsx -disable-ppc-vsx-fma-mutation=false | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK-VSX %s
|
||||
; RUN: llc -verify-machineinstrs < %s -march=ppc32 -fp-contract=fast -mattr=-vsx -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SAFE %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK-VSX -check-prefix=CHECK-VSX-SAFE %s
|
||||
; RUN: llc -verify-machineinstrs < %s -march=ppc32 -fp-contract=fast -enable-unsafe-fp-math -mattr=-vsx -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK -check-prefix=CHECK-UNSAFE %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -enable-unsafe-fp-math -mattr=+vsx -mcpu=pwr7 -disable-ppc-vsx-fma-mutation=false | FileCheck -check-prefix=CHECK-VSX -check-prefix=CHECK-UNSAFE-VSX %s
|
||||
|
||||
define double @test_FMADD_ASSOC1(double %A, double %B, double %C,
|
||||
double %D, double %E) {
|
||||
@ -8,16 +10,28 @@ define double @test_FMADD_ASSOC1(double %A, double %B, double %C,
|
||||
%H = fadd double %F, %G ; <double> [#uses=1]
|
||||
%I = fadd double %H, %E ; <double> [#uses=1]
|
||||
ret double %I
|
||||
; CHECK-LABEL: test_FMADD_ASSOC1:
|
||||
; CHECK: fmadd
|
||||
; CHECK-NEXT: fmadd
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-SAFE-LABEL: test_FMADD_ASSOC1:
|
||||
; CHECK-SAFE: fmul
|
||||
; CHECK-SAFE-NEXT: fmadd
|
||||
; CHECK-SAFE-NEXT: fadd
|
||||
; CHECK-SAFE-NEXT: blr
|
||||
|
||||
; CHECK-VSX-LABEL: test_FMADD_ASSOC1:
|
||||
; CHECK-VSX: xsmaddmdp
|
||||
; CHECK-VSX-NEXT: xsmaddadp
|
||||
; CHECK-VSX-NEXT: fmr
|
||||
; CHECK-VSX-NEXT: blr
|
||||
; CHECK-UNSAFE-LABEL: test_FMADD_ASSOC1:
|
||||
; CHECK-UNSAFE: fmadd
|
||||
; CHECK-UNSAFE-NEXT: fmadd
|
||||
; CHECK-UNSAFE-NEXT: blr
|
||||
|
||||
; CHECK-VSX-SAFE-LABEL: test_FMADD_ASSOC1:
|
||||
; CHECK-VSX-SAFE: xsmuldp
|
||||
; CHECK-VSX-SAFE-NEXT: xsmaddadp
|
||||
; CHECK-VSX-SAFE-NEXT: xsadddp
|
||||
; CHECK-VSX-SAFE-NEXT: blr
|
||||
|
||||
; CHECK-VSX-UNSAFE-LABEL: test_FMADD_ASSOC1:
|
||||
; CHECK-VSX-UNSAFE: xsmaddmdp
|
||||
; CHECK-VSX-UNSAFE-NEXT: xsmaddadp
|
||||
; CHECK-VSX-UNSAFE-NEXT: fmr
|
||||
; CHECK-VSX-UNSAFE-NEXT: blr
|
||||
}
|
||||
|
||||
define double @test_FMADD_ASSOC2(double %A, double %B, double %C,
|
||||
@ -27,16 +41,28 @@ define double @test_FMADD_ASSOC2(double %A, double %B, double %C,
|
||||
%H = fadd double %F, %G ; <double> [#uses=1]
|
||||
%I = fadd double %E, %H ; <double> [#uses=1]
|
||||
ret double %I
|
||||
; CHECK-LABEL: test_FMADD_ASSOC2:
|
||||
; CHECK: fmadd
|
||||
; CHECK-NEXT: fmadd
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-SAFE-LABEL: test_FMADD_ASSOC2:
|
||||
; CHECK-SAFE: fmul
|
||||
; CHECK-SAFE-NEXT: fmadd
|
||||
; CHECK-SAFE-NEXT: fadd
|
||||
; CHECK-SAFE-NEXT: blr
|
||||
|
||||
; CHECK-VSX-LABEL: test_FMADD_ASSOC2:
|
||||
; CHECK-VSX: xsmaddmdp
|
||||
; CHECK-VSX-NEXT: xsmaddadp
|
||||
; CHECK-VSX-NEXT: fmr
|
||||
; CHECK-VSX-NEXT: blr
|
||||
; CHECK-UNSAFE-LABEL: test_FMADD_ASSOC2:
|
||||
; CHECK-UNSAFE: fmadd
|
||||
; CHECK-UNSAFE-NEXT: fmadd
|
||||
; CHECK-UNSAFE-NEXT: blr
|
||||
|
||||
; CHECK-VSX-SAFE-LABEL: test_FMADD_ASSOC2:
|
||||
; CHECK-VSX-SAFE: xsmuldp
|
||||
; CHECK-VSX-SAFE-NEXT: xsmaddadp
|
||||
; CHECK-VSX-SAFE-NEXT: xsadddp
|
||||
; CHECK-VSX-SAFE-NEXT: blr
|
||||
|
||||
; CHECK-VSX-UNSAFE-LABEL: test_FMADD_ASSOC2:
|
||||
; CHECK-VSX-UNSAFE: xsmaddmdp
|
||||
; CHECK-VSX-UNSAFE-NEXT: xsmaddadp
|
||||
; CHECK-VSX-UNSAFE-NEXT: fmr
|
||||
; CHECK-VSX-UNSAFE-NEXT: blr
|
||||
}
|
||||
|
||||
define double @test_FMSUB_ASSOC1(double %A, double %B, double %C,
|
||||
@ -46,16 +72,28 @@ define double @test_FMSUB_ASSOC1(double %A, double %B, double %C,
|
||||
%H = fadd double %F, %G ; <double> [#uses=1]
|
||||
%I = fsub double %H, %E ; <double> [#uses=1]
|
||||
ret double %I
|
||||
; CHECK-LABEL: test_FMSUB_ASSOC1:
|
||||
; CHECK: fmsub
|
||||
; CHECK-NEXT: fmadd
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-SAFE-LABEL: test_FMSUB_ASSOC1:
|
||||
; CHECK-SAFE: fmul
|
||||
; CHECK-SAFE-NEXT: fmadd
|
||||
; CHECK-SAFE-NEXT: fsub
|
||||
; CHECK-SAFE-NEXT: blr
|
||||
|
||||
; CHECK-VSX-LABEL: test_FMSUB_ASSOC1:
|
||||
; CHECK-VSX: xsmsubmdp
|
||||
; CHECK-VSX-NEXT: xsmaddadp
|
||||
; CHECK-VSX-NEXT: fmr
|
||||
; CHECK-VSX-NEXT: blr
|
||||
; CHECK-UNSAFE-LABEL: test_FMSUB_ASSOC1:
|
||||
; CHECK-UNSAFE: fmsub
|
||||
; CHECK-UNSAFE-NEXT: fmadd
|
||||
; CHECK-UNSAFE-NEXT: blr
|
||||
|
||||
; CHECK-SAFE-VSX-LABEL: test_FMSUB_ASSOC1:
|
||||
; CHECK-SAFE-VSX: xsmuldp
|
||||
; CHECK-SAFE-VSX-NEXT: xsmaddadp
|
||||
; CHECK-SAFE-VSX-NEXT: xssubdp
|
||||
; CHECK-SAFE-VSX-NEXT: blr
|
||||
|
||||
; CHECK-UNSAFE-VSX-LABEL: test_FMSUB_ASSOC1:
|
||||
; CHECK-UNSAFE-VSX: xsmsubmdp
|
||||
; CHECK-UNSAFE-VSX-NEXT: xsmaddadp
|
||||
; CHECK-UNSAFE-VSX-NEXT: fmr
|
||||
; CHECK-UNSAFE-VSX-NEXT: blr
|
||||
}
|
||||
|
||||
define double @test_FMSUB_ASSOC2(double %A, double %B, double %C,
|
||||
@ -65,16 +103,28 @@ define double @test_FMSUB_ASSOC2(double %A, double %B, double %C,
|
||||
%H = fadd double %F, %G ; <double> [#uses=1]
|
||||
%I = fsub double %E, %H ; <double> [#uses=1]
|
||||
ret double %I
|
||||
; CHECK-LABEL: test_FMSUB_ASSOC2:
|
||||
; CHECK: fnmsub
|
||||
; CHECK-NEXT: fnmsub
|
||||
; CHECK-NEXT: blr
|
||||
; CHECK-SAFE-LABEL: test_FMSUB_ASSOC2:
|
||||
; CHECK-SAFE: fmul
|
||||
; CHECK-SAFE-NEXT: fmadd
|
||||
; CHECK-SAFE-NEXT: fsub
|
||||
; CHECK-SAFE-NEXT: blr
|
||||
|
||||
; CHECK-VSX-LABEL: test_FMSUB_ASSOC2:
|
||||
; CHECK-VSX: xsnmsubmdp
|
||||
; CHECK-VSX-NEXT: xsnmsubadp
|
||||
; CHECK-VSX-NEXT: fmr
|
||||
; CHECK-VSX-NEXT: blr
|
||||
; CHECK-UNSAFE-LABEL: test_FMSUB_ASSOC2:
|
||||
; CHECK-UNSAFE: fnmsub
|
||||
; CHECK-UNSAFE-NEXT: fnmsub
|
||||
; CHECK-UNSAFE-NEXT: blr
|
||||
|
||||
; CHECK-SAFE-VSX-LABEL: test_FMSUB_ASSOC2:
|
||||
; CHECK-SAFE-VSX: xsmuldp
|
||||
; CHECK-SAFE-VSX-NEXT: xsmaddadp
|
||||
; CHECK-SAFE-VSX-NEXT: xssubdp
|
||||
; CHECK-SAFE-VSX-NEXT: blr
|
||||
|
||||
; CHECK-UNSAFE-VSX-LABEL: test_FMSUB_ASSOC2:
|
||||
; CHECK-UNSAFE-VSX: xsnmsubmdp
|
||||
; CHECK-UNSAFE-VSX-NEXT: xsnmsubadp
|
||||
; CHECK-UNSAFE-VSX-NEXT: fmr
|
||||
; CHECK-UNSAFE-VSX-NEXT: blr
|
||||
}
|
||||
|
||||
define double @test_FMADD_ASSOC_EXT1(float %A, float %B, double %C,
|
||||
|
BIN
test/Object/Inputs/phdr-note.elf-x86-64
Executable file
BIN
test/Object/Inputs/phdr-note.elf-x86-64
Executable file
Binary file not shown.
BIN
test/Object/Inputs/phdrs.elf-x86-64
Executable file
BIN
test/Object/Inputs/phdrs.elf-x86-64
Executable file
Binary file not shown.
@ -339,3 +339,84 @@ define i32 @test75(i32 %x) {
|
||||
ret i32 %retval
|
||||
}
|
||||
|
||||
; The next 3 min tests should canonicalize to the same form...and not infinite loop.
|
||||
|
||||
define double @PR31751_umin1(i32 %x) {
|
||||
; CHECK-LABEL: @PR31751_umin1(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 %x, 2147483647
|
||||
; CHECK-NEXT: [[CONV1:%.*]] = select i1 [[TMP1]], i32 %x, i32 2147483647
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[CONV1]] to double
|
||||
; CHECK-NEXT: ret double [[TMP2]]
|
||||
;
|
||||
%cmp = icmp slt i32 %x, 0
|
||||
%sel = select i1 %cmp, i32 2147483647, i32 %x
|
||||
%conv = sitofp i32 %sel to double
|
||||
ret double %conv
|
||||
}
|
||||
|
||||
define double @PR31751_umin2(i32 %x) {
|
||||
; CHECK-LABEL: @PR31751_umin2(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %x, 2147483647
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 %x, i32 2147483647
|
||||
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double
|
||||
; CHECK-NEXT: ret double [[CONV]]
|
||||
;
|
||||
%cmp = icmp ult i32 %x, 2147483647
|
||||
%sel = select i1 %cmp, i32 %x, i32 2147483647
|
||||
%conv = sitofp i32 %sel to double
|
||||
ret double %conv
|
||||
}
|
||||
|
||||
define double @PR31751_umin3(i32 %x) {
|
||||
; CHECK-LABEL: @PR31751_umin3(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 %x, 2147483647
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], i32 %x, i32 2147483647
|
||||
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double
|
||||
; CHECK-NEXT: ret double [[CONV]]
|
||||
;
|
||||
%cmp = icmp ugt i32 %x, 2147483647
|
||||
%sel = select i1 %cmp, i32 2147483647, i32 %x
|
||||
%conv = sitofp i32 %sel to double
|
||||
ret double %conv
|
||||
}
|
||||
|
||||
; The next 3 max tests should canonicalize to the same form...and not infinite loop.
|
||||
|
||||
define double @PR31751_umax1(i32 %x) {
|
||||
; CHECK-LABEL: @PR31751_umax1(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 %x, -2147483648
|
||||
; CHECK-NEXT: [[CONV1:%.*]] = select i1 [[TMP1]], i32 %x, i32 -2147483648
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[CONV1]] to double
|
||||
; CHECK-NEXT: ret double [[TMP2]]
|
||||
;
|
||||
%cmp = icmp sgt i32 %x, -1
|
||||
%sel = select i1 %cmp, i32 2147483648, i32 %x
|
||||
%conv = sitofp i32 %sel to double
|
||||
ret double %conv
|
||||
}
|
||||
|
||||
define double @PR31751_umax2(i32 %x) {
|
||||
; CHECK-LABEL: @PR31751_umax2(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 %x, -2147483648
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 %x, i32 -2147483648
|
||||
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double
|
||||
; CHECK-NEXT: ret double [[CONV]]
|
||||
;
|
||||
%cmp = icmp ugt i32 %x, 2147483648
|
||||
%sel = select i1 %cmp, i32 %x, i32 2147483648
|
||||
%conv = sitofp i32 %sel to double
|
||||
ret double %conv
|
||||
}
|
||||
|
||||
define double @PR31751_umax3(i32 %x) {
|
||||
; CHECK-LABEL: @PR31751_umax3(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 %x, -2147483648
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], i32 %x, i32 -2147483648
|
||||
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double
|
||||
; CHECK-NEXT: ret double [[CONV]]
|
||||
;
|
||||
%cmp = icmp ult i32 %x, 2147483648
|
||||
%sel = select i1 %cmp, i32 2147483648, i32 %x
|
||||
%conv = sitofp i32 %sel to double
|
||||
ret double %conv
|
||||
}
|
||||
|
@ -0,0 +1,54 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -loop-reduce %s | FileCheck %s
|
||||
|
||||
; Test for assert resulting from inconsistent isLegalAddressingMode
|
||||
; answers when the address space was dropped from the query.
|
||||
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
|
||||
%0 = type { i32, double, i32, float }
|
||||
|
||||
; CHECK-LABEL: @lsr_crash_preserve_addrspace_unknown_type(
|
||||
; CHECK: %tmp4 = bitcast %0 addrspace(3)* %tmp to double addrspace(3)*
|
||||
; CHECK: %scevgep5 = getelementptr double, double addrspace(3)* %tmp4, i32 1
|
||||
; CHEC: load double, double addrspace(3)* %scevgep5
|
||||
|
||||
; CHECK: %scevgep = getelementptr i32, i32 addrspace(3)* %tmp1, i32 4
|
||||
; CHECK:%tmp14 = load i32, i32 addrspace(3)* %scevgep
|
||||
define void @lsr_crash_preserve_addrspace_unknown_type() #0 {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb17, %bb
|
||||
%tmp = phi %0 addrspace(3)* [ undef, %bb ], [ %tmp18, %bb17 ]
|
||||
%tmp2 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 0, i32 1
|
||||
%tmp3 = load double, double addrspace(3)* %tmp2, align 8
|
||||
br label %bb4
|
||||
|
||||
bb4: ; preds = %bb1
|
||||
br i1 undef, label %bb8, label %bb5
|
||||
|
||||
bb5: ; preds = %bb4
|
||||
unreachable
|
||||
|
||||
bb8: ; preds = %bb4
|
||||
%tmp9 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 0, i32 0
|
||||
%tmp10 = load i32, i32 addrspace(3)* %tmp9, align 4
|
||||
%tmp11 = icmp eq i32 0, %tmp10
|
||||
br i1 %tmp11, label %bb12, label %bb17
|
||||
|
||||
bb12: ; preds = %bb8
|
||||
%tmp13 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 0, i32 2
|
||||
%tmp14 = load i32, i32 addrspace(3)* %tmp13, align 4
|
||||
%tmp15 = icmp eq i32 0, %tmp14
|
||||
br i1 %tmp15, label %bb16, label %bb17
|
||||
|
||||
bb16: ; preds = %bb12
|
||||
unreachable
|
||||
|
||||
bb17: ; preds = %bb12, %bb8
|
||||
%tmp18 = getelementptr inbounds %0, %0 addrspace(3)* %tmp, i64 2
|
||||
br label %bb1
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
20
test/tools/llvm-objdump/X86/openbsd-headers.test
Normal file
20
test/tools/llvm-objdump/X86/openbsd-headers.test
Normal file
@ -0,0 +1,20 @@
|
||||
## openbsd-phdrs.elf-x86-64 was generated using GNU ld (GNU Binutils for Ubuntu) 2.26.1.
|
||||
## llvm-mc -filetype=obj -triple=x86_64-pc-linux test.s -o main.o
|
||||
## ld -script linker.script main.o -o openbsd-phdrs.elf-x86-64
|
||||
##
|
||||
## test.s is an empty file.
|
||||
## linker.script:
|
||||
## PHDRS { text PT_LOAD FILEHDR PHDRS; foo 0x65a3dbe6; bar 0x65a3dbe7; zed 0x65a41be6; }
|
||||
## Where 0x65a3dbe6 is the value of PT_OPENBSD_RANDOMIZE,
|
||||
## 0x65a3dbe7 is the value of PT_OPENBSD_WXNEEDED,
|
||||
## 0x65a41be6 is the value of PT_OPENBSD_BOOTDATA
|
||||
## SECTIONS { . = SIZEOF_HEADERS; .all : { *(.*) } : text }
|
||||
RUN: llvm-objdump -p %p/../../../Object/Inputs/openbsd-phdrs.elf-x86-64 \
|
||||
RUN: | FileCheck %s
|
||||
|
||||
CHECK: OPENBSD_RANDOMIZE off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**3
|
||||
CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags ---
|
||||
CHECK-NEXT: OPENBSD_WXNEEDED off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**3
|
||||
CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags ---
|
||||
CHECK-NEXT: OPENBSD_BOOTDATA off 0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**3
|
||||
CHECK-NEXT: filesz 0x0000000000000000 memsz 0x0000000000000000 flags ---
|
32
test/tools/llvm-objdump/X86/phdrs.test
Normal file
32
test/tools/llvm-objdump/X86/phdrs.test
Normal file
@ -0,0 +1,32 @@
|
||||
## phdrs.elf-x86-64 was generated using lld (3.9).
|
||||
## llvm-mc -filetype=obj -triple=x86_64-unknown-linux test.s -o test.o
|
||||
## lld test.o -o phdrs.elf-x86-64
|
||||
##
|
||||
## test.s:
|
||||
## .global _start
|
||||
## _start:
|
||||
##
|
||||
## .global d
|
||||
## .section .foo,"awT",@progbits
|
||||
## d:
|
||||
## .long 2
|
||||
##
|
||||
RUN: llvm-objdump -p %p/../../../Object/Inputs/phdrs.elf-x86-64 \
|
||||
RUN: | FileCheck %s
|
||||
|
||||
CHECK: RELRO off 0x0000000000001000 vaddr 0x0000000000201000 paddr 0x0000000000201000 align 2**0
|
||||
CHECK-NEXT: filesz 0x0000000000000004 memsz 0x0000000000001000 flags r--
|
||||
|
||||
## phdr-note.elf-x86-64 was generated using lld (3.9).
|
||||
## llvm-mc -filetype=obj -triple=x86_64-pc-linux test.s -o test.o
|
||||
## lld test.o -o phdr-note.elf-x86-64 -shared
|
||||
##
|
||||
## test.s:
|
||||
## .section .note.test,"a",@note
|
||||
## .quad 42
|
||||
|
||||
RUN: llvm-objdump -p %p/../../../Object/Inputs/phdr-note.elf-x86-64 \
|
||||
RUN: | FileCheck %s --check-prefix=NOTE
|
||||
|
||||
NOTE: NOTE off 0x0000000000000200 vaddr 0x0000000000000200 paddr 0x0000000000000200 align 2**0
|
||||
NOTE-NEXT: filesz 0x0000000000000008 memsz 0x0000000000000008 flags r--
|
@ -36,6 +36,9 @@ template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
|
||||
case ELF::PT_GNU_EH_FRAME:
|
||||
outs() << "EH_FRAME ";
|
||||
break;
|
||||
case ELF::PT_GNU_RELRO:
|
||||
outs() << " RELRO ";
|
||||
break;
|
||||
case ELF::PT_GNU_STACK:
|
||||
outs() << " STACK ";
|
||||
break;
|
||||
@ -45,6 +48,18 @@ template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
|
||||
case ELF::PT_LOAD:
|
||||
outs() << " LOAD ";
|
||||
break;
|
||||
case ELF::PT_NOTE:
|
||||
outs() << " NOTE ";
|
||||
break;
|
||||
case ELF::PT_OPENBSD_BOOTDATA:
|
||||
outs() << " OPENBSD_BOOTDATA ";
|
||||
break;
|
||||
case ELF::PT_OPENBSD_RANDOMIZE:
|
||||
outs() << " OPENBSD_RANDOMIZE ";
|
||||
break;
|
||||
case ELF::PT_OPENBSD_WXNEEDED:
|
||||
outs() << " OPENBSD_WXNEEDED ";
|
||||
break;
|
||||
case ELF::PT_PHDR:
|
||||
outs() << " PHDR ";
|
||||
break;
|
||||
|
@ -36,6 +36,7 @@ do_libs="yes"
|
||||
do_libunwind="yes"
|
||||
do_test_suite="yes"
|
||||
do_openmp="yes"
|
||||
do_lld="yes"
|
||||
do_lldb="no"
|
||||
do_polly="no"
|
||||
BuildDir="`pwd`"
|
||||
@ -64,6 +65,7 @@ function usage() {
|
||||
echo " -no-libunwind Disable check-out & build libunwind"
|
||||
echo " -no-test-suite Disable check-out & build test-suite"
|
||||
echo " -no-openmp Disable check-out & build libomp"
|
||||
echo " -no-lld Disable check-out & build lld"
|
||||
echo " -lldb Enable check-out & build lldb"
|
||||
echo " -no-lldb Disable check-out & build lldb (default)"
|
||||
echo " -polly Enable check-out & build Polly"
|
||||
@ -143,6 +145,9 @@ while [ $# -gt 0 ]; do
|
||||
-no-openmp )
|
||||
do_openmp="no"
|
||||
;;
|
||||
-no-lld )
|
||||
do_lld="no"
|
||||
;;
|
||||
-lldb )
|
||||
do_lldb="yes"
|
||||
;;
|
||||
@ -225,6 +230,9 @@ esac
|
||||
if [ $do_openmp = "yes" ]; then
|
||||
projects="$projects openmp"
|
||||
fi
|
||||
if [ $do_lld = "yes" ]; then
|
||||
projects="$projects lld"
|
||||
fi
|
||||
if [ $do_lldb = "yes" ]; then
|
||||
projects="$projects lldb"
|
||||
fi
|
||||
@ -297,7 +305,7 @@ function export_sources() {
|
||||
cfe)
|
||||
projsrc=llvm.src/tools/clang
|
||||
;;
|
||||
lldb|polly)
|
||||
lld|lldb|polly)
|
||||
projsrc=llvm.src/tools/$proj
|
||||
;;
|
||||
clang-tools-extra)
|
||||
|
Loading…
Reference in New Issue
Block a user