-
Notifications
You must be signed in to change notification settings - Fork 15.4k
Reapply "[LV] Use ExtractLane(LastActiveLane, V) live outs when tail-folding. (#149042)" #168738
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
23f8762
87278c5
dd576eb
fc37d9d
485e5a2
6905dad
7a928d8
156348c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -560,7 +560,6 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { | |
| case VPInstruction::ExtractLastElement: | ||
| case VPInstruction::ExtractLastLanePerPart: | ||
| case VPInstruction::ExtractPenultimateElement: | ||
| case VPInstruction::FirstActiveLane: | ||
| case VPInstruction::Not: | ||
| case VPInstruction::ResumeForEpilogue: | ||
| case VPInstruction::Unpack: | ||
|
|
@@ -591,6 +590,9 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { | |
| case Instruction::Switch: | ||
| case VPInstruction::SLPLoad: | ||
| case VPInstruction::SLPStore: | ||
| case VPInstruction::FirstActiveLane: | ||
| case VPInstruction::FirstActiveLaneZeroNotPoison: | ||
| case VPInstruction::LastActiveLane: | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think with the test case added in #167897, this assertion: started firing on FirstActiveLane/LastActiveLane because we're now hitting the unrolling path with first order recurrences I think. These can have multiple operands so I think we need to return an unknown number of operands here. |
||
| // Cannot determine the number of operands from the opcode. | ||
| return -1u; | ||
| } | ||
|
|
@@ -1001,11 +1003,16 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
| } | ||
| return Res; | ||
| } | ||
| case VPInstruction::FirstActiveLane: { | ||
| case VPInstruction::FirstActiveLane: | ||
| case VPInstruction::FirstActiveLaneZeroNotPoison: { | ||
| const bool ZeroIsPoison = | ||
| getOpcode() != VPInstruction::FirstActiveLaneZeroNotPoison; | ||
| if (getNumOperands() == 1) { | ||
| Value *Mask = State.get(getOperand(0)); | ||
| // LastActiveLane might get expanded to a FirstActiveLane with an all-ones | ||
| // mask, so make sure zero returns VF and not poison. | ||
| return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask, | ||
| true, Name); | ||
| ZeroIsPoison, Name); | ||
| } | ||
| // If there are multiple operands, create a chain of selects to pick the | ||
| // first operand with an active lane and add the number of lanes of the | ||
|
|
@@ -1023,7 +1030,7 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
| Builder.getInt64Ty()) | ||
| : Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), | ||
| State.get(getOperand(Idx)), | ||
| true, Name); | ||
| ZeroIsPoison, Name); | ||
| Value *Current = Builder.CreateAdd( | ||
| Builder.CreateMul(RuntimeVF, Builder.getInt64(Idx)), TrailingZeros); | ||
| if (Res) { | ||
|
|
@@ -1161,7 +1168,8 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, | |
| return Ctx.TTI.getArithmeticReductionCost( | ||
| Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind); | ||
| } | ||
| case VPInstruction::FirstActiveLane: { | ||
| case VPInstruction::FirstActiveLane: | ||
| case VPInstruction::FirstActiveLaneZeroNotPoison: { | ||
| Type *ScalarTy = Ctx.Types.inferScalarType(getOperand(0)); | ||
| if (VF.isScalar()) | ||
| return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy, | ||
|
|
@@ -1174,6 +1182,29 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, | |
| {PredTy, Type::getInt1Ty(Ctx.LLVMCtx)}); | ||
| return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind); | ||
| } | ||
| case VPInstruction::LastActiveLane: { | ||
| Type *ScalarTy = Ctx.Types.inferScalarType(getOperand(0)); | ||
| if (VF.isScalar()) | ||
| return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy, | ||
| CmpInst::makeCmpResultType(ScalarTy), | ||
| CmpInst::ICMP_EQ, Ctx.CostKind); | ||
| // Calculate the cost of determining the lane index: NOT + cttz_elts + SUB. | ||
| auto *PredTy = toVectorTy(ScalarTy, VF); | ||
| IntrinsicCostAttributes Attrs(Intrinsic::experimental_cttz_elts, | ||
| Type::getInt64Ty(Ctx.LLVMCtx), | ||
| {PredTy, Type::getInt1Ty(Ctx.LLVMCtx)}); | ||
| InstructionCost Cost = Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind); | ||
| // Add cost of NOT operation on the predicate. | ||
| Cost += Ctx.TTI.getArithmeticInstrCost( | ||
| Instruction::Xor, PredTy, Ctx.CostKind, | ||
| {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, | ||
| {TargetTransformInfo::OK_UniformConstantValue, | ||
| TargetTransformInfo::OP_None}); | ||
| // Add cost of SUB operation on the index. | ||
| Cost += Ctx.TTI.getArithmeticInstrCost( | ||
| Instruction::Sub, Type::getInt64Ty(Ctx.LLVMCtx), Ctx.CostKind); | ||
| return Cost; | ||
| } | ||
| case VPInstruction::FirstOrderRecurrenceSplice: { | ||
| assert(VF.isVector() && "Scalar FirstOrderRecurrenceSplice?"); | ||
| SmallVector<int> Mask(VF.getKnownMinValue()); | ||
|
|
@@ -1228,6 +1259,8 @@ bool VPInstruction::isVectorToScalar() const { | |
| getOpcode() == Instruction::ExtractElement || | ||
| getOpcode() == VPInstruction::ExtractLane || | ||
| getOpcode() == VPInstruction::FirstActiveLane || | ||
| getOpcode() == VPInstruction::FirstActiveLaneZeroNotPoison || | ||
| getOpcode() == VPInstruction::LastActiveLane || | ||
| getOpcode() == VPInstruction::ComputeAnyOfResult || | ||
| getOpcode() == VPInstruction::ComputeFindIVResult || | ||
| getOpcode() == VPInstruction::ComputeReductionResult || | ||
|
|
@@ -1294,6 +1327,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { | |
| case VPInstruction::ActiveLaneMask: | ||
| case VPInstruction::ExplicitVectorLength: | ||
| case VPInstruction::FirstActiveLane: | ||
| case VPInstruction::FirstActiveLaneZeroNotPoison: | ||
| case VPInstruction::LastActiveLane: | ||
| case VPInstruction::FirstOrderRecurrenceSplice: | ||
| case VPInstruction::LogicalAnd: | ||
| case VPInstruction::Not: | ||
|
|
@@ -1470,6 +1505,12 @@ void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent, | |
| case VPInstruction::FirstActiveLane: | ||
| O << "first-active-lane"; | ||
| break; | ||
| case VPInstruction::FirstActiveLaneZeroNotPoison: | ||
| O << "first-active-lane-zero-not-poison"; | ||
| break; | ||
| case VPInstruction::LastActiveLane: | ||
| O << "last-active-lane"; | ||
| break; | ||
| case VPInstruction::ReductionStartVector: | ||
| O << "reduction-start-vector"; | ||
| break; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.