Skip to content

Commit c647c58

Browse files
committed
[Matrix] Propagate shape information through fdiv insts
1 parent 9d33b92 commit c647c58

File tree

2 files changed

+29
-0
lines changed

2 files changed

+29
-0
lines changed

llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ static bool isUniformShape(Value *V) {
233233
case Instruction::FAdd:
234234
case Instruction::FSub:
235235
case Instruction::FMul: // Scalar multiply.
236+
case Instruction::FDiv:
236237
case Instruction::FNeg:
237238
case Instruction::Add:
238239
case Instruction::Mul:
@@ -2167,6 +2168,8 @@ class LowerMatrixIntrinsics {
21672168
return Builder.CreateFAdd(LHS, RHS);
21682169
case Instruction::FMul:
21692170
return Builder.CreateFMul(LHS, RHS);
2171+
case Instruction::FDiv:
2172+
return Builder.CreateFDiv(LHS, RHS);
21702173
case Instruction::FSub:
21712174
return Builder.CreateFSub(LHS, RHS);
21722175
default:
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
3+
4+
define void @fdiv_2x2(ptr %num, ptr %denom, ptr %out) {
5+
; CHECK-LABEL: @fdiv_2x2(
6+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, ptr [[NUM:%.*]], align 32
7+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[NUM]], i64 2
8+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 16
9+
; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[DENOM:%.*]], align 32
10+
; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr double, ptr [[DENOM]], i64 2
11+
; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 16
12+
; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x double> [[COL_LOAD]], [[COL_LOAD2]]
13+
; CHECK-NEXT: [[TMP2:%.*]] = fdiv <2 x double> [[COL_LOAD1]], [[COL_LOAD4]]
14+
; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[OUT:%.*]], align 32
15+
; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr double, ptr [[OUT]], i64 2
16+
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[VEC_GEP5]], align 16
17+
; CHECK-NEXT: ret void
18+
;
19+
%numv = load <4 x double>, ptr %num
20+
%denomv = load <4 x double>, ptr %denom
21+
%div = fdiv <4 x double> %numv, %denomv
22+
%divt = call <4 x double> @llvm.matrix.transpose(<4 x double> %div, i32 2, i32 2)
23+
%divtt = call <4 x double> @llvm.matrix.transpose(<4 x double> %divt, i32 2, i32 2)
24+
store <4 x double> %divtt, ptr %out
25+
ret void
26+
}

0 commit comments

Comments
 (0)