@@ -80,18 +80,16 @@ define void @v1x8_levels_6_7_8_9_10_11_12_13(i32 %arg0, ptr align 16 %arg1) {
80
80
ret void
81
81
}
82
82
83
- define void @v1_4_4_4_2_1_to_v8_8_levels_6_7 (i32 %arg0 , ptr addrspace (3 ) align 16 %arg1_ptr , i32 %arg2 , i32 %arg3 , i32 %arg4 , i32 %arg5 , half %arg6_half , half %arg7_half ) {
83
+ define void @v1_4_4_4_2_1_to_v8_8_levels_6_7 (i32 %arg0 , ptr addrspace (3 ) align 16 %arg1_ptr , i32 %arg2 , i32 %arg3 , i32 %arg4 , i32 %arg5 , half %arg6_half , half %arg7_half , < 2 x half > %arg8_2xhalf ) {
84
84
; CHECK-LABEL: define void @v1_4_4_4_2_1_to_v8_8_levels_6_7(
85
- ; CHECK-SAME: i32 [[ARG0:%.*]], ptr addrspace(3) align 16 [[ARG1_PTR:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]], i32 [[ARG4:%.*]], i32 [[ARG5:%.*]], half [[ARG6_HALF:%.*]], half [[ARG7_HALF:%.*]]) #[[ATTR0]] {
85
+ ; CHECK-SAME: i32 [[ARG0:%.*]], ptr addrspace(3) align 16 [[ARG1_PTR:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]], i32 [[ARG4:%.*]], i32 [[ARG5:%.*]], half [[ARG6_HALF:%.*]], half [[ARG7_HALF:%.*]], <2 x half> [[ARG8_2XHALF:%.*]] ) #[[ATTR0]] {
86
86
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[ARG1_PTR]], i32 458752
87
87
; CHECK-NEXT: br [[DOTPREHEADER11_PREHEADER:label %.*]]
88
88
; CHECK: [[_PREHEADER11_PREHEADER:.*:]]
89
89
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[ARG0]], 6
90
90
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP1]], i32 [[TMP2]]
91
91
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[ARG2]]
92
92
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[ARG3]]
93
- ; CHECK-NEXT: [[VEC2_INIT:%.*]] = insertelement <2 x half> undef, half [[ARG7_HALF]], i32 0
94
- ; CHECK-NEXT: [[VEC2:%.*]] = shufflevector <2 x half> [[VEC2_INIT]], <2 x half> undef, <2 x i32> zeroinitializer
95
93
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[ARG0]], 2
96
94
; CHECK-NEXT: br i1 [[CMP]], [[DOTLR_PH:label %.*]], [[DOTEXIT_POINT:label %.*]]
97
95
; CHECK: [[_LR_PH:.*:]]
@@ -102,44 +100,41 @@ define void @v1_4_4_4_2_1_to_v8_8_levels_6_7(i32 %arg0, ptr addrspace(3) align 1
102
100
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x half> [[TMP8]], half 0xH0000, i32 2
103
101
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x half> [[TMP9]], half 0xH0000, i32 3
104
102
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x half> [[TMP10]], half 0xH0000, i32 4
105
- ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x half> [[VEC2 ]], i32 0
103
+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x half> [[ARG8_2XHALF ]], i32 0
106
104
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x half> [[TMP11]], half [[TMP12]], i32 5
107
- ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x half> [[VEC2 ]], i32 1
105
+ ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x half> [[ARG8_2XHALF ]], i32 1
108
106
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x half> [[TMP13]], half [[TMP14]], i32 6
109
107
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x half> [[TMP15]], half [[ARG7_HALF]], i32 7
110
108
; CHECK-NEXT: store <8 x half> [[TMP16]], ptr addrspace(3) [[TMP6]], align 2
111
109
; CHECK-NEXT: br [[DOTEXIT_POINT]]
112
110
; CHECK: [[_EXIT_POINT:.*:]]
113
111
; CHECK-NEXT: ret void
114
112
;
115
- %37 = getelementptr inbounds i8 , ptr addrspace (3 ) %arg1_ptr , i32 458752
113
+ %base1 = getelementptr inbounds i8 , ptr addrspace (3 ) %arg1_ptr , i32 458752
116
114
br label %.preheader11.preheader
117
115
118
116
.preheader11.preheader:
119
- %258 = shl nuw nsw i32 %arg0 , 6
120
- %259 = getelementptr inbounds i8 , ptr addrspace (3 ) %37 , i32 %258
117
+ %base2 = shl nuw nsw i32 %arg0 , 6
118
+ %base3 = getelementptr inbounds i8 , ptr addrspace (3 ) %base1 , i32 %base2
121
119
122
- %268 = getelementptr inbounds i8 , ptr addrspace (3 ) %259 , i32 %arg2
123
- %269 = getelementptr inbounds i8 , ptr addrspace (3 ) %268 , i32 %arg3
124
-
125
- %vec2_init = insertelement <2 x half > undef , half %arg7_half , i32 0
126
- %vec2 = shufflevector <2 x half > %vec2_init , <2 x half > undef , <2 x i32 > zeroinitializer
120
+ %base4 = getelementptr inbounds i8 , ptr addrspace (3 ) %base3 , i32 %arg2
121
+ %base5 = getelementptr inbounds i8 , ptr addrspace (3 ) %base4 , i32 %arg3
127
122
128
123
%cmp = icmp sgt i32 %arg0 , 2
129
124
br i1 %cmp , label %.lr.ph , label %.exit_point
130
125
131
126
.lr.ph:
132
- %gep = getelementptr inbounds i8 , ptr addrspace (3 ) %269 , i32 %arg4
127
+ %gep = getelementptr inbounds i8 , ptr addrspace (3 ) %base5 , i32 %arg4
133
128
134
- %1000 = getelementptr inbounds i8 , ptr addrspace (3 ) %gep , i32 %arg5
135
- %1002 = getelementptr inbounds i8 , ptr addrspace (3 ) %1000 , i32 2
136
- %1010 = getelementptr inbounds i8 , ptr addrspace (3 ) %1000 , i32 10
137
- %1014 = getelementptr inbounds i8 , ptr addrspace (3 ) %1000 , i32 14
129
+ %dst = getelementptr inbounds i8 , ptr addrspace (3 ) %gep , i32 %arg5
130
+ %dst_off2 = getelementptr inbounds i8 , ptr addrspace (3 ) %dst , i32 2
131
+ %dst_off10 = getelementptr inbounds i8 , ptr addrspace (3 ) %dst , i32 10
132
+ %dst_off14 = getelementptr inbounds i8 , ptr addrspace (3 ) %dst , i32 14
138
133
139
- store half %arg6_half , ptr addrspace (3 ) %1000 , align 2
140
- store <4 x half > zeroinitializer , ptr addrspace (3 ) %1002 , align 2
141
- store <2 x half > %vec2 , ptr addrspace (3 ) %1010 , align 2
142
- store half %arg7_half , ptr addrspace (3 ) %1014 , align 2
134
+ store half %arg6_half , ptr addrspace (3 ) %dst , align 2
135
+ store <4 x half > zeroinitializer , ptr addrspace (3 ) %dst_off2 , align 2
136
+ store <2 x half > %arg8_2xhalf , ptr addrspace (3 ) %dst_off10 , align 2
137
+ store half %arg7_half , ptr addrspace (3 ) %dst_off14 , align 2
143
138
br label %.exit_point
144
139
145
140
.exit_point:
0 commit comments