@@ -2098,6 +2098,113 @@ bb11: ; preds = %bb10, %bb2
2098
2098
br label %bb1
2099
2099
}
2100
2100
2101
+ define void @crash_lshlrevb16_not_reg_op () {
2102
+ ; NOSDWA-LABEL: crash_lshlrevb16_not_reg_op:
2103
+ ; NOSDWA: ; %bb.0: ; %bb0
2104
+ ; NOSDWA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2105
+ ; NOSDWA-NEXT: s_mov_b64 s[4:5], 0
2106
+ ; NOSDWA-NEXT: s_and_b64 vcc, exec, -1
2107
+ ; NOSDWA-NEXT: v_lshlrev_b16_e64 v3, 8, 1
2108
+ ; NOSDWA-NEXT: .LBB22_1: ; %bb1
2109
+ ; NOSDWA-NEXT: ; =>This Inner Loop Header: Depth=1
2110
+ ; NOSDWA-NEXT: v_mov_b32_e32 v0, s4
2111
+ ; NOSDWA-NEXT: v_mov_b32_e32 v2, 0xff
2112
+ ; NOSDWA-NEXT: s_lshl_b32 s6, s4, 3
2113
+ ; NOSDWA-NEXT: v_mov_b32_e32 v1, s5
2114
+ ; NOSDWA-NEXT: s_mov_b64 s[4:5], 1
2115
+ ; NOSDWA-NEXT: v_and_b32_e32 v2, s4, v2
2116
+ ; NOSDWA-NEXT: v_or_b32_e32 v2, v2, v3
2117
+ ; NOSDWA-NEXT: v_lshrrev_b16_e32 v2, s6, v2
2118
+ ; NOSDWA-NEXT: flat_store_byte v[0:1], v2
2119
+ ; NOSDWA-NEXT: s_mov_b64 vcc, vcc
2120
+ ; NOSDWA-NEXT: s_cbranch_vccnz .LBB22_1
2121
+ ; NOSDWA-NEXT: ; %bb.2: ; %DummyReturnBlock
2122
+ ; NOSDWA-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2123
+ ; NOSDWA-NEXT: s_setpc_b64 s[30:31]
2124
+ ;
2125
+ ; GFX89-LABEL: crash_lshlrevb16_not_reg_op:
2126
+ ; GFX89: ; %bb.0: ; %bb0
2127
+ ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2128
+ ; GFX89-NEXT: s_mov_b64 s[4:5], 0
2129
+ ; GFX89-NEXT: s_and_b64 vcc, exec, -1
2130
+ ; GFX89-NEXT: v_lshlrev_b16_e64 v0, 8, 1
2131
+ ; GFX89-NEXT: .LBB22_1: ; %bb1
2132
+ ; GFX89-NEXT: ; =>This Inner Loop Header: Depth=1
2133
+ ; GFX89-NEXT: v_mov_b32_e32 v3, s4
2134
+ ; GFX89-NEXT: s_lshl_b32 s6, s4, 3
2135
+ ; GFX89-NEXT: v_mov_b32_e32 v1, s4
2136
+ ; GFX89-NEXT: v_or_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2137
+ ; GFX89-NEXT: v_mov_b32_e32 v2, s5
2138
+ ; GFX89-NEXT: s_mov_b64 s[4:5], 1
2139
+ ; GFX89-NEXT: v_lshrrev_b16_e32 v3, s6, v3
2140
+ ; GFX89-NEXT: flat_store_byte v[1:2], v3
2141
+ ; GFX89-NEXT: s_mov_b64 vcc, vcc
2142
+ ; GFX89-NEXT: s_cbranch_vccnz .LBB22_1
2143
+ ; GFX89-NEXT: ; %bb.2: ; %DummyReturnBlock
2144
+ ; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2145
+ ; GFX89-NEXT: s_setpc_b64 s[30:31]
2146
+ ;
2147
+ ; GFX9-LABEL: crash_lshlrevb16_not_reg_op:
2148
+ ; GFX9: ; %bb.0: ; %bb0
2149
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2150
+ ; GFX9-NEXT: s_mov_b64 s[4:5], 0
2151
+ ; GFX9-NEXT: v_lshlrev_b16_e64 v0, 8, 1
2152
+ ; GFX9-NEXT: s_and_b64 vcc, exec, -1
2153
+ ; GFX9-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2154
+ ; GFX9-NEXT: .LBB22_1: ; %bb1
2155
+ ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
2156
+ ; GFX9-NEXT: s_lshl_b32 s6, s4, 3
2157
+ ; GFX9-NEXT: v_mov_b32_e32 v1, s4
2158
+ ; GFX9-NEXT: v_mov_b32_e32 v2, s5
2159
+ ; GFX9-NEXT: s_mov_b64 s[4:5], 1
2160
+ ; GFX9-NEXT: v_lshrrev_b16_e32 v3, s6, v0
2161
+ ; GFX9-NEXT: flat_store_byte v[1:2], v3
2162
+ ; GFX9-NEXT: s_mov_b64 vcc, vcc
2163
+ ; GFX9-NEXT: s_cbranch_vccnz .LBB22_1
2164
+ ; GFX9-NEXT: ; %bb.2: ; %DummyReturnBlock
2165
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2166
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
2167
+ ;
2168
+ ; GFX10-LABEL: crash_lshlrevb16_not_reg_op:
2169
+ ; GFX10: ; %bb.0: ; %bb0
2170
+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2171
+ ; GFX10-NEXT: v_lshlrev_b16 v0, 8, 1
2172
+ ; GFX10-NEXT: s_mov_b32 vcc_lo, exec_lo
2173
+ ; GFX10-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2174
+ ; GFX10-NEXT: s_mov_b64 s[4:5], 0
2175
+ ; GFX10-NEXT: .LBB22_1: ; %bb1
2176
+ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
2177
+ ; GFX10-NEXT: s_lshl_b32 s6, s4, 3
2178
+ ; GFX10-NEXT: v_mov_b32_e32 v1, s4
2179
+ ; GFX10-NEXT: v_mov_b32_e32 v2, s5
2180
+ ; GFX10-NEXT: v_lshrrev_b16 v3, s6, v0
2181
+ ; GFX10-NEXT: s_mov_b64 s[4:5], 1
2182
+ ; GFX10-NEXT: flat_store_byte v[1:2], v3
2183
+ ; GFX10-NEXT: s_cbranch_vccnz .LBB22_1
2184
+ ; GFX10-NEXT: ; %bb.2: ; %DummyReturnBlock
2185
+ ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2186
+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
2187
+ %1 = alloca [2 x i8 ], align 1 , addrspace (5 )
2188
+ %2 = getelementptr [2 x i8 ], ptr addrspace (5 ) %1 , i32 0 , i32 1
2189
+ br label %bb0
2190
+
2191
+ bb0:
2192
+ store i8 1 , ptr addrspace (5 ) %2 , align 1
2193
+ br label %bb1
2194
+
2195
+ bb1:
2196
+ %3 = phi i64 [ 1 , %bb1 ], [ 0 , %bb0 ]
2197
+ %4 = trunc i64 %3 to i32
2198
+ %5 = getelementptr i8 , ptr addrspace (5 ) %1 , i32 %4
2199
+ %6 = load i8 , ptr addrspace (5 ) %5 , align 1
2200
+ %7 = getelementptr i8 , ptr null , i64 %3
2201
+ store i8 %6 , ptr %7 , align 1
2202
+ br i1 false , label %bb2 , label %bb1
2203
+
2204
+ bb2:
2205
+ br label %bb0
2206
+ }
2207
+
2101
2208
declare i32 @llvm.amdgcn.workitem.id.x ()
2102
2209
2103
2210
attributes #0 = { "denormal-fp-math" ="preserve-sign,preserve-sign" }
0 commit comments