Skip to content

Commit b3a7ab6

Browse files
committed
[DAG] Don't allow implicit truncation in extract_element(bitcast(scalar_to_vector(X))) -> trunc(srl(X,C)) fold
Limits #117900 to only fold when scalar_to_vector doesn't perform implicit truncation, as the scaled shift calculation doesn't currently account for this - this can be addressed in a future update. Fixes #121306
1 parent 79af7bd commit b3a7ab6

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23088,8 +23088,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
2308823088
if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
2308923089
return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
2309023090

23091+
// TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
2309123092
if (LegalTypes && BCSrc.getValueType().isInteger() &&
23092-
BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
23093+
BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23094+
BCSrc.getScalarValueSizeInBits() ==
23095+
BCSrc.getOperand(0).getScalarValueSizeInBits()) {
2309323096
// ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
2309423097
// trunc i64 X to i32
2309523098
SDValue X = BCSrc.getOperand(0);

llvm/test/CodeGen/PowerPC/scalar_vector_test_5.ll

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,35 @@
1111
define i8 @scalar_to_vector_half(ptr nocapture readonly %ad) {
1212
; P9LE-LABEL: scalar_to_vector_half:
1313
; P9LE: # %bb.0: # %entry
14-
; P9LE-NEXT: lhz r3, 0(r3)
14+
; P9LE-NEXT: lxsihzx v2, 0, r3
15+
; P9LE-NEXT: li r3, 0
16+
; P9LE-NEXT: vsplth v2, v2, 3
17+
; P9LE-NEXT: vextubrx r3, r3, v2
1518
; P9LE-NEXT: blr
1619
;
1720
; P9BE-LABEL: scalar_to_vector_half:
1821
; P9BE: # %bb.0: # %entry
19-
; P9BE-NEXT: lhz r3, 0(r3)
20-
; P9BE-NEXT: srwi r3, r3, 24
22+
; P9BE-NEXT: lxsihzx v2, 0, r3
23+
; P9BE-NEXT: li r3, 0
24+
; P9BE-NEXT: vsplth v2, v2, 3
25+
; P9BE-NEXT: vextublx r3, r3, v2
2126
; P9BE-NEXT: blr
2227
;
2328
; P8LE-LABEL: scalar_to_vector_half:
2429
; P8LE: # %bb.0: # %entry
2530
; P8LE-NEXT: lhz r3, 0(r3)
31+
; P8LE-NEXT: mtfprd f0, r3
32+
; P8LE-NEXT: mffprd r3, f0
33+
; P8LE-NEXT: clrldi r3, r3, 56
2634
; P8LE-NEXT: blr
2735
;
2836
; P8BE-LABEL: scalar_to_vector_half:
2937
; P8BE: # %bb.0: # %entry
3038
; P8BE-NEXT: lhz r3, 0(r3)
31-
; P8BE-NEXT: srwi r3, r3, 24
39+
; P8BE-NEXT: sldi r3, r3, 48
40+
; P8BE-NEXT: mtfprd f0, r3
41+
; P8BE-NEXT: mffprd r3, f0
42+
; P8BE-NEXT: rldicl r3, r3, 8, 56
3243
; P8BE-NEXT: blr
3344
entry:
3445
%0 = load <2 x i8>, ptr %ad, align 1

0 commit comments

Comments
 (0)