[flang] Canonicalize redundant pointer converts. (#121864)

vzakhari · web-flow · commit 2e637dbbb8bc · 2025-01-07T08:35:43.000-08:00
This patch adds a canonicalization pattern for optimizing redundant
"pointer" fir.converts. Such converts prevent the StackArrays pass
to recognize fir.freemem for the corresponding fir.allocmem, e.g.:
```
    %69 = fir.allocmem !fir.array&lt;2xi32&gt;
    %71:2 = hlfir.declare %69(%70) {uniq_name = ".tmp.arrayctor"} :
        (!fir.heap&lt;!fir.array&lt;2xi32&gt;&gt;, !fir.shape&lt;1&gt;) -&gt;
        (!fir.heap&lt;!fir.array&lt;2xi32&gt;&gt;, !fir.heap&lt;!fir.array&lt;2xi32&gt;&gt;)
    %95 = fir.convert %71#1 :
        (!fir.heap&lt;!fir.array&lt;2xi32&gt;&gt;) -&gt; !fir.ref&lt;!fir.array&lt;2xi32&gt;&gt;
    %100 = fir.convert %95 :
        (!fir.ref&lt;!fir.array&lt;2xi32&gt;&gt;) -&gt; !fir.heap&lt;!fir.array&lt;2xi32&gt;&gt;
    fir.freemem %100 : !fir.heap&lt;!fir.array&lt;2xi32&gt;&gt;
```
I found this in `tonto`, but the change does not affect performance at all.
Anyway, it looks like a reasonable thing to do, and it makes easier
to compare the performance profiles with other compilers'.
diff --git a/flang/include/flang/Optimizer/Dialect/CanonicalizationPatterns.td b/flang/include/flang/Optimizer/Dialect/CanonicalizationPatterns.td
@@ -57,6 +57,9 @@ def StrictSmallerWidthPred : Constraint<CPred<
                        "$0.getType().getIntOrFloatBitWidth() < "
                        "$1.getType().getIntOrFloatBitWidth()">>;
 
+def PointerCompatiblePred
+    : Constraint<CPred<"fir::ConvertOp::isPointerCompatible($0.getType())">>;
+
 // floats or ints that undergo successive extensions or successive truncations.
 def ConvertConvertOptPattern
     : Pat<(fir_ConvertOp:$res (fir_ConvertOp:$irm $arg)),
@@ -112,4 +115,18 @@ def ForwardConstantConvertPattern
           (createConstantOp $res, $attr),
           [(IndexTypePred $res), (IntegerTypePred $cnt)]>;
 
+// Optimize redundant pointer conversions, e.g.:
+// %1 = fir.convert %0 :
+//     (!fir.heap<!fir.array<2xf32>>) -> !fir.ref<!fir.array<2xf32>>
+// %2 = fir.convert %1 :
+//     (!fir.ref<!fir.array<2xf32>>) -> !fir.heap<!fir.array<2xf32>>
+// Will be optimized into:
+// %2 = fir.convert %0 :
+//     (!fir.heap<!fir.array<2xf32>>) -> !fir.heap<!fir.array<2xf32>>
+// which is redundant due to RedundantConvertOptPattern.
+def ChainedPointerConvertsPattern
+    : Pat<(fir_ConvertOp:$res(fir_ConvertOp:$irm $arg)), (fir_ConvertOp $arg),
+          [(PointerCompatiblePred $arg), (PointerCompatiblePred $irm),
+           (PointerCompatiblePred $res)]>;
+
 #endif // FORTRAN_FIR_REWRITE_PATTERNS
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -1313,7 +1313,8 @@ void fir::ConvertOp::getCanonicalizationPatterns(
   results.insert<ConvertConvertOptPattern, ConvertAscendingIndexOptPattern,
                  ConvertDescendingIndexOptPattern, RedundantConvertOptPattern,
                  CombineConvertOptPattern, CombineConvertTruncOptPattern,
-                 ForwardConstantConvertPattern>(context);
+                 ForwardConstantConvertPattern, ChainedPointerConvertsPattern>(
+      context);
 }
 
 mlir::OpFoldResult fir::ConvertOp::fold(FoldAdaptor adaptor) {
diff --git a/flang/test/Fir/convert-fold.fir b/flang/test/Fir/convert-fold.fir
@@ -35,3 +35,12 @@ func.func @ctest() -> index {
   // CHECK-NEXT: return %{{.*}} : index
   return %2 : index
 }
+
+// CHECK-LABEL:   func.func @ptrtest(
+// CHECK-SAME:                       %[[VAL_0:.*]]: !fir.heap<!fir.array<2xf32>>) -> !fir.heap<!fir.array<2xf32>> {
+func.func @ptrtest(%0 : !fir.heap<!fir.array<2xf32>>) -> !fir.heap<!fir.array<2xf32>> {
+  %1 = fir.convert %0 : (!fir.heap<!fir.array<2xf32>>) -> !fir.ref<!fir.array<2xf32>>
+  %2 = fir.convert %1 : (!fir.ref<!fir.array<2xf32>>) -> !fir.heap<!fir.array<2xf32>>
+// CHECK:           return %[[VAL_0]] : !fir.heap<!fir.array<2xf32>>
+  return %2 : !fir.heap<!fir.array<2xf32>>
+}
diff --git a/flang/test/Lower/array-substring.f90 b/flang/test/Lower/array-substring.f90
@@ -24,9 +24,8 @@
 ! CHECK:         %[[VAL_16:.*]] = fir.array_coor %[[VAL_7]](%[[VAL_9]]) {{\[}}%[[VAL_10]]] %[[VAL_15]] : (!fir.ref<!fir.array<1x!fir.char<1,12>>>, !fir.shape<1>, !fir.slice<1>, index) -> !fir.ref<!fir.char<1,12>>
 ! CHECK:         %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (!fir.ref<!fir.char<1,12>>) -> !fir.ref<!fir.array<12x!fir.char<1>>>
 ! CHECK:         %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_17]], %[[VAL_2]] : (!fir.ref<!fir.array<12x!fir.char<1>>>, index) -> !fir.ref<!fir.char<1>>
-! CHECK:         %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (!fir.ref<!fir.char<1>>) -> !fir.ref<!fir.char<1,?>>
 ! CHECK:         %[[VAL_20:.*]] = fir.array_coor %[[VAL_11]](%[[VAL_9]]) %[[VAL_15]] : (!fir.ref<!fir.array<1x!fir.char<1,8>>>, !fir.shape<1>, index) -> !fir.ref<!fir.char<1,8>>
-! CHECK:         %[[VAL_21:.*]] = fir.convert %[[VAL_19]] : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK:         %[[VAL_21:.*]] = fir.convert %[[VAL_18]] : (!fir.ref<!fir.char<1>>) -> !fir.ref<i8>
 ! CHECK:         %[[VAL_22:.*]] = fir.convert %[[VAL_20]] : (!fir.ref<!fir.char<1,8>>) -> !fir.ref<i8>
 ! CHECK:         %[[VAL_23:.*]] = fir.convert %[[VAL_4]] : (index) -> i64
 ! CHECK:         %[[VAL_24:.*]] = fir.call @_FortranACharacterCompareScalar1(%[[VAL_21]], %[[VAL_22]], %[[VAL_23]], %[[VAL_23]]) {{.*}}: (!fir.ref<i8>, !fir.ref<i8>, i64, i64) -> i32
diff --git a/flang/test/Lower/vector-subscript-io.f90 b/flang/test/Lower/vector-subscript-io.f90
@@ -325,12 +325,11 @@ subroutine substring(x, y, i, j)
 ! CHECK:   %[[VAL_230:.*]] = arith.subi %[[VAL_216]], %[[VAL_210]] : index
 ! CHECK:   %[[VAL_231:.*]] = fir.convert %[[VAL_228]] : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<?x!fir.char<1>>>
 ! CHECK:   %[[VAL_232:.*]] = fir.coordinate_of %[[VAL_231]], %[[VAL_230]] : (!fir.ref<!fir.array<?x!fir.char<1>>>, index) -> !fir.ref<!fir.char<1>>
-! CHECK:   %[[VAL_233:.*]] = fir.convert %[[VAL_232]] : (!fir.ref<!fir.char<1>>) -> !fir.ref<!fir.char<1,?>>
 ! CHECK:   %[[VAL_234:.*]] = arith.subi %[[VAL_219]], %[[VAL_216]] : index
 ! CHECK:   %[[VAL_235:.*]] = arith.addi %[[VAL_234]], %[[VAL_210]] : index
 ! CHECK:   %[[VAL_236:.*]] = arith.cmpi slt, %[[VAL_235]], %[[VAL_209]] : index
 ! CHECK:   %[[VAL_237:.*]] = arith.select %[[VAL_236]], %[[VAL_209]], %[[VAL_235]] : index
-! CHECK:   %[[VAL_238:.*]] = fir.convert %[[VAL_233]] : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK:   %[[VAL_238:.*]] = fir.convert %[[VAL_232]] : (!fir.ref<!fir.char<1>>) -> !fir.ref<i8>
 ! CHECK:   %[[VAL_239:.*]] = fir.convert %[[VAL_237]] : (index) -> i64
 ! CHECK:   %[[VAL_240:.*]] = fir.call @_FortranAioInputAscii(%[[VAL_213]], %[[VAL_238]], %[[VAL_239]]) {{.*}}: (!fir.ref<i8>, !fir.ref<i8>, i64) -> i1
 ! CHECK:   %[[VAL_241:.*]] = arith.addi %[[VAL_221]], %[[VAL_210]] overflow<nsw> : index

Original file line number	Diff line number	Diff line change
`@@ -1313,7 +1313,8 @@ void fir::ConvertOp::getCanonicalizationPatterns(`
`1313`	`1313`	`results.insert<ConvertConvertOptPattern, ConvertAscendingIndexOptPattern,`
`1314`	`1314`	`ConvertDescendingIndexOptPattern, RedundantConvertOptPattern,`
`1315`	`1315`	`CombineConvertOptPattern, CombineConvertTruncOptPattern,`
`1316`		`- ForwardConstantConvertPattern>(context);`
	`1316`	`+ ForwardConstantConvertPattern, ChainedPointerConvertsPattern>(`
	`1317`	`+ context);`
`1317`	`1318`	`}`
`1318`	`1319`
`1319`	`1320`	`mlir::OpFoldResult fir::ConvertOp::fold(FoldAdaptor adaptor) {`