Skip to content

Commit 00769d6

Browse files
committed
[flang] Add -fppc-native-vector-element-order option to control the element order in PowerPC vector types
This patch also adds a LIT test for the vec_cvf intrinsic that can be affected by the option. Co-authored-by: Mark Danial <[email protected]> Co-authored-by: Daniel Chen <[email protected]> Differential Revision: https://reviews.llvm.org/D155852
1 parent e21b1dd commit 00769d6

File tree

16 files changed

+118
-19
lines changed

16 files changed

+118
-19
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5416,6 +5416,9 @@ defm xor_operator : OptInFC1FFlag<"xor-operator", "Enable .XOR. as a synonym of
54165416
defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">;
54175417
defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed unless overridden by IMPLICIT statements">;
54185418
defm underscoring : OptInFC1FFlag<"underscoring", "Appends one trailing underscore to external names">;
5419+
defm ppc_native_vec_elem_order: BoolOptionWithoutMarshalling<"f", "ppc-native-vector-element-order",
5420+
PosFlag<SetTrue, [], "Specifies PowerPC native vector element order (default)">,
5421+
NegFlag<SetFalse, [], "Specifies PowerPC non-native vector element order">>;
54195422

54205423
def fno_automatic : Flag<["-"], "fno-automatic">, Group<f_Group>,
54215424
HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">;

clang/lib/Driver/ToolChains/Flang.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,9 @@ void Flang::addCodegenOptions(const ArgList &Args,
144144
CmdArgs.push_back("-fversion-loops-for-stride");
145145

146146
Args.AddAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir,
147-
options::OPT_flang_experimental_polymorphism});
147+
options::OPT_flang_experimental_polymorphism,
148+
options::OPT_fno_ppc_native_vec_elem_order,
149+
options::OPT_fppc_native_vec_elem_order});
148150
}
149151

150152
void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {

flang/include/flang/Lower/CustomIntrinsicCall.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,12 @@ lowerCustomIntrinsic(fir::FirOpBuilder &builder, mlir::Location loc,
103103
/// Generate the FIR+MLIR operations for the generic intrinsic \p name
104104
/// with argument \p args and expected result type \p resultType.
105105
/// Returned fir::ExtendedValue is the returned Fortran intrinsic value.
106-
fir::ExtendedValue genIntrinsicCall(fir::FirOpBuilder &builder,
107-
mlir::Location loc, llvm::StringRef name,
108-
std::optional<mlir::Type> resultType,
109-
llvm::ArrayRef<fir::ExtendedValue> args,
110-
StatementContext &stmtCtx);
106+
fir::ExtendedValue
107+
genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc,
108+
llvm::StringRef name, std::optional<mlir::Type> resultType,
109+
llvm::ArrayRef<fir::ExtendedValue> args,
110+
StatementContext &stmtCtx,
111+
Fortran::lower::AbstractConverter *converter = nullptr);
111112

112113
} // namespace lower
113114
} // namespace Fortran

flang/include/flang/Lower/LoweringOptions.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,8 @@ ENUM_LOWERINGOPT(PolymorphicTypeImpl, unsigned, 1, 0)
3131
/// Off by default until fully ready.
3232
ENUM_LOWERINGOPT(LowerToHighLevelFIR, unsigned, 1, 0)
3333

34+
/// If true, reverse PowerPC native vector element order.
35+
ENUM_LOWERINGOPT(NoPPCNativeVecElemOrder, unsigned, 1, 0)
36+
3437
#undef LOWERINGOPT
3538
#undef ENUM_LOWERINGOPT

flang/include/flang/Optimizer/Builder/IntrinsicCall.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef FORTRAN_LOWER_INTRINSICCALL_H
1010
#define FORTRAN_LOWER_INTRINSICCALL_H
1111

12+
#include "flang/Lower/AbstractConverter.h"
1213
#include "flang/Optimizer/Builder/BoxValue.h"
1314
#include "flang/Optimizer/Builder/FIRBuilder.h"
1415
#include "flang/Optimizer/Builder/Runtime/Character.h"
@@ -34,7 +35,8 @@ class StatementContext;
3435
std::pair<fir::ExtendedValue, bool>
3536
genIntrinsicCall(fir::FirOpBuilder &, mlir::Location, llvm::StringRef name,
3637
std::optional<mlir::Type> resultType,
37-
llvm::ArrayRef<fir::ExtendedValue> args);
38+
llvm::ArrayRef<fir::ExtendedValue> args,
39+
Fortran::lower::AbstractConverter *converter = nullptr);
3840

3941
/// Enums used to templatize and share lowering of MIN and MAX.
4042
enum class Extremum { Min, Max };
@@ -124,8 +126,10 @@ struct IntrinsicArgumentLoweringRules;
124126
struct IntrinsicLibrary {
125127

126128
// Constructors.
127-
explicit IntrinsicLibrary(fir::FirOpBuilder &builder, mlir::Location loc)
128-
: builder{builder}, loc{loc} {}
129+
explicit IntrinsicLibrary(
130+
fir::FirOpBuilder &builder, mlir::Location loc,
131+
Fortran::lower::AbstractConverter *converter = nullptr)
132+
: builder{builder}, loc{loc}, converter{converter} {}
129133
IntrinsicLibrary() = delete;
130134
IntrinsicLibrary(const IntrinsicLibrary &) = delete;
131135

@@ -416,6 +420,7 @@ struct IntrinsicLibrary {
416420
fir::FirOpBuilder &builder;
417421
mlir::Location loc;
418422
bool resultMustBeFreed = false;
423+
Fortran::lower::AbstractConverter *converter = nullptr;
419424
};
420425

421426
struct IntrinsicDummyArgument {

flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ struct PPCIntrinsicLibrary : IntrinsicLibrary {
135135
PPCIntrinsicLibrary() = delete;
136136
PPCIntrinsicLibrary(const PPCIntrinsicLibrary &) = delete;
137137

138+
// Helper functions for vector element ordering.
139+
bool isBEVecElemOrderOnLE();
140+
bool isNativeVecElemOrderOnLE();
141+
bool changeVecElemOrder();
142+
138143
// PPC MMA intrinsic generic handler
139144
template <MMAOp IntrId, MMAHandlerOp HandlerOp>
140145
void genMmaIntr(llvm::ArrayRef<fir::ExtendedValue>);

flang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,11 @@ bool CompilerInvocation::createFromArgs(
954954
res.loweringOpts.setPolymorphicTypeImpl(true);
955955
}
956956

957+
// -fno-ppc-native-vector-element-order
958+
if (args.hasArg(clang::driver::options::OPT_fno_ppc_native_vec_elem_order)) {
959+
res.loweringOpts.setNoPPCNativeVecElemOrder(true);
960+
}
961+
957962
success &= parseFrontendArgs(res.getFrontendOpts(), args, diags);
958963
parseTargetArgs(res.getTargetOpts(), args);
959964
parsePreprocessorArgs(res.getPreprocessorOpts(), args);

flang/lib/Lower/ConvertExpr.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1928,7 +1928,7 @@ class ScalarExprLowering {
19281928
}
19291929
// Let the intrinsic library lower the intrinsic procedure call
19301930
return Fortran::lower::genIntrinsicCall(builder, getLoc(), name, resultType,
1931-
operands, stmtCtx);
1931+
operands, stmtCtx, &converter);
19321932
}
19331933

19341934
/// helper to detect statement functions

flang/lib/Lower/CustomIntrinsicCall.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,10 @@ Fortran::lower::genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc,
9898
llvm::StringRef name,
9999
std::optional<mlir::Type> resultType,
100100
llvm::ArrayRef<fir::ExtendedValue> args,
101-
Fortran::lower::StatementContext &stmtCtx) {
101+
Fortran::lower::StatementContext &stmtCtx,
102+
Fortran::lower::AbstractConverter *converter) {
102103
auto [result, mustBeFreed] =
103-
fir::genIntrinsicCall(builder, loc, name, resultType, args);
104+
fir::genIntrinsicCall(builder, loc, name, resultType, args, converter);
104105
if (mustBeFreed) {
105106
mlir::Value addr = fir::getBase(result);
106107
if (auto *box = result.getBoxOf<fir::BoxValue>())

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5770,9 +5770,10 @@ lowerIntrinsicArgumentAs(const IntrinsicArgumentLoweringRules &rules,
57705770
std::pair<fir::ExtendedValue, bool>
57715771
genIntrinsicCall(fir::FirOpBuilder &builder, mlir::Location loc,
57725772
llvm::StringRef name, std::optional<mlir::Type> resultType,
5773-
llvm::ArrayRef<fir::ExtendedValue> args) {
5774-
return IntrinsicLibrary{builder, loc}.genIntrinsicCall(name, resultType,
5775-
args);
5773+
llvm::ArrayRef<fir::ExtendedValue> args,
5774+
Fortran::lower::AbstractConverter *converter) {
5775+
return IntrinsicLibrary{builder, loc, converter}.genIntrinsicCall(
5776+
name, resultType, args);
57765777
}
57775778

57785779
mlir::Value genMax(fir::FirOpBuilder &builder, mlir::Location loc,

flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,20 @@ checkPPCMathOperationsRange(llvm::StringRef name) {
360360
return ppcMathOps.equal_range(name);
361361
}
362362

363+
// Helper functions for vector element ordering.
364+
bool PPCIntrinsicLibrary::isBEVecElemOrderOnLE() {
365+
return (Fortran::evaluate::isHostLittleEndian &&
366+
converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
367+
}
368+
bool PPCIntrinsicLibrary::isNativeVecElemOrderOnLE() {
369+
return (Fortran::evaluate::isHostLittleEndian &&
370+
!converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
371+
}
372+
bool PPCIntrinsicLibrary::changeVecElemOrder() {
373+
return (Fortran::evaluate::isHostLittleEndian !=
374+
converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
375+
}
376+
363377
static mlir::FunctionType genMmaVpFuncType(mlir::MLIRContext *context,
364378
int quadCnt, int pairCnt, int vecCnt,
365379
int intCnt = 0,
@@ -1014,8 +1028,8 @@ PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType,
10141028

10151029
mlir::Value newArgs[]{vArg1};
10161030
if (vecTyInfo.isFloat32()) {
1017-
// TODO: Handle element ordering
1018-
newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
1031+
if (changeVecElemOrder())
1032+
newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
10191033

10201034
const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"};
10211035
auto ftype{
@@ -1036,8 +1050,8 @@ PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType,
10361050
auto mvf32Ty{mlir::VectorType::get(4, f32type)};
10371051
newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]);
10381052

1039-
// TODO: Handle element ordering
1040-
newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
1053+
if (changeVecElemOrder())
1054+
newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
10411055

10421056
return builder.createConvert(loc, fvf32Ty, newArgs[0]);
10431057
}

flang/test/Driver/driver-help-hidden.f90

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@
5050
! CHECK-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE
5151
! CHECK-NEXT: -fno-color-diagnostics Disable colors in diagnostics
5252
! CHECK-NEXT: -fno-integrated-as Disable the integrated assembler
53+
! CHECK-NEXT: -fno-ppc-native-vector-element-order
54+
! CHECK-NEXT: Specifies PowerPC non-native vector element order
5355
! CHECK-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
5456
! CHECK-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
5557
! CHECK-NEXT: -fno-version-loops-for-stride
@@ -63,6 +65,8 @@
6365
! CHECK-NEXT: -foptimization-record-passes=<regex>
6466
! CHECK-NEXT: Only include passes which match a specified regular expression in the generated optimization record (by default, include all passes)
6567
! CHECK-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
68+
! CHECK-NEXT: -fppc-native-vector-element-order
69+
! CHECK-NEXT: Specifies PowerPC native vector element order
6670
! CHECK-NEXT: -freciprocal-math Allow division operations to be reassociated
6771
! CHECK-NEXT: -fsave-optimization-record=<format>
6872
! CHECK-NEXT: Generate an optimization record file in a specific format

flang/test/Driver/driver-help.f90

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
! HELP-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE
4747
! HELP-NEXT: -fno-color-diagnostics Disable colors in diagnostics
4848
! HELP-NEXT: -fno-integrated-as Disable the integrated assembler
49+
! HELP-NEXT: -fno-ppc-native-vector-element-order
50+
! HELP-NEXT: Specifies PowerPC non-native vector element order
4951
! HELP-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
5052
! HELP-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
5153
! HELP-NEXT: -fno-version-loops-for-stride
@@ -59,6 +61,8 @@
5961
! HELP-NEXT: -foptimization-record-passes=<regex>
6062
! HELP-NEXT: Only include passes which match a specified regular expression in the generated optimization record (by default, include all passes)
6163
! HELP-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
64+
! HELP-NEXT: -fppc-native-vector-element-order
65+
! HELP-NEXT: Specifies PowerPC native vector element order
6266
! HELP-NEXT: -freciprocal-math Allow division operations to be reassociated
6367
! HELP-NEXT: -fsave-optimization-record=<format>
6468
! HELP-NEXT: Generate an optimization record file in a specific format
@@ -158,6 +162,8 @@
158162
! HELP-FC1-NEXT: Do not use the analyzed objects when unparsing
159163
! HELP-FC1-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE
160164
! HELP-FC1-NEXT: -fno-debug-pass-manager Disables debug printing for the new pass manager
165+
! HELP-FC1-NEXT: -fno-ppc-native-vector-element-order
166+
! HELP-FC1-NEXT: Specifies PowerPC non-native vector element order
161167
! HELP-FC1-NEXT: -fno-reformat Dump the cooked character stream in -E mode
162168
! HELP-FC1-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
163169
! HELP-FC1-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
@@ -173,6 +179,8 @@
173179
! HELP-FC1-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 50 for OpenMP 5.0). Default value is 50 for Clang and 11 for Flang
174180
! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code.
175181
! HELP-FC1-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
182+
! HELP-FC1-NEXT: -fppc-native-vector-element-order
183+
! HELP-FC1-NEXT: Specifies PowerPC native vector element order
176184
! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated
177185
! HELP-FC1-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size
178186
! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages

flang/test/Driver/frontend-forwarding.f90

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
! RUN: -fversion-loops-for-stride \
1919
! RUN: -flang-experimental-polymorphism \
2020
! RUN: -flang-experimental-hlfir \
21+
! RUN: -fno-ppc-native-vector-element-order \
22+
! RUN: -fppc-native-vector-element-order \
2123
! RUN: -mllvm -print-before-all \
2224
! RUN: -save-temps=obj \
2325
! RUN: -P \
@@ -40,5 +42,7 @@
4042
! CHECK: "-fversion-loops-for-stride"
4143
! CHECK: "-flang-experimental-polymorphism"
4244
! CHECK: "-flang-experimental-hlfir"
45+
! CHECK: "-fno-ppc-native-vector-element-order"
46+
! CHECK: "-fppc-native-vector-element-order"
4347
! CHECK: "-mllvm" "-print-before-all"
4448
! CHECK: "-save-temps=obj"
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
! RUN: bbc -emit-fir %s -fno-ppc-native-vector-element-order=true -o - | FileCheck --check-prefixes="FIR" %s
2+
! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="LLVMIR" %s
3+
! REQUIRES: target=powerpc{{.*}}
4+
5+
! CHECK-LABEL: vec_cvf_test_r4r8
6+
subroutine vec_cvf_test_r4r8(arg1)
7+
vector(real(8)), intent(in) :: arg1
8+
vector(real(4)) :: r
9+
r = vec_cvf(arg1)
10+
11+
! FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<2:f64>>
12+
! FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<2:f64>) -> vector<2xf64>
13+
! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvdpsp(%[[carg]]) fastmath<contract> : (vector<2xf64>) -> !fir.vector<4:f32>
14+
! FIR: %[[ccall:.*]] = fir.convert %[[call]] : (!fir.vector<4:f32>) -> vector<4xf32>
15+
! FIR: %[[r:.*]] = fir.convert %[[ccall]] : (vector<4xf32>) -> !fir.vector<4:f32>
16+
! FIR: fir.store %[[r]] to %{{.*}} : !fir.ref<!fir.vector<4:f32>>
17+
18+
! LLVMIR: %[[arg:.*]] = load <2 x double>, ptr %{{.*}}, align 16
19+
! LLVMIR: %[[call:.*]] = call contract <4 x float> @llvm.ppc.vsx.xvcvdpsp(<2 x double> %[[arg]])
20+
! LLVMIR: store <4 x float> %[[call]], ptr %{{.*}}, align 16
21+
end subroutine vec_cvf_test_r4r8
22+
23+
! CHECK-LABEL: vec_cvf_test_r8r4
24+
subroutine vec_cvf_test_r8r4(arg1)
25+
vector(real(4)), intent(in) :: arg1
26+
vector(real(8)) :: r
27+
r = vec_cvf(arg1)
28+
29+
! FIR: %[[arg:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
30+
! FIR: %[[carg:.*]] = fir.convert %[[arg]] : (!fir.vector<4:f32>) -> vector<4xf32>
31+
! FIR: %[[call:.*]] = fir.call @llvm.ppc.vsx.xvcvspdp(%[[carg]]) fastmath<contract> : (vector<4xf32>) -> !fir.vector<2:f64>
32+
! FIR: fir.store %[[call]] to %{{.*}} : !fir.ref<!fir.vector<2:f64>>
33+
34+
! LLVMIR: %[[arg:.*]] = load <4 x float>, ptr %{{.*}}, align 16
35+
! LLVMIR: %[[r:.*]] = call contract <2 x double> @llvm.ppc.vsx.xvcvspdp(<4 x float> %[[arg]])
36+
! LLVMIR: store <2 x double> %[[r]], ptr %{{.*}}, align 16
37+
end subroutine vec_cvf_test_r8r4

flang/tools/bbc/bbc.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,11 @@ static llvm::cl::opt<bool> enablePolymorphic(
186186
llvm::cl::desc("enable polymorphic type lowering (experimental)"),
187187
llvm::cl::init(false));
188188

189+
static llvm::cl::opt<bool> enableNoPPCNativeVecElemOrder(
190+
"fno-ppc-native-vector-element-order",
191+
llvm::cl::desc("no PowerPC native vector element order."),
192+
llvm::cl::init(false));
193+
189194
static llvm::cl::opt<bool> useHLFIR("hlfir",
190195
llvm::cl::desc("Lower to high level FIR"),
191196
llvm::cl::init(false));
@@ -289,6 +294,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
289294
// Use default lowering options for bbc.
290295
Fortran::lower::LoweringOptions loweringOptions{};
291296
loweringOptions.setPolymorphicTypeImpl(enablePolymorphic);
297+
loweringOptions.setNoPPCNativeVecElemOrder(enableNoPPCNativeVecElemOrder);
292298
loweringOptions.setLowerToHighLevelFIR(useHLFIR || emitHLFIR);
293299
auto burnside = Fortran::lower::LoweringBridge::create(
294300
ctx, semanticsContext, defKinds, semanticsContext.intrinsics(),

0 commit comments

Comments
 (0)