@@ -814,10 +814,10 @@ let arguments = (ins
814
814
}];
815
815
}
816
816
817
- class OuterProductWideBase <string mnemonic,
818
- list<Type> allowedInputVectorTypes,
819
- list<Type> allowedResultVectorTypes,
820
- int numOuterProducts> :
817
+ class OuterProductWideningBase <string mnemonic,
818
+ list<Type> allowedInputVectorTypes,
819
+ list<Type> allowedResultVectorTypes,
820
+ int numOuterProducts> :
821
821
ArmSME_Op<mnemonic, [
822
822
ArmSMETileOpInterface,
823
823
AttrSizedOperandSegments,
@@ -869,14 +869,14 @@ class OuterProductWideBase<string mnemonic,
869
869
}];
870
870
}
871
871
872
- class OuterProductWide2Way <string mnemonic,
873
- list<Type> allowedInputVectorTypes,
874
- list<Type> allowedResultVectorTypes>
875
- : OuterProductWideBase <mnemonic, allowedInputVectorTypes,
876
- allowedResultVectorTypes, /*numOuterProducts=*/2>;
872
+ class OuterProduct2Way <string mnemonic,
873
+ list<Type> allowedInputVectorTypes,
874
+ list<Type> allowedResultVectorTypes>
875
+ : OuterProductWideningBase <mnemonic, allowedInputVectorTypes,
876
+ allowedResultVectorTypes, /*numOuterProducts=*/2>;
877
877
878
- def FMopaWide2WayOp
879
- : OuterProductWide2Way<"fmopa_wide_2way ",
878
+ def FMopa2WayOp
879
+ : OuterProduct2Way<"fmopa_2way ",
880
880
[ScalableVectorOfRankAndLengthAndType<[1], [8], [F16, BF16]>],
881
881
[nxnxv4f32]> {
882
882
let summary = "Floating-point sum of 2 outer products and accumulate";
@@ -888,14 +888,14 @@ def FMopaWide2WayOp
888
888
For example (fp16 to fp32):
889
889
890
890
```mlir
891
- %result = arm_sme.fmopa_wide_2way %lhs, %rhs :
891
+ %result = arm_sme.fmopa_2way %lhs, %rhs :
892
892
vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
893
893
```
894
894
895
895
The `lhs` encodes a matrix of shape SVLSx2 and the `rhs` a matrix of
896
896
2xSVLS, where SVLS (spec [1], section B2.1) is the number of 32-bit
897
897
elements in a vector of SVL bits. To illustrate, below is a breakdown of
898
- this operation for SVL=128 (i.e., vscale=1):
898
+ this operation for fp16 to fp32, SVL=128 (i.e., vscale=1):
899
899
900
900
```
901
901
LHS RHS
@@ -960,19 +960,19 @@ def FMopaWide2WayOp
960
960
```mlir
961
961
%a_packed = "llvm.intr.experimental.vector.interleave2"(%a0, %a1) : (vector<[4]xf16>, vector<[4]xf16>) -> vector<[8]xf16>
962
962
%b_packed = "llvm.intr.experimental.vector.interleave2"(%b0, %b1) : (vector<[4]xf16>, vector<[4]xf16>) -> vector<[8]xf16>
963
- %0 = arm_sme.fmopa_wide_2way %a_packed, %b_packed : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
963
+ %0 = arm_sme.fmopa_2way %a_packed, %b_packed : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
964
964
```
965
965
966
- This is implemented in the `-arm-sme-outer-product-widening ` pass.
966
+ This is implemented in the `-arm-sme-outer-product-fusion ` pass.
967
967
968
968
Example: FP16 to FP32
969
969
```mlir
970
- %result = arm_sme.fmopa_wide_2way $lhs, $rhs : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
970
+ %result = arm_sme.fmopa_2way $lhs, $rhs : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
971
971
```
972
972
973
973
Example: BF16 to FP32
974
974
```mlir
975
- %result = arm_sme.fmopa_wide_2way $lhs, $rhs : vector<[8]xbf16>, vector<[8]xbf16> into vector<[4]x[4]xf32>
975
+ %result = arm_sme.fmopa_2way $lhs, $rhs : vector<[8]xbf16>, vector<[8]xbf16> into vector<[4]x[4]xf32>
976
976
```
977
977
978
978
| Spec | Features |
@@ -989,27 +989,27 @@ def FMopaWide2WayOp
989
989
// - FMOPA 4-way FP16 to FP32
990
990
// once intrinsic support lands in the backend.
991
991
992
- def FMopsWide2WayOp
993
- : OuterProductWide2Way<"fmops_wide_2way ",
992
+ def FMops2WayOp
993
+ : OuterProduct2Way<"fmops_2way ",
994
994
[ScalableVectorOfRankAndLengthAndType<[1], [8], [F16, BF16]>],
995
995
[nxnxv4f32]> {
996
996
let summary = "Floating-point sum of 2 outer products and subtract";
997
997
let description = [{
998
- Equivalent to `fmopa_wide_2way ` but outer products are subtracted from
998
+ Equivalent to `fmopa_2way ` but outer products are subtracted from
999
999
destination `result`.
1000
1000
1001
1001
Example: FP16 to FP32
1002
1002
```mlir
1003
- %result = arm_sme.fmops_wide_2way $lhs, $rhs : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
1003
+ %result = arm_sme.fmops_2way $lhs, $rhs : vector<[8]xf16>, vector<[8]xf16> into vector<[4]x[4]xf32>
1004
1004
```
1005
1005
1006
1006
Example: BF16 to FP32
1007
1007
```mlir
1008
- %result = arm_sme.fmops_wide_2way $lhs, $rhs : vector<[8]xbf16>, vector<[8]xbf16> into vector<[4]x[4]xf32>
1008
+ %result = arm_sme.fmops_2way $lhs, $rhs : vector<[8]xbf16>, vector<[8]xbf16> into vector<[4]x[4]xf32>
1009
1009
1010
1010
Refer to
1011
- [fmopa_wide_2way ](#arm_smefmopa_wide_2way-arm_smefmopa_wide_2wayop ) for a
1012
- detailed description of 2-way outer products.
1011
+ [fmopa_2way ](#arm_smefmopa_2way-arm_smefmopa_2wayop ) for a detailed
1012
+ description of 2-way outer products.
1013
1013
1014
1014
| Spec | Features |
1015
1015
| ---- | -------- |
@@ -1019,19 +1019,19 @@ def FMopsWide2WayOp
1019
1019
}];
1020
1020
}
1021
1021
1022
- def SMopaWide2WayOp
1023
- : OuterProductWide2Way<"smopa_wide_2way ",
1022
+ def SMopa2WayOp
1023
+ : OuterProduct2Way<"smopa_2way ",
1024
1024
[ScalableVectorOfRankAndLengthAndType<[1], [8], [I16]>],
1025
1025
[nxnxv4i32]> {
1026
1026
let summary = "Signed integer sum of 2 outer products and accumulate";
1027
1027
let description = [{
1028
1028
Example:
1029
1029
```mlir
1030
- %result = arm_sme.smopa_wide_2way $lhs, $rhs : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1030
+ %result = arm_sme.smopa_2way $lhs, $rhs : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1031
1031
1032
1032
Refer to
1033
- [fmopa_wide_2way ](#arm_smefmopa_wide_2way-arm_smefmopa_wide_2wayop ) for a
1034
- detailed description of 2-way outer products.
1033
+ [fmopa_2way ](#arm_smefmopa_2way-arm_smefmopa_2wayop ) for a detailed
1034
+ description of 2-way outer products.
1035
1035
1036
1036
| Spec | Features |
1037
1037
| ---- | -------- |
@@ -1040,19 +1040,19 @@ def SMopaWide2WayOp
1040
1040
}];
1041
1041
}
1042
1042
1043
- def SMopsWide2WayOp
1044
- : OuterProductWide2Way<"smops_wide_2way ",
1043
+ def SMops2WayOp
1044
+ : OuterProduct2Way<"smops_2way ",
1045
1045
[ScalableVectorOfRankAndLengthAndType<[1], [8], [I16]>],
1046
1046
[nxnxv4i32]> {
1047
1047
let summary = "Signed integer sum of 2 outer products and subtract";
1048
1048
let description = [{
1049
1049
Example:
1050
1050
```mlir
1051
- %result = arm_sme.smops_wide_2way $lhs, $rhs : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1051
+ %result = arm_sme.smops_2way $lhs, $rhs : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1052
1052
1053
1053
Refer to
1054
- [fmopa_wide_2way ](#arm_smefmopa_wide_2way-arm_smefmopa_wide_2wayop ) for a
1055
- detailed description of 2-way outer products.
1054
+ [fmopa_2way ](#arm_smefmopa_2way-arm_smefmopa_2wayop ) for a detailed
1055
+ description of 2-way outer products.
1056
1056
1057
1057
| Spec | Features |
1058
1058
| ---- | -------- |
@@ -1061,19 +1061,19 @@ def SMopsWide2WayOp
1061
1061
}];
1062
1062
}
1063
1063
1064
- def UMopaWide2WayOp
1065
- : OuterProductWide2Way<"umopa_wide_2way ",
1064
+ def UMopa2WayOp
1065
+ : OuterProduct2Way<"umopa_2way ",
1066
1066
[ScalableVectorOfRankAndLengthAndType<[1], [8], [I16]>],
1067
1067
[nxnxv4i32]> {
1068
1068
let summary = "Unsiged integer sum of 2 outer products and accumulate";
1069
1069
let description = [{
1070
1070
Example:
1071
1071
```mlir
1072
- %result = arm_sme.umopa_wide_2way $lhs, $rhs : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1072
+ %result = arm_sme.umopa_2way $lhs, $rhs : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1073
1073
1074
1074
Refer to
1075
- [fmopa_wide_2way ](#arm_smefmopa_wide_2way-arm_smefmopa_wide_2wayop ) for a
1076
- detailed description of 2-way outer products.
1075
+ [fmopa_2way ](#arm_smefmopa_2way-arm_smefmopa_2wayop ) for a detailed
1076
+ description of 2-way outer products.
1077
1077
1078
1078
| Spec | Features |
1079
1079
| ---- | -------- |
@@ -1082,19 +1082,19 @@ def UMopaWide2WayOp
1082
1082
}];
1083
1083
}
1084
1084
1085
- def UMopsWide2WayOp
1086
- : OuterProductWide2Way<"umops_wide_2way ",
1085
+ def UMops2WayOp
1086
+ : OuterProduct2Way<"umops_2way ",
1087
1087
[ScalableVectorOfRankAndLengthAndType<[1], [8], [I16]>],
1088
1088
[nxnxv4i32]> {
1089
1089
let summary = "Unsiged integer sum of 2 outer products and subtract";
1090
1090
let description = [{
1091
1091
Example:
1092
1092
```mlir
1093
- %result = arm_sme.umops_wide_2way $lhs, $rhs : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1093
+ %result = arm_sme.umops_2way $lhs, $rhs : vector<[8]xi16>, vector<[8]xi16> into vector<[4]x[4]xi32>
1094
1094
1095
1095
Refer to
1096
- [fmopa_wide_2way ](#arm_smefmopa_wide_2way-arm_smefmopa_wide_2wayop ) for a
1097
- detailed description of 2-way outer products.
1096
+ [fmopa_2way ](#arm_smefmopa_2way-arm_smefmopa_2wayop ) for a detailed
1097
+ description of 2-way outer products.
1098
1098
1099
1099
| Spec | Features |
1100
1100
| ---- | -------- |
0 commit comments