@@ -61,78 +61,6 @@ entry:
61
61
ret <vscale x 2 x i64 > %partial.reduce
62
62
}
63
63
64
- define <vscale x 4 x i64 > @dotp_8to64 (<vscale x 16 x i8 > %a , <vscale x 16 x i8 > %b ) {
65
- ; CHECK-LABEL: dotp_8to64:
66
- ; CHECK: // %bb.0: // %entry
67
- ; CHECK-NEXT: mov z2.s, #0 // =0x0
68
- ; CHECK-NEXT: udot z2.s, z0.b, z1.b
69
- ; CHECK-NEXT: uunpklo z0.d, z2.s
70
- ; CHECK-NEXT: uunpkhi z1.d, z2.s
71
- ; CHECK-NEXT: ret
72
- entry:
73
- %a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
74
- %b.wide = zext <vscale x 16 x i8 > %b to <vscale x 16 x i64 >
75
- %mult = mul nuw nsw <vscale x 16 x i64 > %a.wide , %b.wide
76
- %partial.reduce = tail call <vscale x 4 x i64 > @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32 (
77
- <vscale x 4 x i64 > zeroinitializer , <vscale x 16 x i64 > %mult )
78
- ret <vscale x 4 x i64 > %partial.reduce
79
- }
80
-
81
- define <vscale x 4 x i64 > @dotp_sext_8to64 (<vscale x 16 x i8 > %a , <vscale x 16 x i8 > %b ) {
82
- ; CHECK-LABEL: dotp_sext_8to64:
83
- ; CHECK: // %bb.0: // %entry
84
- ; CHECK-NEXT: mov z2.s, #0 // =0x0
85
- ; CHECK-NEXT: sdot z2.s, z0.b, z1.b
86
- ; CHECK-NEXT: sunpklo z0.d, z2.s
87
- ; CHECK-NEXT: sunpkhi z1.d, z2.s
88
- ; CHECK-NEXT: ret
89
- entry:
90
- %a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
91
- %b.wide = sext <vscale x 16 x i8 > %b to <vscale x 16 x i64 >
92
- %mult = mul nuw nsw <vscale x 16 x i64 > %a.wide , %b.wide
93
- %partial.reduce = tail call <vscale x 4 x i64 > @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32 (
94
- <vscale x 4 x i64 > zeroinitializer , <vscale x 16 x i64 > %mult )
95
- ret <vscale x 4 x i64 > %partial.reduce
96
- }
97
-
98
- define <vscale x 4 x i64 > @dotp_8to64_accumulator (<vscale x 16 x i8 > %a , <vscale x 16 x i8 > %b , <vscale x 4 x i64 > %acc ) {
99
- ; CHECK-LABEL: dotp_8to64_accumulator:
100
- ; CHECK: // %bb.0: // %entry
101
- ; CHECK-NEXT: mov z4.s, #0 // =0x0
102
- ; CHECK-NEXT: udot z4.s, z0.b, z1.b
103
- ; CHECK-NEXT: uunpklo z0.d, z4.s
104
- ; CHECK-NEXT: uunpkhi z1.d, z4.s
105
- ; CHECK-NEXT: add z0.d, z2.d, z0.d
106
- ; CHECK-NEXT: add z1.d, z3.d, z1.d
107
- ; CHECK-NEXT: ret
108
- entry:
109
- %a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
110
- %b.wide = zext <vscale x 16 x i8 > %b to <vscale x 16 x i64 >
111
- %mult = mul nuw nsw <vscale x 16 x i64 > %a.wide , %b.wide
112
- %partial.reduce = tail call <vscale x 4 x i64 > @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32 (
113
- <vscale x 4 x i64 > %acc , <vscale x 16 x i64 > %mult )
114
- ret <vscale x 4 x i64 > %partial.reduce
115
- }
116
-
117
- define <vscale x 4 x i64 > @dotp_sext_8to64_accumulator (<vscale x 16 x i8 > %a , <vscale x 16 x i8 > %b , <vscale x 4 x i64 > %acc ) {
118
- ; CHECK-LABEL: dotp_sext_8to64_accumulator:
119
- ; CHECK: // %bb.0: // %entry
120
- ; CHECK-NEXT: mov z4.s, #0 // =0x0
121
- ; CHECK-NEXT: sdot z4.s, z0.b, z1.b
122
- ; CHECK-NEXT: sunpklo z0.d, z4.s
123
- ; CHECK-NEXT: sunpkhi z1.d, z4.s
124
- ; CHECK-NEXT: add z0.d, z2.d, z0.d
125
- ; CHECK-NEXT: add z1.d, z3.d, z1.d
126
- ; CHECK-NEXT: ret
127
- entry:
128
- %a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
129
- %b.wide = sext <vscale x 16 x i8 > %b to <vscale x 16 x i64 >
130
- %mult = mul nuw nsw <vscale x 16 x i64 > %a.wide , %b.wide
131
- %partial.reduce = tail call <vscale x 4 x i64 > @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32 (
132
- <vscale x 4 x i64 > %acc , <vscale x 16 x i64 > %mult )
133
- ret <vscale x 4 x i64 > %partial.reduce
134
- }
135
-
136
64
define <vscale x 4 x i32 > @not_dotp (<vscale x 8 x i8 > %a , <vscale x 8 x i8 > %b ) {
137
65
; CHECK-LABEL: not_dotp:
138
66
; CHECK: // %bb.0: // %entry
0 commit comments