Skip to content

Commit e0a951f

Browse files
committed
[RISCV] Extend zvqdotq tests to cover use of accumulator operand
1 parent 9808e1f commit e0a951f

File tree

1 file changed

+183
-0
lines changed

1 file changed

+183
-0
lines changed

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,186 @@ entry:
297297
%res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a.ext)
298298
ret i32 %res
299299
}
300+
301+
define i32 @vqdot_vv_accum(<16 x i8> %a, <16 x i8> %b, <16 x i32> %x) {
302+
; CHECK-LABEL: vqdot_vv_accum:
303+
; CHECK: # %bb.0: # %entry
304+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
305+
; CHECK-NEXT: vsext.vf2 v10, v8
306+
; CHECK-NEXT: vsext.vf2 v16, v9
307+
; CHECK-NEXT: vwmacc.vv v12, v10, v16
308+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
309+
; CHECK-NEXT: vmv.s.x v8, zero
310+
; CHECK-NEXT: vredsum.vs v8, v12, v8
311+
; CHECK-NEXT: vmv.x.s a0, v8
312+
; CHECK-NEXT: ret
313+
entry:
314+
%a.sext = sext <16 x i8> %a to <16 x i32>
315+
%b.sext = sext <16 x i8> %b to <16 x i32>
316+
%mul = mul nuw nsw <16 x i32> %a.sext, %b.sext
317+
%add = add <16 x i32> %mul, %x
318+
%sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %add)
319+
ret i32 %sum
320+
}
321+
322+
define i32 @vqdotu_vv_accum(<16 x i8> %a, <16 x i8> %b, <16 x i32> %x) {
323+
; CHECK-LABEL: vqdotu_vv_accum:
324+
; CHECK: # %bb.0: # %entry
325+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
326+
; CHECK-NEXT: vwmulu.vv v10, v8, v9
327+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
328+
; CHECK-NEXT: vwaddu.wv v12, v12, v10
329+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
330+
; CHECK-NEXT: vmv.s.x v8, zero
331+
; CHECK-NEXT: vredsum.vs v8, v12, v8
332+
; CHECK-NEXT: vmv.x.s a0, v8
333+
; CHECK-NEXT: ret
334+
entry:
335+
%a.zext = zext <16 x i8> %a to <16 x i32>
336+
%b.zext = zext <16 x i8> %b to <16 x i32>
337+
%mul = mul nuw nsw <16 x i32> %a.zext, %b.zext
338+
%add = add <16 x i32> %mul, %x
339+
%sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %add)
340+
ret i32 %sum
341+
}
342+
343+
define i32 @vqdotsu_vv_accum(<16 x i8> %a, <16 x i8> %b, <16 x i32> %x) {
344+
; CHECK-LABEL: vqdotsu_vv_accum:
345+
; CHECK: # %bb.0: # %entry
346+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
347+
; CHECK-NEXT: vsext.vf2 v10, v8
348+
; CHECK-NEXT: vzext.vf2 v16, v9
349+
; CHECK-NEXT: vwmaccsu.vv v12, v10, v16
350+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
351+
; CHECK-NEXT: vmv.s.x v8, zero
352+
; CHECK-NEXT: vredsum.vs v8, v12, v8
353+
; CHECK-NEXT: vmv.x.s a0, v8
354+
; CHECK-NEXT: ret
355+
entry:
356+
%a.sext = sext <16 x i8> %a to <16 x i32>
357+
%b.zext = zext <16 x i8> %b to <16 x i32>
358+
%mul = mul nuw nsw <16 x i32> %a.sext, %b.zext
359+
%add = add <16 x i32> %mul, %x
360+
%sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %add)
361+
ret i32 %sum
362+
}
363+
364+
define i32 @vqdot_vv_scalar_add(<16 x i8> %a, <16 x i8> %b, i32 %x) {
365+
; NODOT-LABEL: vqdot_vv_scalar_add:
366+
; NODOT: # %bb.0: # %entry
367+
; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma
368+
; NODOT-NEXT: vsext.vf2 v12, v8
369+
; NODOT-NEXT: vsext.vf2 v14, v9
370+
; NODOT-NEXT: vwmul.vv v8, v12, v14
371+
; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma
372+
; NODOT-NEXT: vmv.s.x v12, a0
373+
; NODOT-NEXT: vredsum.vs v8, v8, v12
374+
; NODOT-NEXT: vmv.x.s a0, v8
375+
; NODOT-NEXT: ret
376+
;
377+
; DOT-LABEL: vqdot_vv_scalar_add:
378+
; DOT: # %bb.0: # %entry
379+
; DOT-NEXT: vsetivli zero, 4, e32, m1, ta, ma
380+
; DOT-NEXT: vmv.v.i v10, 0
381+
; DOT-NEXT: vqdot.vv v10, v8, v9
382+
; DOT-NEXT: vmv.s.x v8, a0
383+
; DOT-NEXT: vredsum.vs v8, v10, v8
384+
; DOT-NEXT: vmv.x.s a0, v8
385+
; DOT-NEXT: ret
386+
entry:
387+
%a.sext = sext <16 x i8> %a to <16 x i32>
388+
%b.sext = sext <16 x i8> %b to <16 x i32>
389+
%mul = mul nuw nsw <16 x i32> %a.sext, %b.sext
390+
%sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
391+
%add = add i32 %sum, %x
392+
ret i32 %add
393+
}
394+
395+
define i32 @vqdotu_vv_scalar_add(<16 x i8> %a, <16 x i8> %b, i32 %x) {
396+
; NODOT-LABEL: vqdotu_vv_scalar_add:
397+
; NODOT: # %bb.0: # %entry
398+
; NODOT-NEXT: vsetivli zero, 16, e8, m1, ta, ma
399+
; NODOT-NEXT: vwmulu.vv v10, v8, v9
400+
; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma
401+
; NODOT-NEXT: vmv.s.x v8, a0
402+
; NODOT-NEXT: vsetvli zero, zero, e16, m2, ta, ma
403+
; NODOT-NEXT: vwredsumu.vs v8, v10, v8
404+
; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma
405+
; NODOT-NEXT: vmv.x.s a0, v8
406+
; NODOT-NEXT: ret
407+
;
408+
; DOT-LABEL: vqdotu_vv_scalar_add:
409+
; DOT: # %bb.0: # %entry
410+
; DOT-NEXT: vsetivli zero, 4, e32, m1, ta, ma
411+
; DOT-NEXT: vmv.v.i v10, 0
412+
; DOT-NEXT: vqdotu.vv v10, v8, v9
413+
; DOT-NEXT: vmv.s.x v8, a0
414+
; DOT-NEXT: vredsum.vs v8, v10, v8
415+
; DOT-NEXT: vmv.x.s a0, v8
416+
; DOT-NEXT: ret
417+
entry:
418+
%a.zext = zext <16 x i8> %a to <16 x i32>
419+
%b.zext = zext <16 x i8> %b to <16 x i32>
420+
%mul = mul nuw nsw <16 x i32> %a.zext, %b.zext
421+
%sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
422+
%add = add i32 %sum, %x
423+
ret i32 %add
424+
}
425+
426+
define i32 @vqdotsu_vv_scalar_add(<16 x i8> %a, <16 x i8> %b, i32 %x) {
427+
; NODOT-LABEL: vqdotsu_vv_scalar_add:
428+
; NODOT: # %bb.0: # %entry
429+
; NODOT-NEXT: vsetivli zero, 16, e16, m2, ta, ma
430+
; NODOT-NEXT: vsext.vf2 v12, v8
431+
; NODOT-NEXT: vzext.vf2 v14, v9
432+
; NODOT-NEXT: vwmulsu.vv v8, v12, v14
433+
; NODOT-NEXT: vsetvli zero, zero, e32, m4, ta, ma
434+
; NODOT-NEXT: vmv.s.x v12, a0
435+
; NODOT-NEXT: vredsum.vs v8, v8, v12
436+
; NODOT-NEXT: vmv.x.s a0, v8
437+
; NODOT-NEXT: ret
438+
;
439+
; DOT-LABEL: vqdotsu_vv_scalar_add:
440+
; DOT: # %bb.0: # %entry
441+
; DOT-NEXT: vsetivli zero, 4, e32, m1, ta, ma
442+
; DOT-NEXT: vmv.v.i v10, 0
443+
; DOT-NEXT: vqdotsu.vv v10, v8, v9
444+
; DOT-NEXT: vmv.s.x v8, a0
445+
; DOT-NEXT: vredsum.vs v8, v10, v8
446+
; DOT-NEXT: vmv.x.s a0, v8
447+
; DOT-NEXT: ret
448+
entry:
449+
%a.sext = sext <16 x i8> %a to <16 x i32>
450+
%b.zext = zext <16 x i8> %b to <16 x i32>
451+
%mul = mul nuw nsw <16 x i32> %a.sext, %b.zext
452+
%sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
453+
%add = add i32 %sum, %x
454+
ret i32 %add
455+
}
456+
457+
define i32 @vqdot_vv_split(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
458+
; CHECK-LABEL: vqdot_vv_split:
459+
; CHECK: # %bb.0: # %entry
460+
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
461+
; CHECK-NEXT: vsext.vf2 v12, v8
462+
; CHECK-NEXT: vsext.vf2 v14, v9
463+
; CHECK-NEXT: vsext.vf2 v16, v10
464+
; CHECK-NEXT: vsext.vf2 v18, v11
465+
; CHECK-NEXT: vwmul.vv v8, v12, v14
466+
; CHECK-NEXT: vwmacc.vv v8, v16, v18
467+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
468+
; CHECK-NEXT: vmv.s.x v12, zero
469+
; CHECK-NEXT: vredsum.vs v8, v8, v12
470+
; CHECK-NEXT: vmv.x.s a0, v8
471+
; CHECK-NEXT: ret
472+
entry:
473+
%a.sext = sext <16 x i8> %a to <16 x i32>
474+
%b.sext = sext <16 x i8> %b to <16 x i32>
475+
%mul = mul nuw nsw <16 x i32> %a.sext, %b.sext
476+
%c.sext = sext <16 x i8> %c to <16 x i32>
477+
%d.sext = sext <16 x i8> %d to <16 x i32>
478+
%mul2 = mul nuw nsw <16 x i32> %c.sext, %d.sext
479+
%add = add <16 x i32> %mul, %mul2
480+
%sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %add)
481+
ret i32 %sum
482+
}

0 commit comments

Comments
 (0)