Skip to content

Commit a5c7f81

Browse files
committed
[RISCV] Add test coverage for DAG store merging of floating point values
1 parent afae7c9 commit a5c7f81

File tree

1 file changed

+167
-1
lines changed

1 file changed

+167
-1
lines changed

llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll

Lines changed: 167 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
2+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s --check-prefixes=CHECK,V
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
34

45
declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
56
declare void @g()
@@ -327,3 +328,168 @@ define void @v16i8_v32i8(ptr %p, ptr %q) {
327328
store <16 x i8> %x1, ptr %q1
328329
ret void
329330
}
331+
332+
; TODO: We fail to merge these, which would be profitable.
333+
define void @two_half(ptr %p, ptr %q) {
334+
; V-LABEL: two_half:
335+
; V: # %bb.0:
336+
; V-NEXT: addi sp, sp, -32
337+
; V-NEXT: .cfi_def_cfa_offset 32
338+
; V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
339+
; V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
340+
; V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
341+
; V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
342+
; V-NEXT: .cfi_offset ra, -8
343+
; V-NEXT: .cfi_offset s0, -16
344+
; V-NEXT: .cfi_offset s1, -24
345+
; V-NEXT: .cfi_offset s2, -32
346+
; V-NEXT: lh s1, 0(a0)
347+
; V-NEXT: lh s2, 2(a0)
348+
; V-NEXT: mv s0, a1
349+
; V-NEXT: call g
350+
; V-NEXT: sh s1, 0(s0)
351+
; V-NEXT: sh s2, 2(s0)
352+
; V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
353+
; V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
354+
; V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
355+
; V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
356+
; V-NEXT: .cfi_restore ra
357+
; V-NEXT: .cfi_restore s0
358+
; V-NEXT: .cfi_restore s1
359+
; V-NEXT: .cfi_restore s2
360+
; V-NEXT: addi sp, sp, 32
361+
; V-NEXT: .cfi_def_cfa_offset 0
362+
; V-NEXT: ret
363+
;
364+
; ZVFH-LABEL: two_half:
365+
; ZVFH: # %bb.0:
366+
; ZVFH-NEXT: addi sp, sp, -32
367+
; ZVFH-NEXT: .cfi_def_cfa_offset 32
368+
; ZVFH-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
369+
; ZVFH-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
370+
; ZVFH-NEXT: .cfi_offset ra, -8
371+
; ZVFH-NEXT: .cfi_offset s0, -16
372+
; ZVFH-NEXT: csrr a2, vlenb
373+
; ZVFH-NEXT: sub sp, sp, a2
374+
; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb
375+
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
376+
; ZVFH-NEXT: vle16.v v8, (a0)
377+
; ZVFH-NEXT: addi a0, sp, 16
378+
; ZVFH-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
379+
; ZVFH-NEXT: mv s0, a1
380+
; ZVFH-NEXT: call g
381+
; ZVFH-NEXT: addi a0, sp, 16
382+
; ZVFH-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
383+
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
384+
; ZVFH-NEXT: vse16.v v8, (s0)
385+
; ZVFH-NEXT: csrr a0, vlenb
386+
; ZVFH-NEXT: add sp, sp, a0
387+
; ZVFH-NEXT: .cfi_def_cfa sp, 32
388+
; ZVFH-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
389+
; ZVFH-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
390+
; ZVFH-NEXT: .cfi_restore ra
391+
; ZVFH-NEXT: .cfi_restore s0
392+
; ZVFH-NEXT: addi sp, sp, 32
393+
; ZVFH-NEXT: .cfi_def_cfa_offset 0
394+
; ZVFH-NEXT: ret
395+
%p0 = getelementptr i8, ptr %p, i64 0
396+
%p1 = getelementptr i8, ptr %p, i64 2
397+
%x0 = load half, ptr %p0
398+
%x1 = load half, ptr %p1
399+
call void @g()
400+
%q0 = getelementptr i8, ptr %q, i64 0
401+
%q1 = getelementptr i8, ptr %q, i64 2
402+
store half %x0, ptr %q0
403+
store half %x1, ptr %q1
404+
ret void
405+
}
406+
407+
; TODO: This one is currently a vector which is unprofitable, we should
408+
; use i64 instead.
409+
define void @two_float(ptr %p, ptr %q) {
410+
; CHECK-LABEL: two_float:
411+
; CHECK: # %bb.0:
412+
; CHECK-NEXT: addi sp, sp, -32
413+
; CHECK-NEXT: .cfi_def_cfa_offset 32
414+
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
415+
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
416+
; CHECK-NEXT: .cfi_offset ra, -8
417+
; CHECK-NEXT: .cfi_offset s0, -16
418+
; CHECK-NEXT: csrr a2, vlenb
419+
; CHECK-NEXT: sub sp, sp, a2
420+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb
421+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
422+
; CHECK-NEXT: vle32.v v8, (a0)
423+
; CHECK-NEXT: addi a0, sp, 16
424+
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
425+
; CHECK-NEXT: mv s0, a1
426+
; CHECK-NEXT: call g
427+
; CHECK-NEXT: addi a0, sp, 16
428+
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
429+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
430+
; CHECK-NEXT: vse32.v v8, (s0)
431+
; CHECK-NEXT: csrr a0, vlenb
432+
; CHECK-NEXT: add sp, sp, a0
433+
; CHECK-NEXT: .cfi_def_cfa sp, 32
434+
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
435+
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
436+
; CHECK-NEXT: .cfi_restore ra
437+
; CHECK-NEXT: .cfi_restore s0
438+
; CHECK-NEXT: addi sp, sp, 32
439+
; CHECK-NEXT: .cfi_def_cfa_offset 0
440+
; CHECK-NEXT: ret
441+
%p0 = getelementptr i8, ptr %p, i64 0
442+
%p1 = getelementptr i8, ptr %p, i64 4
443+
%x0 = load float, ptr %p0
444+
%x1 = load float, ptr %p1
445+
call void @g()
446+
%q0 = getelementptr i8, ptr %q, i64 0
447+
%q1 = getelementptr i8, ptr %q, i64 4
448+
store float %x0, ptr %q0
449+
store float %x1, ptr %q1
450+
ret void
451+
}
452+
453+
define void @two_double(ptr %p, ptr %q) {
454+
; CHECK-LABEL: two_double:
455+
; CHECK: # %bb.0:
456+
; CHECK-NEXT: addi sp, sp, -32
457+
; CHECK-NEXT: .cfi_def_cfa_offset 32
458+
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
459+
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
460+
; CHECK-NEXT: .cfi_offset ra, -8
461+
; CHECK-NEXT: .cfi_offset s0, -16
462+
; CHECK-NEXT: csrr a2, vlenb
463+
; CHECK-NEXT: sub sp, sp, a2
464+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb
465+
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
466+
; CHECK-NEXT: vle64.v v8, (a0)
467+
; CHECK-NEXT: addi a0, sp, 16
468+
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
469+
; CHECK-NEXT: mv s0, a1
470+
; CHECK-NEXT: call g
471+
; CHECK-NEXT: addi a0, sp, 16
472+
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
473+
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
474+
; CHECK-NEXT: vse64.v v8, (s0)
475+
; CHECK-NEXT: csrr a0, vlenb
476+
; CHECK-NEXT: add sp, sp, a0
477+
; CHECK-NEXT: .cfi_def_cfa sp, 32
478+
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
479+
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
480+
; CHECK-NEXT: .cfi_restore ra
481+
; CHECK-NEXT: .cfi_restore s0
482+
; CHECK-NEXT: addi sp, sp, 32
483+
; CHECK-NEXT: .cfi_def_cfa_offset 0
484+
; CHECK-NEXT: ret
485+
%p0 = getelementptr i8, ptr %p, i64 0
486+
%p1 = getelementptr i8, ptr %p, i64 8
487+
%x0 = load double, ptr %p0
488+
%x1 = load double, ptr %p1
489+
call void @g()
490+
%q0 = getelementptr i8, ptr %q, i64 0
491+
%q1 = getelementptr i8, ptr %q, i64 8
492+
store double %x0, ptr %q0
493+
store double %x1, ptr %q1
494+
ret void
495+
}

0 commit comments

Comments
 (0)