|
17 | 17 | @ThreadGroupSize_Z = constant i32 1
|
18 | 18 |
|
19 | 19 | define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
|
20 |
| -entry: |
21 |
| -; CHECK: _main_0: |
| 20 | +; CHECK: _main_0: |
| 21 | +; CHECK-NEXT: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0> |
| 22 | +; CHECK-NEXT: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0> |
| 23 | +; CHECK-NEXT: add (M1_NM, 1) offset(0,0)<1> src1(0,0)<0;1,0> 0x1:w |
| 24 | +; CHECK-NEXT: mov (M1_NM, 1) offsetTrunc(0,0)<1> offset(0,0)<0;1,0> |
| 25 | +; CHECK-NEXT: mov (M1, 16) offsetTruncBroadcast(0,0)<1> offsetTrunc(0,0)<0;1,0> |
| 26 | +; CHECK-NEXT: setp (M1_NM, 16) P1 0x0:ud |
| 27 | +; CHECK-NEXT: setp (M1_NM, 16) P2 0x0:ud |
| 28 | +; CHECK-NEXT: setp (M1_NM, 16) P3 0x0:ud |
| 29 | +; CHECK-NEXT: lifetime.start call_ |
| 30 | +; |
| 31 | +; CHECK: _test1_001__opt_resource_loop: |
| 32 | +; CHECK-NEXT: setp (M1_NM, 16) P4 0x0:ud |
| 33 | +; CHECK-NEXT: setp (M1_NM, 16) P5 0x0:ud |
| 34 | +; CHECK-NEXT: cmp.eq (M1, 16) P5 V0032(0,0)<0;1,0> V0032(0,0)<0;1,0> |
| 35 | +; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 36 | +; CHECK-NEXT: fbl (M1_NM, 1) V0034(0,0)<1> V0033(0,0)<0;1,0> |
| 37 | +; CHECK-NEXT: shl (M1_NM, 1) V0034(0,0)<1> V0034(0,0)<0;1,0> 0x2:w |
| 38 | +; CHECK-NEXT: addr_add (M1_NM, 1) A0(0)<1> &nonuniform V0035(0,0)<0;1,0> |
| 39 | +; CHECK-NEXT: mov (M1_NM, 1) V0036(0,0)<1> r[A0(0),0]<0;1,0>:d |
| 40 | +; CHECK-NEXT: cmp.eq (M1, 16) P6 V0036(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 41 | +; CHECK-NEXT: (P6) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0036)[offsetTruncBroadcast]:a32 |
| 42 | +; CHECK-NEXT: or (M1_NM, 16) P4 P4 P6 |
| 43 | +; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P6 |
| 44 | +; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 45 | +; CHECK-NEXT: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0> |
| 46 | +; CHECK-NEXT: and (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0xf:ud |
| 47 | +; CHECK-NEXT: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w |
| 48 | +; CHECK-NEXT: addr_add (M1_NM, 1) A1(0)<1> &nonuniform V0038(0,0)<0;1,0> |
| 49 | +; CHECK-NEXT: mov (M1_NM, 1) V0039(0,0)<1> r[A1(0),0]<0;1,0>:d |
| 50 | +; CHECK-NEXT: cmp.eq (M1, 16) P7 V0039(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 51 | +; CHECK-NEXT: and (M1_NM, 16) P7 P7 P5 |
| 52 | +; CHECK-NEXT: (P7) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0039)[offsetTruncBroadcast]:a32 |
| 53 | +; CHECK-NEXT: or (M1_NM, 16) P4 P4 P7 |
| 54 | +; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P7 |
| 55 | +; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 56 | +; CHECK-NEXT: fbl (M1_NM, 1) V0040(0,0)<1> V0033(0,0)<0;1,0> |
| 57 | +; CHECK-NEXT: and (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0xf:ud |
| 58 | +; CHECK-NEXT: shl (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0x2:w |
| 59 | +; CHECK-NEXT: addr_add (M1_NM, 1) A2(0)<1> &nonuniform V0041(0,0)<0;1,0> |
| 60 | +; CHECK-NEXT: mov (M1_NM, 1) V0042(0,0)<1> r[A2(0),0]<0;1,0>:d |
| 61 | +; CHECK-NEXT: cmp.eq (M1, 16) P8 V0042(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 62 | +; CHECK-NEXT: and (M1_NM, 16) P8 P8 P5 |
| 63 | +; CHECK-NEXT: (P8) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0042)[offsetTruncBroadcast]:a32 |
| 64 | +; CHECK-NEXT: or (M1_NM, 16) P4 P4 P8 |
| 65 | +; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P8 |
| 66 | +; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 67 | +; CHECK-NEXT: fbl (M1_NM, 1) V0043(0,0)<1> V0033(0,0)<0;1,0> |
| 68 | +; CHECK-NEXT: and (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0xf:ud |
| 69 | +; CHECK-NEXT: shl (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0x2:w |
| 70 | +; CHECK-NEXT: addr_add (M1_NM, 1) A3(0)<1> &nonuniform V0044(0,0)<0;1,0> |
| 71 | +; CHECK-NEXT: mov (M1_NM, 1) V0045(0,0)<1> r[A3(0),0]<0;1,0>:d |
| 72 | +; CHECK-NEXT: cmp.eq (M1, 16) P9 V0045(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 73 | +; CHECK-NEXT: and (M1_NM, 16) P9 P9 P5 |
| 74 | +; CHECK-NEXT: (P9) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0045)[offsetTruncBroadcast]:a32 |
| 75 | +; CHECK-NEXT: or (M1_NM, 16) P4 P4 P9 |
| 76 | +; CHECK-NEXT: (!P4) goto (M1, 16) _test1_001__opt_resource_loop |
| 77 | +; CHECK-NEXT: mul (M1_NM, 1) V0046(0,0)<1> val_0(0,0)<0;1,0> 0x40:uw |
| 78 | +; CHECK-NEXT: addr_add (M1_NM, 1) A4(0)<1> &call_ V0046(0,0)<0;1,0> |
| 79 | +; CHECK-NEXT: mov (M1, 16) out(0,0)<1> r[A4(0),0]<8;8,1>:d |
| 80 | +; CHECK-NEXT: mov (M1_NM, 1) dst_0(0,0)<1> dst(0,0)<0;1,0> |
| 81 | +; CHECK-NEXT: mov (M1, 16) dstBroadcast_0(0,0)<2> dst_1(0,0)<0;1,0> |
| 82 | +; CHECK-NEXT: mov (M1, 16) dstBroadcast_0(0,1)<2> dst_1(0,1)<0;1,0> |
| 83 | +; CHECK-NEXT: lsc_store.ugm.wb.wb (M1, 16) flat[dstBroadcast]:a64 out:d32 |
| 84 | +; CHECK-NEXT: ret (M1, 1) |
22 | 85 |
|
23 | 86 | %svn = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17)
|
24 |
| -; CHECK: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0> |
25 | 87 |
|
26 | 88 | %nonuniform = zext i16 %svn to i32
|
27 |
| -; CHECK: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0> |
28 | 89 |
|
29 | 90 | %NonUniformResource = inttoptr i32 %nonuniform to <4 x float> addrspace(2621440)*
|
30 | 91 | %offset = add i32 %src1, 1
|
31 |
| -; CHECK: add (M1_NM, 1) offset(0,0)<1> src1(0,0)<0;1,0> 0x1:w |
32 |
| -; CHECK: mov (M1_NM, 1) offsetTrunc(0,0)<1> offset(0,0)<0;1,0> |
33 |
| -; CHECK: mov (M1, 16) offsetTruncBroadcast(0,0)<1> offsetTrunc(0,0)<0;1,0> |
34 | 92 |
|
35 | 93 | %call = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* %NonUniformResource, i32 %offset, i32 4, i1 false)
|
36 |
| -; CHECK: _test1_001__opt_resource_loop: |
37 |
| -; CHECK: setp (M1_NM, 16) P4 0x0:ud |
38 |
| -; CHECK: setp (M1_NM, 16) P5 0x0:ud |
39 |
| -; CHECK: cmp.eq (M1, 16) P5 V0032(0,0)<0;1,0> V0032(0,0)<0;1,0> |
40 |
| -; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
41 |
| -; CHECK: fbl (M1_NM, 1) V0034(0,0)<1> V0033(0,0)<0;1,0> |
42 |
| -; CHECK: shl (M1_NM, 1) V0034(0,0)<1> V0034(0,0)<0;1,0> 0x2:w |
43 |
| -; CHECK: addr_add (M1_NM, 1) A0(0)<1> &nonuniform V0035(0,0)<0;1,0> |
44 |
| -; CHECK: mov (M1_NM, 1) V0036(0,0)<1> r[A0(0),0]<0;1,0>:d |
45 |
| -; CHECK: cmp.eq (M1, 16) P6 V0036(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
46 |
| -; CHECK: (P6) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0036)[offsetTruncBroadcast]:a32 |
47 |
| -; CHECK: or (M1_NM, 16) P4 P4 P6 |
48 |
| -; CHECK: xor (M1_NM, 16) P5 P5 P6 |
49 |
| -; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
50 |
| -; CHECK: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0> |
51 |
| -; CHECK: and (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0xf:ud |
52 |
| -; CHECK: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w |
53 |
| -; CHECK: addr_add (M1_NM, 1) A1(0)<1> &nonuniform V0038(0,0)<0;1,0> |
54 |
| -; CHECK: mov (M1_NM, 1) V0039(0,0)<1> r[A1(0),0]<0;1,0>:d |
55 |
| -; CHECK: cmp.eq (M1, 16) P7 V0039(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
56 |
| -; CHECK: and (M1_NM, 16) P7 P7 P5 |
57 |
| -; CHECK: (P7) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0039)[offsetTruncBroadcast]:a32 |
58 |
| -; CHECK: or (M1_NM, 16) P4 P4 P7 |
59 |
| -; CHECK: xor (M1_NM, 16) P5 P5 P7 |
60 |
| -; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
61 |
| -; CHECK: fbl (M1_NM, 1) V0040(0,0)<1> V0033(0,0)<0;1,0> |
62 |
| -; CHECK: and (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0xf:ud |
63 |
| -; CHECK: shl (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0x2:w |
64 |
| -; CHECK: addr_add (M1_NM, 1) A2(0)<1> &nonuniform V0041(0,0)<0;1,0> |
65 |
| -; CHECK: mov (M1_NM, 1) V0042(0,0)<1> r[A2(0),0]<0;1,0>:d |
66 |
| -; CHECK: cmp.eq (M1, 16) P8 V0042(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
67 |
| -; CHECK: and (M1_NM, 16) P8 P8 P5 |
68 |
| -; CHECK: (P8) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0042)[offsetTruncBroadcast]:a32 |
69 |
| -; CHECK: or (M1_NM, 16) P4 P4 P8 |
70 |
| -; CHECK: xor (M1_NM, 16) P5 P5 P8 |
71 |
| -; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
72 |
| -; CHECK: fbl (M1_NM, 1) V0043(0,0)<1> V0033(0,0)<0;1,0> |
73 |
| -; CHECK: and (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0xf:ud |
74 |
| -; CHECK: shl (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0x2:w |
75 |
| -; CHECK: addr_add (M1_NM, 1) A3(0)<1> &nonuniform V0044(0,0)<0;1,0> |
76 |
| -; CHECK: mov (M1_NM, 1) V0045(0,0)<1> r[A3(0),0]<0;1,0>:d |
77 |
| -; CHECK: cmp.eq (M1, 16) P9 V0045(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
78 |
| -; CHECK: and (M1_NM, 16) P9 P9 P5 |
79 |
| -; CHECK: (P9) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0045)[offsetTruncBroadcast]:a32 |
80 |
| -; CHECK: or (M1_NM, 16) P4 P4 P9 |
81 |
| -; CHECK: (!P4) goto (M1, 16) _test1_001__opt_resource_loop |
82 | 94 | %out = extractelement <3 x i32> %call, i32 %val
|
83 | 95 | store i32 %out, i32 addrspace(1)* %dst, align 1
|
84 | 96 | ret void
|
|
0 commit comments