|
17 | 17 | @ThreadGroupSize_Z = constant i32 1
|
18 | 18 |
|
19 | 19 | define spir_kernel void @test1(i32 %src1, i32 %val, i32 addrspace(1)* %dst) {
|
20 |
| -; CHECK: _main_0: |
21 |
| -; CHECK-NEXT: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0> |
22 |
| -; CHECK-NEXT: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0> |
23 |
| -; CHECK-NEXT: add (M1_NM, 1) offset(0,0)<1> src1(0,0)<0;1,0> 0x1:w |
24 |
| -; CHECK-NEXT: mov (M1_NM, 1) offsetTrunc(0,0)<1> offset(0,0)<0;1,0> |
25 |
| -; CHECK-NEXT: mov (M1, 16) offsetTruncBroadcast(0,0)<1> offsetTrunc(0,0)<0;1,0> |
26 |
| -; CHECK-NEXT: setp (M1_NM, 16) P1 0x0:ud |
27 |
| -; CHECK-NEXT: setp (M1_NM, 16) P2 0x0:ud |
28 |
| -; CHECK-NEXT: setp (M1_NM, 16) P3 0x0:ud |
29 |
| -; CHECK-NEXT: lifetime.start call_ |
30 |
| -; |
31 |
| -; CHECK: _test1_001__opt_resource_loop: |
32 |
| -; CHECK-NEXT: setp (M1_NM, 16) P4 0x0:ud |
33 |
| -; CHECK-NEXT: setp (M1_NM, 16) P5 0x0:ud |
34 |
| -; CHECK-NEXT: cmp.eq (M1, 16) P5 V0032(0,0)<0;1,0> V0032(0,0)<0;1,0> |
35 |
| -; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
36 |
| -; CHECK-NEXT: fbl (M1_NM, 1) V0034(0,0)<1> V0033(0,0)<0;1,0> |
37 |
| -; CHECK-NEXT: shl (M1_NM, 1) V0034(0,0)<1> V0034(0,0)<0;1,0> 0x2:w |
38 |
| -; CHECK-NEXT: addr_add (M1_NM, 1) A0(0)<1> &nonuniform V0035(0,0)<0;1,0> |
39 |
| -; CHECK-NEXT: mov (M1_NM, 1) V0036(0,0)<1> r[A0(0),0]<0;1,0>:d |
40 |
| -; CHECK-NEXT: cmp.eq (M1, 16) P6 V0036(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
41 |
| -; CHECK-NEXT: (P6) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0036)[offsetTruncBroadcast]:a32 |
42 |
| -; CHECK-NEXT: or (M1_NM, 16) P4 P4 P6 |
43 |
| -; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P6 |
44 |
| -; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
45 |
| -; CHECK-NEXT: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0> |
46 |
| -; CHECK-NEXT: and (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0xf:ud |
47 |
| -; CHECK-NEXT: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w |
48 |
| -; CHECK-NEXT: addr_add (M1_NM, 1) A1(0)<1> &nonuniform V0038(0,0)<0;1,0> |
49 |
| -; CHECK-NEXT: mov (M1_NM, 1) V0039(0,0)<1> r[A1(0),0]<0;1,0>:d |
50 |
| -; CHECK-NEXT: cmp.eq (M1, 16) P7 V0039(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
51 |
| -; CHECK-NEXT: and (M1_NM, 16) P7 P7 P5 |
52 |
| -; CHECK-NEXT: (P7) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0039)[offsetTruncBroadcast]:a32 |
53 |
| -; CHECK-NEXT: or (M1_NM, 16) P4 P4 P7 |
54 |
| -; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P7 |
55 |
| -; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
56 |
| -; CHECK-NEXT: fbl (M1_NM, 1) V0040(0,0)<1> V0033(0,0)<0;1,0> |
57 |
| -; CHECK-NEXT: and (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0xf:ud |
58 |
| -; CHECK-NEXT: shl (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0x2:w |
59 |
| -; CHECK-NEXT: addr_add (M1_NM, 1) A2(0)<1> &nonuniform V0041(0,0)<0;1,0> |
60 |
| -; CHECK-NEXT: mov (M1_NM, 1) V0042(0,0)<1> r[A2(0),0]<0;1,0>:d |
61 |
| -; CHECK-NEXT: cmp.eq (M1, 16) P8 V0042(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
62 |
| -; CHECK-NEXT: and (M1_NM, 16) P8 P8 P5 |
63 |
| -; CHECK-NEXT: (P8) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0042)[offsetTruncBroadcast]:a32 |
64 |
| -; CHECK-NEXT: or (M1_NM, 16) P4 P4 P8 |
65 |
| -; CHECK-NEXT: xor (M1_NM, 16) P5 P5 P8 |
66 |
| -; CHECK-NEXT: mov (M1_NM, 1) V0033(0,0)<1> P5 |
67 |
| -; CHECK-NEXT: fbl (M1_NM, 1) V0043(0,0)<1> V0033(0,0)<0;1,0> |
68 |
| -; CHECK-NEXT: and (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0xf:ud |
69 |
| -; CHECK-NEXT: shl (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0x2:w |
70 |
| -; CHECK-NEXT: addr_add (M1_NM, 1) A3(0)<1> &nonuniform V0044(0,0)<0;1,0> |
71 |
| -; CHECK-NEXT: mov (M1_NM, 1) V0045(0,0)<1> r[A3(0),0]<0;1,0>:d |
72 |
| -; CHECK-NEXT: cmp.eq (M1, 16) P9 V0045(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
73 |
| -; CHECK-NEXT: and (M1_NM, 16) P9 P9 P5 |
74 |
| -; CHECK-NEXT: (P9) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0045)[offsetTruncBroadcast]:a32 |
75 |
| -; CHECK-NEXT: or (M1_NM, 16) P4 P4 P9 |
76 |
| -; CHECK-NEXT: (!P4) goto (M1, 16) _test1_001__opt_resource_loop |
77 |
| -; CHECK-NEXT: mul (M1_NM, 1) V0046(0,0)<1> val_0(0,0)<0;1,0> 0x40:uw |
78 |
| -; CHECK-NEXT: addr_add (M1_NM, 1) A4(0)<1> &call_ V0046(0,0)<0;1,0> |
79 |
| -; CHECK-NEXT: mov (M1, 16) out(0,0)<1> r[A4(0),0]<8;8,1>:d |
80 |
| -; CHECK-NEXT: mov (M1_NM, 1) dst_0(0,0)<1> dst(0,0)<0;1,0> |
81 |
| -; CHECK-NEXT: mov (M1, 16) dstBroadcast_0(0,0)<2> dst_1(0,0)<0;1,0> |
82 |
| -; CHECK-NEXT: mov (M1, 16) dstBroadcast_0(0,1)<2> dst_1(0,1)<0;1,0> |
83 |
| -; CHECK-NEXT: lsc_store.ugm.wb.wb (M1, 16) flat[dstBroadcast]:a64 out:d32 |
84 |
| -; CHECK-NEXT: ret (M1, 1) |
| 20 | +entry: |
| 21 | +; CHECK: _main_0: |
85 | 22 |
|
86 | 23 | %svn = call i16 @llvm.genx.GenISA.DCL.SystemValue.i16(i32 17)
|
| 24 | +; CHECK: mov (M1, 16) svn(0,0)<1> threadIdInGroupX(0,0)<1;1,0> |
87 | 25 |
|
88 | 26 | %nonuniform = zext i16 %svn to i32
|
| 27 | +; CHECK: mov (M1, 16) nonuniform(0,0)<1> svn_0(0,0)<1;1,0> |
89 | 28 |
|
90 | 29 | %NonUniformResource = inttoptr i32 %nonuniform to <4 x float> addrspace(2621440)*
|
91 | 30 | %offset = add i32 %src1, 1
|
| 31 | +; CHECK: add (M1_NM, 1) offset(0,0)<1> src1(0,0)<0;1,0> 0x1:w |
| 32 | +; CHECK: mov (M1_NM, 1) offsetTrunc(0,0)<1> offset(0,0)<0;1,0> |
| 33 | +; CHECK: mov (M1, 16) offsetTruncBroadcast(0,0)<1> offsetTrunc(0,0)<0;1,0> |
92 | 34 |
|
93 | 35 | %call = call <3 x i32> @llvm.genx.GenISA.ldrawvector.indexed.v3i32.p2621440v4f32(<4 x float> addrspace(2621440)* %NonUniformResource, i32 %offset, i32 4, i1 false)
|
| 36 | +; CHECK: _test1_001__opt_resource_loop: |
| 37 | +; CHECK: setp (M1_NM, 16) P4 0x0:ud |
| 38 | +; CHECK: setp (M1_NM, 16) P5 0x0:ud |
| 39 | +; CHECK: cmp.eq (M1, 16) P5 V0032(0,0)<0;1,0> V0032(0,0)<0;1,0> |
| 40 | +; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 41 | +; CHECK: fbl (M1_NM, 1) V0034(0,0)<1> V0033(0,0)<0;1,0> |
| 42 | +; CHECK: shl (M1_NM, 1) V0034(0,0)<1> V0034(0,0)<0;1,0> 0x2:w |
| 43 | +; CHECK: addr_add (M1_NM, 1) A0(0)<1> &nonuniform V0035(0,0)<0;1,0> |
| 44 | +; CHECK: mov (M1_NM, 1) V0036(0,0)<1> r[A0(0),0]<0;1,0>:d |
| 45 | +; CHECK: cmp.eq (M1, 16) P6 V0036(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 46 | +; CHECK: (P6) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0036)[offsetTruncBroadcast]:a32 |
| 47 | +; CHECK: or (M1_NM, 16) P4 P4 P6 |
| 48 | +; CHECK: xor (M1_NM, 16) P5 P5 P6 |
| 49 | +; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 50 | +; CHECK: fbl (M1_NM, 1) V0037(0,0)<1> V0033(0,0)<0;1,0> |
| 51 | +; CHECK: and (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0xf:ud |
| 52 | +; CHECK: shl (M1_NM, 1) V0037(0,0)<1> V0037(0,0)<0;1,0> 0x2:w |
| 53 | +; CHECK: addr_add (M1_NM, 1) A1(0)<1> &nonuniform V0038(0,0)<0;1,0> |
| 54 | +; CHECK: mov (M1_NM, 1) V0039(0,0)<1> r[A1(0),0]<0;1,0>:d |
| 55 | +; CHECK: cmp.eq (M1, 16) P7 V0039(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 56 | +; CHECK: and (M1_NM, 16) P7 P7 P5 |
| 57 | +; CHECK: (P7) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0039)[offsetTruncBroadcast]:a32 |
| 58 | +; CHECK: or (M1_NM, 16) P4 P4 P7 |
| 59 | +; CHECK: xor (M1_NM, 16) P5 P5 P7 |
| 60 | +; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 61 | +; CHECK: fbl (M1_NM, 1) V0040(0,0)<1> V0033(0,0)<0;1,0> |
| 62 | +; CHECK: and (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0xf:ud |
| 63 | +; CHECK: shl (M1_NM, 1) V0040(0,0)<1> V0040(0,0)<0;1,0> 0x2:w |
| 64 | +; CHECK: addr_add (M1_NM, 1) A2(0)<1> &nonuniform V0041(0,0)<0;1,0> |
| 65 | +; CHECK: mov (M1_NM, 1) V0042(0,0)<1> r[A2(0),0]<0;1,0>:d |
| 66 | +; CHECK: cmp.eq (M1, 16) P8 V0042(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 67 | +; CHECK: and (M1_NM, 16) P8 P8 P5 |
| 68 | +; CHECK: (P8) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0042)[offsetTruncBroadcast]:a32 |
| 69 | +; CHECK: or (M1_NM, 16) P4 P4 P8 |
| 70 | +; CHECK: xor (M1_NM, 16) P5 P5 P8 |
| 71 | +; CHECK: mov (M1_NM, 1) V0033(0,0)<1> P5 |
| 72 | +; CHECK: fbl (M1_NM, 1) V0043(0,0)<1> V0033(0,0)<0;1,0> |
| 73 | +; CHECK: and (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0xf:ud |
| 74 | +; CHECK: shl (M1_NM, 1) V0043(0,0)<1> V0043(0,0)<0;1,0> 0x2:w |
| 75 | +; CHECK: addr_add (M1_NM, 1) A3(0)<1> &nonuniform V0044(0,0)<0;1,0> |
| 76 | +; CHECK: mov (M1_NM, 1) V0045(0,0)<1> r[A3(0),0]<0;1,0>:d |
| 77 | +; CHECK: cmp.eq (M1, 16) P9 V0045(0,0)<0;1,0> nonuniform(0,0)<1;1,0> |
| 78 | +; CHECK: and (M1_NM, 16) P9 P9 P5 |
| 79 | +; CHECK: (P9) lsc_load.ugm.ca.ca (M1, 16) call_:d32x3 bss(V0045)[offsetTruncBroadcast]:a32 |
| 80 | +; CHECK: or (M1_NM, 16) P4 P4 P9 |
| 81 | +; CHECK: (!P4) goto (M1, 16) _test1_001__opt_resource_loop |
94 | 82 | %out = extractelement <3 x i32> %call, i32 %val
|
95 | 83 | store i32 %out, i32 addrspace(1)* %dst, align 1
|
96 | 84 | ret void
|
|
0 commit comments