@@ -24,8 +24,10 @@ def main():
24
24
help = 'A string used to seed the RNG' )
25
25
parser .add_argument ('-v' , '--verbose' , action = 'store_true' ,
26
26
help = 'Show verbose output' )
27
- parser .add_argument ('--fixed-num-shuffles' , type = int ,
28
- help = 'Specify a fixed number of shuffles to test' )
27
+ parser .add_argument ('--max-shuffle-height' , type = int , default = 16 ,
28
+ help = 'Specify a fixed height of shuffle tree to test' )
29
+ parser .add_argument ('--no-blends' , dest = 'blends' , action = 'store_false' ,
30
+ help = 'Include blends of two input vectors' )
29
31
parser .add_argument ('--fixed-bit-width' , type = int , choices = [128 , 256 ],
30
32
help = 'Specify a fixed bit width of vector to test' )
31
33
parser .add_argument ('--triple' ,
@@ -49,33 +51,46 @@ def main():
49
51
width = random .choice ([2 , 4 , 8 , 16 , 32 , 64 ])
50
52
element_type = random .choice (['i8' , 'i16' , 'i32' , 'i64' , 'f32' , 'f64' ])
51
53
52
- # FIXME: Support blends.
53
- shuffle_indices = [- 1 ] + range (width )
54
+ element_modulus = {
55
+ 'i8' : 1 << 8 , 'i16' : 1 << 16 , 'i32' : 1 << 32 , 'i64' : 1 << 64 ,
56
+ 'f32' : 1 << 32 , 'f64' : 1 << 64 }[element_type ]
54
57
55
- if args .fixed_num_shuffles is not None :
56
- num_shuffles = args .fixed_num_shuffles
57
- else :
58
- num_shuffles = random .randint (0 , 16 )
58
+ shuffle_range = (2 * width ) if args .blends else width
59
+ shuffle_indices = [- 1 ] + range (shuffle_range )
59
60
60
- shuffles = [[random .choice (shuffle_indices )
61
- for _ in itertools .repeat (None , width )]
62
- for _ in itertools .repeat (None , num_shuffles )]
61
+ shuffle_tree = [[[random .choice (shuffle_indices )
62
+ for _ in itertools .repeat (None , width )]
63
+ for _ in itertools .repeat (None , args .max_shuffle_height - i )]
64
+ for i in xrange (args .max_shuffle_height )]
63
65
64
66
if args .verbose :
65
67
# Print out the shuffle sequence in a compact form.
66
- print >> sys .stderr , 'Testing shuffle sequence "%s":' % (args .seed ,)
67
- for s in shuffles :
68
- print >> sys .stderr , ' v%d%s: %s' % (width , element_type , s )
68
+ print >> sys .stderr , ('Testing shuffle sequence "%s" (v%d%s):' %
69
+ (args .seed , width , element_type ))
70
+ for i , shuffles in enumerate (shuffle_tree ):
71
+ print >> sys .stderr , ' tree level %d:' % (i ,)
72
+ for j , s in enumerate (shuffles ):
73
+ print >> sys .stderr , ' shuffle %d: %s' % (j , s )
69
74
print >> sys .stderr , ''
70
75
71
- # Compute a round-trip of the shuffle.
72
- result = range (1 , width + 1 )
73
- for s in shuffles :
74
- result = [result [i ] if i != - 1 else - 1 for i in s ]
76
+ # Symbolically evaluate the shuffle tree.
77
+ inputs = [[int (j % element_modulus )
78
+ for j in xrange (i * width + 1 , (i + 1 ) * width + 1 )]
79
+ for i in xrange (args .max_shuffle_height + 1 )]
80
+ results = inputs
81
+ for shuffles in shuffle_tree :
82
+ results = [[((results [i ] if j < width else results [i + 1 ])[j % width ]
83
+ if j != - 1 else - 1 )
84
+ for j in s ]
85
+ for i , s in enumerate (shuffles )]
86
+ if len (results ) != 1 :
87
+ print >> sys .stderr , 'ERROR: Bad results: %s' % (results ,)
88
+ sys .exit (1 )
89
+ result = results [0 ]
75
90
76
91
if args .verbose :
77
92
print >> sys .stderr , 'Which transforms:'
78
- print >> sys .stderr , ' from: %s' % (range ( 1 , width + 1 ) ,)
93
+ print >> sys .stderr , ' from: %s' % (inputs ,)
79
94
print >> sys .stderr , ' into: %s' % (result ,)
80
95
print >> sys .stderr , ''
81
96
@@ -92,22 +107,24 @@ def main():
92
107
# Now we need to generate IR for the shuffle function.
93
108
subst = {'N' : width , 'T' : element_type , 'IT' : integral_element_type }
94
109
print """
95
- define internal <%(N)d x %(T)s> @test(<%(N)d x %(T)s> %%v) noinline nounwind {
96
- entry:""" % subst
97
-
98
- for i , s in enumerate (shuffles ):
110
+ define internal fastcc <%(N)d x %(T)s> @test(%(arguments)s) noinline nounwind {
111
+ entry:""" % dict (subst ,
112
+ arguments = ', ' .join (
113
+ ['<%(N)d x %(T)s> %%s.0.%(i)d' % dict (subst , i = i )
114
+ for i in xrange (args .max_shuffle_height + 1 )]))
115
+
116
+ for i , shuffles in enumerate (shuffle_tree ):
117
+ for j , s in enumerate (shuffles ):
99
118
print """
100
- %%s%(i)d = shufflevector <%(N)d x %(T)s> %(I)s, <%(N)d x %(T)s> undef, <%(N)d x i32> <%(S)s>
101
- """ .strip () % dict (subst ,
102
- i = i ,
103
- I = ('%%s%d' % (i - 1 )) if i != 0 else '%v' ,
104
- S = ', ' .join (['i32 %s' % (str (si ) if si != - 1 else 'undef' ,)
105
- for si in s ]))
119
+ %%s.%(next_i)d.%(j)d = shufflevector <%(N)d x %(T)s> %%s.%(i)d.%(j)d, <%(N)d x %(T)s> %%s.%(i)d.%(next_j)d, <%(N)d x i32> <%(S)s>
120
+ """ .strip ('\n ' ) % dict (subst , i = i , next_i = i + 1 , j = j , next_j = j + 1 ,
121
+ S = ', ' .join (['i32 ' + (str (si ) if si != - 1 else 'undef' )
122
+ for si in s ]))
106
123
107
124
print """
108
- ret <%(N)d x %(T)s> %%s%(i)d
125
+ ret <%(N)d x %(T)s> %%s. %(i)d.0
109
126
}
110
- """ % dict (subst , i = len (shuffles ) - 1 )
127
+ """ % dict (subst , i = len (shuffle_tree ) )
111
128
112
129
# Generate some string constants that we can use to report errors.
113
130
for i , r in enumerate (result ):
@@ -119,28 +136,39 @@ def main():
119
136
@error.%(i)d = private unnamed_addr global [128 x i8] c"%(s)s"
120
137
""" .strip () % {'i' : i , 's' : s }
121
138
139
+ # Define a wrapper function which is marked 'optnone' to prevent
140
+ # interprocedural optimizations from deleting the test.
141
+ print """
142
+ define internal fastcc <%(N)d x %(T)s> @test_wrapper(%(arguments)s) optnone noinline {
143
+ %%result = call fastcc <%(N)d x %(T)s> @test(%(arguments)s)
144
+ ret <%(N)d x %(T)s> %%result
145
+ }
146
+ """ % dict (subst ,
147
+ arguments = ', ' .join (['<%(N)d x %(T)s> %%s.%(i)d' % dict (subst , i = i )
148
+ for i in xrange (args .max_shuffle_height + 1 )]))
149
+
122
150
# Finally, generate a main function which will trap if any lanes are mapped
123
151
# incorrectly (in an observable way).
124
152
print """
125
- define i32 @main() optnone noinline {
153
+ define i32 @main() {
126
154
entry:
127
155
; Create a scratch space to print error messages.
128
156
%%str = alloca [128 x i8]
129
157
%%str.ptr = getelementptr inbounds [128 x i8]* %%str, i32 0, i32 0
130
158
131
159
; Build the input vector and call the test function.
132
- %%input = bitcast <%(N)d x %(IT)s> <%(input)s> to <%(N)d x %(T)s>
133
- %%v = call <%(N)d x %(T)s> @test(<%(N)d x %(T)s> %%input)
160
+ %%v = call fastcc <%(N)d x %(T)s> @test_wrapper(%(inputs)s)
134
161
; We need to cast this back to an integer type vector to easily check the
135
162
; result.
136
163
%%v.cast = bitcast <%(N)d x %(T)s> %%v to <%(N)d x %(IT)s>
137
164
br label %%test.0
138
165
""" % dict (subst ,
139
- input = ', ' .join (['%(IT)s %(i)s' % dict (subst , i = i )
140
- for i in xrange (1 , width + 1 )]),
141
- result = ', ' .join (['%(IT)s %(i)s' % dict (subst ,
142
- i = i if i != - 1 else 'undef' )
143
- for i in result ]))
166
+ inputs = ', ' .join (
167
+ [('<%(N)d x %(T)s> bitcast '
168
+ '(<%(N)d x %(IT)s> <%(input)s> to <%(N)d x %(T)s>)' %
169
+ dict (subst , input = ', ' .join (['%(IT)s %(i)d' % dict (subst , i = i )
170
+ for i in input ])))
171
+ for input in inputs ]))
144
172
145
173
# Test that each non-undef result lane contains the expected value.
146
174
for i , r in enumerate (result ):
0 commit comments