Skip to content

Commit 209c91e

Browse files
committed
Use load intrinsic and loop for intrinsic-test programs. Add --release flag back to intrinsic-test programs.
1 parent 79fdc5d commit 209c91e

File tree

6 files changed

+139
-109
lines changed

6 files changed

+139
-109
lines changed

crates/intrinsic-test/missing_aarch64.txt

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -67,20 +67,6 @@ vrnd64xq_f64
6767
vrnd64z_f64
6868
vrnd64zq_f64
6969

70-
# Takes too long to compile tests
71-
vcopyq_laneq_u8
72-
vcopyq_laneq_s8
73-
vcopyq_laneq_p8
74-
vcopyq_lane_u8
75-
vcopyq_lane_s8
76-
vcopyq_lane_p8
77-
vcopy_laneq_u8
78-
vcopy_laneq_s8
79-
vcopy_laneq_p8
80-
vcopy_lane_u8
81-
vcopy_lane_s8
82-
vcopy_lane_p8
83-
8470
# QEMU 6.0 doesn't support these instructions
8571
vmmlaq_s32
8672
vmmlaq_u32

crates/intrinsic-test/src/argument.rs

Lines changed: 62 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::ops::Range;
22

3-
use crate::types::IntrinsicType;
3+
use crate::types::{IntrinsicType, TypeKind};
44
use crate::Language;
55

66
/// An argument for the intrinsic.
@@ -90,49 +90,87 @@ impl ArgumentList {
9090
.join(", ")
9191
}
9292

93-
/// Creates a line that initializes this argument for C code.
94-
/// e.g. `int32x2_t a = { 0x1, 0x2 };`
95-
pub fn init_random_values_c(&self, pass: usize) -> String {
93+
/// Creates a line for each argument that initializes an array for C from which `loads` argument
94+
/// values can be loaded as a sliding window.
95+
/// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2.
96+
pub fn gen_arglists_c(&self, loads: u32) -> String {
9697
self.iter()
9798
.filter_map(|arg| {
9899
(!arg.has_constraint()).then(|| {
99100
format!(
100-
"{ty} {name} = {{ {values} }};",
101-
ty = arg.to_c_type(),
101+
"const {ty} {name}_vals[] = {{ {values} }};",
102+
ty = arg.ty.c_scalar_type(),
102103
name = arg.name,
103-
values = arg.ty.populate_random(pass, &Language::C)
104+
values = arg.ty.populate_random(loads, &Language::C)
104105
)
105106
})
106107
})
107108
.collect::<Vec<_>>()
108-
.join("\n ")
109+
.join("\n")
109110
}
110111

111-
/// Creates a line that initializes this argument for Rust code.
112-
/// e.g. `let a = transmute([0x1, 0x2]);`
113-
pub fn init_random_values_rust(&self, pass: usize) -> String {
112+
/// Creates a line for each argument that initializes an array for Rust from which `loads` argument
113+
/// values can be loaded as a sliding window, e.g `const A_VALS: [u32; 20] = [...];`
114+
pub fn gen_arglists_rust(&self, loads: u32) -> String {
114115
self.iter()
115116
.filter_map(|arg| {
116117
(!arg.has_constraint()).then(|| {
117-
if arg.is_simd() {
118-
format!(
119-
"let {name} = ::std::mem::transmute([{values}]);",
120-
name = arg.name,
121-
values = arg.ty.populate_random(pass, &Language::Rust),
122-
)
123-
} else {
124-
format!(
125-
"let {name} = {value};",
126-
name = arg.name,
127-
value = arg.ty.populate_random(pass, &Language::Rust)
128-
)
129-
}
118+
format!(
119+
"const {upper_name}_VALS: [{ty}; {load_size}] = unsafe{{ [{values}] }};",
120+
upper_name = arg.name.to_uppercase(),
121+
ty = arg.ty.rust_scalar_type(),
122+
load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1,
123+
values = arg.ty.populate_random(loads, &Language::Rust)
124+
)
125+
})
126+
})
127+
.collect::<Vec<_>>()
128+
.join("\n")
129+
}
130+
131+
/// Creates a line for each argument that initalizes the argument from an array [arg]_vals at
132+
/// an offset i using a load intrinsic, in C.
133+
/// e.g `uint8x8_t a = vld1_u8(&a_vals[i]);`
134+
pub fn load_values_c(&self, p64_armv7_workaround: bool) -> String {
135+
self.iter()
136+
.filter_map(|arg| {
137+
// The ACLE doesn't support 64-bit polynomial loads on Armv7
138+
// This and the cast are a workaround for this
139+
let armv7_p64 = if let TypeKind::Poly = arg.ty.kind() { p64_armv7_workaround } else { false };
140+
141+
(!arg.has_constraint()).then(|| {
142+
format!(
143+
"{ty} {name} = {open_cast}{load}(&{name}_vals[i]){close_cast};",
144+
ty = arg.to_c_type(),
145+
name = arg.name,
146+
load = if arg.is_simd() { arg.ty.get_load_function(p64_armv7_workaround) } else { "*".to_string() },
147+
open_cast = if armv7_p64 { format!("cast<{}>(", arg.to_c_type()) } else { "".to_string() },
148+
close_cast = if armv7_p64{ ")".to_string() } else { "".to_string() }
149+
)
130150
})
131151
})
132152
.collect::<Vec<_>>()
133153
.join("\n ")
134154
}
135155

156+
/// Creates a line for each argument that initalizes the argument from array [ARG]_VALS at
157+
/// an offset i using a load intrinsic, in Rust.
158+
/// e.g `let a = vld1_u8(A_VALS.as_ptr().offset(i));`
159+
pub fn load_values_rust(&self) -> String {
160+
self.iter()
161+
.filter_map(|arg| {
162+
(!arg.has_constraint()).then(|| {
163+
format!("let {name} = {load}({upper_name}_VALS.as_ptr().offset(i));",
164+
name = arg.name,
165+
upper_name = arg.name.to_uppercase(),
166+
load = if arg.is_simd() { arg.ty.get_load_function(false) } else { "*".to_string() },
167+
)
168+
})
169+
})
170+
.collect::<Vec<_>>()
171+
.join("\n ")
172+
}
173+
136174
pub fn iter(&self) -> std::slice::Iter<'_, Argument> {
137175
self.args.iter()
138176
}

crates/intrinsic-test/src/intrinsic.rs

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ pub struct Intrinsic {
2020

2121
impl Intrinsic {
2222
/// Generates a std::cout for the intrinsics results that will match the
23-
/// rust debug output format for the return type.
24-
pub fn print_result_c(&self, index: usize, additional: &str) -> String {
23+
/// rust debug output format for the return type. The generated line assumes
24+
/// there is an int i in scope which is the current pass number.
25+
pub fn print_result_c(&self, additional: &str) -> String {
2526
let lanes = if self.results.num_vectors() > 1 {
2627
(0..self.results.num_vectors())
2728
.map(|vector| {
@@ -72,7 +73,7 @@ impl Intrinsic {
7273
};
7374

7475
format!(
75-
r#"std::cout << "Result {additional}-{idx}: {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#,
76+
r#"std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) << {lanes} << "{close}" << std::endl;"#,
7677
ty = if self.results.is_simd() {
7778
format!("{}(", self.results.c_type())
7879
} else {
@@ -81,45 +82,48 @@ impl Intrinsic {
8182
close = if self.results.is_simd() { ")" } else { "" },
8283
lanes = lanes,
8384
additional = additional,
84-
idx = index,
8585
)
8686
}
8787

88-
pub fn generate_pass_rust(&self, index: usize, additional: &str) -> String {
88+
pub fn generate_loop_c(&self, additional: &str, passes: u32, p64_armv7_workaround: bool) -> String {
89+
format!(
90+
r#" {{
91+
for (int i=0; i<{passes}; i++) {{
92+
{loaded_args}
93+
auto __return_value = {intrinsic_call}({args});
94+
{print_result}
95+
}}
96+
}}"#,
97+
loaded_args = self.arguments.load_values_c(p64_armv7_workaround),
98+
intrinsic_call = self.name,
99+
args = self.arguments.as_call_param_c(),
100+
print_result = self.print_result_c(additional)
101+
)
102+
}
103+
104+
pub fn generate_loop_rust(&self, additional: &str, passes: u32) -> String {
89105
let constraints = self.arguments.as_constraint_parameters_rust();
90106
let constraints = if !constraints.is_empty() {
91107
format!("::<{}>", constraints)
92108
} else {
93109
constraints
94110
};
95111

96-
format!(
97-
r#"
98-
unsafe {{
99-
{initialized_args}
100-
let res = {intrinsic_call}{const}({args});
101-
println!("Result {additional}-{idx}: {{:.150?}}", res);
102-
}}"#,
103-
initialized_args = self.arguments.init_random_values_rust(index),
104-
intrinsic_call = self.name,
105-
args = self.arguments.as_call_param_rust(),
106-
additional = additional,
107-
idx = index,
108-
const = constraints,
109-
)
110-
}
111-
112-
pub fn generate_pass_c(&self, index: usize, additional: &str) -> String {
113112
format!(
114113
r#" {{
115-
{initialized_args}
116-
auto __return_value = {intrinsic_call}({args});
117-
{print_result}
114+
for i in 0..{passes} {{
115+
unsafe {{
116+
{loaded_args}
117+
let __return_value = {intrinsic_call}{const}({args});
118+
println!("Result {additional}-{{}}: {{:.150?}}", i+1, __return_value);
119+
}}
120+
}}
118121
}}"#,
119-
initialized_args = self.arguments.init_random_values_c(index),
122+
loaded_args = self.arguments.load_values_rust(),
120123
intrinsic_call = self.name,
121-
args = self.arguments.as_call_param_c(),
122-
print_result = self.print_result_c(index, additional)
124+
const = constraints,
125+
args = self.arguments.as_call_param_rust(),
126+
additional = additional,
123127
)
124128
}
125129
}

crates/intrinsic-test/src/main.rs

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,16 @@ mod intrinsic;
2323
mod types;
2424
mod values;
2525

26+
// The number of times each intrinsic will be called.
27+
const PASSES: u32 = 20;
28+
2629
#[derive(Debug, PartialEq)]
2730
pub enum Language {
2831
Rust,
2932
C,
3033
}
3134

32-
fn gen_code_c(intrinsic: &Intrinsic, constraints: &[&Argument], name: String) -> String {
35+
fn gen_code_c(intrinsic: &Intrinsic, constraints: &[&Argument], name: String, p64_armv7_workaround: bool) -> String {
3336
if let Some((current, constraints)) = constraints.split_last() {
3437
let range = current
3538
.constraints
@@ -47,19 +50,16 @@ fn gen_code_c(intrinsic: &Intrinsic, constraints: &[&Argument], name: String) ->
4750
name = current.name,
4851
ty = current.ty.c_type(),
4952
val = i,
50-
pass = gen_code_c(intrinsic, constraints, format!("{}-{}", name, i))
53+
pass = gen_code_c(intrinsic, constraints, format!("{}-{}", name, i), p64_armv7_workaround)
5154
)
5255
})
5356
.collect()
5457
} else {
55-
(1..20)
56-
.map(|idx| intrinsic.generate_pass_c(idx, &name))
57-
.collect::<Vec<_>>()
58-
.join("\n")
58+
intrinsic.generate_loop_c(&name, PASSES, p64_armv7_workaround)
5959
}
6060
}
6161

62-
fn generate_c_program(header_files: &[&str], intrinsic: &Intrinsic) -> String {
62+
fn generate_c_program(header_files: &[&str], intrinsic: &Intrinsic, p64_armv7_workaround: bool) -> String {
6363
let constraints = intrinsic
6464
.arguments
6565
.iter()
@@ -75,7 +75,7 @@ fn generate_c_program(header_files: &[&str], intrinsic: &Intrinsic) -> String {
7575
7676
template<typename T1, typename T2> T1 cast(T2 x) {{
7777
static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
78-
T1 ret = 0;
78+
T1 ret{{}};
7979
memcpy(&ret, &x, sizeof(T1));
8080
return ret;
8181
}}
@@ -95,6 +95,8 @@ std::ostream& operator<<(std::ostream& os, poly128_t value) {{
9595
}}
9696
#endif
9797
98+
{arglists}
99+
98100
int main(int argc, char **argv) {{
99101
{passes}
100102
return 0;
@@ -104,7 +106,8 @@ int main(int argc, char **argv) {{
104106
.map(|header| format!("#include <{}>", header))
105107
.collect::<Vec<_>>()
106108
.join("\n"),
107-
passes = gen_code_c(intrinsic, constraints.as_slice(), Default::default()),
109+
arglists = intrinsic.arguments.gen_arglists_c(PASSES),
110+
passes = gen_code_c(intrinsic, constraints.as_slice(), Default::default(), p64_armv7_workaround),
108111
)
109112
}
110113

@@ -131,10 +134,7 @@ fn gen_code_rust(intrinsic: &Intrinsic, constraints: &[&Argument], name: String)
131134
})
132135
.collect()
133136
} else {
134-
(1..20)
135-
.map(|idx| intrinsic.generate_pass_rust(idx, &name))
136-
.collect::<Vec<_>>()
137-
.join("\n")
137+
intrinsic.generate_loop_rust(&name, PASSES)
138138
}
139139
}
140140

@@ -153,11 +153,14 @@ fn generate_rust_program(intrinsic: &Intrinsic, a32: bool) -> String {
153153
#![allow(non_upper_case_globals)]
154154
use core_arch::arch::{target_arch}::*;
155155
156+
{arglists}
157+
156158
fn main() {{
157159
{passes}
158160
}}
159161
"#,
160162
target_arch = if a32 { "arm" } else { "aarch64" },
163+
arglists = intrinsic.arguments.gen_arglists_rust(PASSES),
161164
passes = gen_code_rust(intrinsic, &constraints, Default::default())
162165
)
163166
}
@@ -203,7 +206,7 @@ fn build_c(intrinsics: &Vec<Intrinsic>, compiler: &str, a32: bool) -> bool {
203206
let c_filename = format!(r#"c_programs/{}.cpp"#, i.name);
204207
let mut file = File::create(&c_filename).unwrap();
205208

206-
let c_code = generate_c_program(&["arm_neon.h", "arm_acle.h"], &i);
209+
let c_code = generate_c_program(&["arm_neon.h", "arm_acle.h"], &i, a32);
207210
file.write_all(c_code.into_bytes().as_slice()).unwrap();
208211
compile_c(&c_filename, &i, compiler, a32)
209212
})
@@ -259,7 +262,7 @@ path = "{intrinsic}/main.rs""#,
259262
.current_dir("rust_programs")
260263
.arg("-c")
261264
.arg(format!(
262-
"cargo {toolchain} build --target {target}",
265+
"cargo {toolchain} build --target {target} --release",
263266
toolchain = toolchain,
264267
target = if a32 {
265268
"armv7-unknown-linux-gnueabihf"
@@ -407,7 +410,7 @@ fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str, a
407410
.current_dir("rust_programs")
408411
.arg("-c")
409412
.arg(format!(
410-
"cargo {toolchain} run --target {target} --bin {intrinsic}",
413+
"cargo {toolchain} run --target {target} --bin {intrinsic} --release",
411414
intrinsic = intrinsic.name,
412415
toolchain = toolchain,
413416
target = if a32 {

0 commit comments

Comments
 (0)