Skip to content

Commit 78126f4

Browse files
committed
Do not always enable avx2
1 parent 5f5f504 commit 78126f4

File tree

4 files changed

+49
-23
lines changed

4 files changed

+49
-23
lines changed

src/abi.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use rustc_codegen_ssa::traits::{AbiBuilderMethods, BaseTypeMethods};
33
use rustc_data_structures::fx::FxHashSet;
44
use rustc_middle::bug;
55
use rustc_middle::ty::Ty;
6+
#[cfg(feature = "master")]
67
use rustc_session::config;
78
use rustc_target::abi::call::{ArgAttributes, CastTarget, FnAbi, PassMode, Reg, RegKind};
89

src/base.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
use std::collections::HashSet;
22
use std::env;
3-
use std::sync::Arc;
43
use std::time::Instant;
54

65
use gccjit::{
76
Context,
87
FunctionType,
98
GlobalKind,
109
};
11-
#[cfg(feature="master")]
12-
use gccjit::TargetInfo;
1310
use rustc_middle::dep_graph;
1411
use rustc_middle::ty::TyCtxt;
1512
#[cfg(feature="master")]
@@ -22,8 +19,7 @@ use rustc_codegen_ssa::traits::DebugInfoMethods;
2219
use rustc_session::config::DebugInfo;
2320
use rustc_span::Symbol;
2421

25-
#[cfg(not(feature="master"))]
26-
use crate::TargetInfo;
22+
use crate::LockedTargetInfo;
2723
use crate::GccContext;
2824
use crate::builder::Builder;
2925
use crate::context::CodegenCx;
@@ -70,7 +66,7 @@ pub fn linkage_to_gcc(linkage: Linkage) -> FunctionType {
7066
}
7167
}
7268

73-
pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: Arc<TargetInfo>) -> (ModuleCodegen<GccContext>, u64) {
69+
pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: LockedTargetInfo) -> (ModuleCodegen<GccContext>, u64) {
7470
let prof_timer = tcx.prof.generic_activity("codegen_module");
7571
let start_time = Instant::now();
7672

@@ -89,7 +85,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: Arc<
8985
// the time we needed for codegenning it.
9086
let cost = time_to_codegen.as_secs() * 1_000_000_000 + time_to_codegen.subsec_nanos() as u64;
9187

92-
fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, target_info): (Symbol, Arc<TargetInfo>)) -> ModuleCodegen<GccContext> {
88+
fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, target_info): (Symbol, LockedTargetInfo)) -> ModuleCodegen<GccContext> {
9389
let cgu = tcx.codegen_unit(cgu_name);
9490
// Instantiate monomorphizations without filling out definitions yet...
9591
let context = Context::default();
@@ -111,14 +107,20 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: Arc<
111107
// TODO(antoyo): only set on x86 platforms.
112108
context.add_command_line_option("-masm=intel");
113109

114-
let features = ["64", "avxvnni", "bmi", "sse2", "avx", "avx2", "sha", "fma", "fma4", "gfni", "f16c", "aes", "bmi2", "pclmul", "rtm",
110+
// TODO: instead of setting the features manually, set the correct -march flag.
111+
let features = ["64", "avxvnni", "bmi", "sse2", "avx2", "sha", "fma", "fma4", "gfni", "f16c", "aes", "bmi2", "pclmul", "rtm",
115112
"vaes", "vpclmulqdq", "xsavec",
116113
];
117114

118115
for feature in &features {
119116
add_cpu_feature_flag(feature);
120117
}
121118

119+
// NOTE: we always enable AVX because the equivalent of llvm.x86.sse2.cmp.pd in GCC for
120+
// SSE2 is multiple builtins, so we use the AVX __builtin_ia32_cmppd instead.
121+
// FIXME(antoyo): use the proper builtins for llvm.x86.sse2.cmp.pd and similar.
122+
context.add_command_line_option("-mavx");
123+
122124
for arg in &tcx.sess.opts.cg.llvm_args {
123125
context.add_command_line_option(arg);
124126
}

src/intrinsic/llvm.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
236236
let arg2 = builder.context.new_cast(None, arg2, arg2_type);
237237
args = vec![new_args[0], arg2].into();
238238
},
239+
// These builtins are sent one more argument than needed.
239240
"__builtin_prefetch" => {
240241
let mut new_args = args.to_vec();
241242
new_args.pop();

src/lib.rs

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ mod type_of;
7373

7474
use std::any::Any;
7575
use std::sync::Arc;
76+
use std::sync::Mutex;
7677
#[cfg(not(feature="master"))]
7778
use std::sync::atomic::AtomicBool;
7879
#[cfg(not(feature="master"))]
@@ -135,9 +136,24 @@ impl TargetInfo {
135136
}
136137
}
137138

139+
#[derive(Clone, Debug)]
140+
pub struct LockedTargetInfo {
141+
info: Arc<Mutex<TargetInfo>>,
142+
}
143+
144+
impl LockedTargetInfo {
145+
fn cpu_supports(&self, feature: &str) -> bool {
146+
self.info.lock().expect("lock").cpu_supports(feature)
147+
}
148+
149+
fn supports_128bit_int(&self) -> bool {
150+
self.info.lock().expect("lock").supports_128bit_int()
151+
}
152+
}
153+
138154
#[derive(Clone)]
139155
pub struct GccCodegenBackend {
140-
target_info: Arc<TargetInfo>,
156+
target_info: LockedTargetInfo,
141157
}
142158

143159
impl CodegenBackend for GccCodegenBackend {
@@ -146,6 +162,17 @@ impl CodegenBackend for GccCodegenBackend {
146162
}
147163

148164
fn init(&self, sess: &Session) {
165+
#[cfg(feature="master")]
166+
{
167+
let target_cpu = target_cpu(sess);
168+
169+
// Get the second TargetInfo with the correct CPU features by setting the arch.
170+
let context = Context::default();
171+
context.add_command_line_option(&format!("-march={}", target_cpu));
172+
173+
*self.target_info.info.lock().expect("lock") = context.get_target_info();
174+
}
175+
149176
#[cfg(feature="master")]
150177
gccjit::set_global_personality_function_name(b"rust_eh_personality\0");
151178
if sess.lto() == Lto::Thin {
@@ -161,7 +188,7 @@ impl CodegenBackend for GccCodegenBackend {
161188
let _int128_ty = check_context.new_c_type(CType::UInt128t);
162189
// NOTE: we cannot just call compile() as this would require other files than libgccjit.so.
163190
check_context.compile_to_file(gccjit::OutputKind::Assembler, temp_file.to_str().expect("path to str"));
164-
self.target_info.supports_128bit_integers.store(check_context.get_last_error() == Ok(None), Ordering::SeqCst);
191+
self.target_info.info.lock().expect("lock").supports_128bit_integers.store(check_context.get_last_error() == Ok(None), Ordering::SeqCst);
165192
}
166193
}
167194

@@ -217,7 +244,7 @@ impl ExtraBackendMethods for GccCodegenBackend {
217244
}
218245

219246
fn compile_codegen_unit(&self, tcx: TyCtxt<'_>, cgu_name: Symbol) -> (ModuleCodegen<Self::Module>, u64) {
220-
base::compile_codegen_unit(tcx, cgu_name, Arc::clone(&self.target_info))
247+
base::compile_codegen_unit(tcx, cgu_name, self.target_info.clone())
221248
}
222249

223250
fn target_machine_factory(&self, _sess: &Session, _opt_level: OptLevel, _features: &[String]) -> TargetMachineFactoryFn<Self> {
@@ -306,23 +333,18 @@ impl WriteBackendMethods for GccCodegenBackend {
306333
#[no_mangle]
307334
pub fn __rustc_codegen_backend() -> Box<dyn CodegenBackend> {
308335
#[cfg(feature="master")]
309-
let target_info = {
310-
// Get the native arch and check whether the target supports 128-bit integers.
311-
let context = Context::default();
312-
let arch = context.get_target_info().arch().unwrap();
313-
314-
// Get the second TargetInfo with the correct CPU features by setting the arch.
336+
let info = {
337+
// Check whether the target supports 128-bit integers.
315338
let context = Context::default();
316-
context.add_command_line_option(&format!("-march={}", arch.to_str().unwrap()));
317-
Arc::new(context.get_target_info())
339+
Arc::new(Mutex::new(context.get_target_info()))
318340
};
319341
#[cfg(not(feature="master"))]
320-
let target_info = Arc::new(TargetInfo {
342+
let info = Arc::new(Mutex::new(TargetInfo {
321343
supports_128bit_integers: AtomicBool::new(false),
322-
});
344+
}));
323345

324346
Box::new(GccCodegenBackend {
325-
target_info,
347+
target_info: LockedTargetInfo { info },
326348
})
327349
}
328350

@@ -356,7 +378,7 @@ pub fn target_cpu(sess: &Session) -> &str {
356378
}
357379
}
358380

359-
pub fn target_features(sess: &Session, allow_unstable: bool, target_info: &Arc<TargetInfo>) -> Vec<Symbol> {
381+
pub fn target_features(sess: &Session, allow_unstable: bool, target_info: &LockedTargetInfo) -> Vec<Symbol> {
360382
supported_target_features(sess)
361383
.iter()
362384
.filter_map(

0 commit comments

Comments
 (0)