Skip to content

Commit a0880a3

Browse files
Centri3NCGThompson
authored andcommitted
Add new intrinsic is_constant and optimize pow
1 parent 9480767 commit a0880a3

File tree

11 files changed

+259
-31
lines changed

11 files changed

+259
-31
lines changed

compiler/rustc_codegen_llvm/src/context.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,8 @@ impl<'ll> CodegenCx<'ll, '_> {
908908
ifn!("llvm.lifetime.start.p0i8", fn(t_i64, ptr) -> void);
909909
ifn!("llvm.lifetime.end.p0i8", fn(t_i64, ptr) -> void);
910910

911+
ifn!("llvm.is.constant", fn(...) -> i1);
912+
911913
ifn!("llvm.expect.i1", fn(i1, i1) -> i1);
912914
ifn!("llvm.eh.typeid.for", fn(ptr) -> t_i32);
913915
ifn!("llvm.localescape", fn(...) -> void);

compiler/rustc_codegen_llvm/src/intrinsic.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ impl<'ll, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'_, 'll, 'tcx> {
119119
sym::likely => {
120120
self.call_intrinsic("llvm.expect.i1", &[args[0].immediate(), self.const_bool(true)])
121121
}
122+
sym::is_constant => self.call_intrinsic("llvm.is.constant", &[args[0].immediate()]),
122123
sym::unlikely => self
123124
.call_intrinsic("llvm.expect.i1", &[args[0].immediate(), self.const_bool(false)]),
124125
kw::Try => {

compiler/rustc_const_eval/src/interpret/intrinsics.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
216216
sym::copy => {
217217
self.copy_intrinsic(&args[0], &args[1], &args[2], /*nonoverlapping*/ false)?;
218218
}
219+
sym::is_constant => self.write_scalar(Scalar::from_bool(true), dest)?,
219220
sym::write_bytes => {
220221
self.write_bytes_intrinsic(&args[0], &args[1], &args[2])?;
221222
}

compiler/rustc_hir_analysis/messages.ftl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,8 @@ hir_analysis_invalid_union_field =
178178
hir_analysis_invalid_union_field_sugg =
179179
wrap the field type in `ManuallyDrop<...>`
180180
181+
hir_analysis_is_constant_zst = parameter for `is_constant` cannot be zero-sized
182+
181183
hir_analysis_late_bound_const_in_apit = `impl Trait` can only mention const parameters from an fn or impl
182184
.label = const parameter declared here
183185

compiler/rustc_hir_analysis/src/check/intrinsic.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ pub fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: DefId) -> hir
112112
| sym::forget
113113
| sym::black_box
114114
| sym::variant_count
115-
| sym::ptr_mask => hir::Unsafety::Normal,
115+
| sym::ptr_mask
116+
| sym::is_constant => hir::Unsafety::Normal,
116117
_ => hir::Unsafety::Unsafe,
117118
};
118119

@@ -453,6 +454,11 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
453454

454455
sym::black_box => (1, vec![param(0)], param(0)),
455456

457+
sym::is_constant => {
458+
// FIXME: ZSTs cause an ICE. We should check for this.
459+
(1, vec![param(0)], tcx.types.bool)
460+
}
461+
456462
sym::const_eval_select => (4, vec![param(0), param(1), param(2)], param(3)),
457463

458464
sym::vtable_size | sym::vtable_align => {

compiler/rustc_span/src/symbol.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,7 @@ symbols! {
907907
io_stderr,
908908
io_stdout,
909909
irrefutable_let_patterns,
910+
is_constant,
910911
isa_attribute,
911912
isize,
912913
issue,

library/core/src/intrinsics.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2511,6 +2511,25 @@ extern "rust-intrinsic" {
25112511
where
25122512
G: FnOnce<ARG, Output = RET>,
25132513
F: FnOnce<ARG, Output = RET>;
2514+
2515+
/// Returns whether the argument is known at compile-time. This opens the
2516+
/// door for additional optimizations, in that LLVM can then optimize
2517+
/// following checks to either `true` or `false`. This is often paired with
2518+
/// an `if-else` statement, as LLVM will only keep one branch (if
2519+
/// optimizations are on).
2520+
///
2521+
/// "Constant" in this context is not the same as a constant in Rust. As
2522+
/// such, this should only be used for optimizations.
2523+
///
2524+
/// Note that, unlike most intrinsics, this is safe to call;
2525+
/// it does not require an `unsafe` block.
2526+
/// Therefore, implementations must not require the user to uphold
2527+
/// any safety invariants.
2528+
#[rustc_const_stable(feature = "todo", since = "never")]
2529+
#[rustc_safe_intrinsic]
2530+
#[rustc_nounwind]
2531+
#[cfg(not(bootstrap))]
2532+
pub fn is_constant<T>(arg: T) -> bool;
25142533
}
25152534

25162535
// Some functions are defined here because they accidentally got made

library/core/src/num/int_macros.rs

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2089,25 +2089,65 @@ macro_rules! int_impl {
20892089
#[inline]
20902090
#[rustc_inherit_overflow_checks]
20912091
pub const fn pow(self, mut exp: u32) -> Self {
2092-
if exp == 0 {
2093-
return 1;
2092+
#[cfg(not(bootstrap))]
2093+
if intrinsics::is_constant(self) && self > 0 && (self & (self - 1) == 0) {
2094+
let power_used = match self.checked_ilog2() {
2095+
Some(v) => v,
2096+
// SAFETY: We just checked this is a power of two. and above zero.
2097+
None => unsafe { core::hint::unreachable_unchecked() },
2098+
};
2099+
// So it panics. Have to use `overflowing_mul` to efficiently set the
2100+
// result to 0 if not.
2101+
#[cfg(debug_assertions)]
2102+
power_used * exp;
2103+
let (num_shl, overflowed) = power_used.overflowing_mul(exp);
2104+
let fine = !overflowed
2105+
& (power_used < (mem::size_of::<Self>() * 8) as u32);
2106+
(1 << num_shl) * fine as Self
2107+
} else {
2108+
if exp == 0 {
2109+
return 1;
2110+
}
2111+
let mut base = self;
2112+
let mut acc = 1;
2113+
2114+
while exp > 1 {
2115+
if (exp & 1) == 1 {
2116+
acc = acc * base;
2117+
}
2118+
exp /= 2;
2119+
base = base * base;
2120+
}
2121+
2122+
// since exp!=0, finally the exp must be 1.
2123+
// Deal with the final bit of the exponent separately, since
2124+
// squaring the base afterwards is not necessary and may cause a
2125+
// needless overflow.
2126+
acc * base
20942127
}
2095-
let mut base = self;
2096-
let mut acc = 1;
20972128

2098-
while exp > 1 {
2099-
if (exp & 1) == 1 {
2100-
acc = acc * base;
2129+
#[cfg(bootstrap)]
2130+
{
2131+
if exp == 0 {
2132+
return 1;
2133+
}
2134+
let mut base = self;
2135+
let mut acc = 1;
2136+
2137+
while exp > 1 {
2138+
if (exp & 1) == 1 {
2139+
acc = acc * base;
2140+
}
2141+
exp /= 2;
2142+
base = base * base;
21012143
}
2102-
exp /= 2;
2103-
base = base * base;
2104-
}
21052144

2106-
// since exp!=0, finally the exp must be 1.
2107-
// Deal with the final bit of the exponent separately, since
2108-
// squaring the base afterwards is not necessary and may cause a
2109-
// needless overflow.
2110-
acc * base
2145+
// since exp!=0, finally the exp must be 1.
2146+
// Deal with the final bit of the exponent separately, since
2147+
// squaring the base afterwards is not necessary and may cause a
2148+
// needless overflow.
2149+
acc * base
2150+
}
21112151
}
21122152

21132153
/// Returns the square root of the number, rounded down.

library/core/src/num/uint_macros.rs

Lines changed: 67 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1974,25 +1974,77 @@ macro_rules! uint_impl {
19741974
#[inline]
19751975
#[rustc_inherit_overflow_checks]
19761976
pub const fn pow(self, mut exp: u32) -> Self {
1977-
if exp == 0 {
1978-
return 1;
1977+
// LLVM now knows that `self` is a constant value, but not a
1978+
// constant in Rust. This allows us to compute the power used at
1979+
// compile-time.
1980+
//
1981+
// This will likely add a branch in debug builds, but this should
1982+
// be ok.
1983+
//
1984+
// This is a massive performance boost in release builds as you can
1985+
// get the power of a power of two and the exponent through a `shl`
1986+
// instruction, but we must add a couple more checks for parity with
1987+
// our own `pow`.
1988+
#[cfg(not(bootstrap))]
1989+
if intrinsics::is_constant(self) && self.is_power_of_two() {
1990+
let power_used = match self.checked_ilog2() {
1991+
Some(v) => v,
1992+
// SAFETY: We just checked this is a power of two. `0` is not a
1993+
// power of two.
1994+
None => unsafe { core::hint::unreachable_unchecked() },
1995+
};
1996+
// So it panics. Have to use `overflowing_mul` to efficiently set the
1997+
// result to 0 if not.
1998+
#[cfg(debug_assertions)]
1999+
power_used * exp;
2000+
let (num_shl, overflowed) = power_used.overflowing_mul(exp);
2001+
let fine = !overflowed
2002+
& (power_used < (mem::size_of::<Self>() * 8) as u32);
2003+
(1 << num_shl) * fine as Self
2004+
} else {
2005+
if exp == 0 {
2006+
return 1;
2007+
}
2008+
let mut base = self;
2009+
let mut acc = 1;
2010+
2011+
while exp > 1 {
2012+
if (exp & 1) == 1 {
2013+
acc = acc * base;
2014+
}
2015+
exp /= 2;
2016+
base = base * base;
2017+
}
2018+
2019+
// since exp!=0, finally the exp must be 1.
2020+
// Deal with the final bit of the exponent separately, since
2021+
// squaring the base afterwards is not necessary and may cause a
2022+
// needless overflow.
2023+
acc * base
19792024
}
1980-
let mut base = self;
1981-
let mut acc = 1;
19822025

1983-
while exp > 1 {
1984-
if (exp & 1) == 1 {
1985-
acc = acc * base;
2026+
#[cfg(bootstrap)]
2027+
{
2028+
if exp == 0 {
2029+
return 1;
2030+
}
2031+
let mut base = self;
2032+
let mut acc = 1;
2033+
2034+
while exp > 1 {
2035+
if (exp & 1) == 1 {
2036+
acc = acc * base;
2037+
}
2038+
exp /= 2;
2039+
base = base * base;
19862040
}
1987-
exp /= 2;
1988-
base = base * base;
1989-
}
19902041

1991-
// since exp!=0, finally the exp must be 1.
1992-
// Deal with the final bit of the exponent separately, since
1993-
// squaring the base afterwards is not necessary and may cause a
1994-
// needless overflow.
1995-
acc * base
2042+
// since exp!=0, finally the exp must be 1.
2043+
// Deal with the final bit of the exponent separately, since
2044+
// squaring the base afterwards is not necessary and may cause a
2045+
// needless overflow.
2046+
acc * base
2047+
}
19962048
}
19972049

19982050
/// Returns the square root of the number, rounded down.

tests/codegen/is_constant.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// compile-flags: --crate-type=lib
2+
#![feature(core_intrinsics)]
3+
4+
use std::intrinsics::is_constant;
5+
6+
pub struct A(u32);
7+
pub enum B {
8+
Ye(u32),
9+
}
10+
11+
#[inline]
12+
pub fn tuple_struct(a: A) -> i32 {
13+
if is_constant(a) { 1 } else { 0 }
14+
}
15+
16+
// CHECK-LABEL: @tuple_struct_true(
17+
#[no_mangle]
18+
pub fn tuple_struct_true() -> i32 {
19+
// CHECK: ret i32 1
20+
tuple_struct(A(1))
21+
}
22+
23+
// CHECK-LABEL: @tuple_struct_false(
24+
#[no_mangle]
25+
pub fn tuple_struct_false(a: A) -> i32 {
26+
// CHECK: ret i32 0
27+
tuple_struct(a)
28+
}
29+
30+
#[inline]
31+
pub fn r#enum(b: B) -> i32 {
32+
if is_constant(b) { 3 } else { 2 }
33+
}
34+
35+
// CHECK-LABEL: @enum_true(
36+
#[no_mangle]
37+
pub fn enum_true() -> i32 {
38+
// CHECK: ret i32 3
39+
r#enum(B::Ye(2))
40+
}
41+
42+
// CHECK-LABEL: @enum_false(
43+
#[no_mangle]
44+
pub fn enum_false(b: B) -> i32 {
45+
// CHECK: ret i32 2
46+
r#enum(b)
47+
}

tests/codegen/pow_of_two.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// compile-flags: --crate-type=lib
2+
3+
// CHECK: @b = unnamed_addr alias i64 (i32), ptr @a
4+
// CHECK-LABEL: @a(
5+
#[no_mangle]
6+
pub fn a(exp: u32) -> u64 {
7+
// CHECK: %[[R:.+]] = and i32 %exp, 63
8+
// CHECK: %[[R:.+]] = zext i32 %[[R:.+]] to i64
9+
// CHECK: %[[R:.+]].i = shl nuw i64 1, %[[R:.+]]
10+
// CHECK: ret i64 %[[R:.+]].i
11+
2u64.pow(exp)
12+
}
13+
14+
#[no_mangle]
15+
pub fn b(exp: u32) -> i64 {
16+
2i64.pow(exp)
17+
}
18+
19+
// CHECK-LABEL: @c(
20+
#[no_mangle]
21+
pub fn c(exp: u32) -> u32 {
22+
// CHECK: %[[R:.+]].0.i = shl i32 %exp, 1
23+
// CHECK: %[[R:.+]].1.i = icmp sgt i32 %exp, -1
24+
// CHECK: %[[R:.+]] = and i32 %[[R:.+]].0.i, 30
25+
// CHECK: %[[R:.+]].i = zext i1 %[[R:.+]].1.i to i32
26+
// CHECK: %[[R:.+]] = shl nuw nsw i32 %[[R:.+]].i, %[[R:.+]]
27+
// CHECK: ret i32 %[[R:.+]]
28+
4u32.pow(exp)
29+
}
30+
31+
// CHECK-LABEL: @d(
32+
#[no_mangle]
33+
pub fn d(exp: u32) -> u32 {
34+
// CHECK: %[[R:.+]] = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %exp, i32 5)
35+
// CHECK: %[[R:.+]].0.i = extractvalue { i32, i1 } %[[R:.+]], 0
36+
// CHECK: %[[R:.+]].1.i = extractvalue { i32, i1 } %[[R:.+]], 1
37+
// CHECK: %[[R:.+]].i = xor i1 %[[R:.+]].1.i, true
38+
// CHECK: %[[R:.+]] = and i32 %[[R:.+]].0.i, 31
39+
// CHECK: %[[R:.+]].i = zext i1 %[[R:.+]].i to i32
40+
// CHECK: %[[R:.+]] = shl nuw i32 %[[R:.+]].i, %[[R:.+]]
41+
// CHECK: ret i32 %[[R:.+]]
42+
32u32.pow(exp)
43+
}
44+
45+
// CHECK-LABEL: @e(
46+
#[no_mangle]
47+
pub fn e(exp: u32) -> i32 {
48+
// CHECK: %[[R:.+]] = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %exp, i32 5)
49+
// CHECK: %[[R:.+]].1.i = extractvalue { i32, i1 } %[[R:.+]], 1
50+
// CHECK: %[[R:.+]].i = xor i1 %[[R:.+]].1.i, true
51+
// CHECK: %[[R:.+]].i = zext i1 %[[R:.+]].i to i32
52+
// CHECK: %[[R:.+]].0.i = extractvalue { i32, i1 } %[[R:.+]], 0
53+
// CHECK: %[[R:.+]] = and i32 %[[R:.+]].0.i, 31
54+
// CHECK: %[[R:.+]].010.i = shl nuw i32 %[[R:.+]].i, %[[R:.+]]
55+
// CHECK: ret i32 %[[R:.+]].010.i
56+
32i32.pow(exp)
57+
}

0 commit comments

Comments
 (0)