Skip to content

Commit 7fc6365

Browse files
committed
Auto merge of rust-lang#116692 - Nadrieril:half-open-ranges, r=cjgillot
Match usize/isize exhaustively with half-open ranges The long-awaited finale to the saga of [exhaustiveness checking for integers](rust-lang#50912)! ```rust match 0usize { 0.. => {} // exhaustive! } match 0usize { 0..usize::MAX => {} // helpful error message! } ``` Features: - Half-open ranges behave as expected for `usize`/`isize`; - Trying to use `0..usize::MAX` will tell you that `usize::MAX..` is missing and explain why. No more unhelpful "`_` is missing"; - Everything else stays the same. This should unblock rust-lang#37854. Review-wise: - I recommend looking commit-by-commit; - This regresses perf because of the added complexity in `IntRange`; hopefully not too much; - I measured each `#[inline]`, they all help a bit with the perf regression (tho I don't get why); - I did not touch MIR building; I expect there's an easy PR there that would skip unnecessary comparisons when the range is half-open.
2 parents 98f5ebb + 35fe75d commit 7fc6365

26 files changed

+898
-597
lines changed

Diff for: compiler/rustc_middle/src/thir.rs

+239-10
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,19 @@ use rustc_hir::RangeEnd;
1616
use rustc_index::newtype_index;
1717
use rustc_index::IndexVec;
1818
use rustc_middle::middle::region;
19-
use rustc_middle::mir::interpret::AllocId;
19+
use rustc_middle::mir::interpret::{AllocId, Scalar};
2020
use rustc_middle::mir::{self, BinOp, BorrowKind, FakeReadCause, Mutability, UnOp};
2121
use rustc_middle::ty::adjustment::PointerCoercion;
22+
use rustc_middle::ty::layout::IntegerExt;
2223
use rustc_middle::ty::{
2324
self, AdtDef, CanonicalUserType, CanonicalUserTypeAnnotation, FnSig, GenericArgsRef, List, Ty,
24-
UpvarArgs,
25+
TyCtxt, UpvarArgs,
2526
};
2627
use rustc_span::def_id::LocalDefId;
2728
use rustc_span::{sym, ErrorGuaranteed, Span, Symbol, DUMMY_SP};
28-
use rustc_target::abi::{FieldIdx, VariantIdx};
29+
use rustc_target::abi::{FieldIdx, Integer, Size, VariantIdx};
2930
use rustc_target::asm::InlineAsmRegOrRegClass;
31+
use std::cmp::Ordering;
3032
use std::fmt;
3133
use std::ops::Index;
3234

@@ -810,12 +812,243 @@ pub enum PatKind<'tcx> {
810812
Error(ErrorGuaranteed),
811813
}
812814

815+
/// A range pattern.
816+
/// The boundaries must be of the same type and that type must be numeric.
813817
#[derive(Clone, Debug, PartialEq, HashStable, TypeVisitable)]
814818
pub struct PatRange<'tcx> {
815-
pub lo: mir::Const<'tcx>,
816-
pub hi: mir::Const<'tcx>,
819+
pub lo: PatRangeBoundary<'tcx>,
820+
pub hi: PatRangeBoundary<'tcx>,
817821
#[type_visitable(ignore)]
818822
pub end: RangeEnd,
823+
pub ty: Ty<'tcx>,
824+
}
825+
826+
impl<'tcx> PatRange<'tcx> {
827+
/// Whether this range covers the full extent of possible values (best-effort, we ignore floats).
828+
#[inline]
829+
pub fn is_full_range(&self, tcx: TyCtxt<'tcx>) -> Option<bool> {
830+
let (min, max, size, bias) = match *self.ty.kind() {
831+
ty::Char => (0, std::char::MAX as u128, Size::from_bits(32), 0),
832+
ty::Int(ity) => {
833+
let size = Integer::from_int_ty(&tcx, ity).size();
834+
let max = size.truncate(u128::MAX);
835+
let bias = 1u128 << (size.bits() - 1);
836+
(0, max, size, bias)
837+
}
838+
ty::Uint(uty) => {
839+
let size = Integer::from_uint_ty(&tcx, uty).size();
840+
let max = size.unsigned_int_max();
841+
(0, max, size, 0)
842+
}
843+
_ => return None,
844+
};
845+
846+
// We want to compare ranges numerically, but the order of the bitwise representation of
847+
// signed integers does not match their numeric order. Thus, to correct the ordering, we
848+
// need to shift the range of signed integers to correct the comparison. This is achieved by
849+
// XORing with a bias (see pattern/deconstruct_pat.rs for another pertinent example of this
850+
// pattern).
851+
//
852+
// Also, for performance, it's important to only do the second `try_to_bits` if necessary.
853+
let lo_is_min = match self.lo {
854+
PatRangeBoundary::NegInfinity => true,
855+
PatRangeBoundary::Finite(value) => {
856+
let lo = value.try_to_bits(size).unwrap() ^ bias;
857+
lo <= min
858+
}
859+
PatRangeBoundary::PosInfinity => false,
860+
};
861+
if lo_is_min {
862+
let hi_is_max = match self.hi {
863+
PatRangeBoundary::NegInfinity => false,
864+
PatRangeBoundary::Finite(value) => {
865+
let hi = value.try_to_bits(size).unwrap() ^ bias;
866+
hi > max || hi == max && self.end == RangeEnd::Included
867+
}
868+
PatRangeBoundary::PosInfinity => true,
869+
};
870+
if hi_is_max {
871+
return Some(true);
872+
}
873+
}
874+
Some(false)
875+
}
876+
877+
#[inline]
878+
pub fn contains(
879+
&self,
880+
value: mir::Const<'tcx>,
881+
tcx: TyCtxt<'tcx>,
882+
param_env: ty::ParamEnv<'tcx>,
883+
) -> Option<bool> {
884+
use Ordering::*;
885+
debug_assert_eq!(self.ty, value.ty());
886+
let ty = self.ty;
887+
let value = PatRangeBoundary::Finite(value);
888+
// For performance, it's important to only do the second comparison if necessary.
889+
Some(
890+
match self.lo.compare_with(value, ty, tcx, param_env)? {
891+
Less | Equal => true,
892+
Greater => false,
893+
} && match value.compare_with(self.hi, ty, tcx, param_env)? {
894+
Less => true,
895+
Equal => self.end == RangeEnd::Included,
896+
Greater => false,
897+
},
898+
)
899+
}
900+
901+
#[inline]
902+
pub fn overlaps(
903+
&self,
904+
other: &Self,
905+
tcx: TyCtxt<'tcx>,
906+
param_env: ty::ParamEnv<'tcx>,
907+
) -> Option<bool> {
908+
use Ordering::*;
909+
debug_assert_eq!(self.ty, other.ty);
910+
// For performance, it's important to only do the second comparison if necessary.
911+
Some(
912+
match other.lo.compare_with(self.hi, self.ty, tcx, param_env)? {
913+
Less => true,
914+
Equal => self.end == RangeEnd::Included,
915+
Greater => false,
916+
} && match self.lo.compare_with(other.hi, self.ty, tcx, param_env)? {
917+
Less => true,
918+
Equal => other.end == RangeEnd::Included,
919+
Greater => false,
920+
},
921+
)
922+
}
923+
}
924+
925+
impl<'tcx> fmt::Display for PatRange<'tcx> {
926+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
927+
if let PatRangeBoundary::Finite(value) = &self.lo {
928+
write!(f, "{value}")?;
929+
}
930+
if let PatRangeBoundary::Finite(value) = &self.hi {
931+
write!(f, "{}", self.end)?;
932+
write!(f, "{value}")?;
933+
} else {
934+
// `0..` is parsed as an inclusive range, we must display it correctly.
935+
write!(f, "..")?;
936+
}
937+
Ok(())
938+
}
939+
}
940+
941+
/// A (possibly open) boundary of a range pattern.
942+
/// If present, the const must be of a numeric type.
943+
#[derive(Copy, Clone, Debug, PartialEq, HashStable, TypeVisitable)]
944+
pub enum PatRangeBoundary<'tcx> {
945+
Finite(mir::Const<'tcx>),
946+
NegInfinity,
947+
PosInfinity,
948+
}
949+
950+
impl<'tcx> PatRangeBoundary<'tcx> {
951+
#[inline]
952+
pub fn is_finite(self) -> bool {
953+
matches!(self, Self::Finite(..))
954+
}
955+
#[inline]
956+
pub fn as_finite(self) -> Option<mir::Const<'tcx>> {
957+
match self {
958+
Self::Finite(value) => Some(value),
959+
Self::NegInfinity | Self::PosInfinity => None,
960+
}
961+
}
962+
#[inline]
963+
pub fn to_const(self, ty: Ty<'tcx>, tcx: TyCtxt<'tcx>) -> mir::Const<'tcx> {
964+
match self {
965+
Self::Finite(value) => value,
966+
Self::NegInfinity => {
967+
// Unwrap is ok because the type is known to be numeric.
968+
let c = ty.numeric_min_val(tcx).unwrap();
969+
mir::Const::from_ty_const(c, tcx)
970+
}
971+
Self::PosInfinity => {
972+
// Unwrap is ok because the type is known to be numeric.
973+
let c = ty.numeric_max_val(tcx).unwrap();
974+
mir::Const::from_ty_const(c, tcx)
975+
}
976+
}
977+
}
978+
pub fn eval_bits(self, ty: Ty<'tcx>, tcx: TyCtxt<'tcx>, param_env: ty::ParamEnv<'tcx>) -> u128 {
979+
match self {
980+
Self::Finite(value) => value.eval_bits(tcx, param_env),
981+
Self::NegInfinity => {
982+
// Unwrap is ok because the type is known to be numeric.
983+
ty.numeric_min_and_max_as_bits(tcx).unwrap().0
984+
}
985+
Self::PosInfinity => {
986+
// Unwrap is ok because the type is known to be numeric.
987+
ty.numeric_min_and_max_as_bits(tcx).unwrap().1
988+
}
989+
}
990+
}
991+
992+
#[instrument(skip(tcx, param_env), level = "debug", ret)]
993+
pub fn compare_with(
994+
self,
995+
other: Self,
996+
ty: Ty<'tcx>,
997+
tcx: TyCtxt<'tcx>,
998+
param_env: ty::ParamEnv<'tcx>,
999+
) -> Option<Ordering> {
1000+
use PatRangeBoundary::*;
1001+
match (self, other) {
1002+
// When comparing with infinities, we must remember that `0u8..` and `0u8..=255`
1003+
// describe the same range. These two shortcuts are ok, but for the rest we must check
1004+
// bit values.
1005+
(PosInfinity, PosInfinity) => return Some(Ordering::Equal),
1006+
(NegInfinity, NegInfinity) => return Some(Ordering::Equal),
1007+
1008+
// This code is hot when compiling matches with many ranges. So we
1009+
// special-case extraction of evaluated scalars for speed, for types where
1010+
// raw data comparisons are appropriate. E.g. `unicode-normalization` has
1011+
// many ranges such as '\u{037A}'..='\u{037F}', and chars can be compared
1012+
// in this way.
1013+
(Finite(mir::Const::Ty(a)), Finite(mir::Const::Ty(b)))
1014+
if matches!(ty.kind(), ty::Uint(_) | ty::Char) =>
1015+
{
1016+
return Some(a.kind().cmp(&b.kind()));
1017+
}
1018+
(
1019+
Finite(mir::Const::Val(mir::ConstValue::Scalar(Scalar::Int(a)), _)),
1020+
Finite(mir::Const::Val(mir::ConstValue::Scalar(Scalar::Int(b)), _)),
1021+
) if matches!(ty.kind(), ty::Uint(_) | ty::Char) => return Some(a.cmp(&b)),
1022+
_ => {}
1023+
}
1024+
1025+
let a = self.eval_bits(ty, tcx, param_env);
1026+
let b = other.eval_bits(ty, tcx, param_env);
1027+
1028+
match ty.kind() {
1029+
ty::Float(ty::FloatTy::F32) => {
1030+
use rustc_apfloat::Float;
1031+
let a = rustc_apfloat::ieee::Single::from_bits(a);
1032+
let b = rustc_apfloat::ieee::Single::from_bits(b);
1033+
a.partial_cmp(&b)
1034+
}
1035+
ty::Float(ty::FloatTy::F64) => {
1036+
use rustc_apfloat::Float;
1037+
let a = rustc_apfloat::ieee::Double::from_bits(a);
1038+
let b = rustc_apfloat::ieee::Double::from_bits(b);
1039+
a.partial_cmp(&b)
1040+
}
1041+
ty::Int(ity) => {
1042+
use rustc_middle::ty::layout::IntegerExt;
1043+
let size = rustc_target::abi::Integer::from_int_ty(&tcx, *ity).size();
1044+
let a = size.sign_extend(a) as i128;
1045+
let b = size.sign_extend(b) as i128;
1046+
Some(a.cmp(&b))
1047+
}
1048+
ty::Uint(_) | ty::Char => Some(a.cmp(&b)),
1049+
_ => bug!(),
1050+
}
1051+
}
8191052
}
8201053

8211054
impl<'tcx> fmt::Display for Pat<'tcx> {
@@ -944,11 +1177,7 @@ impl<'tcx> fmt::Display for Pat<'tcx> {
9441177
PatKind::InlineConstant { def: _, ref subpattern } => {
9451178
write!(f, "{} (from inline const)", subpattern)
9461179
}
947-
PatKind::Range(box PatRange { lo, hi, end }) => {
948-
write!(f, "{lo}")?;
949-
write!(f, "{end}")?;
950-
write!(f, "{hi}")
951-
}
1180+
PatKind::Range(ref range) => write!(f, "{range}"),
9521181
PatKind::Slice { ref prefix, ref slice, ref suffix }
9531182
| PatKind::Array { ref prefix, ref slice, ref suffix } => {
9541183
write!(f, "[")?;

Diff for: compiler/rustc_middle/src/ty/util.rs

+43-35
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use rustc_index::bit_set::GrowableBitSet;
1919
use rustc_macros::HashStable;
2020
use rustc_session::Limit;
2121
use rustc_span::sym;
22-
use rustc_target::abi::{Integer, IntegerType, Size};
22+
use rustc_target::abi::{Integer, IntegerType, Primitive, Size};
2323
use rustc_target::spec::abi::Abi;
2424
use smallvec::SmallVec;
2525
use std::{fmt, iter};
@@ -919,54 +919,62 @@ impl<'tcx> TypeFolder<TyCtxt<'tcx>> for OpaqueTypeExpander<'tcx> {
919919
}
920920

921921
impl<'tcx> Ty<'tcx> {
922+
/// Returns the `Size` for primitive types (bool, uint, int, char, float).
923+
pub fn primitive_size(self, tcx: TyCtxt<'tcx>) -> Size {
924+
match *self.kind() {
925+
ty::Bool => Size::from_bytes(1),
926+
ty::Char => Size::from_bytes(4),
927+
ty::Int(ity) => Integer::from_int_ty(&tcx, ity).size(),
928+
ty::Uint(uty) => Integer::from_uint_ty(&tcx, uty).size(),
929+
ty::Float(ty::FloatTy::F32) => Primitive::F32.size(&tcx),
930+
ty::Float(ty::FloatTy::F64) => Primitive::F64.size(&tcx),
931+
_ => bug!("non primitive type"),
932+
}
933+
}
934+
922935
pub fn int_size_and_signed(self, tcx: TyCtxt<'tcx>) -> (Size, bool) {
923-
let (int, signed) = match *self.kind() {
924-
ty::Int(ity) => (Integer::from_int_ty(&tcx, ity), true),
925-
ty::Uint(uty) => (Integer::from_uint_ty(&tcx, uty), false),
936+
match *self.kind() {
937+
ty::Int(ity) => (Integer::from_int_ty(&tcx, ity).size(), true),
938+
ty::Uint(uty) => (Integer::from_uint_ty(&tcx, uty).size(), false),
926939
_ => bug!("non integer discriminant"),
927-
};
928-
(int.size(), signed)
940+
}
929941
}
930942

931-
/// Returns the maximum value for the given numeric type (including `char`s)
932-
/// or returns `None` if the type is not numeric.
933-
pub fn numeric_max_val(self, tcx: TyCtxt<'tcx>) -> Option<ty::Const<'tcx>> {
934-
let val = match self.kind() {
943+
/// Returns the minimum and maximum values for the given numeric type (including `char`s) or
944+
/// returns `None` if the type is not numeric.
945+
pub fn numeric_min_and_max_as_bits(self, tcx: TyCtxt<'tcx>) -> Option<(u128, u128)> {
946+
use rustc_apfloat::ieee::{Double, Single};
947+
Some(match self.kind() {
935948
ty::Int(_) | ty::Uint(_) => {
936949
let (size, signed) = self.int_size_and_signed(tcx);
937-
let val =
950+
let min = if signed { size.truncate(size.signed_int_min() as u128) } else { 0 };
951+
let max =
938952
if signed { size.signed_int_max() as u128 } else { size.unsigned_int_max() };
939-
Some(val)
953+
(min, max)
940954
}
941-
ty::Char => Some(std::char::MAX as u128),
942-
ty::Float(fty) => Some(match fty {
943-
ty::FloatTy::F32 => rustc_apfloat::ieee::Single::INFINITY.to_bits(),
944-
ty::FloatTy::F64 => rustc_apfloat::ieee::Double::INFINITY.to_bits(),
945-
}),
946-
_ => None,
947-
};
955+
ty::Char => (0, std::char::MAX as u128),
956+
ty::Float(ty::FloatTy::F32) => {
957+
((-Single::INFINITY).to_bits(), Single::INFINITY.to_bits())
958+
}
959+
ty::Float(ty::FloatTy::F64) => {
960+
((-Double::INFINITY).to_bits(), Double::INFINITY.to_bits())
961+
}
962+
_ => return None,
963+
})
964+
}
948965

949-
val.map(|v| ty::Const::from_bits(tcx, v, ty::ParamEnv::empty().and(self)))
966+
/// Returns the maximum value for the given numeric type (including `char`s)
967+
/// or returns `None` if the type is not numeric.
968+
pub fn numeric_max_val(self, tcx: TyCtxt<'tcx>) -> Option<ty::Const<'tcx>> {
969+
self.numeric_min_and_max_as_bits(tcx)
970+
.map(|(_, max)| ty::Const::from_bits(tcx, max, ty::ParamEnv::empty().and(self)))
950971
}
951972

952973
/// Returns the minimum value for the given numeric type (including `char`s)
953974
/// or returns `None` if the type is not numeric.
954975
pub fn numeric_min_val(self, tcx: TyCtxt<'tcx>) -> Option<ty::Const<'tcx>> {
955-
let val = match self.kind() {
956-
ty::Int(_) | ty::Uint(_) => {
957-
let (size, signed) = self.int_size_and_signed(tcx);
958-
let val = if signed { size.truncate(size.signed_int_min() as u128) } else { 0 };
959-
Some(val)
960-
}
961-
ty::Char => Some(0),
962-
ty::Float(fty) => Some(match fty {
963-
ty::FloatTy::F32 => (-::rustc_apfloat::ieee::Single::INFINITY).to_bits(),
964-
ty::FloatTy::F64 => (-::rustc_apfloat::ieee::Double::INFINITY).to_bits(),
965-
}),
966-
_ => None,
967-
};
968-
969-
val.map(|v| ty::Const::from_bits(tcx, v, ty::ParamEnv::empty().and(self)))
976+
self.numeric_min_and_max_as_bits(tcx)
977+
.map(|(min, _)| ty::Const::from_bits(tcx, min, ty::ParamEnv::empty().and(self)))
970978
}
971979

972980
/// Checks whether values of this type `T` are *moved* or *copied*

Diff for: compiler/rustc_mir_build/src/build/matches/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,7 @@ enum TestKind<'tcx> {
10351035
ty: Ty<'tcx>,
10361036
},
10371037

1038-
/// Test whether the value falls within an inclusive or exclusive range
1038+
/// Test whether the value falls within an inclusive or exclusive range.
10391039
Range(Box<PatRange<'tcx>>),
10401040

10411041
/// Test that the length of the slice is equal to `len`.

0 commit comments

Comments
 (0)