Skip to content

Commit 8bf93e9

Browse files
committed
Auto merge of rust-lang#95855 - Dylan-DPC:rollup-h45xmpw, r=Dylan-DPC
Rollup of 7 pull requests Successful merges: - rust-lang#94794 (Clarify indexing into Strings) - rust-lang#95361 (Make non-power-of-two alignments a validity error in `Layout`) - rust-lang#95369 (Fix `x test src/librustdoc` with `download-rustc` enabled ) - rust-lang#95805 (Left overs of rust-lang#95761) - rust-lang#95808 (expand: Remove `ParseSess::missing_fragment_specifiers`) - rust-lang#95817 (hide another #[allow] directive from a docs example) - rust-lang#95831 (Use bitwise XOR in to_ascii_uppercase) Failed merges: r? `@ghost` `@rustbot` modify labels: rollup
2 parents 8c1fb2e + 7726265 commit 8bf93e9

26 files changed

+569
-95
lines changed

compiler/rustc_expand/src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
#![feature(associated_type_bounds)]
33
#![feature(associated_type_defaults)]
44
#![feature(crate_visibility_modifier)]
5-
#![feature(decl_macro)]
65
#![feature(if_let_guard)]
76
#![feature(let_chains)]
87
#![feature(let_else)]

compiler/rustc_expand/src/mbe/macro_check.rs

+13-2
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ use rustc_ast::token::{DelimToken, Token, TokenKind};
110110
use rustc_ast::{NodeId, DUMMY_NODE_ID};
111111
use rustc_data_structures::fx::FxHashMap;
112112
use rustc_errors::MultiSpan;
113-
use rustc_session::lint::builtin::META_VARIABLE_MISUSE;
113+
use rustc_session::lint::builtin::{META_VARIABLE_MISUSE, MISSING_FRAGMENT_SPECIFIER};
114114
use rustc_session::parse::ParseSess;
115115
use rustc_span::symbol::kw;
116116
use rustc_span::{symbol::MacroRulesNormalizedIdent, Span};
@@ -261,7 +261,18 @@ fn check_binders(
261261
}
262262
}
263263
// Similarly, this can only happen when checking a toplevel macro.
264-
TokenTree::MetaVarDecl(span, name, _kind) => {
264+
TokenTree::MetaVarDecl(span, name, kind) => {
265+
if kind.is_none() && node_id != DUMMY_NODE_ID {
266+
// FIXME: Report this as a hard error eventually and remove equivalent errors from
267+
// `parse_tt_inner` and `nameize`. Until then the error may be reported twice, once
268+
// as a hard error and then once as a buffered lint.
269+
sess.buffer_lint(
270+
MISSING_FRAGMENT_SPECIFIER,
271+
span,
272+
node_id,
273+
&format!("missing fragment specifier"),
274+
);
275+
}
265276
if !macros.is_empty() {
266277
sess.span_diagnostic.span_bug(span, "unexpected MetaVarDecl in nested lhs");
267278
}

compiler/rustc_expand/src/mbe/macro_parser.rs

+6-12
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,6 @@ impl TtParser {
411411
/// track of through the mps generated.
412412
fn parse_tt_inner(
413413
&mut self,
414-
sess: &ParseSess,
415414
matcher: &[MatcherLoc],
416415
token: &Token,
417416
) -> Option<NamedParseResult> {
@@ -519,11 +518,9 @@ impl TtParser {
519518
self.bb_mps.push(mp);
520519
}
521520
} else {
521+
// E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used.
522522
// Both this check and the one in `nameize` are necessary, surprisingly.
523-
if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
524-
// E.g. `$e` instead of `$e:expr`.
525-
return Some(Error(span, "missing fragment specifier".to_string()));
526-
}
523+
return Some(Error(span, "missing fragment specifier".to_string()));
527524
}
528525
}
529526
MatcherLoc::Eof => {
@@ -549,7 +546,7 @@ impl TtParser {
549546
// Need to take ownership of the matches from within the `Lrc`.
550547
Lrc::make_mut(&mut eof_mp.matches);
551548
let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter();
552-
self.nameize(sess, matcher, matches)
549+
self.nameize(matcher, matches)
553550
}
554551
EofMatcherPositions::Multiple => {
555552
Error(token.span, "ambiguity: multiple successful parses".to_string())
@@ -587,7 +584,7 @@ impl TtParser {
587584

588585
// Process `cur_mps` until either we have finished the input or we need to get some
589586
// parsing from the black-box parser done.
590-
if let Some(res) = self.parse_tt_inner(&parser.sess, matcher, &parser.token) {
587+
if let Some(res) = self.parse_tt_inner(matcher, &parser.token) {
591588
return res;
592589
}
593590

@@ -694,7 +691,6 @@ impl TtParser {
694691

695692
fn nameize<I: Iterator<Item = NamedMatch>>(
696693
&self,
697-
sess: &ParseSess,
698694
matcher: &[MatcherLoc],
699695
mut res: I,
700696
) -> NamedParseResult {
@@ -711,11 +707,9 @@ impl TtParser {
711707
}
712708
};
713709
} else {
710+
// E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used.
714711
// Both this check and the one in `parse_tt_inner` are necessary, surprisingly.
715-
if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
716-
// E.g. `$e` instead of `$e:expr`.
717-
return Error(span, "missing fragment specifier".to_string());
718-
}
712+
return Error(span, "missing fragment specifier".to_string());
719713
}
720714
}
721715
}

compiler/rustc_expand/src/mbe/quoted.rs

+2-6
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@ use crate::mbe::macro_parser::count_metavar_decls;
22
use crate::mbe::{Delimited, KleeneOp, KleeneToken, MetaVarExpr, SequenceRepetition, TokenTree};
33

44
use rustc_ast::token::{self, Token};
5-
use rustc_ast::tokenstream;
6-
use rustc_ast::{NodeId, DUMMY_NODE_ID};
5+
use rustc_ast::{tokenstream, NodeId};
76
use rustc_ast_pretty::pprust;
87
use rustc_feature::Features;
98
use rustc_session::parse::{feature_err, ParseSess};
@@ -104,10 +103,7 @@ pub(super) fn parse(
104103
}
105104
tree => tree.as_ref().map_or(start_sp, tokenstream::TokenTree::span),
106105
};
107-
if node_id != DUMMY_NODE_ID {
108-
// Macros loaded from other crates have dummy node ids.
109-
sess.missing_fragment_specifiers.borrow_mut().insert(span, node_id);
110-
}
106+
111107
result.push(TokenTree::MetaVarDecl(span, ident, None));
112108
}
113109

compiler/rustc_interface/src/passes.rs

-16
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ use rustc_resolve::{Resolver, ResolverArenas};
3030
use rustc_serialize::json;
3131
use rustc_session::config::{CrateType, Input, OutputFilenames, OutputType};
3232
use rustc_session::cstore::{MetadataLoader, MetadataLoaderDyn};
33-
use rustc_session::lint;
3433
use rustc_session::output::{filename_for_input, filename_for_metadata};
3534
use rustc_session::search_paths::PathKind;
3635
use rustc_session::{Limit, Session};
@@ -349,23 +348,8 @@ pub fn configure_and_expand(
349348
ecx.check_unused_macros();
350349
});
351350

352-
let mut missing_fragment_specifiers: Vec<_> = ecx
353-
.sess
354-
.parse_sess
355-
.missing_fragment_specifiers
356-
.borrow()
357-
.iter()
358-
.map(|(span, node_id)| (*span, *node_id))
359-
.collect();
360-
missing_fragment_specifiers.sort_unstable_by_key(|(span, _)| *span);
361-
362351
let recursion_limit_hit = ecx.reduced_recursion_limit.is_some();
363352

364-
for (span, node_id) in missing_fragment_specifiers {
365-
let lint = lint::builtin::MISSING_FRAGMENT_SPECIFIER;
366-
let msg = "missing fragment specifier";
367-
resolver.lint_buffer().buffer_lint(lint, node_id, span, msg);
368-
}
369353
if cfg!(windows) {
370354
env::set_var("PATH", &old_path);
371355
}

compiler/rustc_session/src/parse.rs

-2
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ pub struct ParseSess {
140140
pub config: CrateConfig,
141141
pub check_config: CrateCheckConfig,
142142
pub edition: Edition,
143-
pub missing_fragment_specifiers: Lock<FxHashMap<Span, NodeId>>,
144143
/// Places where raw identifiers were used. This is used to avoid complaining about idents
145144
/// clashing with keywords in new editions.
146145
pub raw_identifier_spans: Lock<Vec<Span>>,
@@ -195,7 +194,6 @@ impl ParseSess {
195194
config: FxHashSet::default(),
196195
check_config: CrateCheckConfig::default(),
197196
edition: ExpnId::root().expn_data().edition,
198-
missing_fragment_specifiers: Default::default(),
199197
raw_identifier_spans: Lock::new(Vec::new()),
200198
bad_unicode_identifiers: Lock::new(Default::default()),
201199
source_map,

library/alloc/src/rc.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ impl<T> Rc<T> {
393393
/// # Examples
394394
///
395395
/// ```
396-
/// #![allow(dead_code)]
396+
/// # #![allow(dead_code)]
397397
/// use std::rc::{Rc, Weak};
398398
///
399399
/// struct Gadget {

library/alloc/src/string.rs

+82-10
Original file line numberDiff line numberDiff line change
@@ -117,27 +117,99 @@ use crate::vec::Vec;
117117
///
118118
/// # UTF-8
119119
///
120-
/// `String`s are always valid UTF-8. This has a few implications, the first of
121-
/// which is that if you need a non-UTF-8 string, consider [`OsString`]. It is
122-
/// similar, but without the UTF-8 constraint. The second implication is that
123-
/// you cannot index into a `String`:
120+
/// `String`s are always valid UTF-8. If you need a non-UTF-8 string, consider
121+
/// [`OsString`]. It is similar, but without the UTF-8 constraint. Because UTF-8
122+
/// is a variable width encoding, `String`s are typically smaller than an array of
123+
/// the same `chars`:
124+
///
125+
/// ```
126+
/// use std::mem;
127+
///
128+
/// // `s` is ASCII which represents each `char` as one byte
129+
/// let s = "hello";
130+
/// assert_eq!(s.len(), 5);
131+
///
132+
/// // A `char` array with the same contents would be longer because
133+
/// // every `char` is four bytes
134+
/// let s = ['h', 'e', 'l', 'l', 'o'];
135+
/// let size: usize = s.into_iter().map(|c| mem::size_of_val(&c)).sum();
136+
/// assert_eq!(size, 20);
137+
///
138+
/// // However, for non-ASCII strings, the difference will be smaller
139+
/// // and sometimes they are the same
140+
/// let s = "💖💖💖💖💖";
141+
/// assert_eq!(s.len(), 20);
142+
///
143+
/// let s = ['💖', '💖', '💖', '💖', '💖'];
144+
/// let size: usize = s.into_iter().map(|c| mem::size_of_val(&c)).sum();
145+
/// assert_eq!(size, 20);
146+
/// ```
147+
///
148+
/// This raises interesting questions as to how `s[i]` should work.
149+
/// What should `i` be here? Several options include byte indices and
150+
/// `char` indices but, because of UTF-8 encoding, only byte indices
151+
/// would provide constant time indexing. Getting the `i`th `char`, for
152+
/// example, is available using [`chars`]:
153+
///
154+
/// ```
155+
/// let s = "hello";
156+
/// let third_character = s.chars().nth(2);
157+
/// assert_eq!(third_character, Some('l'));
158+
///
159+
/// let s = "💖💖💖💖💖";
160+
/// let third_character = s.chars().nth(2);
161+
/// assert_eq!(third_character, Some('💖'));
162+
/// ```
163+
///
164+
/// Next, what should `s[i]` return? Because indexing returns a reference
165+
/// to underlying data it could be `&u8`, `&[u8]`, or something else similar.
166+
/// Since we're only providing one index, `&u8` makes the most sense but that
167+
/// might not be what the user expects and can be explicitly achieved with
168+
/// [`as_bytes()`]:
169+
///
170+
/// ```
171+
/// // The first byte is 104 - the byte value of `'h'`
172+
/// let s = "hello";
173+
/// assert_eq!(s.as_bytes()[0], 104);
174+
/// // or
175+
/// assert_eq!(s.as_bytes()[0], b'h');
176+
///
177+
/// // The first byte is 240 which isn't obviously useful
178+
/// let s = "💖💖💖💖💖";
179+
/// assert_eq!(s.as_bytes()[0], 240);
180+
/// ```
181+
///
182+
/// Due to these ambiguities/restrictions, indexing with a `usize` is simply
183+
/// forbidden:
124184
///
125185
/// ```compile_fail,E0277
126186
/// let s = "hello";
127187
///
128-
/// println!("The first letter of s is {}", s[0]); // ERROR!!!
188+
/// // The following will not compile!
189+
/// println!("The first letter of s is {}", s[0]);
129190
/// ```
130191
///
192+
/// It is more clear, however, how `&s[i..j]` should work (that is,
193+
/// indexing with a range). It should accept byte indices (to be constant-time)
194+
/// and return a `&str` which is UTF-8 encoded. This is also called "string slicing".
195+
/// Note this will panic if the byte indices provided are not character
196+
/// boundaries - see [`is_char_boundary`] for more details. See the implementations
197+
/// for [`SliceIndex<str>`] for more details on string slicing. For a non-panicking
198+
/// version of string slicing, see [`get`].
199+
///
131200
/// [`OsString`]: ../../std/ffi/struct.OsString.html "ffi::OsString"
201+
/// [`SliceIndex<str>`]: core::slice::SliceIndex
202+
/// [`as_bytes()`]: str::as_bytes
203+
/// [`get`]: str::get
204+
/// [`is_char_boundary`]: str::is_char_boundary
132205
///
133-
/// Indexing is intended to be a constant-time operation, but UTF-8 encoding
134-
/// does not allow us to do this. Furthermore, it's not clear what sort of
135-
/// thing the index should return: a byte, a codepoint, or a grapheme cluster.
136-
/// The [`bytes`] and [`chars`] methods return iterators over the first
137-
/// two, respectively.
206+
/// The [`bytes`] and [`chars`] methods return iterators over the bytes and
207+
/// codepoints of the string, respectively. To iterate over codepoints along
208+
/// with byte indices, use [`char_indices`].
138209
///
139210
/// [`bytes`]: str::bytes
140211
/// [`chars`]: str::chars
212+
/// [`char_indices`]: str::char_indices
141213
///
142214
/// # Deref
143215
///

library/core/src/alloc/layout.rs

+5-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use crate::cmp;
22
use crate::fmt;
3-
use crate::mem;
4-
use crate::num::NonZeroUsize;
3+
use crate::mem::{self, ValidAlign};
54
use crate::ptr::NonNull;
65

76
// While this function is used in one place and its implementation
@@ -40,7 +39,7 @@ pub struct Layout {
4039
//
4140
// (However, we do not analogously require `align >= sizeof(void*)`,
4241
// even though that is *also* a requirement of `posix_memalign`.)
43-
align_: NonZeroUsize,
42+
align_: ValidAlign,
4443
}
4544

4645
impl Layout {
@@ -97,8 +96,8 @@ impl Layout {
9796
#[must_use]
9897
#[inline]
9998
pub const unsafe fn from_size_align_unchecked(size: usize, align: usize) -> Self {
100-
// SAFETY: the caller must ensure that `align` is greater than zero.
101-
Layout { size_: size, align_: unsafe { NonZeroUsize::new_unchecked(align) } }
99+
// SAFETY: the caller must ensure that `align` is a power of two.
100+
Layout { size_: size, align_: unsafe { ValidAlign::new_unchecked(align) } }
102101
}
103102

104103
/// The minimum size in bytes for a memory block of this layout.
@@ -117,7 +116,7 @@ impl Layout {
117116
without modifying the layout"]
118117
#[inline]
119118
pub const fn align(&self) -> usize {
120-
self.align_.get()
119+
self.align_.as_nonzero().get()
121120
}
122121

123122
/// Constructs a `Layout` suitable for holding a value of type `T`.

library/core/src/mem/mod.rs

+6
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ mod maybe_uninit;
2121
#[stable(feature = "maybe_uninit", since = "1.36.0")]
2222
pub use maybe_uninit::MaybeUninit;
2323

24+
mod valid_align;
25+
// For now this type is left crate-local. It could potentially make sense to expose
26+
// it publicly, as it would be a nice parameter type for methods which need to take
27+
// alignment as a parameter, such as `Layout::padding_needed_for`.
28+
pub(crate) use valid_align::ValidAlign;
29+
2430
#[stable(feature = "rust1", since = "1.0.0")]
2531
#[doc(inline)]
2632
pub use crate::intrinsics::transmute;

0 commit comments

Comments
 (0)