Skip to content

Commit a168807

Browse files
pierrechevalier83Byron
authored andcommitted
fix!: Hide the internal representation of EntryMode
Change the gix-object API so that client code can't explicitely rely on the internal representation of `EntryMode`. This is necessary as we need to change it to allow round-trip behaviour for modes like `b"040000"` and `b"40000"` which currently share the `0o40000u16` representation. Use the opportunity to sprinkle a couple of optimizations in the parsing of the EntryMode since we had to go deep in understanding this code anyway, so may as well. Mostly, don't `reverse` the bytes when parsing. ``` TreeRefIter() time: [46.251 ns 46.390 ns 46.549 ns] change: [-17.685% -16.512% -15.664%] (p = 0.00 < 0.05) Performance has improved. Found 1 outliers among 100 measurements (1.00%) ``` Also add a test that shows the current incorrect behaviour.
1 parent 420e730 commit a168807

File tree

3 files changed

+125
-92
lines changed

3 files changed

+125
-92
lines changed

gix-object/src/tree/mod.rs

Lines changed: 97 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,98 @@ pub struct Editor<'a> {
4444
/// create it by converting [`EntryKind`] into `EntryMode`.
4545
#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)]
4646
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
47-
pub struct EntryMode(pub u16);
47+
pub struct EntryMode {
48+
// Represents the value read from Git, except that "040000" is represented with 0o140000 but
49+
// "40000" is represented with 0o40000
50+
internal: u16,
51+
}
52+
53+
impl TryFrom<u32> for tree::EntryMode {
54+
type Error = u32;
55+
fn try_from(mode: u32) -> Result<Self, Self::Error> {
56+
Ok(match mode {
57+
0o40000 | 0o120000 | 0o160000 => EntryMode { internal: mode as u16 },
58+
blob_mode if blob_mode & 0o100000 == 0o100000 => EntryMode { internal: mode as u16 },
59+
_ => return Err(mode),
60+
})
61+
}
62+
}
63+
64+
impl EntryMode {
65+
/// Expose the value as a u16 (lossy, unlike the internal representation that is hidden)
66+
pub const fn value(self) -> u16 {
67+
self.internal
68+
}
69+
70+
/// Return the representation as used in the git internal format, which is octal and written
71+
/// to the `backing` buffer. The respective sub-slice that was written to is returned.
72+
pub fn as_bytes<'a>(&self, backing: &'a mut [u8; 6]) -> &'a BStr {
73+
if self.internal == 0 {
74+
std::slice::from_ref(&b'0')
75+
} else {
76+
for (idx, backing_octet) in backing.iter_mut().enumerate() {
77+
let bit_pos = 3 /* because base 8 and 2^3 == 8*/ * (6 - idx - 1);
78+
let oct_mask = 0b111 << bit_pos;
79+
let digit = (self.internal & oct_mask) >> bit_pos;
80+
*backing_octet = b'0' + digit as u8;
81+
}
82+
if backing[1] == b'4' {
83+
&backing[1..6]
84+
} else {
85+
&backing[0..6]
86+
}
87+
}
88+
.into()
89+
}
90+
91+
/// Construct an EntryMode from bytes represented as in the git internal format
92+
/// Return the mode and the remainder of the bytes
93+
pub(crate) fn extract_from_bytes(i: &[u8]) -> Option<(Self, &'_ [u8])> {
94+
let mut mode = 0;
95+
let mut idx = 0;
96+
let mut space_pos = 0;
97+
if i.is_empty() {
98+
return None;
99+
}
100+
// const fn, this is why we can't have nice things (like `.iter().any()`)
101+
while idx < i.len() {
102+
let b = i[idx];
103+
// Delimiter, return what we got
104+
if b == b' ' {
105+
space_pos = idx;
106+
break;
107+
}
108+
// Not a pure octal input
109+
// Performance matters here, so `!(b'0'..=b'7').contains(&b)` won't do
110+
#[allow(clippy::manual_range_contains)]
111+
if b < b'0' || b > b'7' {
112+
return None;
113+
}
114+
// More than 6 octal digits we must have hit the delimiter or the input was malformed
115+
if idx > 6 {
116+
return None;
117+
}
118+
mode = (mode << 3) + (b - b'0') as u16;
119+
idx += 1;
120+
}
121+
Some((Self { internal: mode }, &i[(space_pos + 1)..]))
122+
}
123+
124+
/// Construct an EntryMode from bytes represented as in the git internal format
125+
pub fn from_bytes(i: &[u8]) -> Option<Self> {
126+
Self::extract_from_bytes(i).map(|(mode, _rest)| mode)
127+
}
128+
}
48129

49130
impl std::fmt::Debug for EntryMode {
50131
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51-
write!(f, "EntryMode({:#o})", self.0)
132+
write!(f, "EntryMode(0o{})", self.as_bytes(&mut Default::default()))
133+
}
134+
}
135+
136+
impl std::fmt::Octal for EntryMode {
137+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
138+
write!(f, "{}", self.as_bytes(&mut Default::default()))
52139
}
53140
}
54141

@@ -74,7 +161,7 @@ pub enum EntryKind {
74161

75162
impl From<EntryKind> for EntryMode {
76163
fn from(value: EntryKind) -> Self {
77-
EntryMode(value as u16)
164+
EntryMode { internal: value as u16 }
78165
}
79166
}
80167

@@ -100,22 +187,14 @@ impl EntryKind {
100187
}
101188
}
102189

103-
impl std::ops::Deref for EntryMode {
104-
type Target = u16;
105-
106-
fn deref(&self) -> &Self::Target {
107-
&self.0
108-
}
109-
}
110-
111190
const IFMT: u16 = 0o170000;
112191

113192
impl EntryMode {
114193
/// Discretize the raw mode into an enum with well-known state while dropping unnecessary details.
115194
pub const fn kind(&self) -> EntryKind {
116-
let etype = self.0 & IFMT;
195+
let etype = self.value() & IFMT;
117196
if etype == 0o100000 {
118-
if self.0 & 0o000100 == 0o000100 {
197+
if self.value() & 0o000100 == 0o000100 {
119198
EntryKind::BlobExecutable
120199
} else {
121200
EntryKind::Blob
@@ -131,27 +210,27 @@ impl EntryMode {
131210

132211
/// Return true if this entry mode represents a Tree/directory
133212
pub const fn is_tree(&self) -> bool {
134-
self.0 & IFMT == EntryKind::Tree as u16
213+
self.value() & IFMT == EntryKind::Tree as u16
135214
}
136215

137216
/// Return true if this entry mode represents the commit of a submodule.
138217
pub const fn is_commit(&self) -> bool {
139-
self.0 & IFMT == EntryKind::Commit as u16
218+
self.value() & IFMT == EntryKind::Commit as u16
140219
}
141220

142221
/// Return true if this entry mode represents a symbolic link
143222
pub const fn is_link(&self) -> bool {
144-
self.0 & IFMT == EntryKind::Link as u16
223+
self.value() & IFMT == EntryKind::Link as u16
145224
}
146225

147226
/// Return true if this entry mode represents anything BUT Tree/directory
148227
pub const fn is_no_tree(&self) -> bool {
149-
self.0 & IFMT != EntryKind::Tree as u16
228+
self.value() & IFMT != EntryKind::Tree as u16
150229
}
151230

152231
/// Return true if the entry is any kind of blob.
153232
pub const fn is_blob(&self) -> bool {
154-
self.0 & IFMT == 0o100000
233+
self.value() & IFMT == 0o100000
155234
}
156235

157236
/// Return true if the entry is an executable blob.
@@ -178,27 +257,6 @@ impl EntryMode {
178257
Commit => "commit",
179258
}
180259
}
181-
182-
/// Return the representation as used in the git internal format, which is octal and written
183-
/// to the `backing` buffer. The respective sub-slice that was written to is returned.
184-
pub fn as_bytes<'a>(&self, backing: &'a mut [u8; 6]) -> &'a BStr {
185-
if self.0 == 0 {
186-
std::slice::from_ref(&b'0')
187-
} else {
188-
let mut nb = 0;
189-
let mut n = self.0;
190-
while n > 0 {
191-
let remainder = (n % 8) as u8;
192-
backing[nb] = b'0' + remainder;
193-
n /= 8;
194-
nb += 1;
195-
}
196-
let res = &mut backing[..nb];
197-
res.reverse();
198-
res
199-
}
200-
.into()
201-
}
202260
}
203261

204262
impl TreeRef<'_> {

gix-object/src/tree/ref_iter.rs

Lines changed: 3 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -171,54 +171,18 @@ impl<'a> TryFrom<&'a [u8]> for tree::EntryMode {
171171
type Error = &'a [u8];
172172

173173
fn try_from(mode: &'a [u8]) -> Result<Self, Self::Error> {
174-
mode_from_decimal(mode)
175-
.map(|(mode, _rest)| tree::EntryMode(mode as u16))
176-
.ok_or(mode)
177-
}
178-
}
179-
180-
fn mode_from_decimal(i: &[u8]) -> Option<(u32, &[u8])> {
181-
let mut mode = 0u32;
182-
let mut spacer_pos = 1;
183-
for b in i.iter().take_while(|b| **b != b' ') {
184-
if *b < b'0' || *b > b'7' {
185-
return None;
186-
}
187-
mode = (mode << 3) + u32::from(b - b'0');
188-
spacer_pos += 1;
189-
}
190-
if i.len() < spacer_pos {
191-
return None;
192-
}
193-
let (_, i) = i.split_at(spacer_pos);
194-
Some((mode, i))
195-
}
196-
197-
impl TryFrom<u32> for tree::EntryMode {
198-
type Error = u32;
199-
200-
fn try_from(mode: u32) -> Result<Self, Self::Error> {
201-
Ok(match mode {
202-
0o40000 | 0o120000 | 0o160000 => tree::EntryMode(mode as u16),
203-
blob_mode if blob_mode & 0o100000 == 0o100000 => tree::EntryMode(mode as u16),
204-
_ => return Err(mode),
205-
})
174+
tree::EntryMode::from_bytes(mode).ok_or(mode)
206175
}
207176
}
208177

209178
mod decode {
210179
use bstr::ByteSlice;
211180
use winnow::{error::ParserError, prelude::*};
212181

213-
use crate::{
214-
tree,
215-
tree::{ref_iter::mode_from_decimal, EntryRef},
216-
TreeRef,
217-
};
182+
use crate::{tree, tree::EntryRef, TreeRef};
218183

219184
pub fn fast_entry(i: &[u8]) -> Option<(&[u8], EntryRef<'_>)> {
220-
let (mode, i) = mode_from_decimal(i)?;
221-
let mode = tree::EntryMode::try_from(mode).ok()?;
185+
let (mode, i) = tree::EntryMode::extract_from_bytes(i)?;
222186
let (filename, i) = i.split_at(i.find_byte(0)?);
223187
let i = &i[1..];
224188
const HASH_LEN_FIXME: usize = 20; // TODO(SHA256): know actual/desired length or we may overshoot

gix-object/tests/object/tree/entry_mode.rs

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,14 @@ fn is_methods() {
1616
}
1717

1818
assert!(mode(EntryKind::Blob).is_blob());
19-
assert!(EntryMode(0o100645).is_blob());
20-
assert_eq!(EntryMode(0o100645).kind(), EntryKind::Blob);
21-
assert!(!EntryMode(0o100675).is_executable());
22-
assert!(EntryMode(0o100700).is_executable());
23-
assert_eq!(EntryMode(0o100700).kind(), EntryKind::BlobExecutable);
19+
assert!(EntryMode::from_bytes(b"100645").unwrap().is_blob());
20+
assert_eq!(EntryMode::from_bytes(b"100645").unwrap().kind(), EntryKind::Blob);
21+
assert!(!EntryMode::from_bytes(b"100675").unwrap().is_executable());
22+
assert!(EntryMode::from_bytes(b"100700").unwrap().is_executable());
23+
assert_eq!(
24+
EntryMode::from_bytes(b"100700").unwrap().kind(),
25+
EntryKind::BlobExecutable
26+
);
2427
assert!(!mode(EntryKind::Blob).is_link());
2528
assert!(mode(EntryKind::BlobExecutable).is_blob());
2629
assert!(mode(EntryKind::BlobExecutable).is_executable());
@@ -29,17 +32,19 @@ fn is_methods() {
2932

3033
assert!(!mode(EntryKind::Link).is_blob());
3134
assert!(mode(EntryKind::Link).is_link());
32-
assert!(EntryMode(0o121234).is_link());
33-
assert_eq!(EntryMode(0o121234).kind(), EntryKind::Link);
35+
assert!(EntryMode::from_bytes(b"121234").unwrap().is_link());
36+
assert_eq!(EntryMode::from_bytes(b"121234").unwrap().kind(), EntryKind::Link);
3437
assert!(mode(EntryKind::Link).is_blob_or_symlink());
3538
assert!(mode(EntryKind::Tree).is_tree());
36-
assert!(EntryMode(0o040101).is_tree());
37-
assert_eq!(EntryMode(0o040101).kind(), EntryKind::Tree);
39+
assert!(EntryMode::from_bytes(b"040101").unwrap().is_tree());
40+
assert_eq!(EntryMode::from_bytes(b"040101").unwrap().kind(), EntryKind::Tree);
41+
assert!(EntryMode::from_bytes(b"40101").unwrap().is_tree());
42+
assert_eq!(EntryMode::from_bytes(b"40101").unwrap().kind(), EntryKind::Tree);
3843
assert!(mode(EntryKind::Commit).is_commit());
39-
assert!(EntryMode(0o167124).is_commit());
40-
assert_eq!(EntryMode(0o167124).kind(), EntryKind::Commit);
44+
assert!(EntryMode::from_bytes(b"167124").unwrap().is_commit());
45+
assert_eq!(EntryMode::from_bytes(b"167124").unwrap().kind(), EntryKind::Commit);
4146
assert_eq!(
42-
EntryMode(0o000000).kind(),
47+
EntryMode::from_bytes(b"000000").unwrap().kind(),
4348
EntryKind::Commit,
4449
"commit is really 'anything else' as `kind()` can't fail"
4550
);
@@ -58,13 +63,19 @@ fn as_bytes() {
5863
(EntryKind::Link.into(), EntryKind::Link.as_octal_str()),
5964
(EntryKind::Commit.into(), EntryKind::Commit.as_octal_str()),
6065
(
61-
EntryMode::try_from(b"100744 ".as_ref()).expect("valid"),
66+
EntryMode::from_bytes(b"100744 ".as_ref()).expect("valid"),
6267
"100744".into(),
6368
),
6469
(
65-
EntryMode::try_from(b"100644 ".as_ref()).expect("valid"),
70+
EntryMode::from_bytes(b"100644 ".as_ref()).expect("valid"),
6671
"100644".into(),
6772
),
73+
// Show incorrect behaviour: b"040000" doesn't round-trip
74+
(
75+
EntryMode::from_bytes(b"040000".as_ref()).expect("valid"),
76+
"40000".into(),
77+
),
78+
(EntryMode::from_bytes(b"40000".as_ref()).expect("valid"), "40000".into()),
6879
] {
6980
assert_eq!(mode.as_bytes(&mut buf), expected);
7081
}

0 commit comments

Comments
 (0)