Skip to content

Commit c1d7ca0

Browse files
committed
Convert Ascii into an enum.
Allows static initialization, nicer pattern matches and comparisons. Naming conventions: * alphabetic: just use it. breaks CamelCase convention, but worth it for simplicity. * digits: prepend _ to make them valid identifiers, but still short. * non-alphanumeric but visible: Use Wikipedia names CamelCased, but remove -Mark endings. * rarely used control codes: use uppercase acronym to deter use. * more commonly used control codes: Expand and CamelCase acronym to preserve meaning, eg LineFeed not NewLine or LFneBreak. I'm not certain which control codes should be expanded; Currently BEL and DEL are, but should they? I'm not sure whether this is a breaking change.
1 parent f94d00a commit c1d7ca0

File tree

1 file changed

+192
-27
lines changed

1 file changed

+192
-27
lines changed

src/ascii.rs

Lines changed: 192 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,178 @@
1+
use std::mem::transmute;
12
use std::fmt;
23
#[cfg(feature="unstable")]
34
use std::ascii::AsciiExt;
45

56
use AsciiCast;
67

8+
#[allow(non_camel_case_types)]
79
/// Datatype to hold one ascii character. It wraps a `u8`, with the highest bit always zero.
810
#[derive(Clone, PartialEq, PartialOrd, Ord, Eq, Hash, Copy)]
9-
pub struct Ascii { chr: u8 }
11+
#[repr(u8)]
12+
pub enum Ascii {
13+
/**`'\0'`*/NUL = 0,
14+
/** [Start Of Heading](http://en.wikipedia.org/wiki/Start_of_Heading)
15+
*/ SOH = 1,
16+
/** [Start Of teXt](http://en.wikipedia.org/wiki/Start_of_Text)
17+
*/ SOX = 2,
18+
/** [End Of teXt](http://en.wikipedia.org/wiki/End-of-Text_character)
19+
*/ ETX = 3,
20+
/** [End Of Transmission](http://en.wikipedia.org/wiki/End-of-Transmission_character)
21+
*/ EOT = 4,
22+
/** [Enquiry](http://en.wikipedia.org/wiki/Enquiry_character)
23+
*/ ENQ = 5,
24+
/** [Acknowledgement](http://en.wikipedia.org/wiki/Acknowledge_character)
25+
*/ ACK = 6,
26+
/** [bell / alarm / audible](http://en.wikipedia.org/wiki/Bell_character)
27+
* '\a' is not supported by Rust.
28+
*/ Bell = 7,
29+
/** [Backspace character](http://en.wikipedia.org/wiki/Backspace)
30+
* '\b' is not supported by Rust.
31+
*/ BackSpace = 8,
32+
/**`'\t'`*/Tab = 9,
33+
/**`'\n'`*/LineFeed = 10,
34+
/** [Vertical tab](http://en.wikipedia.org/wiki/Vertical_Tab). In some other languages
35+
* '\v' is not supported by Rust.
36+
*/ VT = 11,
37+
/** [Form Feed](http://en.wikipedia.org/wiki/Form_Feed)
38+
* '\f' is not supported by Rust.
39+
*/ FF = 12,
40+
/**`'\r'`*/CarriageReturn = 13,
41+
/** [Shift In](http://en.wikipedia.org/wiki/Shift_Out_and_Shift_In_characters)
42+
*/ SI = 14,
43+
/** [Shift Out](http://en.wikipedia.org/wiki/Shift_Out_and_Shift_In_characters)
44+
*/ SO = 15,
45+
/** [Data Link Escape](http://en.wikipedia.org/wiki/Data_Link_Escape)
46+
*/ DLE = 16,
47+
/** [Device control 1, often XON](http://en.wikipedia.org/wiki/Device_Control_1)
48+
*/ DC1 = 17,
49+
/** Device control 2
50+
*/ DC2 = 18,
51+
/** Device control 3, Often XOFF
52+
*/ DC3 = 19,
53+
/** Device control 4
54+
*/ DC4 = 20,
55+
/** [Negative Acknowledgement](http://en.wikipedia.org/wiki/Negative-acknowledge_character)
56+
*/ NAK = 21,
57+
/** [Synchronous idle](http://en.wikipedia.org/wiki/Synchronous_Idle)
58+
*/ SYN = 22,
59+
/** [End of Transmission Block](http://en.wikipedia.org/wiki/End-of-Transmission-Block_character)
60+
*/ ETB = 23,
61+
/** [Cancel](http://en.wikipedia.org/wiki/Cancel_character)
62+
*/ CAN = 24,
63+
/** [End of Medium](http://en.wikipedia.org/wiki/End_of_Medium)
64+
*/ EM = 25,
65+
/** [Substitute](http://en.wikipedia.org/wiki/Substitute_character)
66+
*/ SUB = 26,
67+
/** [Escape](http://en.wikipedia.org/wiki/Escape_character)
68+
* '\e' is not supported in Rust.
69+
*/ Escape = 27,
70+
/** [File Separator](http://en.wikipedia.org/wiki/File_separator)
71+
*/ FS = 28,
72+
/** [Group Separator](http://en.wikipedia.org/wiki/Group_separator)
73+
*/ GS = 29,
74+
/** [Record Separator](http://en.wikipedia.org/wiki/Record_separator)
75+
*/ RS = 30,
76+
/** [Unit Separator](http://en.wikipedia.org/wiki/Unit_separator)
77+
*/ US = 31,
78+
/**`' '`*/ Space = 32,
79+
/**`'!'`*/ Exclamation = 33,
80+
/**`'"'`*/ Quotation = 34,
81+
/**`'''`*/ Hash = 35,
82+
/**`'$'`*/ Dollar = 36,
83+
/**`'%'`*/ Percent = 37,
84+
/**`'&'`*/ Ampersand = 38,
85+
/**`'''`*/ Apostrophe = 39,
86+
/**`'('`*/ ParenOpen = 40,
87+
/**`')'`*/ ParenClose = 41,
88+
/**`'*'`*/ Asterisk = 42,
89+
/**`'+'`*/ Plus = 43,
90+
/**`','`*/ Comma = 44,
91+
/**`'-'`*/ Minus = 45,
92+
/**`'.'`*/ Dot = 46,
93+
/**`'/'`*/ Slash = 47,
94+
/**`'0'`*/ _0 = 48,
95+
/**`'1'`*/ _1 = 49,
96+
/**`'2'`*/ _2 = 50,
97+
/**`'3'`*/ _3 = 51,
98+
/**`'4'`*/ _4 = 52,
99+
/**`'5'`*/ _5 = 53,
100+
/**`'6'`*/ _6 = 54,
101+
/**`'7'`*/ _7 = 55,
102+
/**`'8'`*/ _8 = 56,
103+
/**`'9'`*/ _9 = 57,
104+
/**`':'`*/ Colon = 58,
105+
/**`';'`*/ SemiColon = 59,
106+
/**`'<'`*/ LessThan = 60,
107+
/**`'='`*/ Equal = 61,
108+
/**`'>'`*/ GreaterThan = 62,
109+
/**`'?'`*/ Question = 63,
110+
/**`'@'`*/ At = 64,
111+
/**`'A'`*/ A = 65,
112+
/**`'B'`*/ B = 66,
113+
/**`'C'`*/ C = 67,
114+
/**`'D'`*/ D = 68,
115+
/**`'E'`*/ E = 69,
116+
/**`'F'`*/ F = 70,
117+
/**`'G'`*/ G = 71,
118+
/**`'H'`*/ H = 72,
119+
/**`'I'`*/ I = 73,
120+
/**`'J'`*/ J = 74,
121+
/**`'K'`*/ K = 75,
122+
/**`'L'`*/ L = 76,
123+
/**`'M'`*/ M = 77,
124+
/**`'N'`*/ N = 78,
125+
/**`'O'`*/ O = 79,
126+
/**`'P'`*/ P = 80,
127+
/**`'Q'`*/ Q = 81,
128+
/**`'R'`*/ R = 82,
129+
/**`'S'`*/ S = 83,
130+
/**`'T'`*/ T = 84,
131+
/**`'U'`*/ U = 85,
132+
/**`'V'`*/ V = 86,
133+
/**`'W'`*/ W = 87,
134+
/**`'X'`*/ X = 88,
135+
/**`'Y'`*/ Y = 89,
136+
/**`'Z'`*/ Z = 90,
137+
/**`'['`*/ BracketOpen = 91,
138+
/**`'\'`*/ BackSlash = 92,
139+
/**`']'`*/ BracketClose = 93,
140+
/**`'_'`*/ Caret = 94,
141+
/**`'_'`*/ UnderScore = 95,
142+
/**`'`'`*/ Grave = 96,
143+
/**`'a'`*/ a = 97,
144+
/**`'b'`*/ b = 98,
145+
/**`'c'`*/ c = 99,
146+
/**`'d'`*/ d = 100,
147+
/**`'e'`*/ e = 101,
148+
/**`'f'`*/ f = 102,
149+
/**`'g'`*/ g = 103,
150+
/**`'h'`*/ h = 104,
151+
/**`'i'`*/ i = 105,
152+
/**`'j'`*/ j = 106,
153+
/**`'k'`*/ k = 107,
154+
/**`'l'`*/ l = 108,
155+
/**`'m'`*/ m = 109,
156+
/**`'n'`*/ n = 110,
157+
/**`'o'`*/ o = 111,
158+
/**`'p'`*/ p = 112,
159+
/**`'q'`*/ q = 113,
160+
/**`'r'`*/ r = 114,
161+
/**`'s'`*/ s = 115,
162+
/**`'t'`*/ t = 116,
163+
/**`'u'`*/ u = 117,
164+
/**`'v'`*/ v = 118,
165+
/**`'w'`*/ w = 119,
166+
/**`'x'`*/ x = 120,
167+
/**`'y'`*/ y = 121,
168+
/**`'z'`*/ z = 122,
169+
/**`'{'`*/ CurlyBraceOpen = 123,
170+
/**`'|'`*/ VerticalBar = 124,
171+
/**`'}'`*/ CurlyBraceClose = 125,
172+
/**`'~'`*/ Tilde = 126,
173+
/**[Delete](http://en.wikipedia.org/wiki/Delete_character)
174+
*/ Delete = 127,
175+
}
10176

11177
impl Ascii {
12178
/// Constructs an Ascii character from a `char`.
@@ -24,7 +190,7 @@ impl Ascii {
24190
#[inline]
25191
pub fn from(ch: char) -> Result<Ascii, ()> {
26192
if ch as u32 <= 0x7F {
27-
return Ok( Ascii { chr: ch as u8 });
193+
return Ok(unsafe{ ch.to_ascii_nocheck() });
28194
}
29195
Err(())
30196
}
@@ -45,35 +211,36 @@ impl Ascii {
45211
#[inline]
46212
pub fn from_byte(ch: u8) -> Result<Ascii, ()> {
47213
if ch <= 0x7F {
48-
return Ok( Ascii { chr: ch });
214+
return Ok(unsafe{ ch.to_ascii_nocheck() });
49215
}
50216
Err(())
51217
}
52218

53219
/// Converts an ascii character into a `u8`.
54220
#[inline]
55221
pub fn as_byte(&self) -> u8 {
56-
self.chr
222+
unsafe{ transmute(*self) }
57223
}
58224

59225
/// Converts an ascii character into a `char`.
60226
#[inline]
61227
pub fn as_char(&self) -> char {
62-
self.chr as char
228+
self.as_byte() as char
63229
}
64230

65231
// the following methods are like ctype, and the implementation is inspired by musl
66232

67233
/// Check if the character is a letter (a-z, A-Z)
68234
#[inline]
69235
pub fn is_alphabetic(&self) -> bool {
70-
(self.chr >= 0x41 && self.chr <= 0x5A) || (self.chr >= 0x61 && self.chr <= 0x7A)
236+
(self >= &Ascii::a && self <= &Ascii::z) ||
237+
(self >= &Ascii::A && self <= &Ascii::Z)
71238
}
72239

73240
/// Check if the character is a number (0-9)
74241
#[inline]
75242
pub fn is_digit(&self) -> bool {
76-
self.chr >= 0x30 && self.chr <= 0x39
243+
self >= &Ascii::_0 && self <= &Ascii::_9
77244
}
78245

79246
/// Check if the character is a letter or number
@@ -85,7 +252,7 @@ impl Ascii {
85252
/// Check if the character is a space or horizontal tab
86253
#[inline]
87254
pub fn is_blank(&self) -> bool {
88-
self.chr == b' ' || self.chr == b'\t'
255+
*self == Ascii::Space || *self == Ascii::Tab
89256
}
90257

91258
/// Check if the character is a control character
@@ -101,7 +268,7 @@ impl Ascii {
101268
/// ```
102269
#[inline]
103270
pub fn is_control(&self) -> bool {
104-
self.chr < 0x20 || self.chr == 0x7F
271+
self.as_byte() < 0x20 || *self == Ascii::Delete
105272
}
106273

107274
/// Checks if the character is printable (except space)
@@ -116,7 +283,7 @@ impl Ascii {
116283
/// ```
117284
#[inline]
118285
pub fn is_graph(&self) -> bool {
119-
self.chr.wrapping_sub(0x21) < 0x5E
286+
self.as_byte().wrapping_sub(0x21) < 0x5E
120287
}
121288

122289
/// Checks if the character is printable (including space)
@@ -131,7 +298,7 @@ impl Ascii {
131298
/// ```
132299
#[inline]
133300
pub fn is_print(&self) -> bool {
134-
self.chr.wrapping_sub(0x20) < 0x5F
301+
self.as_byte().wrapping_sub(0x20) < 0x5F
135302
}
136303

137304
/// Checks if the character is alphabetic and lowercase
@@ -146,7 +313,7 @@ impl Ascii {
146313
/// ```
147314
#[inline]
148315
pub fn is_lowercase(&self) -> bool {
149-
self.chr.wrapping_sub(b'a') < 26
316+
self.as_byte().wrapping_sub(b'a') < 26
150317
}
151318

152319
/// Checks if the character is alphabetic and uppercase
@@ -161,7 +328,7 @@ impl Ascii {
161328
/// ```
162329
#[inline]
163330
pub fn is_uppercase(&self) -> bool {
164-
self.chr.wrapping_sub(b'A') < 26
331+
self.as_byte().wrapping_sub(b'A') < 26
165332
}
166333

167334
/// Checks if the character is punctuation
@@ -193,7 +360,7 @@ impl Ascii {
193360
/// ```
194361
#[inline]
195362
pub fn is_hex(&self) -> bool {
196-
self.is_digit() || (self.chr | 32u8).wrapping_sub(b'a') < 6
363+
self.is_digit() || (self.as_byte() | 32u8).wrapping_sub(b'a') < 6
197364
}
198365
}
199366

@@ -219,25 +386,25 @@ impl AsciiExt for Ascii {
219386
}
220387

221388
fn to_ascii_uppercase(&self) -> Ascii {
222-
Ascii{chr: self.chr.to_ascii_uppercase()}
389+
unsafe{ self.as_byte().to_ascii_uppercase().to_ascii_nocheck() }
223390
}
224391

225392
fn to_ascii_lowercase(&self) -> Ascii {
226-
Ascii{chr: self.chr.to_ascii_lowercase()}
393+
unsafe{ self.as_byte().to_ascii_uppercase().to_ascii_nocheck() }
227394
}
228395

229396
fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
230-
self.chr.eq_ignore_ascii_case(&other.chr)
397+
self.as_byte().eq_ignore_ascii_case(&other.as_byte())
231398
}
232399

233400
#[inline]
234401
fn make_ascii_uppercase(&mut self) {
235-
self.chr.make_ascii_uppercase()
402+
*self = self.to_ascii_uppercase();
236403
}
237404

238405
#[inline]
239406
fn make_ascii_lowercase(&mut self) {
240-
self.chr.make_ascii_lowercase()
407+
*self = self.to_ascii_lowercase();
241408
}
242409
}
243410

@@ -246,7 +413,7 @@ impl<'a> AsciiCast<'a> for u8 {
246413

247414
#[inline]
248415
unsafe fn to_ascii_nocheck(&self) -> Ascii {
249-
Ascii{ chr: *self }
416+
transmute(*self)
250417
}
251418
}
252419

@@ -255,7 +422,7 @@ impl<'a> AsciiCast<'a> for char {
255422

256423
#[inline]
257424
unsafe fn to_ascii_nocheck(&self) -> Ascii {
258-
Ascii{ chr: *self as u8 }
425+
(*self as u8).to_ascii_nocheck()
259426
}
260427
}
261428

@@ -266,10 +433,10 @@ mod tests {
266433

267434
#[test]
268435
fn to_ascii() {
269-
assert_eq!(65_u8.to_ascii(), Ok(Ascii { chr: 65_u8 }));
436+
assert_eq!(65_u8.to_ascii(), Ok(Ascii::A));
270437
assert_eq!(255_u8.to_ascii(), Err(()));
271438

272-
assert_eq!('A'.to_ascii(), Ok(Ascii { chr: 65_u8 }));
439+
assert_eq!('A'.to_ascii(), Ok(Ascii::A));
273440
assert_eq!('λ'.to_ascii(), Err(()));
274441
}
275442

@@ -302,13 +469,11 @@ mod tests {
302469

303470
#[test]
304471
fn fmt_display_ascii() {
305-
let s = Ascii { chr: b't' };
306-
assert_eq!(format!("{}", s), "t".to_string());
472+
assert_eq!(format!("{}", Ascii::t), "t".to_string());
307473
}
308474

309475
#[test]
310476
fn fmt_debug_ascii() {
311-
let c = Ascii { chr: b't' };
312-
assert_eq!(format!("{:?}", c), "'t'".to_string());
477+
assert_eq!(format!("{:?}", Ascii::t), "'t'".to_string());
313478
}
314479
}

0 commit comments

Comments
 (0)