Skip to content

Commit 58e7598

Browse files
committed
added gensym_copy mechanism to ensure sharing of pointers in the interner
this makes comparisons constant-time, and enables spelling-comparison of identifiers, crucial in many parts of resolve.
1 parent 9d33001 commit 58e7598

File tree

2 files changed

+88
-48
lines changed

2 files changed

+88
-48
lines changed

src/libsyntax/parse/token.rs

Lines changed: 33 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@ use parse::token;
1515
use util::interner::StrInterner;
1616
use util::interner;
1717

18+
use std::cast;
1819
use std::char;
1920
use std::cmp::Equiv;
2021
use std::local_data;
2122
use std::rand;
2223
use std::rand::RngUtil;
23-
use std::ptr::to_unsafe_ptr;
2424

2525
#[deriving(Clone, Encodable, Decodable, Eq, IterBytes)]
2626
pub enum binop {
@@ -382,30 +382,8 @@ pub fn token_to_binop(tok: &Token) -> Option<ast::BinOp> {
382382
}
383383
}
384384

385-
pub struct ident_interner {
386-
priv interner: StrInterner,
387-
}
388-
389-
impl ident_interner {
390-
pub fn intern(&self, val: &str) -> Name {
391-
self.interner.intern(val)
392-
}
393-
pub fn gensym(&self, val: &str) -> Name {
394-
self.interner.gensym(val)
395-
}
396-
pub fn get(&self, idx: Name) -> @str {
397-
self.interner.get(idx)
398-
}
399-
// is this really something that should be exposed?
400-
pub fn len(&self) -> uint {
401-
self.interner.len()
402-
}
403-
pub fn find_equiv<Q:Hash + IterBytes + Equiv<@str>>(&self, val: &Q)
404-
-> Option<Name> {
405-
self.interner.find_equiv(val)
406-
}
407-
}
408-
385+
// looks like we can get rid of this completely...
386+
pub type ident_interner = StrInterner;
409387

410388
// return a fresh interner, preloaded with special identifiers.
411389
fn mk_fresh_ident_interner() -> @ident_interner {
@@ -486,9 +464,7 @@ fn mk_fresh_ident_interner() -> @ident_interner {
486464
"typeof", // 67
487465
];
488466

489-
@ident_interner {
490-
interner: interner::StrInterner::prefill(init_vec)
491-
}
467+
@interner::StrInterner::prefill(init_vec)
492468
}
493469

494470
// if an interner exists in TLS, return it. Otherwise, prepare a
@@ -509,7 +485,7 @@ pub fn get_ident_interner() -> @ident_interner {
509485
/* for when we don't care about the contents; doesn't interact with TLD or
510486
serialization */
511487
pub fn mk_fake_ident_interner() -> @ident_interner {
512-
@ident_interner { interner: interner::StrInterner::new() }
488+
@interner::StrInterner::new()
513489
}
514490

515491
// maps a string to its interned representation
@@ -545,10 +521,11 @@ pub fn gensym_ident(str : &str) -> ast::Ident {
545521
}
546522

547523
// create a fresh name that maps to the same string as the old one.
548-
// note that this guarantees that ptr_eq(ident_to_str(src),interner_get(fresh_name(src)));
524+
// note that this guarantees that str_ptr_eq(ident_to_str(src),interner_get(fresh_name(src)));
549525
// that is, that the new name and the old one are connected to ptr_eq strings.
550526
pub fn fresh_name(src : &ast::Ident) -> Name {
551-
gensym(ident_to_str(src))
527+
let interner = get_ident_interner();
528+
interner.gensym_copy(src.name)
552529
// following: debug version. Could work in final except that it's incompatible with
553530
// good error messages and uses of struct names in ambiguous could-be-binding
554531
// locations. Also definitely destroys the guarantee given above about ptr_eq.
@@ -557,18 +534,26 @@ pub fn fresh_name(src : &ast::Ident) -> Name {
557534
}
558535

559536
// it looks like there oughta be a str_ptr_eq fn, but no one bothered to implement it?
560-
pub fn str_ptr_eq<T>(a: @str, b: @str) -> bool {
561-
// doesn't compile! ...because of rebase mangling. this should be fixed
562-
// in the commit that follows this.
563-
let (a_ptr, b_ptr): (*uint, *uint) = (to_unsafe_ptr(a), to_unsafe_ptr(b));
564-
a_ptr == b_ptr
565-
}
566-
567537

538+
// determine whether two @str values are pointer-equal
539+
pub fn str_ptr_eq(a : @str, b : @str) -> bool {
540+
unsafe {
541+
let p : uint = cast::transmute(a);
542+
let q : uint = cast::transmute(b);
543+
let result = p == q;
544+
// got to transmute them back, to make sure the ref count is correct:
545+
let junk1 : @str = cast::transmute(p);
546+
let junk2 : @str = cast::transmute(q);
547+
result
548+
}
549+
}
568550

569551
// return true when two identifiers refer (through the intern table) to the same ptr_eq
570552
// string. This is used to compare identifiers in places where hygienic comparison is
571553
// not wanted (i.e. not lexical vars).
554+
pub fn ident_spelling_eq(a : &ast::Ident, b : &ast::Ident) -> bool {
555+
str_ptr_eq(interner_get(a.name),interner_get(b.name))
556+
}
572557

573558
// create a fresh mark.
574559
pub fn fresh_mark() -> Mrk {
@@ -721,13 +706,21 @@ mod test {
721706
use ast_util;
722707

723708

724-
#[test] fn t1() {
709+
#[test] fn str_ptr_eq_tests(){
710+
let a = @"abc";
711+
let b = @"abc";
712+
let c = a;
713+
assert!(str_ptr_eq(a,c));
714+
assert!(!str_ptr_eq(a,b));
715+
}
716+
717+
#[test] fn fresh_name_pointer_sharing() {
725718
let ghi = str_to_ident("ghi");
726719
assert_eq!(ident_to_str(&ghi),@"ghi");
720+
assert!(str_ptr_eq(ident_to_str(&ghi),ident_to_str(&ghi)))
727721
let fresh = ast::Ident::new(fresh_name(&ghi));
728722
assert_eq!(ident_to_str(&fresh),@"ghi");
729723
assert!(str_ptr_eq(ident_to_str(&ghi),ident_to_str(&fresh)));
730-
assert_eq!(3,4);
731724
}
732725

733726
}

src/libsyntax/util/interner.rs

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,23 @@ impl StrInterner {
117117
new_idx
118118
}
119119

120+
// I want these gensyms to share name pointers
121+
// with existing entries. This would be automatic,
122+
// except that the existing gensym creates its
123+
// own managed ptr using to_managed. I think that
124+
// adding this utility function is the most
125+
// lightweight way to get what I want, though not
126+
// necessarily the cleanest.
127+
128+
// create a gensym with the same name as an existing
129+
// entry.
130+
pub fn gensym_copy(&self, idx : uint) -> uint {
131+
let new_idx = self.len();
132+
// leave out of map to avoid colliding
133+
self.vect.push(self.vect[idx]);
134+
new_idx
135+
}
136+
120137
// this isn't "pure" in the traditional sense, because it can go from
121138
// failing to returning a value as items are interned. But for typestate,
122139
// where we first check a pred and then rely on it, ceasing to fail is ok.
@@ -144,23 +161,23 @@ mod tests {
144161
}
145162

146163
#[test]
147-
fn i2 () {
164+
fn interner_tests () {
148165
let i : Interner<@str> = Interner::new();
149166
// first one is zero:
150-
assert_eq!(i.intern (@"dog"), 0);
167+
assert_eq!(i.intern(@"dog"), 0);
151168
// re-use gets the same entry:
152-
assert_eq!(i.intern (@"dog"), 0);
169+
assert_eq!(i.intern(@"dog"), 0);
153170
// different string gets a different #:
154-
assert_eq!(i.intern (@"cat"), 1);
155-
assert_eq!(i.intern (@"cat"), 1);
171+
assert_eq!(i.intern(@"cat"), 1);
172+
assert_eq!(i.intern(@"cat"), 1);
156173
// dog is still at zero
157-
assert_eq!(i.intern (@"dog"), 0);
174+
assert_eq!(i.intern(@"dog"), 0);
158175
// gensym gets 3
159-
assert_eq!(i.gensym (@"zebra" ), 2);
176+
assert_eq!(i.gensym(@"zebra" ), 2);
160177
// gensym of same string gets new number :
161178
assert_eq!(i.gensym (@"zebra" ), 3);
162179
// gensym of *existing* string gets new number:
163-
assert_eq!(i.gensym (@"dog"), 4);
180+
assert_eq!(i.gensym(@"dog"), 4);
164181
assert_eq!(i.get(0), @"dog");
165182
assert_eq!(i.get(1), @"cat");
166183
assert_eq!(i.get(2), @"zebra");
@@ -176,4 +193,34 @@ mod tests {
176193
assert_eq!(i.get(2), @"Carol");
177194
assert_eq!(i.intern(@"Bob"), 1);
178195
}
196+
197+
#[test]
198+
fn string_interner_tests() {
199+
let i : StrInterner = StrInterner::new();
200+
// first one is zero:
201+
assert_eq!(i.intern("dog"), 0);
202+
// re-use gets the same entry:
203+
assert_eq!(i.intern ("dog"), 0);
204+
// different string gets a different #:
205+
assert_eq!(i.intern("cat"), 1);
206+
assert_eq!(i.intern("cat"), 1);
207+
// dog is still at zero
208+
assert_eq!(i.intern("dog"), 0);
209+
// gensym gets 3
210+
assert_eq!(i.gensym("zebra"), 2);
211+
// gensym of same string gets new number :
212+
assert_eq!(i.gensym("zebra"), 3);
213+
// gensym of *existing* string gets new number:
214+
assert_eq!(i.gensym("dog"), 4);
215+
// gensym tests again with gensym_copy:
216+
assert_eq!(i.gensym_copy(2), 5);
217+
assert_eq!(i.get(5), @"zebra");
218+
assert_eq!(i.gensym_copy(2), 6);
219+
assert_eq!(i.get(6), @"zebra");
220+
assert_eq!(i.get(0), @"dog");
221+
assert_eq!(i.get(1), @"cat");
222+
assert_eq!(i.get(2), @"zebra");
223+
assert_eq!(i.get(3), @"zebra");
224+
assert_eq!(i.get(4), @"dog");
225+
}
179226
}

0 commit comments

Comments
 (0)