Skip to content

Commit 84e9a61

Browse files
committed
metadata: Implement relaxation of short RBML lengths.
We try to move the data when the length can be encoded in the much smaller number of bytes. This interferes with indices and type abbreviations however, so this commit introduces a public interface to get and mark a "stable" (i.e. not affected by relaxation) position of the current pointer. The relaxation logic only moves a small data, currently at most 256 bytes, as moving the data can be costly. There might be further opportunities to allow more relaxation by moving fields around, which I didn't seriously try.
1 parent de00b85 commit 84e9a61

File tree

4 files changed

+138
-102
lines changed

4 files changed

+138
-102
lines changed

src/librbml/lib.rs

Lines changed: 62 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -729,21 +729,24 @@ pub mod writer {
729729
use std::num::Int;
730730
use std::old_io::{Writer, Seek};
731731
use std::old_io;
732+
use std::slice::bytes;
732733

733734
use super::{ EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey,
734735
EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8,
735736
EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsUint,
736737
EsOpaque, NUM_IMPLICIT_TAGS, NUM_TAGS };
738+
use super::io::SeekableMemWriter;
737739

738740
use serialize;
739741

740742

741743
pub type EncodeResult = old_io::IoResult<()>;
742744

743745
// rbml writing
744-
pub struct Encoder<'a, W:'a> {
745-
pub writer: &'a mut W,
746+
pub struct Encoder<'a> {
747+
pub writer: &'a mut SeekableMemWriter,
746748
size_positions: Vec<uint>,
749+
relax_limit: u64, // do not move encoded bytes before this position
747750
}
748751

749752
fn write_tag<W: Writer>(w: &mut W, n: uint) -> EncodeResult {
@@ -788,19 +791,21 @@ pub mod writer {
788791
})
789792
}
790793

791-
impl<'a, W: Writer + Seek> Encoder<'a, W> {
792-
pub fn new(w: &'a mut W) -> Encoder<'a, W> {
794+
impl<'a> Encoder<'a> {
795+
pub fn new(w: &'a mut SeekableMemWriter) -> Encoder<'a> {
793796
Encoder {
794797
writer: w,
795798
size_positions: vec!(),
799+
relax_limit: 0,
796800
}
797801
}
798802

799803
/// FIXME(pcwalton): Workaround for badness in trans. DO NOT USE ME.
800-
pub unsafe fn unsafe_clone(&self) -> Encoder<'a, W> {
804+
pub unsafe fn unsafe_clone(&self) -> Encoder<'a> {
801805
Encoder {
802806
writer: mem::transmute_copy(&self.writer),
803807
size_positions: self.size_positions.clone(),
808+
relax_limit: self.relax_limit,
804809
}
805810
}
806811

@@ -822,11 +827,29 @@ pub mod writer {
822827
let cur_pos = try!(self.writer.tell());
823828
try!(self.writer.seek(last_size_pos as i64, old_io::SeekSet));
824829
let size = cur_pos as uint - last_size_pos - 4;
825-
try!(write_sized_vuint(self.writer, size, 4));
826-
let r = try!(self.writer.seek(cur_pos as i64, old_io::SeekSet));
830+
831+
// relax the size encoding for small tags (bigger tags are costly to move).
832+
// we should never try to move the stable positions, however.
833+
const RELAX_MAX_SIZE: uint = 0x100;
834+
if size <= RELAX_MAX_SIZE && last_size_pos >= self.relax_limit as uint {
835+
// we can't alter the buffer in place, so have a temporary buffer
836+
let mut buf = [0u8; RELAX_MAX_SIZE];
837+
{
838+
let data = &self.writer.get_ref()[last_size_pos+4..cur_pos as uint];
839+
bytes::copy_memory(&mut buf, data);
840+
}
841+
842+
// overwrite the size and data and continue
843+
try!(write_vuint(self.writer, size));
844+
try!(self.writer.write_all(&buf[..size]));
845+
} else {
846+
// overwrite the size with an overlong encoding and skip past the data
847+
try!(write_sized_vuint(self.writer, size, 4));
848+
try!(self.writer.seek(cur_pos as i64, old_io::SeekSet));
849+
}
827850

828851
debug!("End tag (size = {:?})", size);
829-
Ok(r)
852+
Ok(())
830853
}
831854

832855
pub fn wr_tag<F>(&mut self, tag_id: uint, blk: F) -> EncodeResult where
@@ -933,12 +956,19 @@ pub mod writer {
933956
debug!("Write str: {:?}", s);
934957
self.writer.write_all(s.as_bytes())
935958
}
936-
}
937959

938-
// FIXME (#2743): optionally perform "relaxations" on end_tag to more
939-
// efficiently encode sizes; this is a fixed point iteration
960+
/// Returns the current position while marking it stable, i.e.
961+
/// generated bytes so far woundn't be affected by relaxation.
962+
pub fn mark_stable_position(&mut self) -> u64 {
963+
let pos = self.writer.tell().unwrap();
964+
if self.relax_limit < pos {
965+
self.relax_limit = pos;
966+
}
967+
pos
968+
}
969+
}
940970

941-
impl<'a, W: Writer + Seek> Encoder<'a, W> {
971+
impl<'a> Encoder<'a> {
942972
// used internally to emit things like the vector length and so on
943973
fn _emit_tagged_sub(&mut self, v: uint) -> EncodeResult {
944974
if let Some(v) = v.to_u8() {
@@ -955,15 +985,15 @@ pub mod writer {
955985
}
956986

957987
pub fn emit_opaque<F>(&mut self, f: F) -> EncodeResult where
958-
F: FnOnce(&mut Encoder<W>) -> EncodeResult,
988+
F: FnOnce(&mut Encoder) -> EncodeResult,
959989
{
960990
try!(self.start_tag(EsOpaque as uint));
961991
try!(f(self));
962992
self.end_tag()
963993
}
964994
}
965995

966-
impl<'a, W: Writer + Seek> serialize::Encoder for Encoder<'a, W> {
996+
impl<'a> serialize::Encoder for Encoder<'a> {
967997
type Error = old_io::IoError;
968998

969999
fn emit_nil(&mut self) -> EncodeResult {
@@ -1023,7 +1053,7 @@ pub mod writer {
10231053
}
10241054

10251055
fn emit_enum<F>(&mut self, _name: &str, f: F) -> EncodeResult where
1026-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1056+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
10271057
{
10281058
try!(self.start_tag(EsEnum as uint));
10291059
try!(f(self));
@@ -1035,14 +1065,14 @@ pub mod writer {
10351065
v_id: uint,
10361066
_: uint,
10371067
f: F) -> EncodeResult where
1038-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1068+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
10391069
{
10401070
try!(self._emit_tagged_sub(v_id));
10411071
f(self)
10421072
}
10431073

10441074
fn emit_enum_variant_arg<F>(&mut self, _: uint, f: F) -> EncodeResult where
1045-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1075+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
10461076
{
10471077
f(self)
10481078
}
@@ -1052,7 +1082,7 @@ pub mod writer {
10521082
v_id: uint,
10531083
cnt: uint,
10541084
f: F) -> EncodeResult where
1055-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1085+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
10561086
{
10571087
self.emit_enum_variant(v_name, v_id, cnt, f)
10581088
}
@@ -1061,62 +1091,62 @@ pub mod writer {
10611091
_: &str,
10621092
idx: uint,
10631093
f: F) -> EncodeResult where
1064-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1094+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
10651095
{
10661096
self.emit_enum_variant_arg(idx, f)
10671097
}
10681098

10691099
fn emit_struct<F>(&mut self, _: &str, _len: uint, f: F) -> EncodeResult where
1070-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1100+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
10711101
{
10721102
f(self)
10731103
}
10741104

10751105
fn emit_struct_field<F>(&mut self, _name: &str, _: uint, f: F) -> EncodeResult where
1076-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1106+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
10771107
{
10781108
f(self)
10791109
}
10801110

10811111
fn emit_tuple<F>(&mut self, len: uint, f: F) -> EncodeResult where
1082-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1112+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
10831113
{
10841114
self.emit_seq(len, f)
10851115
}
10861116
fn emit_tuple_arg<F>(&mut self, idx: uint, f: F) -> EncodeResult where
1087-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1117+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
10881118
{
10891119
self.emit_seq_elt(idx, f)
10901120
}
10911121

10921122
fn emit_tuple_struct<F>(&mut self, _: &str, len: uint, f: F) -> EncodeResult where
1093-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1123+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
10941124
{
10951125
self.emit_seq(len, f)
10961126
}
10971127
fn emit_tuple_struct_arg<F>(&mut self, idx: uint, f: F) -> EncodeResult where
1098-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1128+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
10991129
{
11001130
self.emit_seq_elt(idx, f)
11011131
}
11021132

11031133
fn emit_option<F>(&mut self, f: F) -> EncodeResult where
1104-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1134+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
11051135
{
11061136
self.emit_enum("Option", f)
11071137
}
11081138
fn emit_option_none(&mut self) -> EncodeResult {
11091139
self.emit_enum_variant("None", 0, 0, |_| Ok(()))
11101140
}
11111141
fn emit_option_some<F>(&mut self, f: F) -> EncodeResult where
1112-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1142+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
11131143
{
11141144

11151145
self.emit_enum_variant("Some", 1, 1, f)
11161146
}
11171147

11181148
fn emit_seq<F>(&mut self, len: uint, f: F) -> EncodeResult where
1119-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1149+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
11201150
{
11211151

11221152
try!(self.start_tag(EsVec as uint));
@@ -1126,7 +1156,7 @@ pub mod writer {
11261156
}
11271157

11281158
fn emit_seq_elt<F>(&mut self, _idx: uint, f: F) -> EncodeResult where
1129-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1159+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
11301160
{
11311161

11321162
try!(self.start_tag(EsVecElt as uint));
@@ -1135,7 +1165,7 @@ pub mod writer {
11351165
}
11361166

11371167
fn emit_map<F>(&mut self, len: uint, f: F) -> EncodeResult where
1138-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1168+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
11391169
{
11401170

11411171
try!(self.start_tag(EsMap as uint));
@@ -1145,7 +1175,7 @@ pub mod writer {
11451175
}
11461176

11471177
fn emit_map_elt_key<F>(&mut self, _idx: uint, f: F) -> EncodeResult where
1148-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1178+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
11491179
{
11501180

11511181
try!(self.start_tag(EsMapKey as uint));
@@ -1154,7 +1184,7 @@ pub mod writer {
11541184
}
11551185

11561186
fn emit_map_elt_val<F>(&mut self, _idx: uint, f: F) -> EncodeResult where
1157-
F: FnOnce(&mut Encoder<'a, W>) -> EncodeResult,
1187+
F: FnOnce(&mut Encoder<'a>) -> EncodeResult,
11581188
{
11591189
try!(self.start_tag(EsMapVal as uint));
11601190
try!(f(self));

0 commit comments

Comments
 (0)