Skip to content

BWT overflow fix + Ari terminator #12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 13, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions bwt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,6 @@ pub fn encode_brute(input: &[u8], suf: &mut [Suffix], fn_out: |u8|) -> Suffix {
}
}

assert!( origin.is_some() );
origin.unwrap()
}

Expand Down Expand Up @@ -335,7 +334,7 @@ impl<R: Reader> Reader for Decoder<R> {
self.header = true;
}
let mut amt = dst.len();
let len = amt;
let dst_len = amt;

while amt > 0 {
if self.output.len() == self.start {
Expand All @@ -344,19 +343,19 @@ impl<R: Reader> Reader for Decoder<R> {
break
}
}
let n = num::min( amt, self.output.len() - self.start );
let n = num::min(amt, self.output.len() - self.start);
vec::bytes::copy_memory(
dst.mut_slice_from(len - amt),
self.output.slice_from(self.start)
dst.mut_slice_from(dst_len - amt),
self.output.slice(self.start, self.start + n)
);
self.start += n;
amt -= n;
}

if len == amt {
if dst_len == amt {
Err(io::standard_error(io::EndOfFile))
} else {
Ok(len - amt)
Ok(dst_len - amt)
}
}
}
Expand Down
48 changes: 30 additions & 18 deletions entropy/ari.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ use compress::entropy::ari;
let text = "some text";
let mut e = ari::ByteEncoder::new(MemWriter::new());
e.write_str(text);
let (encoded, _) = e.encoder.finish();
let (encoded, _) = e.finish();

// Decode the encoded text
let mut d = ari::ByteDecoder::new(MemReader::new(encoded.unwrap()));
let decoded = d.read_bytes(text.len()).unwrap();
let decoded = d.read_to_end().unwrap();
```

# Credit
Expand Down Expand Up @@ -435,6 +435,7 @@ impl Model for FrequencyTable {


/// A basic byte-encoding arithmetic
/// uses a special terminator code to end the stream
pub struct ByteEncoder<W> {
/// A lower level encoder
encoder: Encoder<W>,
Expand All @@ -448,9 +449,16 @@ impl<W: Writer> ByteEncoder<W> {
let freq_max = range_default_threshold >> 2;
ByteEncoder {
encoder: Encoder::new(w),
freq: FrequencyTable::new_flat(symbol_total, freq_max),
freq: FrequencyTable::new_flat(symbol_total+1, freq_max),
}
}

/// Finish encoding & write the terminator symbol
pub fn finish(mut self) -> (W, io::IoResult<()>) {
let ret = self.encoder.encode(symbol_total, &self.freq);
let (w,r2) = self.encoder.finish();
(w, ret.and(r2))
}
}

impl<W: Writer> Writer for ByteEncoder<W> {
Expand All @@ -470,21 +478,24 @@ impl<W: Writer> Writer for ByteEncoder<W> {


/// A basic byte-decoding arithmetic
/// expects a special terminator code for the end of the stream
pub struct ByteDecoder<R> {
/// A lower level decoder
decoder: Decoder<R>,
/// A basic frequency table
freq: FrequencyTable,
/// Remember if we found the terminator code
priv is_eof: bool,
}

impl<R: Reader> ByteDecoder<R> {
/// Create a decoder on top of a given Reader
/// requires the output size to be known
pub fn new(r: R) -> ByteDecoder<R> {
let freq_max = range_default_threshold >> 2;
ByteDecoder {
decoder: Decoder::new(r),
freq: FrequencyTable::new_flat(symbol_total, freq_max),
freq: FrequencyTable::new_flat(symbol_total+1, freq_max),
is_eof: false,
}
}
}
Expand All @@ -494,20 +505,21 @@ impl<R: Reader> Reader for ByteDecoder<R> {
if self.decoder.tell() == 0 {
if_ok!(self.decoder.start());
}
let mut ret = Ok(dst.len());
if self.is_eof {
return Err(io::standard_error(io::EndOfFile))
}
let mut amount = 0u;
for out_byte in dst.mut_iter() {
match self.decoder.decode(&self.freq) {
Ok(value) => {
self.freq.update(value, 10, 1);
*out_byte = value as u8;
},
Err(e) => {
ret = Err(e);
break
}
let value = if_ok!(self.decoder.decode(&self.freq));
if value == symbol_total {
self.is_eof = true;
break
}
self.freq.update(value, 10, 1);
*out_byte = value as u8;
amount += 1;
}
ret
Ok(amount)
}
}

Expand All @@ -523,12 +535,12 @@ mod test {
info!("Roundtrip Ari of size {}", bytes.len());
let mut e = ByteEncoder::new(MemWriter::new());
e.write(bytes).unwrap();
let (e, r) = e.encoder.finish();
let (e, r) = e.finish();
r.unwrap();
let encoded = e.unwrap();
debug!("Roundtrip input {:?} encoded {:?}", bytes, encoded);
let mut d = ByteDecoder::new(BufReader::new(encoded));
let decoded = d.read_bytes(bytes.len()).unwrap();
let decoded = d.read_to_end().unwrap();
assert_eq!(bytes.as_slice(), decoded.as_slice());
}

Expand Down