1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
/*!

Arithmetic encoder/decoder using the Range encoder underneath. Requires `entropy` feature, enabled by default
Can be used in a general case of entropy coding stage. Supposed to be fast.

# Links

http://en.wikipedia.org/wiki/Arithmetic_coding
http://en.wikipedia.org/wiki/Range_encoding

# Example
```rust
# #![allow(unused_must_use)]
use std::io::{BufWriter, BufReader, Read, Write};
use compress::entropy::ari;

// Encode some text
let text = "some text";
let mut e = ari::ByteEncoder::new(BufWriter::new(Vec::new()));
e.write_all(text.as_bytes()).unwrap();
let (encoded, _) = e.finish();
let inner = encoded.into_inner().unwrap();

// Decode the encoded text
let mut d = ari::ByteDecoder::new(BufReader::new(&inner[..]));
let mut decoded = Vec::new();
d.read_to_end(&mut decoded).unwrap();
```
# Credit

This is an original implementation.

*/

#![allow(missing_docs)]

use std::fmt::Display;
use std::io::{self, Read, Write};

use super::super::byteorder::{BigEndian, WriteBytesExt, ReadBytesExt};
use super::super::byteorder_err_to_io;

pub use self::table::{ByteDecoder, ByteEncoder};

pub mod apm;
pub mod bin;
pub mod table;
#[cfg(test)]
mod test;

pub type Symbol = u8;
const SYMBOL_BITS: usize = 8;
const SYMBOL_TOTAL: usize = 1<<SYMBOL_BITS;

pub type Border = u32;
const BORDER_BYTES: usize = 4;
const BORDER_BITS: usize = BORDER_BYTES * 8;
const BORDER_EXCESS: usize = BORDER_BITS-SYMBOL_BITS;
const BORDER_SYMBOL_MASK: u32 = ((SYMBOL_TOTAL-1) << BORDER_EXCESS) as u32;

pub const RANGE_DEFAULT_THRESHOLD: Border = 1<<14;


/// Range Encoder basic primitive
/// Gets probability ranges on the input, produces whole bytes of code on the output,
/// where the code is an arbitrary fixed-ppoint value inside the resulting probability range.
pub struct RangeEncoder {
    low: Border,
    hai: Border,
    /// The minimum distance between low and hai to keep at all times,
    /// has to be at least the largest incoming 'total',
    /// and optimally many times larger
    pub threshold: Border,
    /// Tuning parameters
    bits_lost_on_threshold_cut: f32,
    bits_lost_on_division: f32,
}

impl RangeEncoder {
    /// Create a new instance
    /// will keep the active range below 'max_range'
    pub fn new(max_range: Border) -> RangeEncoder {
        debug_assert!(max_range > (SYMBOL_TOTAL as Border));
        RangeEncoder {
            low: 0,
            hai: !0,
            threshold: max_range,
            bits_lost_on_threshold_cut: 0.0,
            bits_lost_on_division: 0.0,
        }
    }

    /// Reset the current range
    pub fn reset(&mut self) {
        self.low = 0;
        self.hai = !0;
    }

    #[cfg(tune)]
    fn count_bits(range: Border, total: Border) -> f32 {
        -((range as f32) / (total as f32)).log2()
    }

    #[cfg(not(tune))]
    fn count_bits(_range: Border, _total: Border) -> f32 {
        0.0
    }

    /// Return the number of bits lost due to threshold cuts and integer operations
    #[cfg(tune)]
    pub fn get_bits_lost(&self) -> (f32, f32) {
        (self.bits_lost_on_threshold_cut, self.bits_lost_on_division)
    }

    /// Process a given interval [from/total,to/total) into the current range
    /// write into the output slice, and return the number of symbols produced
    pub fn process(&mut self, total: Border, from: Border, to: Border, output: &mut [Symbol]) -> usize {
        debug_assert!(from<to && to<=total);
        let old_range = self.hai - self.low;
        let range = old_range / total;
        debug_assert!(range>0, "RangeCoder range is too narrow [{}-{}) for the total {}",
            self.low, self.hai, total);
        debug!("\t\tProcessing [{}-{})/{} with range {}", from, to, total, range);
        let mut lo = self.low + range*from;
        let mut hi = self.low + range*to;
        self.bits_lost_on_division += RangeEncoder::count_bits(range*total, old_range);
        let mut num_shift = 0;
        loop {
            if (lo^hi) & BORDER_SYMBOL_MASK != 0 {
                if hi-lo > self.threshold {
                    break
                }
                let old_range = hi-lo;
                let lim = hi & BORDER_SYMBOL_MASK;
                if hi-lim >= lim-lo {lo=lim}
                else {hi=lim-1};
                debug_assert!(lo < hi);
                self.bits_lost_on_threshold_cut += RangeEncoder::count_bits(hi-lo, old_range);
            }

            debug!("\t\tShifting on [{}-{}) to symbol {}", lo, hi, lo>>BORDER_EXCESS);
            output[num_shift] = (lo>>BORDER_EXCESS) as Symbol;
            num_shift += 1;
            lo<<=SYMBOL_BITS; hi<<=SYMBOL_BITS;
            debug_assert!(lo < hi);
        }
        self.low = lo;
        self.hai = hi;
        num_shift
    }

    /// Query the value encoded by 'code' in range [0,total)
    pub fn query(&self, total: Border, code: Border) -> Border {
        debug!("\t\tQuerying code {} of total {} under range [{}-{})",
            code, total, self.low, self.hai);
        debug_assert!(self.low <= code && code < self.hai);
        let range = (self.hai - self.low) / total;
        (code - self.low) / range
    }

    /// Get the code tail and close the range
    /// used at the end of encoding
    pub fn get_code_tail(&mut self) -> Border {
        let tail = self.low;
        self.low = 0;
        self.hai = 0;
        tail
    }
}


/// An abstract model to produce probability ranges
/// Can be a table, a mix of tables, or just a smart function.
pub trait Model<V: Copy + Display> {
    /// Get the probability range of a value
    fn get_range(&self, value: V) -> (Border,Border);
    /// Find the value by a given probability offset, return with the range
    fn find_value(&self, offset: Border) -> (V,Border,Border);
    /// Get the sum of all probabilities
    fn get_denominator(&self) -> Border;

    /// Encode a value using a range encoder
    /// return the number of symbols written
    fn encode(&self, value: V, re: &mut RangeEncoder, out: &mut [Symbol]) -> usize {
        let (lo, hi) = self.get_range(value);
        let total = self.get_denominator();
        debug!("\tEncoding value {} of range [{}-{}) with total {}", value, lo, hi, total);
        re.process(total, lo, hi, out)
    }

    /// Decode a value using given 'code' on the range encoder
    /// return a (value, num_symbols_to_shift) pair
    fn decode(&self, code: Border, re: &mut RangeEncoder) -> (V, usize) {
        let total = self.get_denominator();
        let offset = re.query(total, code);
        let (value, lo, hi) = self.find_value(offset);
        debug!("\tDecoding value {} of offset {} with total {}", value, offset, total);
        let mut out = [0 as Symbol; BORDER_BYTES];
        let shift = re.process(total, lo, hi, &mut out[..]);
        debug_assert_eq!(if shift==0 {0} else {code>>(BORDER_BITS - shift*8)},
            out[..shift].iter().fold(0 as Border, |u,&b| (u<<8)+(b as Border)));
        (value, shift)
    }
}


/// An arithmetic encoder helper
pub struct Encoder<W> {
    stream: W,
    range: RangeEncoder,
}

impl<W: Write> Encoder<W> {
    /// Create a new encoder on top of a given Writer
    pub fn new(w: W) -> Encoder<W> {
        Encoder {
            stream: w,
            range: RangeEncoder::new(RANGE_DEFAULT_THRESHOLD),
        }
    }

    /// Encode an abstract value under the given Model
    pub fn encode<V: Copy + Display, M: Model<V>>(&mut self, value: V, model: &M) -> io::Result<()> {
        let mut buf = [0 as Symbol; BORDER_BYTES];
        let num = model.encode(value, &mut self.range, &mut buf[..]);
        self.stream.write(&buf[..num]).map(|_| ()) 
    }

    /// Finish encoding by writing the code tail word
    pub fn finish(mut self) -> (W, io::Result<()>) {
        debug_assert!(BORDER_BITS == 32);
        let code = self.range.get_code_tail();
        let result = self.stream.write_u32::<BigEndian>(code)
                                .map_err(byteorder_err_to_io);
        let result = result.and(self.stream.flush());
        (self.stream, result)
    }

    /// Flush the output stream
    pub fn flush(&mut self) -> io::Result<()> {
        self.stream.flush()
    }

    /// Return the number of bytes lost due to threshold cuts and integer operations
    #[cfg(tune)]
    pub fn get_bytes_lost(&self) -> (f32, f32) {
        let (a,b) = self.range.get_bits_lost();
        (a/8.0, b/8.0)
    }
}

/// An arithmetic decoder helper
pub struct Decoder<R> {
    stream: R,
    range: RangeEncoder,
    code: Border,
    bytes_pending: usize,
}

impl<R: Read> Decoder<R> {
    /// Create a decoder on top of a given Reader
    pub fn new(r: R) -> Decoder<R> {
        Decoder {
            stream: r,
            range: RangeEncoder::new(RANGE_DEFAULT_THRESHOLD),
            code: 0,
            bytes_pending: BORDER_BYTES,
        }
    }

    fn feed(&mut self) -> io::Result<()> {
        while self.bytes_pending != 0 {
            let b = try!(self.stream.read_u8());
            self.code = (self.code<<8) + (b as Border);
            self.bytes_pending -= 1;
        }
        Ok(())
    }

    /// Decode an abstract value based on the given Model
    pub fn decode<V: Copy + Display, M: Model<V>>(&mut self, model: &M) -> io::Result<V> {
        self.feed().unwrap();
        let (value, shift) = model.decode(self.code, &mut self.range);
        self.bytes_pending = shift;
        Ok(value)
    }

    /// Finish decoding
    pub fn finish(mut self) -> (R, io::Result<()>)  {
        let err = self.feed();
        (self.stream, err)
    }
}