lz_str/
decompress.rs

1use crate::IntoWideIter;
2use crate::constants::BASE64_KEY;
3use crate::constants::CLOSE_CODE;
4use crate::constants::START_CODE_BITS;
5use crate::constants::U8_CODE;
6use crate::constants::U16_CODE;
7use crate::constants::URI_KEY;
8use std::convert::TryFrom;
9use std::convert::TryInto;
10
11#[derive(Debug)]
12pub struct DecompressContext<I> {
13    val: u16,
14    compressed_data: I,
15    position: u16,
16    reset_val: u16,
17}
18
19impl<I> DecompressContext<I>
20where
21    I: Iterator<Item = u16>,
22{
23    /// Make a new [`DecompressContext`].
24    ///
25    /// # Errors
26    /// Returns `None` if the iterator is empty.
27    ///
28    /// # Panics
29    /// Panics if `bits_per_char` is greater than the number of bits in a `u16`.
30    #[inline]
31    pub fn new(mut compressed_data: I, bits_per_char: u8) -> Option<Self> {
32        assert!(usize::from(bits_per_char) <= std::mem::size_of::<u16>() * 8);
33
34        let reset_val_pow = bits_per_char - 1;
35        // (1 << 15) <= u16::MAX
36        let reset_val: u16 = 1 << reset_val_pow;
37
38        Some(DecompressContext {
39            val: compressed_data.next()?,
40            compressed_data,
41            position: reset_val,
42            reset_val,
43        })
44    }
45
46    #[inline]
47    pub fn read_bit(&mut self) -> Option<bool> {
48        let res = self.val & self.position;
49        self.position >>= 1;
50
51        if self.position == 0 {
52            self.position = self.reset_val;
53            self.val = self.compressed_data.next()?;
54        }
55
56        Some(res != 0)
57    }
58
59    /// Read n bits.
60    ///
61    /// `u32` is the return type as we expect all possible codes to be within that type's range.
62    #[inline]
63    pub fn read_bits(&mut self, n: u8) -> Option<u32> {
64        let mut res = 0;
65        let max_power: u32 = 1 << n;
66        let mut power: u32 = 1;
67        while power != max_power {
68            res |= u32::from(self.read_bit()?) * power;
69            power <<= 1;
70        }
71
72        Some(res)
73    }
74}
75
76/// Decompress a string into a [`Vec<u16>`].
77/// The result contains possibly invalid UTF16.
78///
79/// # Errors
80/// Returns `None` if the decompression fails.
81#[inline]
82pub fn decompress(compressed: impl IntoWideIter) -> Option<Vec<u16>> {
83    decompress_internal(compressed.into_wide_iter(), 16)
84}
85
86/// Decompress a [`&str`] compressed with [`crate::compress_to_utf16`].
87///
88/// # Errors
89/// Returns an error if the compressed data could not be decompressed.
90#[inline]
91pub fn decompress_from_utf16(compressed: &str) -> Option<Vec<u16>> {
92    decompress_internal(compressed.encode_utf16().map(|c| c - 32), 15)
93}
94
95/// Decompress a [`&str`] compressed with [`crate::compress_to_encoded_uri_component`].
96///
97/// # Errors
98/// Returns an error if the compressed data could not be decompressed.
99#[inline]
100pub fn decompress_from_encoded_uri_component(compressed: &str) -> Option<Vec<u16>> {
101    let compressed: Option<Vec<u16>> = compressed
102        .encode_utf16()
103        .map(|c| {
104            if c == u16::from(b' ') {
105                u16::from(b'+')
106            } else {
107                c
108            }
109        })
110        .map(u32::from)
111        .flat_map(|c| {
112            URI_KEY
113                .iter()
114                .position(|k| u8::try_from(c) == Ok(*k))
115                .map(|n| u16::try_from(n).ok())
116        })
117        .collect();
118
119    decompress_internal(compressed?.into_iter(), 6)
120}
121
122/// Decompress a [`&str`] compressed with [`crate::compress_to_base64`].
123///
124/// # Errors
125/// Returns an error if the compressed data could not be decompressed.
126#[inline]
127pub fn decompress_from_base64(compressed: &str) -> Option<Vec<u16>> {
128    let compressed: Option<Vec<u16>> = compressed
129        .encode_utf16()
130        .flat_map(|c| {
131            BASE64_KEY
132                .iter()
133                .position(|k| u8::try_from(c) == Ok(*k))
134                .map(|n| u16::try_from(n).ok())
135        })
136        .collect();
137
138    decompress_internal(compressed?.into_iter(), 6)
139}
140
141/// Decompress a byte slice compressed with [`crate::compress_to_uint8_array`].
142///
143/// # Errors
144/// Returns an error if the compressed data could not be decompressed.
145#[inline]
146pub fn decompress_from_uint8_array(compressed: &[u8]) -> Option<Vec<u16>> {
147    // The buffer is a UCS2 big endian encoded string.
148    // If it is not a multiple of 2, it is invalid.
149    let compressed_len = compressed.len();
150    if compressed_len & 1 == 1 {
151        return None;
152    }
153
154    let buffer: Vec<u16> = compressed
155        .chunks(2)
156        .map(|slice| {
157            // The slice is always guaranteed to be 2 here.
158            // We check to see if the length is a multiple of 2 earlier.
159            u16::from_be_bytes(slice.try_into().unwrap())
160        })
161        .collect();
162
163    decompress(buffer)
164}
165
166/// The internal decompress function.
167///
168/// All other decompress functions are built on top of this one.
169/// It generally should not be used directly.
170///
171/// # Errors
172/// Returns an error if the compressed data could not be decompressed.
173///
174/// # Panics
175/// Panics if `bits_per_char` is greater than the number of bits in a `u16`.
176#[inline]
177pub fn decompress_internal<I>(compressed: I, bits_per_char: u8) -> Option<Vec<u16>>
178where
179    I: Iterator<Item = u16>,
180{
181    let mut ctx = match DecompressContext::new(compressed, bits_per_char) {
182        Some(ctx) => ctx,
183        None => return Some(Vec::new()),
184    };
185
186    let mut dictionary: Vec<Vec<u16>> = Vec::with_capacity(16);
187    for i in 0_u16..3_u16 {
188        dictionary.push(vec![i]);
189    }
190
191    // u8::MAX > u2::MAX
192    let code = u8::try_from(ctx.read_bits(START_CODE_BITS)?).unwrap();
193    let first_entry = match code {
194        U8_CODE | U16_CODE => {
195            let bits_to_read = (code * 8) + 8;
196            // bits_to_read == 8 or 16 <= 16
197            u16::try_from(ctx.read_bits(bits_to_read)?).unwrap()
198        }
199        CLOSE_CODE => return Some(Vec::new()),
200        _ => return None,
201    };
202    dictionary.push(vec![first_entry]);
203
204    let mut w = vec![first_entry];
205    let mut result = vec![first_entry];
206    let mut num_bits: u8 = 3;
207    let mut enlarge_in: u64 = 4;
208    let mut entry;
209    loop {
210        let mut code = ctx.read_bits(num_bits)?;
211        match u8::try_from(code) {
212            Ok(code_u8 @ (U8_CODE | U16_CODE)) => {
213                let bits_to_read = (code_u8 * 8) + 8;
214                // if cc == 0 {
215                // if (errorCount++ > 10000) return "Error"; // TODO: Error logic
216                // }
217
218                // bits_to_read == 8 or 16 <= 16
219                let bits = u16::try_from(ctx.read_bits(bits_to_read)?).unwrap();
220                dictionary.push(vec![bits]);
221                code = u32::try_from(dictionary.len() - 1).ok()?;
222                enlarge_in -= 1;
223            }
224            Ok(CLOSE_CODE) => return Some(result),
225            _ => {}
226        }
227
228        if enlarge_in == 0 {
229            enlarge_in = 1 << num_bits;
230            num_bits += 1;
231        }
232
233        // Return error if code cannot be converted to dictionary index
234        let code_usize = usize::try_from(code).ok()?;
235        if let Some(entry_value) = dictionary.get(code_usize) {
236            entry = entry_value.clone();
237        } else if code_usize == dictionary.len() {
238            entry = w.clone();
239            entry.push(*w.first()?);
240        } else {
241            return None;
242        }
243
244        result.extend(&entry);
245
246        // Add w+entry[0] to the dictionary.
247        let mut to_be_inserted = w.clone();
248        to_be_inserted.push(*entry.first()?);
249        dictionary.push(to_be_inserted);
250        enlarge_in -= 1;
251
252        w = entry;
253
254        if enlarge_in == 0 {
255            enlarge_in = 1 << num_bits;
256            num_bits += 1;
257        }
258    }
259}