1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
use enum_iterator::IntoEnumIterator;
use std::io::{self, BufReader, Read, Write};

#[derive(Debug, Serialize, Deserialize, IntoEnumIterator)]
pub enum CompressionMethod {
    NoCompression,
    Bzip2,
    Gzip,
    Zstd,
}

fn decompress_reader<'a, R: Read + 'a>(
    method: CompressionMethod,
    stream: R,
) -> Result<Box<dyn Read + 'a>, io::Error> {
    let buf_reader = BufReader::new(stream);
    let decompress_reader: Box<dyn Read> = match method {
        CompressionMethod::Bzip2 => Box::new(bzip2::bufread::BzDecoder::new(buf_reader)),
        CompressionMethod::Gzip => Box::new(flate2::read::GzDecoder::new(buf_reader)),
        CompressionMethod::Zstd => Box::new(zstd::stream::read::Decoder::new(buf_reader)?),
        CompressionMethod::NoCompression => Box::new(buf_reader),
    };
    Ok(decompress_reader)
}

pub fn decompress(data: &[u8]) -> Result<Vec<u8>, io::Error> {
    let method_size = bincode::serialized_size(&CompressionMethod::NoCompression).unwrap();
    if (data.len() as u64) < method_size {
        return Err(io::Error::new(
            io::ErrorKind::Other,
            format!("data len too small: {}", data.len()),
        ));
    }
    let method = bincode::deserialize(&data[..method_size as usize]).map_err(|err| {
        io::Error::new(
            io::ErrorKind::Other,
            format!("method deserialize failed: {}", err),
        )
    })?;

    let mut reader = decompress_reader(method, &data[method_size as usize..])?;
    let mut uncompressed_data = vec![];
    reader.read_to_end(&mut uncompressed_data)?;
    Ok(uncompressed_data)
}

pub fn compress(method: CompressionMethod, data: &[u8]) -> Result<Vec<u8>, io::Error> {
    let mut compressed_data = bincode::serialize(&method).unwrap();
    compressed_data.extend(
        match method {
            CompressionMethod::Bzip2 => {
                let mut e = bzip2::write::BzEncoder::new(Vec::new(), bzip2::Compression::Best);
                e.write_all(data)?;
                e.finish()?
            }
            CompressionMethod::Gzip => {
                let mut e =
                    flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
                e.write_all(data)?;
                e.finish()?
            }
            CompressionMethod::Zstd => {
                let mut e = zstd::stream::write::Encoder::new(Vec::new(), 0).unwrap();
                e.write_all(data)?;
                e.finish()?
            }
            CompressionMethod::NoCompression => data.to_vec(),
        }
        .into_iter(),
    );

    Ok(compressed_data)
}

pub fn compress_best(data: &[u8]) -> Result<Vec<u8>, io::Error> {
    let mut candidates = vec![];
    for method in CompressionMethod::into_enum_iter() {
        candidates.push(compress(method, data)?);
    }

    Ok(candidates
        .into_iter()
        .min_by(|a, b| a.len().cmp(&b.len()))
        .unwrap())
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_compress_uncompress() {
        let data = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
        assert_eq!(
            decompress(&compress_best(&data).expect("compress_best")).expect("decompress"),
            data
        );
    }

    #[test]
    fn test_compress() {
        let data = vec![0; 256];
        assert!(compress_best(&data).expect("compress_best").len() < data.len());
    }
}