1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
use rand;
use std::cmp::min;
#[allow(deprecated)]
use std::mem::uninitialized;
use std::ptr::{copy_nonoverlapping, read_unaligned};

/// Generate a random frame mask.
#[inline]
pub fn generate_mask() -> [u8; 4] {
    rand::random()
}

/// Mask/unmask a frame.
#[inline]
pub fn apply_mask(buf: &mut [u8], mask: [u8; 4]) {
    apply_mask_fast32(buf, mask)
}

/// A safe unoptimized mask application.
#[inline]
#[allow(dead_code)]
fn apply_mask_fallback(buf: &mut [u8], mask: [u8; 4]) {
    for (i, byte) in buf.iter_mut().enumerate() {
        *byte ^= mask[i & 3];
    }
}

/// Faster version of `apply_mask()` which operates on 4-byte blocks.
#[inline]
#[allow(dead_code, clippy::cast_ptr_alignment)]
fn apply_mask_fast32(buf: &mut [u8], mask: [u8; 4]) {
    let mask_u32: u32 = unsafe { read_unaligned(mask.as_ptr() as *const u32) };

    let mut ptr = buf.as_mut_ptr();
    let mut len = buf.len();

    // Possible first unaligned block.
    let head = min(len, (4 - (ptr as usize & 3)) & 3);
    let mask_u32 = if head > 0 {
        unsafe {
            xor_mem(ptr, mask_u32, head);
            ptr = ptr.add(head);
        }
        len -= head;
        if cfg!(target_endian = "big") {
            mask_u32.rotate_left(8 * head as u32)
        } else {
            mask_u32.rotate_right(8 * head as u32)
        }
    } else {
        mask_u32
    };

    if len > 0 {
        debug_assert_eq!(ptr as usize % 4, 0);
    }

    // Properly aligned middle of the data.
    while len > 4 {
        unsafe {
            *(ptr as *mut u32) ^= mask_u32;
            ptr = ptr.offset(4);
            len -= 4;
        }
    }

    // Possible last block.
    if len > 0 {
        unsafe {
            xor_mem(ptr, mask_u32, len);
        }
    }
}

#[inline]
// TODO: copy_nonoverlapping here compiles to call memcpy. While it is not so inefficient,
// it could be done better. The compiler does not see that len is limited to 3.
unsafe fn xor_mem(ptr: *mut u8, mask: u32, len: usize) {
    #[allow(deprecated)]
    let mut b: u32 = uninitialized();
    #[allow(trivial_casts)]
    copy_nonoverlapping(ptr, &mut b as *mut _ as *mut u8, len);
    b ^= mask;
    #[allow(trivial_casts)]
    copy_nonoverlapping(&b as *const _ as *const u8, ptr, len);
}

#[cfg(test)]
mod tests {

    use super::{apply_mask_fallback, apply_mask_fast32};

    #[test]
    fn test_apply_mask() {
        let mask = [0x6d, 0xb6, 0xb2, 0x80];
        let unmasked = vec![
            0xf3, 0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x82, 0xff, 0xfe, 0x00, 0x17, 0x74, 0xf9,
            0x12, 0x03,
        ];

        // Check masking with proper alignment.
        {
            let mut masked = unmasked.clone();
            apply_mask_fallback(&mut masked, mask);

            let mut masked_fast = unmasked.clone();
            apply_mask_fast32(&mut masked_fast, mask);

            assert_eq!(masked, masked_fast);
        }

        // Check masking without alignment.
        {
            let mut masked = unmasked.clone();
            apply_mask_fallback(&mut masked[1..], mask);

            let mut masked_fast = unmasked.clone();
            apply_mask_fast32(&mut masked_fast[1..], mask);

            assert_eq!(masked, masked_fast);
        }
    }
}