1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
use ::iter::Bytes;

pub enum Scan {
    /// Returned when an implementation finds a noteworthy token.
    Found,
    /// Returned when an implementation couldn't keep running because the input was too short.
    TooShort,
}


pub unsafe fn parse_uri_batch_32<'a>(bytes: &mut Bytes<'a>) -> Scan {
    while bytes.as_ref().len() >= 32 {
        let advance = match_url_char_32_avx(bytes.as_ref());
        bytes.advance(advance);

        if advance != 32 {
            return Scan::Found;
        }
    }
    Scan::TooShort
}

#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
#[inline]
#[allow(non_snake_case, overflowing_literals)]
unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize {
    debug_assert!(buf.len() >= 32);

    /*
    #[cfg(target_arch = "x86")]
    use core::arch::x86::*;
    #[cfg(target_arch = "x86_64")]
    */
    use core::arch::x86_64::*;

    let ptr = buf.as_ptr();

    let LSH: __m256i = _mm256_set1_epi8(0x0f);
    let URI: __m256i = _mm256_setr_epi8(
        0xb8, 0xfc, 0xf8, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
        0xfc, 0xfc, 0xfc, 0x7c, 0x54, 0x7c, 0xd4, 0x7c,
        0xb8, 0xfc, 0xf8, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
        0xfc, 0xfc, 0xfc, 0x7c, 0x54, 0x7c, 0xd4, 0x7c,
    );
    let ARF: __m256i = _mm256_setr_epi8(
        0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    );

    let data = _mm256_lddqu_si256(ptr as *const _);
    let rbms = _mm256_shuffle_epi8(URI, data);
    let cols = _mm256_and_si256(LSH, _mm256_srli_epi16(data, 4));
    let bits = _mm256_and_si256(_mm256_shuffle_epi8(ARF, cols), rbms);

    let v = _mm256_cmpeq_epi8(bits, _mm256_setzero_si256());
    let r = 0xffffffff_00000000 | _mm256_movemask_epi8(v) as u64;

    _tzcnt_u64(r) as usize
}

#[cfg(target_arch = "x86")]
unsafe fn match_url_char_32_avx(_: &[u8]) -> usize {
    unreachable!("AVX2 detection should be disabled for x86");
}

pub unsafe fn match_header_value_batch_32(bytes: &mut Bytes) -> Scan {
    while bytes.as_ref().len() >= 32 {
        let advance = match_header_value_char_32_avx(bytes.as_ref());
        bytes.advance(advance);

        if advance != 32 {
            return Scan::Found;
        }
    }
    Scan::TooShort
}

#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
#[inline]
#[allow(non_snake_case)]
unsafe fn match_header_value_char_32_avx(buf: &[u8]) -> usize {
    debug_assert!(buf.len() >= 32);

    /*
    #[cfg(target_arch = "x86")]
    use core::arch::x86::*;
    #[cfg(target_arch = "x86_64")]
    */
    use core::arch::x86_64::*;

    let ptr = buf.as_ptr();

    // %x09 %x20-%x7e %x80-%xff
    let TAB: __m256i = _mm256_set1_epi8(0x09);
    let DEL: __m256i = _mm256_set1_epi8(0x7f);
    let LOW: __m256i = _mm256_set1_epi8(0x1f);

    let dat = _mm256_lddqu_si256(ptr as *const _);
    let low = _mm256_cmpgt_epi8(dat, LOW);
    let tab = _mm256_cmpeq_epi8(dat, TAB);
    let del = _mm256_cmpeq_epi8(dat, DEL);
    let bit = _mm256_andnot_si256(del, _mm256_or_si256(low, tab));
    let rev = _mm256_cmpeq_epi8(bit, _mm256_setzero_si256());
    let res = 0xffffffff_00000000 | _mm256_movemask_epi8(rev) as u64;

    _tzcnt_u64(res) as usize
}

#[cfg(target_arch = "x86")]
unsafe fn match_header_value_char_32_avx(_: &[u8]) -> usize {
    unreachable!("AVX2 detection should be disabled for x86");
}