From 5a6dc8c4f5fdf06420b16f848582f6e17b9ff83e Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Tue, 29 May 2018 17:50:13 +0200 Subject: [PATCH] Add SSE2 accelerated version of FileMap analysis. --- src/Cargo.lock | 1 + src/libsyntax_pos/Cargo.toml | 1 + src/libsyntax_pos/analyze_filemap.rs | 434 +++++++++++++++++++++++++++ src/libsyntax_pos/lib.rs | 77 +---- 4 files changed, 445 insertions(+), 68 deletions(-) create mode 100644 src/libsyntax_pos/analyze_filemap.rs diff --git a/src/Cargo.lock b/src/Cargo.lock index b74587e5662..a9339055264 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -2779,6 +2779,7 @@ name = "syntax_pos" version = "0.0.0" dependencies = [ "arena 0.0.0", + "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "rustc_data_structures 0.0.0", "scoped-tls 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "serialize 0.0.0", diff --git a/src/libsyntax_pos/Cargo.toml b/src/libsyntax_pos/Cargo.toml index a9147b394f7..08ee2e0f376 100644 --- a/src/libsyntax_pos/Cargo.toml +++ b/src/libsyntax_pos/Cargo.toml @@ -14,3 +14,4 @@ rustc_data_structures = { path = "../librustc_data_structures" } arena = { path = "../libarena" } scoped-tls = { version = "0.1.1", features = ["nightly"] } unicode-width = "0.1.4" +cfg-if = "0.1.2" diff --git a/src/libsyntax_pos/analyze_filemap.rs b/src/libsyntax_pos/analyze_filemap.rs new file mode 100644 index 00000000000..7828c55ce78 --- /dev/null +++ b/src/libsyntax_pos/analyze_filemap.rs @@ -0,0 +1,434 @@ +// Copyright 2018 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use unicode_width::UnicodeWidthChar; +use super::*; + +/// Find all newlines, multi-byte characters, and non-narrow characters in a +/// FileMap. +/// +/// This function will use an SSE2 enhanced implementation if hardware support +/// is detected at runtime. +pub fn analyze_filemap( + src: &str, + filemap_start_pos: BytePos) + -> (Vec, Vec, Vec) +{ + let mut lines = vec![filemap_start_pos]; + let mut multi_byte_chars = vec![]; + let mut non_narrow_chars = vec![]; + + // Calls the right implementation, depending on hardware support available. + analyze_filemap_dispatch(src, + filemap_start_pos, + &mut lines, + &mut multi_byte_chars, + &mut non_narrow_chars); + + // The code above optimistically registers a new line *after* each \n + // it encounters. If that point is already outside the filemap, remove + // it again. + if let Some(&last_line_start) = lines.last() { + let file_map_end = filemap_start_pos + BytePos::from_usize(src.len()); + assert!(file_map_end >= last_line_start); + if last_line_start == file_map_end { + lines.pop(); + } + } + + (lines, multi_byte_chars, non_narrow_chars) +} + +cfg_if! { + if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), + not(stage0)))] { + fn analyze_filemap_dispatch(src: &str, + filemap_start_pos: BytePos, + lines: &mut Vec, + multi_byte_chars: &mut Vec, + non_narrow_chars: &mut Vec) { + if is_x86_feature_detected!("sse2") { + unsafe { + analyze_filemap_sse2(src, + filemap_start_pos, + lines, + multi_byte_chars, + non_narrow_chars); + } + } else { + analyze_filemap_generic(src, + src.len(), + filemap_start_pos, + lines, + multi_byte_chars, + non_narrow_chars); + + } + } + + /// Check 16 byte chunks of text at a time. If the chunk contains + /// something other than printable ASCII characters and newlines, the + /// function falls back to the generic implementation. Otherwise it uses + /// SSE2 intrinsics to quickly find all newlines. + #[target_feature(enable = "sse2")] + unsafe fn analyze_filemap_sse2(src: &str, + output_offset: BytePos, + lines: &mut Vec, + multi_byte_chars: &mut Vec, + non_narrow_chars: &mut Vec) { + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + + const CHUNK_SIZE: usize = 16; + + let src_bytes = src.as_bytes(); + + let chunk_count = src.len() / CHUNK_SIZE; + + // This variable keeps track of where we should start decoding a + // chunk. If a multi-byte character spans across chunk boundaries, + // we need to skip that part in the next chunk because we already + // handled it. + let mut intra_chunk_offset = 0; + + for chunk_index in 0 .. chunk_count { + let ptr = src_bytes.as_ptr() as *const __m128i; + let chunk = _mm_loadu_si128(ptr.offset(chunk_index as isize)); + + // For character in the chunk, see if its byte value is < 0, which + // indicates that it's part of a UTF-8 char. + let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)); + // Create a bit mask from the comparison results. + let multibyte_mask = _mm_movemask_epi8(multibyte_test); + + // If the bit mask is all zero, we only have ASCII chars here: + if multibyte_mask == 0 { + assert!(intra_chunk_offset == 0); + + // Check if there are any control characters in the chunk. All + // control characters that we can encounter at this point have a + // byte value less than 32 or ... + let control_char_test0 = _mm_cmplt_epi8(chunk, _mm_set1_epi8(32)); + let control_char_mask0 = _mm_movemask_epi8(control_char_test0); + + // ... it's the ASCII 'DEL' character with a value of 127. + let control_char_test1 = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(127)); + let control_char_mask1 = _mm_movemask_epi8(control_char_test1); + + let control_char_mask = control_char_mask0 | control_char_mask1; + + if control_char_mask != 0 { + // Check for newlines in the chunk + let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)); + let newlines_mask = _mm_movemask_epi8(newlines_test); + + if control_char_mask == newlines_mask { + // All control characters are newlines, record them + let mut newlines_mask = 0xFFFF0000 | newlines_mask as u32; + let output_offset = output_offset + + BytePos::from_usize(chunk_index * CHUNK_SIZE + 1); + + loop { + let index = newlines_mask.trailing_zeros(); + + if index >= CHUNK_SIZE as u32 { + // We have arrived at the end of the chunk. + break + } + + lines.push(BytePos(index) + output_offset); + + // Clear the bit, so we can find the next one. + newlines_mask &= (!1) << index; + } + + // We are done for this chunk. All control characters were + // newlines and we took care of those. + continue + } else { + // Some of the control characters are not newlines, + // fall through to the slow path below. + } + } else { + // No control characters, nothing to record for this chunk + continue + } + } + + // The slow path. + // There are control chars in here, fallback to generic decoding. + let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset; + intra_chunk_offset = analyze_filemap_generic( + &src[scan_start .. ], + CHUNK_SIZE - intra_chunk_offset, + BytePos::from_usize(scan_start) + output_offset, + lines, + multi_byte_chars, + non_narrow_chars + ); + } + + // There might still be a tail left to analyze + let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset; + if tail_start < src.len() { + analyze_filemap_generic(&src[tail_start as usize ..], + src.len() - tail_start, + output_offset + BytePos::from_usize(tail_start), + lines, + multi_byte_chars, + non_narrow_chars); + } + } + } else { + + // The target (or compiler version) does not support SSE2 ... + fn analyze_filemap_dispatch(src: &str, + filemap_start_pos: BytePos, + lines: &mut Vec, + multi_byte_chars: &mut Vec, + non_narrow_chars: &mut Vec) { + analyze_filemap_generic(src, + src.len(), + filemap_start_pos, + lines, + multi_byte_chars, + non_narrow_chars); + } + } +} + +// `scan_len` determines the number of bytes in `src` to scan. Note that the +// function can read past `scan_len` if a multi-byte character start within the +// range but extends past it. The overflow is returned by the function. +fn analyze_filemap_generic(src: &str, + scan_len: usize, + output_offset: BytePos, + lines: &mut Vec, + multi_byte_chars: &mut Vec, + non_narrow_chars: &mut Vec) + -> usize +{ + assert!(src.len() >= scan_len); + let mut i = 0; + let src_bytes = src.as_bytes(); + + while i < scan_len { + let byte = unsafe { + // We verified that i < scan_len <= src.len() + *src_bytes.get_unchecked(i as usize) + }; + + // How much to advance in order to get to the next UTF-8 char in the + // string. + let mut char_len = 1; + + if byte < 32 { + // This is an ASCII control character, it could be one of the cases + // that are interesting to us. + + let pos = BytePos::from_usize(i) + output_offset; + + match byte { + b'\n' => { + lines.push(pos + BytePos(1)); + } + b'\t' => { + non_narrow_chars.push(NonNarrowChar::Tab(pos)); + } + _ => { + non_narrow_chars.push(NonNarrowChar::ZeroWidth(pos)); + } + } + } else if byte >= 127 { + // The slow path: + // This is either ASCII control character "DEL" or the beginning of + // a multibyte char. Just decode to `char`. + let c = (&src[i..]).chars().next().unwrap(); + char_len = c.len_utf8(); + + let pos = BytePos::from_usize(i) + output_offset; + + if char_len > 1 { + assert!(char_len >=2 && char_len <= 4); + let mbc = MultiByteChar { + pos, + bytes: char_len as u32, + }; + multi_byte_chars.push(mbc); + } + + // Assume control characters are zero width. + // FIXME: How can we decide between `width` and `width_cjk`? + let char_width = UnicodeWidthChar::width(c).unwrap_or(0); + + if char_width != 1 { + non_narrow_chars.push(NonNarrowChar::new(pos, char_width)); + } + } + + i += char_len; + } + + i - scan_len +} + + + +macro_rules! test { + (case: $test_name:ident, + text: $text:expr, + filemap_start_pos: $filemap_start_pos:expr, + lines: $lines:expr, + multi_byte_chars: $multi_byte_chars:expr, + non_narrow_chars: $non_narrow_chars:expr,) => ( + + #[test] + fn $test_name() { + + let (lines, multi_byte_chars, non_narrow_chars) = + analyze_filemap($text, BytePos($filemap_start_pos)); + + let expected_lines: Vec = $lines + .into_iter() + .map(|pos| BytePos(pos)) + .collect(); + + assert_eq!(lines, expected_lines); + + let expected_mbcs: Vec = $multi_byte_chars + .into_iter() + .map(|(pos, bytes)| MultiByteChar { + pos: BytePos(pos), + bytes, + }) + .collect(); + + assert_eq!(multi_byte_chars, expected_mbcs); + + let expected_nncs: Vec = $non_narrow_chars + .into_iter() + .map(|(pos, width)| { + NonNarrowChar::new(BytePos(pos), width) + }) + .collect(); + + assert_eq!(non_narrow_chars, expected_nncs); + }) +} + +test!( + case: empty_text, + text: "", + filemap_start_pos: 0, + lines: vec![], + multi_byte_chars: vec![], + non_narrow_chars: vec![], +); + +test!( + case: newlines_short, + text: "a\nc", + filemap_start_pos: 0, + lines: vec![0, 2], + multi_byte_chars: vec![], + non_narrow_chars: vec![], +); + +test!( + case: newlines_long, + text: "012345678\nabcdef012345678\na", + filemap_start_pos: 0, + lines: vec![0, 10, 26], + multi_byte_chars: vec![], + non_narrow_chars: vec![], +); + +test!( + case: newline_and_multi_byte_char_in_same_chunk, + text: "01234β789\nbcdef0123456789abcdef", + filemap_start_pos: 0, + lines: vec![0, 11], + multi_byte_chars: vec![(5, 2)], + non_narrow_chars: vec![], +); + +test!( + case: newline_and_control_char_in_same_chunk, + text: "01234\u{07}6789\nbcdef0123456789abcdef", + filemap_start_pos: 0, + lines: vec![0, 11], + multi_byte_chars: vec![], + non_narrow_chars: vec![(5, 0)], +); + +test!( + case: multi_byte_char_short, + text: "aβc", + filemap_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![(1, 2)], + non_narrow_chars: vec![], +); + +test!( + case: multi_byte_char_long, + text: "0123456789abcΔf012345β", + filemap_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![(13, 2), (22, 2)], + non_narrow_chars: vec![], +); + +test!( + case: multi_byte_char_across_chunk_boundary, + text: "0123456789abcdeΔ123456789abcdef01234", + filemap_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![(15, 2)], + non_narrow_chars: vec![], +); + +test!( + case: multi_byte_char_across_chunk_boundary_tail, + text: "0123456789abcdeΔ....", + filemap_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![(15, 2)], + non_narrow_chars: vec![], +); + +test!( + case: non_narrow_short, + text: "0\t2", + filemap_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![], + non_narrow_chars: vec![(1, 4)], +); + +test!( + case: non_narrow_long, + text: "01\t3456789abcdef01234567\u{07}9", + filemap_start_pos: 0, + lines: vec![0], + multi_byte_chars: vec![], + non_narrow_chars: vec![(2, 4), (24, 0)], +); + +test!( + case: output_offset_all, + text: "01\t345\n789abcΔf01234567\u{07}9\nbcΔf", + filemap_start_pos: 1000, + lines: vec![0 + 1000, 7 + 1000, 27 + 1000], + multi_byte_chars: vec![(13 + 1000, 2), (29 + 1000, 2)], + non_narrow_chars: vec![(2 + 1000, 4), (24 + 1000, 0)], +); diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs index 93b65dac288..90f3ae90c2f 100644 --- a/src/libsyntax_pos/lib.rs +++ b/src/libsyntax_pos/lib.rs @@ -24,6 +24,7 @@ #![feature(optin_builtin_traits)] #![allow(unused_attributes)] #![feature(specialization)] +#![feature(stdsimd)] use std::borrow::Cow; use std::cell::Cell; @@ -47,6 +48,9 @@ use serialize::{Encodable, Decodable, Encoder, Decoder}; extern crate serialize; extern crate serialize as rustc_serialize; // used by deriving +#[macro_use] +extern crate cfg_if; + extern crate unicode_width; pub mod edition; @@ -58,6 +62,8 @@ pub use span_encoding::{Span, DUMMY_SP}; pub mod symbol; +mod analyze_filemap; + pub struct Globals { symbol_interner: Lock, span_interner: Lock, @@ -652,7 +658,7 @@ impl From> for MultiSpan { pub const NO_EXPANSION: SyntaxContext = SyntaxContext::empty(); /// Identifies an offset of a multi-byte character in a FileMap -#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)] +#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq, Debug)] pub struct MultiByteChar { /// The absolute offset of the character in the CodeMap pub pos: BytePos, @@ -661,7 +667,7 @@ pub struct MultiByteChar { } /// Identifies an offset of a non-narrow character in a FileMap -#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)] +#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq, Debug)] pub enum NonNarrowChar { /// Represents a zero-width character ZeroWidth(BytePos), @@ -950,7 +956,7 @@ impl FileMap { let end_pos = start_pos.to_usize() + src.len(); let (lines, multibyte_chars, non_narrow_chars) = - Self::find_newlines_and_special_chars(&src[..], start_pos); + analyze_filemap::analyze_filemap(&src[..], start_pos); FileMap { name, @@ -969,71 +975,6 @@ impl FileMap { } } - fn find_newlines_and_special_chars(src: &str, filemap_start_pos: BytePos) - -> (Vec, Vec, Vec) { - - let mut index = 0; - let mut lines = vec![filemap_start_pos]; - let mut multibyte_chars = vec![]; - let mut non_narrow_chars = vec![]; - - while index < src.len() { - let byte_pos = BytePos::from_usize(index) + filemap_start_pos; - let byte = src.as_bytes()[index]; - - if byte.is_ascii() { - match byte { - b'\n' => { - lines.push(byte_pos + BytePos(1)); - } - b'\t' => { - // Tabs will consume 4 columns. - non_narrow_chars.push(NonNarrowChar::new(byte_pos, 4)); - } - c => if c.is_ascii_control() { - // Assume control characters are zero width. - non_narrow_chars.push(NonNarrowChar::new(byte_pos, 0)); - } - } - - index += 1; - } else { - let c = (&src[index..]).chars().next().unwrap(); - let c_len = c.len_utf8(); - - if c_len > 1 { - assert!(c_len >=2 && c_len <= 4); - let mbc = MultiByteChar { - pos: byte_pos, - bytes: c_len, - }; - multibyte_chars.push(mbc); - } - - // Assume control characters are zero width. - // FIXME: How can we decide between `width` and `width_cjk`? - let c_width = unicode_width::UnicodeWidthChar::width(c).unwrap_or(0); - - if c_width != 1 { - non_narrow_chars.push(NonNarrowChar::new(byte_pos, c_width)); - } - - index += c_len; - } - } - - // The loop above optimistically registers a new line *after* each of \n - // it encounters. If that point is already outside the filemap, remove - // it again. - if let Some(&last_line_start) = lines.last() { - if last_line_start == filemap_start_pos + BytePos::from_usize(src.len()) { - lines.pop(); - } - } - - (lines, multibyte_chars, non_narrow_chars) - } - /// Return the BytePos of the beginning of the current line. pub fn line_begin_pos(&self) -> BytePos { match self.lines.last() {