diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs index afbe5cfaf8ef9..27d14d4c63b6e 100644 --- a/library/alloc/src/str.rs +++ b/library/alloc/src/str.rs @@ -11,7 +11,7 @@ use core::borrow::{Borrow, BorrowMut}; use core::iter::FusedIterator; use core::mem; use core::ptr; -use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher}; +use core::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher}; use core::unicode::conversions; use crate::borrow::ToOwned; @@ -20,8 +20,6 @@ use crate::slice::{Concat, Join, SliceIndex}; use crate::string::String; use crate::vec::Vec; -#[stable(feature = "rust1", since = "1.0.0")] -pub use core::str::pattern; #[stable(feature = "encode_utf16", since = "1.8.0")] pub use core::str::EncodeUtf16; #[stable(feature = "split_ascii_whitespace", since = "1.34.0")] @@ -268,7 +266,7 @@ impl str { without modifying the original"] #[stable(feature = "rust1", since = "1.0.0")] #[inline] - pub fn replace<'a, P: Pattern<'a>>(&'a self, from: P, to: &str) -> String { + pub fn replace<'a, P: Pattern<&'a str>>(&'a self, from: P, to: &str) -> String { let mut result = String::new(); let mut last_end = 0; for (start, part) in self.match_indices(from) { @@ -308,7 +306,7 @@ impl str { #[must_use = "this returns the replaced string as a new allocation, \ without modifying the original"] #[stable(feature = "str_replacen", since = "1.16.0")] - pub fn replacen<'a, P: Pattern<'a>>(&'a self, pat: P, to: &str, count: usize) -> String { + pub fn replacen<'a, P: Pattern<&'a str>>(&'a self, pat: P, to: &str, count: usize) -> String { // Hope to reduce the times of re-allocation let mut result = String::with_capacity(32); let mut last_end = 0; diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs index 2b843647dd510..ea4da19bce11a 100644 --- a/library/alloc/src/string.rs +++ b/library/alloc/src/string.rs @@ -57,7 +57,7 @@ use core::ops::Bound::{Excluded, Included, Unbounded}; use core::ops::{self, Index, IndexMut, Range, RangeBounds}; use core::ptr; use core::slice; -use core::str::pattern::Pattern; +use core::pattern::Pattern; #[cfg(not(no_global_oom_handling))] use core::str::Utf8Chunks; @@ -1371,9 +1371,9 @@ impl String { #[unstable(feature = "string_remove_matches", reason = "new API", issue = "72826")] pub fn remove_matches<'a, P>(&'a mut self, pat: P) where - P: for<'x> Pattern<'x>, + P: for<'x> Pattern<&'x str>, { - use core::str::pattern::Searcher; + use core::pattern::Searcher; let rejections = { let mut searcher = pat.into_searcher(self); @@ -2174,10 +2174,10 @@ impl<'a> Extend> for String { reason = "API not fully fleshed out and ready to be stabilized", issue = "27721" )] -impl<'a, 'b> Pattern<'a> for &'b String { - type Searcher = <&'b str as Pattern<'a>>::Searcher; +impl<'a, 'b> Pattern<&'a str> for &'b String { + type Searcher = <&'b str as Pattern<&'a str>>::Searcher; - fn into_searcher(self, haystack: &'a str) -> <&'b str as Pattern<'a>>::Searcher { + fn into_searcher(self, haystack: &'a str) -> <&'b str as Pattern<&'a str>>::Searcher { self[..].into_searcher(haystack) } diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index 4d182be02c9e9..7ba183edc77f7 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1856,14 +1856,14 @@ fn test_repeat() { } mod pattern { - use std::str::pattern::SearchStep::{self, Done, Match, Reject}; - use std::str::pattern::{Pattern, ReverseSearcher, Searcher}; + use core::pattern::SearchStep::{self, Done, Match, Reject}; + use core::pattern::{Pattern, ReverseSearcher, Searcher}; macro_rules! make_test { ($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => { #[allow(unused_imports)] mod $name { - use std::str::pattern::SearchStep::{Match, Reject}; + use core::pattern::SearchStep::{Match, Reject}; use super::{cmp_search_to_vec}; #[test] fn fwd() { @@ -1879,7 +1879,7 @@ mod pattern { fn cmp_search_to_vec<'a>( rev: bool, - pat: impl Pattern<'a, Searcher: ReverseSearcher<'a>>, + pat: impl Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>, haystack: &'a str, right: Vec, ) { @@ -2139,11 +2139,11 @@ generate_iterator_test! { #[test] fn different_str_pattern_forwarding_lifetimes() { - use std::str::pattern::Pattern; + use core::pattern::Pattern; fn foo<'a, P>(p: P) where - for<'b> &'b P: Pattern<'a>, + for<'b> &'b P: Pattern<&'a str>, { for _ in 0..3 { "asdf".find(&p); diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index dc0702c467a4e..14cfd8a119497 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -357,6 +357,7 @@ pub mod sync; pub mod fmt; pub mod hash; +pub mod pattern; pub mod slice; pub mod str; pub mod time; diff --git a/library/core/src/pattern.rs b/library/core/src/pattern.rs new file mode 100644 index 0000000000000..dfc633e774a3c --- /dev/null +++ b/library/core/src/pattern.rs @@ -0,0 +1,362 @@ +//! The Pattern API. +//! +//! The Pattern API provides a generic mechanism for using different pattern +//! types when searching through different objects. +//! +//! For more details, see the traits [`Pattern`], [`Haystack`], [`Searcher`], +//! [`ReverseSearcher`] and [`DoubleEndedSearcher`]. Although this API is +//! unstable, it is exposed via stable APIs on the [`str`] type. +//! +//! # Examples +//! +//! [`Pattern`] is [implemented][pattern-impls] in the stable API for +//! [`&str`][`str`], [`char`], slices of [`char`], and functions and closures +//! implementing `FnMut(char) -> bool`. +//! +//! ``` +//! let s = "Can you find a needle in a haystack?"; +//! +//! // &str pattern +//! assert_eq!(s.find("you"), Some(4)); +//! // char pattern +//! assert_eq!(s.find('n'), Some(2)); +//! // array of chars pattern +//! assert_eq!(s.find(&['a', 'e', 'i', 'o', 'u']), Some(1)); +//! // slice of chars pattern +//! assert_eq!(s.find(&['a', 'e', 'i', 'o', 'u'][..]), Some(1)); +//! // closure pattern +//! assert_eq!(s.find(|c: char| c.is_ascii_punctuation()), Some(35)); +//! ``` +//! +//! [pattern-impls]: Pattern#implementors + +#![unstable( + feature = "pattern", + reason = "API not fully fleshed out and ready to be stabilized", + issue = "27721" +)] + +use crate::marker::PhantomData; + +/// A pattern which can be matched against a [`Haystack`]. +/// +/// A `Pattern` expresses that the implementing type can be used as a pattern +/// for searching in a `H`. +/// +/// For example, character `'a'` and string `"aa"` are patterns that would match +/// at index `1` in the string `"baaaab"`. +/// +/// The trait itself acts as a builder for an associated +/// [`Searcher`] type, which does the actual work of finding +/// occurrences of the pattern in a string. +/// +/// Depending on the type of the pattern, the behaviour of methods like +/// [`str::find`] and [`str::contains`] can change. The table below describes +/// some of those behaviours. +/// +/// | Pattern type | Match condition | +/// |--------------------------|-------------------------------------------| +/// | `&str` | is substring | +/// | `char` | is contained in string | +/// | `&[char]` | any char in slice is contained in string | +/// | `F: FnMut(char) -> bool` | `F` returns `true` for a char in string | +/// | `&&str` | is substring | +/// | `&String` | is substring | +/// +/// # Examples +/// +/// ``` +/// // &str +/// assert_eq!("abaaa".find("ba"), Some(1)); +/// assert_eq!("abaaa".find("bac"), None); +/// +/// // char +/// assert_eq!("abaaa".find('a'), Some(0)); +/// assert_eq!("abaaa".find('b'), Some(1)); +/// assert_eq!("abaaa".find('c'), None); +/// +/// // &[char; N] +/// assert_eq!("ab".find(&['b', 'a']), Some(0)); +/// assert_eq!("abaaa".find(&['a', 'z']), Some(0)); +/// assert_eq!("abaaa".find(&['c', 'd']), None); +/// +/// // &[char] +/// assert_eq!("ab".find(&['b', 'a'][..]), Some(0)); +/// assert_eq!("abaaa".find(&['a', 'z'][..]), Some(0)); +/// assert_eq!("abaaa".find(&['c', 'd'][..]), None); +/// +/// // FnMut(char) -> bool +/// assert_eq!("abcdef_z".find(|ch| ch > 'd' && ch < 'y'), Some(4)); +/// assert_eq!("abcddd_z".find(|ch| ch > 'd' && ch < 'y'), None); +/// ``` +#[rustc_has_incoherent_inherent_impls] +pub trait Pattern: Sized { + /// Associated searcher for this pattern + type Searcher: Searcher; + + /// Constructs the associated searcher from + /// `self` and the `haystack` to search in. + fn into_searcher(self, haystack: H) -> Self::Searcher; + + /// Checks whether the pattern matches anywhere in the haystack + fn is_contained_in(self, haystack: H) -> bool { + self.into_searcher(haystack).next_match().is_some() + } + + /// Checks whether the pattern matches at the front of the haystack + fn is_prefix_of(self, haystack: H) -> bool { + matches!( + self.into_searcher(haystack).next(), + SearchStep::Match(start, _) if start == haystack.cursor_at_front() + ) + } + + /// Checks whether the pattern matches at the back of the haystack + fn is_suffix_of(self, haystack: H) -> bool + where Self::Searcher: ReverseSearcher { + matches!( + self.into_searcher(haystack).next_back(), + SearchStep::Match(_, end) if end == haystack.cursor_at_back() + ) + } + + /// Removes the pattern from the front of haystack, if it matches. + fn strip_prefix_of(self, haystack: H) -> Option { + if let SearchStep::Match(start, end) = self.into_searcher(haystack).next() { + // This cannot be debug_assert_eq because StartCursor isn’t Debug. + debug_assert!(start == haystack.cursor_at_front(), + "The first search step from Searcher \ + must include the first character"); + // SAFETY: `Searcher` is known to return valid indices. + Some(unsafe { haystack.split_at_cursor_unchecked(end) }.1) + } else { + None + } + } + + /// Removes the pattern from the back of haystack, if it matches. + fn strip_suffix_of(self, haystack: H) -> Option + where Self::Searcher: ReverseSearcher { + if let SearchStep::Match(start, end) = self.into_searcher(haystack).next_back() { + // This cannot be debug_assert_eq because StartCursor isn’t Debug. + debug_assert!(end == haystack.cursor_at_back(), + "The first search step from ReverseSearcher \ + must include the last character"); + // SAFETY: `Searcher` is known to return valid indices. + Some(unsafe { haystack.split_at_cursor_unchecked(start) }.0) + } else { + None + } + } +} + + +/// A type which can be searched in using a [`Pattern`]. +/// +/// The trait is used in combination with [`Pattern`] trait to express a pattern +/// that can be used to search for elements in given haystack. +pub trait Haystack: Sized + Copy { + /// A cursor representing position in the haystack or its end. + type Cursor: Copy + PartialOrd; + + /// Returns cursor pointing at the beginning of the haystack. + fn cursor_at_front(&self) -> Self::Cursor; + + /// Returns cursor pointing at the end of the haystack. + fn cursor_at_back(&self) -> Self::Cursor; + + /// Splits haystack into two at given cursor position. + /// + /// Note that splitting a haystack isn’t guaranteed to preserve total + /// length. That is, each separate part’s length may be longer than length + /// of the original haystack. This property is preserved for `&str` and + /// `&[T]` haystacks but not for `&OsStr`. + unsafe fn split_at_cursor_unchecked(self, cursor: Self::Cursor) -> (Self, Self); +} + + +/// Result of calling [`Searcher::next()`] or [`ReverseSearcher::next_back()`]. +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum SearchStep { + /// Expresses that a match of the pattern has been found at + /// `haystack[a..b]`. + Match(T, T), + /// Expresses that `haystack[a..b]` has been rejected as a possible match + /// of the pattern. + /// + /// Note that there might be more than one `Reject` between two `Match`es, + /// there is no requirement for them to be combined into one. + Reject(T, T), + /// Expresses that every byte of the haystack has been visited, ending + /// the iteration. + Done, +} + +/// A searcher for a string pattern. +/// +/// This trait provides methods for searching for non-overlapping +/// matches of a pattern starting from the front (left) of a string. +/// +/// It will be implemented by associated `Searcher` +/// types of the [`Pattern`] trait. +/// +/// The trait is marked unsafe because the indices returned by the +/// [`next()`][Searcher::next] methods are required to lie on valid utf8 +/// boundaries in the haystack. This enables consumers of this trait to +/// slice the haystack without additional runtime checks. +pub unsafe trait Searcher { + /// Getter for the underlying string to be searched in + /// + /// Will always return the same [`&str`][str]. + fn haystack(&self) -> H; + + /// Performs the next search step starting from the front. + /// + /// - Returns [`Match(a, b)`][SearchStep::Match] if `haystack[a..b]` matches + /// the pattern. + /// - Returns [`Reject(a, b)`][SearchStep::Reject] if `haystack[a..b]` can + /// not match the pattern, even partially. + /// - Returns [`Done`][SearchStep::Done] if every byte of the haystack has + /// been visited. + /// + /// The stream of [`Match`][SearchStep::Match] and + /// [`Reject`][SearchStep::Reject] values up to a [`Done`][SearchStep::Done] + /// will contain index ranges that are adjacent, non-overlapping, + /// covering the whole haystack, and laying on utf8 boundaries. + /// + /// A [`Match`][SearchStep::Match] result needs to contain the whole matched + /// pattern, however [`Reject`][SearchStep::Reject] results may be split up + /// into arbitrary many adjacent fragments. Both ranges may have zero length. + /// + /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"` + /// might produce the stream + /// `[Reject(0, 1), Reject(1, 2), Match(2, 5), Reject(5, 8)]` + fn next(&mut self) -> SearchStep; + + /// Finds the next [`Match`][SearchStep::Match] result. See [`next()`][Searcher::next]. + /// + /// Unlike [`next()`][Searcher::next], there is no guarantee that the returned ranges + /// of this and [`next_reject`][Searcher::next_reject] will overlap. This will return + /// `(start_match, end_match)`, where start_match is the index of where + /// the match begins, and end_match is the index after the end of the match. + fn next_match(&mut self) -> Option<(H::Cursor, H::Cursor)> { + loop_next::(|| self.next()) + } + + /// Finds the next [`Reject`][SearchStep::Reject] result. See [`next()`][Searcher::next] + /// and [`next_match()`][Searcher::next_match]. + /// + /// Unlike [`next()`][Searcher::next], there is no guarantee that the returned ranges + /// of this and [`next_match`][Searcher::next_match] will overlap. + fn next_reject(&mut self) -> Option<(H::Cursor, H::Cursor)> { + loop_next::(|| self.next()) + } +} + +/// A reverse searcher for a string pattern. +/// +/// This trait provides methods for searching for non-overlapping +/// matches of a pattern starting from the back (right) of a string. +/// +/// It will be implemented by associated [`Searcher`] +/// types of the [`Pattern`] trait if the pattern supports searching +/// for it from the back. +/// +/// The index ranges returned by this trait are not required +/// to exactly match those of the forward search in reverse. +/// +/// For the reason why this trait is marked unsafe, see the +/// parent trait [`Searcher`]. +pub unsafe trait ReverseSearcher: Searcher { + /// Performs the next search step starting from the back. + /// + /// - Returns [`Match(a, b)`][SearchStep::Match] if `haystack[a..b]` + /// matches the pattern. + /// - Returns [`Reject(a, b)`][SearchStep::Reject] if `haystack[a..b]` + /// can not match the pattern, even partially. + /// - Returns [`Done`][SearchStep::Done] if every byte of the haystack + /// has been visited + /// + /// The stream of [`Match`][SearchStep::Match] and + /// [`Reject`][SearchStep::Reject] values up to a [`Done`][SearchStep::Done] + /// will contain index ranges that are adjacent, non-overlapping, + /// covering the whole haystack, and laying on utf8 boundaries. + /// + /// A [`Match`][SearchStep::Match] result needs to contain the whole matched + /// pattern, however [`Reject`][SearchStep::Reject] results may be split up + /// into arbitrary many adjacent fragments. Both ranges may have zero length. + /// + /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"` + /// might produce the stream + /// `[Reject(7, 8), Match(4, 7), Reject(1, 4), Reject(0, 1)]`. + fn next_back(&mut self) -> SearchStep; + + /// Finds the next [`Match`][SearchStep::Match] result. + /// See [`next_back()`][ReverseSearcher::next_back]. + fn next_match_back(&mut self) -> Option<(H::Cursor, H::Cursor)> { + loop_next::(|| self.next_back()) + } + + /// Finds the next [`Reject`][SearchStep::Reject] result. + /// See [`next_back()`][ReverseSearcher::next_back]. + fn next_reject_back(&mut self) -> Option<(H::Cursor, H::Cursor)> { + loop_next::(|| self.next_back()) + } +} + +/// A marker trait to express that a [`ReverseSearcher`] +/// can be used for a [`DoubleEndedIterator`] implementation. +/// +/// For this, the impl of [`Searcher`] and [`ReverseSearcher`] need +/// to follow these conditions: +/// +/// - All results of `next()` need to be identical +/// to the results of `next_back()` in reverse order. +/// - `next()` and `next_back()` need to behave as +/// the two ends of a range of values, that is they +/// can not "walk past each other". +/// +/// # Examples +/// +/// `char::Searcher` is a `DoubleEndedSearcher` because searching for a +/// [`char`] only requires looking at one at a time, which behaves the same +/// from both ends. +/// +/// `(&str)::Searcher` is not a `DoubleEndedSearcher` because +/// the pattern `"aa"` in the haystack `"aaa"` matches as either +/// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched. +pub trait DoubleEndedSearcher: ReverseSearcher {} + + +/// XXX TODO placeholder +#[derive(Clone, Debug)] +pub struct Predicate(F, PhantomData<*const T>); + +/// XXX TODO placeholder +pub fn predicate bool>(pred: F) -> Predicate { + Predicate(pred, PhantomData) +} + +impl bool> Predicate { + /// XXX TODO placeholder + pub fn test(&mut self, element: T) -> bool { self.0(element) } + + /// XXX TODO placeholder + pub fn as_fn(&mut self) -> &mut F { &mut self.0 } +} + + +/// Calls callback until it returns `SearchStep::Done` or either `Match` or +/// `Reject` depending no `MATCH` generic argument. +pub(super) fn loop_next( + mut next: impl FnMut() -> SearchStep, +) -> Option<(T, T)> { + loop { + match next() { + SearchStep::Done => break None, + SearchStep::Match(start, end) if MATCH => break Some((start, end)), + SearchStep::Reject(start, end) if !MATCH => break Some((start, end)), + _ => (), + } + } +} diff --git a/library/core/src/slice/cmp.rs b/library/core/src/slice/cmp.rs index 5e1b218e507bd..a9b0abae21fa5 100644 --- a/library/core/src/slice/cmp.rs +++ b/library/core/src/slice/cmp.rs @@ -227,34 +227,286 @@ impl_marker_for!(BytewiseEquality, u8 i8 u16 i16 u32 i32 u64 i64 u128 i128 usize isize char bool); pub(super) trait SliceContains: Sized { - fn slice_contains(&self, x: &[Self]) -> bool; + fn slice_contains_element(hs: &[Self], needle: &Self) -> bool; + fn slice_contains_slice(hs: &[Self], needle: &[Self]) -> bool; } impl SliceContains for T where T: PartialEq, { - default fn slice_contains(&self, x: &[Self]) -> bool { - x.iter().any(|y| *y == *self) + default fn slice_contains_element(hs: &[Self], needle: &Self) -> bool { + hs.iter().any(|element| *element == *needle) + } + + default fn slice_contains_slice(hs: &[Self], needle: &[Self]) -> bool { + default_slice_contains_slice(hs, needle) } } impl SliceContains for u8 { #[inline] - fn slice_contains(&self, x: &[Self]) -> bool { - memchr::memchr(*self, x).is_some() + fn slice_contains_element(hs: &[Self], needle: &Self) -> bool { + memchr::memchr(*needle, hs).is_some() + } + + #[inline] + fn slice_contains_slice(hs: &[Self], needle: &[Self]) -> bool { + if needle.len() <= 32 { + if let Some(result) = simd_contains(hs, needle) { + return result; + } + } + default_slice_contains_slice(hs, needle) } } +unsafe fn bytes_of(slice: &[T]) -> &[u8] { + // SAFETY: caller promises that `T` and `u8` have the same memory layout, + // thus casting `x.as_ptr()` as `*const u8` is safe. The `x.as_ptr()` comes + // from a reference and is thus guaranteed to be valid for reads for the + // length of the slice `x.len()`, which cannot be larger than + // `isize::MAX`. The returned slice is never mutated. + unsafe { from_raw_parts(slice.as_ptr() as *const u8, slice.len()) } +} + impl SliceContains for i8 { #[inline] - fn slice_contains(&self, x: &[Self]) -> bool { - let byte = *self as u8; - // SAFETY: `i8` and `u8` have the same memory layout, thus casting `x.as_ptr()` - // as `*const u8` is safe. The `x.as_ptr()` comes from a reference and is thus guaranteed - // to be valid for reads for the length of the slice `x.len()`, which cannot be larger - // than `isize::MAX`. The returned slice is never mutated. - let bytes: &[u8] = unsafe { from_raw_parts(x.as_ptr() as *const u8, x.len()) }; - memchr::memchr(byte, bytes).is_some() + fn slice_contains_element(hs: &[Self], needle: &Self) -> bool { + // SAFETY: i8 and u8 have the same memory layout + u8::slice_contains_element(unsafe { bytes_of(hs) }, &(*needle as u8)) + } + + #[inline] + fn slice_contains_slice(hs: &[Self], needle: &[Self]) -> bool { + // SAFETY: i8 and u8 have the same memory layout + unsafe { u8::slice_contains_slice(bytes_of(hs), bytes_of(needle)) } + } +} + +impl SliceContains for bool { + #[inline] + fn slice_contains_element(hs: &[Self], needle: &Self) -> bool { + // SAFETY: bool and u8 have the same memory layout and all valid bool + // bit patterns are valid u8 bit patterns. + u8::slice_contains_element(unsafe { bytes_of(hs) }, &(*needle as u8)) + } + + #[inline] + fn slice_contains_slice(hs: &[Self], needle: &[Self]) -> bool { + // SAFETY: bool and u8 have the same memory layout and all valid bool + // bit patterns are valid u8 bit patterns. + unsafe { u8::slice_contains_slice(bytes_of(hs), bytes_of(needle)) } + } +} + +fn default_slice_contains_slice(hs: &[T], needle: &[T]) -> bool { + super::pattern::NaiveSearcherState::new(hs.len()) + .next_match(hs, needle) + .is_some() +} + + +/// SIMD search for short needles based on +/// Wojciech Muła's "SIMD-friendly algorithms for substring searching"[0] +/// +/// It skips ahead by the vector width on each iteration (rather than the needle length as two-way +/// does) by probing the first and last byte of the needle for the whole vector width +/// and only doing full needle comparisons when the vectorized probe indicated potential matches. +/// +/// Since the x86_64 baseline only offers SSE2 we only use u8x16 here. +/// If we ever ship std with for x86-64-v3 or adapt this for other platforms then wider vectors +/// should be evaluated. +/// +/// For haystacks smaller than vector-size + needle length it falls back to +/// a naive O(n*m) search so this implementation should not be called on larger needles. +/// +/// [0]: http://0x80.pl/articles/simd-strfind.html#sse-avx2 +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +#[inline] +fn simd_contains(haystack: &[u8], needle: &[u8]) -> Option { + debug_assert!(needle.len() > 1); + + use crate::ops::BitAnd; + use crate::simd::mask8x16 as Mask; + use crate::simd::u8x16 as Block; + use crate::simd::{SimdPartialEq, ToBitMask}; + + let first_probe = needle[0]; + let last_byte_offset = needle.len() - 1; + + // the offset used for the 2nd vector + let second_probe_offset = if needle.len() == 2 { + // never bail out on len=2 needles because the probes will fully cover them and have + // no degenerate cases. + 1 + } else { + // try a few bytes in case first and last byte of the needle are the same + let Some(second_probe_offset) = (needle.len().saturating_sub(4)..needle.len()).rfind(|&idx| needle[idx] != first_probe) else { + // fall back to other search methods if we can't find any different bytes + // since we could otherwise hit some degenerate cases + return None; + }; + second_probe_offset + }; + + // do a naive search if the haystack is too small to fit + if haystack.len() < Block::LANES + last_byte_offset { + return Some(haystack.windows(needle.len()).any(|c| c == needle)); + } + + let first_probe: Block = Block::splat(first_probe); + let second_probe: Block = Block::splat(needle[second_probe_offset]); + // first byte are already checked by the outer loop. to verify a match only the + // remainder has to be compared. + let trimmed_needle = &needle[1..]; + + // this #[cold] is load-bearing, benchmark before removing it... + let check_mask = #[cold] + |idx, mask: u16, skip: bool| -> bool { + if skip { + return false; + } + + // and so is this. optimizations are weird. + let mut mask = mask; + + while mask != 0 { + let trailing = mask.trailing_zeros(); + let offset = idx + trailing as usize + 1; + // SAFETY: mask is between 0 and 15 trailing zeroes, we skip one additional byte that was already compared + // and then take trimmed_needle.len() bytes. This is within the bounds defined by the outer loop + unsafe { + let sub = haystack.get_unchecked(offset..).get_unchecked(..trimmed_needle.len()); + if small_slice_eq(sub, trimmed_needle) { + return true; + } + } + mask &= !(1 << trailing); + } + return false; + }; + + let test_chunk = |idx| -> u16 { + // SAFETY: this requires at least LANES bytes being readable at idx + // that is ensured by the loop ranges (see comments below) + let a: Block = unsafe { haystack.as_ptr().add(idx).cast::().read_unaligned() }; + // SAFETY: this requires LANES + block_offset bytes being readable at idx + let b: Block = unsafe { + haystack.as_ptr().add(idx).add(second_probe_offset).cast::().read_unaligned() + }; + let eq_first: Mask = a.simd_eq(first_probe); + let eq_last: Mask = b.simd_eq(second_probe); + let both = eq_first.bitand(eq_last); + let mask = both.to_bitmask(); + + return mask; + }; + + let mut i = 0; + let mut result = false; + // The loop condition must ensure that there's enough headroom to read LANE bytes, + // and not only at the current index but also at the index shifted by block_offset + const UNROLL: usize = 4; + while i + last_byte_offset + UNROLL * Block::LANES < haystack.len() && !result { + let mut masks = [0u16; UNROLL]; + for j in 0..UNROLL { + masks[j] = test_chunk(i + j * Block::LANES); + } + for j in 0..UNROLL { + let mask = masks[j]; + if mask != 0 { + result |= check_mask(i + j * Block::LANES, mask, result); + } + } + i += UNROLL * Block::LANES; + } + while i + last_byte_offset + Block::LANES < haystack.len() && !result { + let mask = test_chunk(i); + if mask != 0 { + result |= check_mask(i, mask, result); + } + i += Block::LANES; + } + + // Process the tail that didn't fit into LANES-sized steps. + // This simply repeats the same procedure but as right-aligned chunk instead + // of a left-aligned one. The last byte must be exactly flush with the string end so + // we don't miss a single byte or read out of bounds. + let i = haystack.len() - last_byte_offset - Block::LANES; + let mask = test_chunk(i); + if mask != 0 { + result |= check_mask(i, mask, result); + } + + Some(result) +} + +/// Compares short slices for equality. +/// +/// It avoids a call to libc's memcmp which is faster on long slices +/// due to SIMD optimizations but it incurs a function call overhead. +/// +/// # Safety +/// +/// Both slices must have the same length. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] // only called on x86 +#[inline] +unsafe fn small_slice_eq(x: &[u8], y: &[u8]) -> bool { + debug_assert_eq!(x.len(), y.len()); + // This function is adapted from + // https://github.com/BurntSushi/memchr/blob/8037d11b4357b0f07be2bb66dc2659d9cf28ad32/src/memmem/util.rs#L32 + + // If we don't have enough bytes to do 4-byte at a time loads, then + // fall back to the naive slow version. + // + // Potential alternative: We could do a copy_nonoverlapping combined with a mask instead + // of a loop. Benchmark it. + if x.len() < 4 { + for (&b1, &b2) in x.iter().zip(y) { + if b1 != b2 { + return false; + } + } + return true; + } + // When we have 4 or more bytes to compare, then proceed in chunks of 4 at + // a time using unaligned loads. + // + // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is + // that this particular version of memcmp is likely to be called with tiny + // needles. That means that if we do 8 byte loads, then a higher proportion + // of memcmp calls will use the slower variant above. With that said, this + // is a hypothesis and is only loosely supported by benchmarks. There's + // likely some improvement that could be made here. The main thing here + // though is to optimize for latency, not throughput. + + // SAFETY: Via the conditional above, we know that both `px` and `py` + // have the same length, so `px < pxend` implies that `py < pyend`. + // Thus, derefencing both `px` and `py` in the loop below is safe. + // + // Moreover, we set `pxend` and `pyend` to be 4 bytes before the actual + // end of `px` and `py`. Thus, the final dereference outside of the + // loop is guaranteed to be valid. (The final comparison will overlap with + // the last comparison done in the loop for lengths that aren't multiples + // of four.) + // + // Finally, we needn't worry about alignment here, since we do unaligned + // loads. + unsafe { + let (mut px, mut py) = (x.as_ptr(), y.as_ptr()); + let (pxend, pyend) = (px.add(x.len() - 4), py.add(y.len() - 4)); + while px < pxend { + let vx = (px as *const u32).read_unaligned(); + let vy = (py as *const u32).read_unaligned(); + if vx != vy { + return false; + } + px = px.add(4); + py = py.add(4); + } + let vx = (pxend as *const u32).read_unaligned(); + let vy = (pyend as *const u32).read_unaligned(); + vx == vy } } diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index 6ea16bf643071..0ffd2eb285384 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -15,6 +15,7 @@ use crate::num::NonZeroUsize; use crate::ops::{Bound, FnMut, OneSidedRange, Range, RangeBounds}; use crate::option::Option; use crate::option::Option::{None, Some}; +use crate::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher}; use crate::ptr; use crate::result::Result; use crate::result::Result::{Err, Ok}; @@ -40,6 +41,7 @@ mod ascii; mod cmp; mod index; mod iter; +mod pattern; mod raw; mod rotate; mod specialize; @@ -2213,11 +2215,14 @@ impl [T] { RSplitNMut::new(self.rsplit_mut(pred), n) } - /// Returns `true` if the slice contains an element with the given value. + /// Returns `true` if the slice contains given pattern; returns `false` + /// otherwise. /// - /// This operation is *O*(*n*). + /// This may be used to look for a single element (in which case the + /// operation is *O*(*n*)) or with more complex patterns. /// - /// Note that if you have a sorted slice, [`binary_search`] may be faster. + /// Note that if you have a sorted slice and are looking for a single + /// element, [`binary_search`] may be faster. /// /// [`binary_search`]: slice::binary_search /// @@ -2227,11 +2232,15 @@ impl [T] { /// let v = [10, 40, 30]; /// assert!(v.contains(&30)); /// assert!(!v.contains(&50)); + /// + /// assert!(v.contains(&[])); + /// assert!(v.contains(&[40, 30])); + /// assert!(!v.contains(&[30, 40])); /// ``` /// - /// If you do not have a `&T`, but some other value that you can compare - /// with one (for example, `String` implements `PartialEq`), you can - /// use `iter().any`: + /// If you’re looking for a single element and don’t have a `&T`, but some + /// other value that you can compare with one (for example, `String` + /// implements `PartialEq`), you can use `iter().any`: /// /// ``` /// let v = [String::from("hello"), String::from("world")]; // slice of `String` @@ -2241,44 +2250,42 @@ impl [T] { #[stable(feature = "rust1", since = "1.0.0")] #[inline] #[must_use] - pub fn contains(&self, x: &T) -> bool - where - T: PartialEq, - { - cmp::SliceContains::slice_contains(x, self) + pub fn contains<'a, P: Pattern<&'a [T]>>(&'a self, pat: P) -> bool { + pat.is_contained_in(self) } - /// Returns `true` if `needle` is a prefix of the slice. + /// Returns `true` if `pattern` matches at the beginning of the slice. /// /// # Examples /// /// ``` /// let v = [10, 40, 30]; + /// + /// assert!(v.starts_with(&[])); /// assert!(v.starts_with(&[10])); /// assert!(v.starts_with(&[10, 40])); /// assert!(!v.starts_with(&[50])); /// assert!(!v.starts_with(&[10, 50])); + /// + /// assert!(v.starts_with(&10)); + /// assert!(!v.starts_with(&30)); /// ``` /// - /// Always returns `true` if `needle` is an empty slice: + /// Always returns `true` if `pattern` is an empty slice: /// /// ``` /// let v = &[10, 40, 30]; - /// assert!(v.starts_with(&[])); + /// assert!(v.ends_with(&[])); /// let v: &[u8] = &[]; - /// assert!(v.starts_with(&[])); + /// assert!(v.ends_with(&[])); /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[must_use] - pub fn starts_with(&self, needle: &[T]) -> bool - where - T: PartialEq, - { - let n = needle.len(); - self.len() >= n && needle == &self[..n] + pub fn starts_with<'a, P: Pattern<&'a [T]>>(&'a self, pattern: P) -> bool { + pattern.is_prefix_of(self) } - /// Returns `true` if `needle` is a suffix of the slice. + /// Returns `true` if `pattern` matches at the end of the slice. /// /// # Examples /// @@ -2288,9 +2295,12 @@ impl [T] { /// assert!(v.ends_with(&[40, 30])); /// assert!(!v.ends_with(&[50])); /// assert!(!v.ends_with(&[50, 30])); + /// + /// assert!(v.ends_with(&30)); + /// assert!(!v.ends_with(&10)); /// ``` /// - /// Always returns `true` if `needle` is an empty slice: + /// Always returns `true` if `pattern` is an empty slice: /// /// ``` /// let v = &[10, 40, 30]; @@ -2300,20 +2310,15 @@ impl [T] { /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[must_use] - pub fn ends_with(&self, needle: &[T]) -> bool - where - T: PartialEq, - { - let (m, n) = (self.len(), needle.len()); - m >= n && needle == &self[m - n..] + pub fn ends_with<'a, P>(&'a self, pattern: P) -> bool + where P: Pattern<&'a [T], Searcher: ReverseSearcher<&'a [T]>> { + pattern.is_suffix_of(self) } /// Returns a subslice with the prefix removed. /// - /// If the slice starts with `prefix`, returns the subslice after the prefix, wrapped in `Some`. - /// If `prefix` is empty, simply returns the original slice. - /// - /// If the slice does not start with `prefix`, returns `None`. + /// If `prefix` matches at the beginning of the slice, returns the subslice + /// after the prefix, wrapped in `Some`. Otherwise returns `None`. /// /// # Examples /// @@ -2324,34 +2329,20 @@ impl [T] { /// assert_eq!(v.strip_prefix(&[50]), None); /// assert_eq!(v.strip_prefix(&[10, 50]), None); /// - /// let prefix : &str = "he"; - /// assert_eq!(b"hello".strip_prefix(prefix.as_bytes()), + /// let prefix: &[u8] = b"he"; + /// assert_eq!(b"hello".strip_prefix(prefix), /// Some(b"llo".as_ref())); /// ``` #[must_use = "returns the subslice without modifying the original"] #[stable(feature = "slice_strip", since = "1.51.0")] - pub fn strip_prefix + ?Sized>(&self, prefix: &P) -> Option<&[T]> - where - T: PartialEq, - { - // This function will need rewriting if and when SlicePattern becomes more sophisticated. - let prefix = prefix.as_slice(); - let n = prefix.len(); - if n <= self.len() { - let (head, tail) = self.split_at(n); - if head == prefix { - return Some(tail); - } - } - None + pub fn strip_prefix<'a, P: Pattern<&'a [T]>>(&'a self, prefix: P) -> Option<&'a [T]> { + prefix.strip_prefix_of(self) } /// Returns a subslice with the suffix removed. /// - /// If the slice ends with `suffix`, returns the subslice before the suffix, wrapped in `Some`. - /// If `suffix` is empty, simply returns the original slice. - /// - /// If the slice does not end with `suffix`, returns `None`. + /// If `suffix` matches at the end of the slice, returns the subslice before + /// the suffix, wrapped in `Some`. Otherwise returns `None`. /// /// # Examples /// @@ -2364,20 +2355,200 @@ impl [T] { /// ``` #[must_use = "returns the subslice without modifying the original"] #[stable(feature = "slice_strip", since = "1.51.0")] - pub fn strip_suffix + ?Sized>(&self, suffix: &P) -> Option<&[T]> + pub fn strip_suffix<'a, P>(&'a self, suffix: P) -> Option<&'a [T]> where - T: PartialEq, + P: Pattern<&'a [T]>, +

>::Searcher: ReverseSearcher<&'a [T]>, { - // This function will need rewriting if and when SlicePattern becomes more sophisticated. - let suffix = suffix.as_slice(); - let (len, n) = (self.len(), suffix.len()); - if n <= len { - let (head, tail) = self.split_at(len - n); - if tail == suffix { - return Some(head); - } + suffix.strip_suffix_of(self) + } + + /// Returns index of the first occurrence of the specified `pattern` in the + /// slice. + /// + /// Returns [`None`] if the pattern doesn't match. + /// + /// # Examples + /// + /// ``` + /// # #![feature(pattern)] + /// + /// let nums = &[10, 40, 30, 40]; + /// assert_eq!(nums.find(&40), Some(1)); + /// assert_eq!(nums.find(&[40, 30]), Some(1)); + /// assert_eq!(nums.find(&42), None); + /// + /// let s = b"The swift brown fox"; + /// + /// assert_eq!(s.find(b"w"), Some(5)); + /// assert_eq!(s.find(&b'w'), Some(5)); + /// assert_eq!(s.find(b"swift"), Some(4)); + /// assert_eq!(s.find(b"slow"), None); + /// ``` + #[unstable(feature = "pattern", issue = "27721")] + pub fn find<'a, P: Pattern<&'a [T]>>(&'a self, pattern: P) -> Option { + pattern.into_searcher(self).next_match().map(|(i, _)| i) + } + + /// Returns index of the last occurrence of the specified `pattern` in the + /// slice. + /// + /// Returns [`None`] if the pattern doesn't match. + /// + /// # Examples + /// + /// ``` + /// # #![feature(pattern)] + /// + /// let nums = &[10, 40, 30, 40]; + /// assert_eq!(nums.find(&40), Some(1)); + /// assert_eq!(nums.find(&[40, 30]), Some(1)); + /// assert_eq!(nums.find(&42), None); + /// + /// let s = b"The swift brown fox"; + /// + /// assert_eq!(s.rfind(b"w"), Some(13)); + /// assert_eq!(s.rfind(&b'w'), Some(13)); + /// assert_eq!(s.rfind(b"swift"), Some(4)); + /// assert_eq!(s.rfind(b"slow"), None); + /// ``` + #[unstable(feature = "pattern", issue = "27721")] + pub fn rfind<'a, P>(&'a self, pat: P) -> Option + where + P: Pattern<&'a [T], Searcher: ReverseSearcher<&'a [T]>>, + { + pat.into_searcher(self).next_match_back().map(|(i, _)| i) + } + + /// Splits the slice on the first occurrence of the specified `delimiter` + /// [pattern] and returns prefix before delimiter and suffix after delimiter. + /// + /// Returns [`None`] if the pattern doesn't match. + /// + /// # Examples + /// + /// ``` + /// # #![feature(pattern)] + /// + /// let s = b"Durarara"; + /// + /// assert_eq!(s.split_once(b"ra"), Some((&b"Du"[..], &b"rara"[..]))); + /// assert_eq!(s.split_once(b"!"), None); + /// ``` + /// + /// [pattern]: crate::slice::pattern + #[unstable(feature = "pattern", issue = "27721")] + pub fn split_once<'a, P: Pattern<&'a [T]>>(&'a self, delimiter: P) -> Option<(&'a [T], &'a [T])> { + let (start, end) = delimiter.into_searcher(self).next_match()?; + // SAFETY: `Searcher` is known to return valid indices. + unsafe { Some((self.get_unchecked(..start), self.get_unchecked(end..))) } + } + + /// Splits the slice on the last occurrence of the specified `delimiter` + /// [pattern] and returns prefix before delimiter and suffix after delimiter. + /// + /// Returns [`None`] if the pattern doesn't match. + /// + /// # Examples + /// + /// Simple patterns: + /// + /// ``` + /// # #![feature(pattern)] + /// + /// let s = b"Durarara"; + /// + /// assert_eq!(s.rsplit_once(b"ra"), Some((&b"Durara"[..], &b""[..]))); + /// assert_eq!(s.rsplit_once(b"!"), None); + /// ``` + /// + /// [pattern]: crate::slice::pattern + #[unstable(feature = "pattern", issue = "27721")] + pub fn rsplit_once<'a, P>(&'a self, delimiter: P) -> Option<(&'a [T], &'a [T])> + where + P: Pattern<&'a [T], Searcher: ReverseSearcher<&'a [T]>>, + { + let (start, end) = delimiter.into_searcher(self).next_match_back()?; + // SAFETY: `Searcher` is known to return valid indices. + unsafe { Some((self.get_unchecked(..start), self.get_unchecked(end..))) } + } + + /// Returns a slice with all prefixes and suffixes that match the `pattern` + /// repeatedly removed. + /// + /// # Examples + /// + /// ``` + /// # #![feature(pattern)] + /// + /// let s = b"111foo1bar111".as_ref(); + /// assert_eq!(s.trim_matches(&b'1'), &b"foo1bar"[..]); + /// ``` + #[unstable(feature = "pattern", issue = "27721")] + pub fn trim_matches<'a, P>(&'a self, pat: P) -> &'a [T] + where + P: Pattern<&'a [T], Searcher: DoubleEndedSearcher<&'a [T]>>, + { + let mut i = 0; + let mut j = 0; + let mut matcher = pat.into_searcher(self); + if let Some((a, b)) = matcher.next_reject() { + i = a; + j = b; // Remember earliest known match, correct it below if + // last match is different + } + if let Some((_, b)) = matcher.next_reject_back() { + j = b; } - None + // SAFETY: `Searcher` is known to return valid indices. + unsafe { self.get_unchecked(i..j) } + } + + /// XXX TODO placeholder + /// + /// # Examples + /// + /// ``` + /// # #![feature(pattern)] + /// + /// let s = b"111foo1bar111".as_ref(); + /// assert_eq!(s.trim_start_matches(&b'1'), &b"foo1bar111"[..]); + /// assert_eq!(s.trim_start_matches(b"11".as_ref()), &b"1foo1bar111"[..]); + /// ``` + #[unstable(feature = "pattern", issue = "27721")] + pub fn trim_start_matches<'a, P: Pattern<&'a [T]>>(&'a self, pat: P) -> &'a [T] { + let mut i = self.len(); + let mut matcher = pat.into_searcher(self); + if let Some((a, _)) = matcher.next_reject() { + i = a; + } + // SAFETY: `Searcher` is known to return valid indices. + unsafe { self.get_unchecked(i..self.len()) } + } + + /// XXX TODO placeholder + /// + /// # Examples + /// + /// ``` + /// # #![feature(pattern)] + /// + /// let s = b"111foo1bar111".as_ref(); + /// assert_eq!(s.trim_end_matches(&b'1'), &b"111foo1bar"[..]); + /// assert_eq!(s.trim_end_matches(b"11".as_ref()), &b"111foo1bar1"[..]); + /// ``` + #[unstable(feature = "pattern", issue = "27721")] + pub fn trim_end_matches<'a, P>(&'a self, pat: P) -> &'a [T] + where + P: Pattern<&'a [T], Searcher: ReverseSearcher<&'a [T]>>, + { + let mut j = 0; + let mut matcher = pat.into_searcher(self); + if let Some((_, b)) = matcher.next_reject_back() { + j = b; + } + // SAFETY: `Searcher` is known to return valid indices. + unsafe { self.get_unchecked(0..j) } } /// Binary searches this slice for a given element. @@ -4407,38 +4578,6 @@ impl const Default for &mut [T] { } } -#[unstable(feature = "slice_pattern", reason = "stopgap trait for slice patterns", issue = "56345")] -/// Patterns in slices - currently, only used by `strip_prefix` and `strip_suffix`. At a future -/// point, we hope to generalise `core::str::Pattern` (which at the time of writing is limited to -/// `str`) to slices, and then this trait will be replaced or abolished. -pub trait SlicePattern { - /// The element type of the slice being matched on. - type Item; - - /// Currently, the consumers of `SlicePattern` need a slice. - fn as_slice(&self) -> &[Self::Item]; -} - -#[stable(feature = "slice_strip", since = "1.51.0")] -impl SlicePattern for [T] { - type Item = T; - - #[inline] - fn as_slice(&self) -> &[Self::Item] { - self - } -} - -#[stable(feature = "slice_strip", since = "1.51.0")] -impl SlicePattern for [T; N] { - type Item = T; - - #[inline] - fn as_slice(&self) -> &[Self::Item] { - self - } -} - /// This checks every index against each other, and against `len`. /// /// This will do `binomial(N + 1, 2) = N * (N + 1) / 2 = 0, 1, 3, 6, 10, ..` diff --git a/library/core/src/slice/pattern.rs b/library/core/src/slice/pattern.rs new file mode 100644 index 0000000000000..baa81ad007001 --- /dev/null +++ b/library/core/src/slice/pattern.rs @@ -0,0 +1,769 @@ +#![unstable( + feature = "pattern", + reason = "API not fully fleshed out and ready to be stabilized", + issue = "27721" +)] + +use crate::pattern::{Haystack, Pattern, Predicate, SearchStep}; +use crate::pattern; + +use super::cmp::SliceContains; + +///////////////////////////////////////////////////////////////////////////// +// Impl for Haystack +///////////////////////////////////////////////////////////////////////////// + +impl<'a, T> Haystack for &'a [T] { + type Cursor = usize; + + fn cursor_at_front(&self) -> usize { 0 } + fn cursor_at_back(&self) -> usize { self.len() } + + unsafe fn split_at_cursor_unchecked(self, pos: usize) -> (Self, Self) { + // SAFETY: Caller promises cursor is valid. + unsafe { (self.get_unchecked(..pos), self.get_unchecked(pos..)) } + } +} + +///////////////////////////////////////////////////////////////////////////// +// Impl Pattern for &T +///////////////////////////////////////////////////////////////////////////// + +/// Pattern implementation for searching for an element in a slice. +/// +/// The pattern matches a single element in a slice. +/// +/// # Examples +/// +/// ``` +/// # #![feature(pattern)] +/// +/// let nums = &[10, 40, 30, 40]; +/// assert_eq!(nums.find(&40), Some(1)); +/// assert_eq!(nums.find(&42), None); +/// ``` +impl<'hs, 'p, T: PartialEq> Pattern<&'hs [T]> for &'p T { + type Searcher = ElementSearcher<'hs, 'p, T>; + + fn into_searcher(self, haystack: &'hs [T]) -> Self::Searcher { + // TODO: We probably should specialise this for u8 and i8 the same way + // we specialise SliceContains + Self::Searcher::new(haystack, self) + } + + fn is_contained_in(self, haystack: &'hs [T]) -> bool { + T::slice_contains_element(haystack, self) + } + + fn is_prefix_of(self, haystack: &'hs [T]) -> bool { + haystack.first() == Some(self) + } + + fn is_suffix_of(self, haystack: &'hs [T]) -> bool { + haystack.last() == Some(self) + } + + fn strip_prefix_of(self, haystack: &'hs [T]) -> Option<&'hs [T]> { + match haystack.split_first() { + Some((first, tail)) if first == self => Some(tail), + _ => None, + } + } + + fn strip_suffix_of(self, haystack: &'hs [T]) -> Option<&'hs [T]> { + match haystack.split_last() { + Some((last, head)) if last == self => Some(head), + _ => None, + } + } +} + +#[derive(Clone, Debug)] +pub struct ElementSearcher<'hs, 'p, T> { + /// Haystack we’re searching in. + haystack: &'hs [T], + /// Element we’re searching for. + needle: &'p T, + /// Internal state of the searcher. + state: PredicateSearchState, +} + +impl<'hs, 'p, T> ElementSearcher<'hs, 'p, T> { + fn new(haystack: &'hs [T], needle: &'p T) -> Self { + Self { + haystack, + needle, + state: PredicateSearchState::new(haystack.len()) + } + } +} + +unsafe impl<'hs, 'p, T: PartialEq> pattern::Searcher<&'hs [T]> for ElementSearcher<'hs, 'p, T> { + fn haystack(&self) -> &'hs [T] { self.haystack } + + fn next(&mut self) -> SearchStep { + self.state.next(self.haystack, &mut |element| element == self.needle) + } + + fn next_match(&mut self) -> Option<(usize, usize)> { + self.state.next_match(self.haystack, &mut |element| element == self.needle) + } + + fn next_reject(&mut self) -> Option<(usize, usize)> { + self.state.next_reject(self.haystack, &mut |element| element == self.needle) + } +} + +unsafe impl<'hs, 'p, T: PartialEq> pattern::ReverseSearcher<&'hs [T]> for ElementSearcher<'hs, 'p, T> { + fn next_back(&mut self) -> SearchStep { + self.state.next_back(self.haystack, &mut |element| element == self.needle) + } + + fn next_match_back(&mut self) -> Option<(usize, usize)> { + self.state.next_match_back(self.haystack, &mut |element| element == self.needle) + } + + fn next_reject_back(&mut self) -> Option<(usize, usize)> { + self.state.next_reject_back(self.haystack, &mut |element| element == self.needle) + } +} + +impl<'hs, 'p, T: PartialEq> pattern::DoubleEndedSearcher<&'hs [T]> for ElementSearcher<'hs, 'p, T> {} + +///////////////////////////////////////////////////////////////////////////// +// Impl Pattern for Predicate +///////////////////////////////////////////////////////////////////////////// + +/// Pattern implementation for searching for an element matching given +/// predicate. +/// +/// # Examples +/// +/// ``` +/// # #![feature(pattern)] +/// use core::pattern::predicate; +/// +/// let nums = &[10, 40, 30, 40]; +/// assert_eq!(nums.find(predicate(|n| n % 3 == 0)), Some(2)); +/// assert_eq!(nums.find(predicate(|n| n % 2 == 1)), None); +/// ``` +impl<'hs, T, F: FnMut(&'hs T) -> bool> Pattern<&'hs [T]> for Predicate<&'hs T, F> { + type Searcher = PredicateSearcher<'hs, T, F>; + + fn into_searcher(self, haystack: &'hs [T]) -> Self::Searcher { + Self::Searcher::new(haystack, self) + } + + fn is_contained_in(mut self, haystack: &'hs [T]) -> bool { + haystack.iter().any(|element| self.test(element)) + } + + fn is_prefix_of(mut self, haystack: &'hs [T]) -> bool { + haystack.first().filter(|element| self.test(element)).is_some() + } + + fn is_suffix_of(mut self, haystack: &'hs [T]) -> bool { + haystack.last().filter(|element| self.test(element)).is_some() + } + + fn strip_prefix_of(mut self, haystack: &'hs [T]) -> Option<&'hs [T]> { + match haystack.split_first() { + Some((first, tail)) if self.test(first) => Some(tail), + _ => None, + } + } + + fn strip_suffix_of(mut self, haystack: &'hs [T]) -> Option<&'hs [T]> { + match haystack.split_last() { + Some((last, head)) if self.test(last) => Some(head), + _ => None, + } + } +} + +#[derive(Clone, Debug)] +pub struct PredicateSearcher<'hs, T, F> { + /// Haystack we’re searching in. + haystack: &'hs [T], + /// Predicate used to match elements. + pred: Predicate<&'hs T, F>, + /// Internal state of the searcher. + state: PredicateSearchState, +} + +impl<'hs, T, F> PredicateSearcher<'hs, T, F> { + fn new(haystack: &'hs [T], pred: Predicate<&'hs T, F>) -> Self { + let state = PredicateSearchState::new(haystack.len()); + Self { haystack, pred, state } + } +} + +unsafe impl<'hs, T, F: FnMut(&'hs T) -> bool> pattern::Searcher<&'hs [T]> for PredicateSearcher<'hs, T, F> { + fn haystack(&self) -> &'hs [T] { self.haystack } + + fn next(&mut self) -> SearchStep { + self.state.next(self.haystack, self.pred.as_fn()) + } + + fn next_match(&mut self) -> Option<(usize, usize)> { + self.state.next_match(self.haystack, self.pred.as_fn()) + } + + fn next_reject(&mut self) -> Option<(usize, usize)> { + self.state.next_reject(self.haystack, self.pred.as_fn()) + } +} + +unsafe impl<'hs, T, F: FnMut(&'hs T) -> bool> pattern::ReverseSearcher<&'hs [T]> for PredicateSearcher<'hs, T, F> { + fn next_back(&mut self) -> SearchStep { + self.state.next_back(self.haystack, self.pred.as_fn()) + } + + fn next_match_back(&mut self) -> Option<(usize, usize)> { + self.state.next_match_back(self.haystack, self.pred.as_fn()) + } + + fn next_reject_back(&mut self) -> Option<(usize, usize)> { + self.state.next_reject_back(self.haystack, self.pred.as_fn()) + } +} + +///////////////////////////////////////////////////////////////////////////// +// Impl Pattern for &[T] and &[T; N] +///////////////////////////////////////////////////////////////////////////// + +/// Pattern implementation for searching a subslice in a slice. +/// +/// The pattern matches a subslice of a larger slice. An empty pattern matches +/// around every character in a slice. +/// +/// Note: Other than with slice patterns matching `str`, this pattern matches +/// a subslice rather than a single element of haystack being equal to element +/// of the pattern. +/// +/// # Examples +/// +/// ``` +/// # #![feature(pattern)] +/// use core::pattern::{Pattern, Searcher}; +/// +/// // Simple usage +/// let nums: &[i32] = &[10, 40, 30, 40]; +/// assert_eq!(nums.find(&[40]), Some(1)); +/// assert_eq!(nums.find(&[40, 30]), Some(1)); +/// assert_eq!(nums.find(&[42, 30]), None); +/// +/// // Empty pattern +/// let empty: &[i32] = &[]; +/// let mut s = empty.into_searcher(nums); +/// assert_eq!(s.next_match(), Some((0, 0))); +/// assert_eq!(s.next_match(), Some((1, 1))); +/// assert_eq!(s.next_match(), Some((2, 2))); +/// assert_eq!(s.next_match(), Some((3, 3))); +/// assert_eq!(s.next_match(), Some((4, 4))); +/// assert_eq!(s.next_match(), None); +/// +/// // Difference with str patterns. +/// assert_eq!("Foo".find(&['f', 'o']), Some(1)); +/// // -- "Foo" contains letter 'o' at index 1. +/// assert_eq!(b"Foo".find(&[b'f', b'o']), None); +/// // -- b"Foo" doesn’t contain subslice b"fo". +/// ``` +impl<'hs, 'p, T: PartialEq> Pattern<&'hs [T]> for &'p [T] { + type Searcher = Searcher<'hs, 'p, T>; + + fn into_searcher(self, haystack: &'hs [T]) -> Self::Searcher { + Searcher::new(haystack, self) + } + + fn is_contained_in(self, haystack: &'hs [T]) -> bool { + if self.len() == 0 { + true + } else if self.len() == 1 { + T::slice_contains_element(haystack, &self[0]) + } else if self.len() < haystack.len() { + T::slice_contains_slice(haystack, self) + } else if self.len() == haystack.len() { + self == haystack + } else { + false + } + } + #[inline] + fn is_prefix_of(self, haystack: &'hs [T]) -> bool { + haystack.get(..self.len()).map_or(false, |prefix| prefix == self) + } + + + #[inline] + fn is_suffix_of(self, haystack: &'hs [T]) -> bool { + haystack + .len() + .checked_sub(self.len()) + .map_or(false, |n| &haystack[n..] == self) + } + + #[inline] + fn strip_prefix_of(self, haystack: &'hs [T]) -> Option<&'hs [T]> { + self.is_prefix_of(haystack).then(|| { + // SAFETY: prefix was just verified to exist. + unsafe { haystack.get_unchecked(self.len()..) } + }) + } + + #[inline] + fn strip_suffix_of(self, haystack: &'hs [T]) -> Option<&'hs [T]> { + self.is_suffix_of(haystack).then(|| { + let n = haystack.len() - self.len(); + // SAFETY: suffix was just verified to exist. + unsafe { haystack.get_unchecked(..n) } + }) + } +} + +/// Pattern implementation for searching a subslice in a slice. +/// +/// This is identical to a slice pattern: the pattern matches a subslice of +/// a larger slice. An empty array matches around every character in a slice. +/// +/// Note: Other than with slice patterns matching `str`, this pattern matches +/// a subslice rather than a single element of haystack being equal to element +/// of the pattern. +/// +/// # Examples +/// +/// ``` +/// # #![feature(pattern)] +/// +/// let slice: &[u8] = b"The quick brown fox"; +/// assert_eq!(slice.find(b"quick"), Some(4)); +/// assert_eq!(slice.find(b"slow"), None); +/// assert_eq!(slice.find(b""), Some(0)); +/// ``` +impl<'hs, 'p, T: PartialEq, const N: usize> Pattern<&'hs [T]> for &'p [T; N] { + type Searcher = Searcher<'hs, 'p, T>; + + fn into_searcher(self, haystack: &'hs [T]) -> Searcher<'hs, 'p, T> { + Searcher::new(haystack, &self[..]) + } + + #[inline(always)] + fn is_contained_in(self, haystack: &'hs [T]) -> bool { + (&self[..]).is_contained_in(haystack) + } + + #[inline(always)] + fn is_prefix_of(self, haystack: &'hs [T]) -> bool { + (&self[..]).is_prefix_of(haystack) + } + + #[inline(always)] + fn is_suffix_of(self, haystack: &'hs [T]) -> bool { + (&self[..]).is_suffix_of(haystack) + } + + #[inline(always)] + fn strip_prefix_of(self, haystack: &'hs [T]) -> Option<&'hs [T]> { + (&self[..]).strip_prefix_of(haystack) + } + + #[inline(always)] + fn strip_suffix_of(self, haystack: &'hs [T]) -> Option<&'hs [T]> { + (&self[..]).strip_suffix_of(haystack) + } +} + +#[derive(Clone, Debug)] +/// Associated type for `<&'p [T] as Pattern<&'hs [T]>>::Searcher`. +pub struct Searcher<'hs, 'p, T> { + /// Haystack we’re searching in. + haystack: &'hs [T], + /// Subslice we’re searching for. + needle: &'p [T], + /// Internal state of the searcher. + state: SearcherState, +} + +#[derive(Clone, Debug)] +enum SearcherState { + Empty(EmptySearcherState), + Element(PredicateSearchState), + Naive(NaiveSearcherState), +} + +impl<'hs, 'p, T: PartialEq> Searcher<'hs, 'p, T> { + fn new(haystack: &'hs [T], needle: &'p [T]) -> Searcher<'hs, 'p, T> { + let state = match needle.len() { + 0 => SearcherState::Empty(EmptySearcherState::new(haystack.len())), + 1 => SearcherState::Element(PredicateSearchState::new(haystack.len())), + _ => SearcherState::Naive(NaiveSearcherState::new(haystack.len())), + }; + Searcher { haystack, needle, state } + } +} + +macro_rules! delegate { + ($method:ident -> $ret:ty) => { + fn $method(&mut self) -> $ret { + match &mut self.state { + SearcherState::Empty(state) => state.$method(), + SearcherState::Element(state) => state.$method(self.haystack, &mut |element| { + // SAFETY: SearcherState::Element is created if and only if + // needle.len() == 1. + element == unsafe { self.needle.get_unchecked(0) } + }), + SearcherState::Naive(state) => state.$method(self.haystack, self.needle), + } + } + } +} + +unsafe impl<'hs, 'p, T: PartialEq> pattern::Searcher<&'hs [T]> for Searcher<'hs, 'p, T> { + fn haystack(&self) -> &'hs [T] { + self.haystack + } + + delegate!(next -> SearchStep); + delegate!(next_match -> Option<(usize, usize)>); + delegate!(next_reject -> Option<(usize, usize)>); +} + +unsafe impl<'hs, 'p, T: PartialEq> pattern::ReverseSearcher<&'hs [T]> for Searcher<'hs, 'p, T> { + delegate!(next_back -> SearchStep); + delegate!(next_match_back -> Option<(usize, usize)>); + delegate!(next_reject_back -> Option<(usize, usize)>); +} + +///////////////////////////////////////////////////////////////////////////// +// Searching for an empty pattern +///////////////////////////////////////////////////////////////////////////// + +#[derive(Clone, Debug)] +struct EmptySearcherState { + start: usize, + end: usize, + is_match_fw: bool, + is_match_bw: bool, + // Needed in case of an empty haystack, see #85462 + is_finished: bool, +} + +impl EmptySearcherState { + fn new(haystack_length: usize) -> Self { + Self { + start: 0, + end: haystack_length, + is_match_fw: true, + is_match_bw: true, + is_finished: false, + } + } + + fn next(&mut self) -> SearchStep { + if self.is_finished { + return SearchStep::Done; + } + let is_match = self.is_match_fw; + self.is_match_fw = !self.is_match_fw; + let pos = self.start; + if is_match { + SearchStep::Match(pos, pos) + } else if self.start < self.end { + self.start += 1; + SearchStep::Reject(pos, pos + 1) + } else { + self.is_finished = true; + SearchStep::Done + } + } + + fn next_back(&mut self) -> SearchStep { + if self.is_finished { + return SearchStep::Done; + } + let is_match = self.is_match_bw; + self.is_match_bw = !self.is_match_bw; + let end = self.end; + if is_match { + SearchStep::Match(end, end) + } else if self.end <= self.start { + self.is_finished = true; + SearchStep::Done + } else { + self.end -= 1; + SearchStep::Reject(end - 1, end) + } + } + + fn next_match(&mut self) -> Option<(usize, usize)> { + pattern::loop_next::(|| self.next()) + } + + fn next_reject(&mut self) -> Option<(usize, usize)> { + pattern::loop_next::(|| self.next()) + } + + fn next_match_back(&mut self) -> Option<(usize, usize)> { + pattern::loop_next::(|| self.next_back()) + } + + fn next_reject_back(&mut self) -> Option<(usize, usize)> { + pattern::loop_next::(|| self.next_back()) + } +} + +///////////////////////////////////////////////////////////////////////////// +// Searching for a single element +///////////////////////////////////////////////////////////////////////////// + +/// State of a searcher which tests one element at a time using a provided +/// predicate. +/// +/// Matches are always one-element long. Rejects can be arbitrarily long. +#[derive(Clone, Debug)] +struct PredicateSearchState { + /// Position to start searching from. Updated as we find new matches. + start: usize, + /// Position to end searching at. Updated as we find new matches. + end: usize, + /// If true, we’re finished searching or haystack[start] is a match. + is_match_fw: bool, + /// If true, we’re finished searching or haystack[end-1] is a match. + is_match_bw: bool +} + +impl PredicateSearchState { + fn new(haystack_length: usize) -> Self { + Self { + start: 0, + end: haystack_length, + is_match_fw: false, + is_match_bw: false, + } + } + + fn next<'hs, T, F>(&mut self, hs: &'hs [T], pred: &mut F) -> SearchStep + where F: FnMut(&'hs T) -> bool, + { + if self.start >= self.end { + return SearchStep::Done; + } + let count = if self.is_match_fw { + self.is_match_fw = false; + 0 + } else { + self.count(false, hs, pred) + }; + if count == 0 { + self.start += 1; + SearchStep::Match(self.start - 1, self.start) + } else { + self.is_match_fw = true; + let pos = self.start; + self.start += count; + SearchStep::Reject(pos, self.start) + } + } + + fn next_match<'hs, T, F>(&mut self, hs: &'hs [T], pred: &mut F) -> Option<(usize, usize)> + where F: FnMut(&'hs T) -> bool, + { + pattern::loop_next::(|| self.next(hs, pred)) + } + + fn next_reject<'hs, T, F>(&mut self, hs: &'hs [T], pred: &mut F) -> Option<(usize, usize)> + where F: FnMut(&'hs T) -> bool, + { + if self.start >= self.end { + return None; + } + + if self.is_match_fw { + self.start += 1; + } + self.start += self.count(true, hs, pred); + + let count = self.count(false, hs, pred); + if count == 0 { + None + } else { + self.is_match_fw = true; + let pos = self.start; + self.start += count; + Some((pos, self.start)) + } + } + + fn next_back<'hs, T, F>(&mut self, hs: &'hs [T], pred: &mut F) -> SearchStep + where F: FnMut(&'hs T) -> bool, + { + if self.start >= self.end { + return SearchStep::Done + } + let count = if self.is_match_bw { + self.is_match_bw = false; + 0 + } else { + self.count_back(false, hs, pred) + }; + let pos = self.end; + if count == 0 { + self.end -= 1; + SearchStep::Match(self.end, pos) + } else { + self.is_match_bw = true; + self.end -= count; + SearchStep::Reject(self.end, pos) + } + } + + fn next_match_back<'hs, T, F>(&mut self, hs: &'hs [T], pred: &mut F) -> Option<(usize, usize)> + where F: FnMut(&'hs T) -> bool, + { + pattern::loop_next::(|| self.next_back(hs, pred)) + } + + fn next_reject_back<'hs, T, F>(&mut self, hs: &'hs [T], pred: &mut F) -> Option<(usize, usize)> + where F: FnMut(&'hs T) -> bool, + { + if self.start >= self.end { + return None; + } + + if self.is_match_fw { + self.end -= 1; + } + self.end -= self.count_back(true, hs, pred); + + let count = self.count_back(false, hs, pred); + if count == 0 { + None + } else { + self.is_match_bw = true; + let pos = self.end; + self.end -= count; + Some((self.end, pos)) + } + } + + fn count<'hs, T, F>(&self, want: bool, hs: &'hs [T], pred: &mut F) -> usize + where F: FnMut(&'hs T) -> bool, + { + hs[self.start..self.end] + .iter() + .map(pred) + .take_while(|&matches| matches == want) + .count() + } + + fn count_back<'hs, T, F>(&self, want: bool, hs: &'hs [T], pred: &mut F) -> usize + where F: FnMut(&'hs T) -> bool, + { + hs[self.start..self.end] + .iter() + .rev() + .map(pred) + .take_while(|&matches| matches == want) + .count() + } +} + +///////////////////////////////////////////////////////////////////////////// +// Searching for a subslice element +///////////////////////////////////////////////////////////////////////////// + +// TODO: Implement something smarter perhaps? Or have specialisation for +// different T? We’re not using core::str::pattern::TwoWaySearcher because it +// requires PartialOrd elements. Specifically, TwoWaySearcher::maximal_suffix +// and TwoWaySearcher::reverse_maximal_suffix methods compare elements. For the +// time being, use naive O(nk) search. +#[derive(Clone, Debug)] +pub(super) struct NaiveSearcherState { + start: usize, + end: usize, + is_match_fw: bool, + is_match_bw: bool, +} + +impl NaiveSearcherState { + pub(super) fn new(haystack_length: usize) -> Self { + Self { + start: 0, + end: haystack_length, + is_match_fw: false, + is_match_bw: false, + } + } + + pub(super) fn next(&mut self, haystack: &[T], needle: &[T]) -> SearchStep { + if self.end - self.start < needle.len() { + SearchStep::Done + } else if self.is_match_fw { + let pos = self.start; + self.start += needle.len(); + self.is_match_fw = false; + SearchStep::Match(pos, self.start) + } else { + let count = haystack[self.start..self.end] + .windows(needle.len()) + .take_while(|window| *window != needle) + .count(); + let pos = self.start; + if count == 0 { + self.start += needle.len(); + SearchStep::Match(pos, self.start) + } else { + let pos = self.start; + self.start += count; + // We’ve either reached the end of the haystack or start + // where it matches so maker is_match_fw. + self.is_match_fw = true; + SearchStep::Reject(pos, self.start) + } + } + } + + pub(super) fn next_back(&mut self, haystack: &[T], needle: &[T]) -> SearchStep { + if self.end - self.start < needle.len() { + SearchStep::Done + } else if self.is_match_bw { + let pos = self.end; + self.end -= needle.len(); + self.is_match_bw = false; + SearchStep::Match(self.end, pos) + } else { + let count = haystack[self.start..self.end] + .windows(needle.len()) + .rev() + .take_while(|window| *window != needle) + .count(); + let pos = self.end; + if count == 0 { + self.end -= needle.len(); + SearchStep::Match(self.end, pos) + } else { + self.end -= count; + // We’ve either reached the end of the haystack or start + // where it matches so maker is_match_bw. + self.is_match_bw = true; + SearchStep::Reject(self.end, pos) + } + } + } + + pub(super) fn next_match(&mut self, haystack: &[T], needle: &[T]) -> Option<(usize, usize)> { + pattern::loop_next::(|| self.next(haystack, needle)) + } + + pub(super) fn next_reject(&mut self, haystack: &[T], needle: &[T]) -> Option<(usize, usize)> { + pattern::loop_next::(|| self.next(haystack, needle)) + } + + pub(super) fn next_match_back(&mut self, haystack: &[T], needle: &[T]) -> Option<(usize, usize)> { + pattern::loop_next::(|| self.next_back(haystack, needle)) + } + + pub(super) fn next_reject_back(&mut self, haystack: &[T], needle: &[T]) -> Option<(usize, usize)> { + pattern::loop_next::(|| self.next_back(haystack, needle)) + } +} diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 95c682f42d0c9..b323a0709530b 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -7,11 +7,10 @@ use crate::iter::{Copied, Filter, FusedIterator, Map, TrustedLen}; use crate::iter::{TrustedRandomAccess, TrustedRandomAccessNoCoerce}; use crate::ops::Try; use crate::option; +use crate::pattern::{DoubleEndedSearcher, ReverseSearcher, Pattern, Searcher}; use crate::slice::{self, Split as SliceSplit}; use super::from_utf8_unchecked; -use super::pattern::Pattern; -use super::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher}; use super::validations::{next_code_point, next_code_point_reverse}; use super::LinesAnyMap; use super::{BytesIsNotEmpty, UnsafeBytesToStr}; @@ -361,7 +360,7 @@ macro_rules! derive_pattern_clone { (clone $t:ident with |$s:ident| $e:expr) => { impl<'a, P> Clone for $t<'a, P> where - P: Pattern<'a, Searcher: Clone>, + P: Pattern<&'a str, Searcher: Clone>, { fn clone(&self) -> Self { let $s = self; @@ -374,7 +373,7 @@ macro_rules! derive_pattern_clone { /// This macro generates two public iterator structs /// wrapping a private internal one that makes use of the `Pattern` API. /// -/// For all patterns `P: Pattern<'a>` the following items will be +/// For all patterns `P: Pattern<&'a str>` the following items will be /// generated (generics omitted): /// /// struct $forward_iterator($internal_iterator); @@ -434,12 +433,14 @@ macro_rules! generate_pattern_iterators { } => { $(#[$forward_iterator_attribute])* $(#[$common_stability_attribute])* - pub struct $forward_iterator<'a, P: Pattern<'a>>(pub(super) $internal_iterator<'a, P>); + pub struct $forward_iterator<'a, P: Pattern<&'a str>>( + pub(super) $internal_iterator<'a, P> + ); $(#[$common_stability_attribute])* impl<'a, P> fmt::Debug for $forward_iterator<'a, P> where - P: Pattern<'a, Searcher: fmt::Debug>, + P: Pattern<&'a str, Searcher: fmt::Debug>, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple(stringify!($forward_iterator)) @@ -449,7 +450,7 @@ macro_rules! generate_pattern_iterators { } $(#[$common_stability_attribute])* - impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> { + impl<'a, P: Pattern<&'a str>> Iterator for $forward_iterator<'a, P> { type Item = $iterty; #[inline] @@ -461,7 +462,7 @@ macro_rules! generate_pattern_iterators { $(#[$common_stability_attribute])* impl<'a, P> Clone for $forward_iterator<'a, P> where - P: Pattern<'a, Searcher: Clone>, + P: Pattern<&'a str, Searcher: Clone>, { fn clone(&self) -> Self { $forward_iterator(self.0.clone()) @@ -470,12 +471,14 @@ macro_rules! generate_pattern_iterators { $(#[$reverse_iterator_attribute])* $(#[$common_stability_attribute])* - pub struct $reverse_iterator<'a, P: Pattern<'a>>(pub(super) $internal_iterator<'a, P>); + pub struct $reverse_iterator<'a, P: Pattern<&'a str>>( + pub(super) $internal_iterator<'a, P> + ); $(#[$common_stability_attribute])* impl<'a, P> fmt::Debug for $reverse_iterator<'a, P> where - P: Pattern<'a, Searcher: fmt::Debug>, + P: Pattern<&'a str, Searcher: fmt::Debug>, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple(stringify!($reverse_iterator)) @@ -487,7 +490,7 @@ macro_rules! generate_pattern_iterators { $(#[$common_stability_attribute])* impl<'a, P> Iterator for $reverse_iterator<'a, P> where - P: Pattern<'a, Searcher: ReverseSearcher<'a>>, + P: Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>, { type Item = $iterty; @@ -500,7 +503,7 @@ macro_rules! generate_pattern_iterators { $(#[$common_stability_attribute])* impl<'a, P> Clone for $reverse_iterator<'a, P> where - P: Pattern<'a, Searcher: Clone>, + P: Pattern<&'a str, Searcher: Clone>, { fn clone(&self) -> Self { $reverse_iterator(self.0.clone()) @@ -508,12 +511,12 @@ macro_rules! generate_pattern_iterators { } #[stable(feature = "fused", since = "1.26.0")] - impl<'a, P: Pattern<'a>> FusedIterator for $forward_iterator<'a, P> {} + impl<'a, P: Pattern<&'a str>> FusedIterator for $forward_iterator<'a, P> {} #[stable(feature = "fused", since = "1.26.0")] impl<'a, P> FusedIterator for $reverse_iterator<'a, P> where - P: Pattern<'a, Searcher: ReverseSearcher<'a>>, + P: Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>, {} generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*, @@ -528,7 +531,7 @@ macro_rules! generate_pattern_iterators { $(#[$common_stability_attribute])* impl<'a, P> DoubleEndedIterator for $forward_iterator<'a, P> where - P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>, + P: Pattern<&'a str, Searcher: DoubleEndedSearcher<&'a str>>, { #[inline] fn next_back(&mut self) -> Option<$iterty> { @@ -539,7 +542,7 @@ macro_rules! generate_pattern_iterators { $(#[$common_stability_attribute])* impl<'a, P> DoubleEndedIterator for $reverse_iterator<'a, P> where - P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>, + P: Pattern<&'a str, Searcher: DoubleEndedSearcher<&'a str>>, { #[inline] fn next_back(&mut self) -> Option<$iterty> { @@ -559,7 +562,7 @@ derive_pattern_clone! { with |s| SplitInternal { matcher: s.matcher.clone(), ..*s } } -pub(super) struct SplitInternal<'a, P: Pattern<'a>> { +pub(super) struct SplitInternal<'a, P: Pattern<&'a str>> { pub(super) start: usize, pub(super) end: usize, pub(super) matcher: P::Searcher, @@ -569,7 +572,7 @@ pub(super) struct SplitInternal<'a, P: Pattern<'a>> { impl<'a, P> fmt::Debug for SplitInternal<'a, P> where - P: Pattern<'a, Searcher: fmt::Debug>, + P: Pattern<&'a str, Searcher: fmt::Debug>, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("SplitInternal") @@ -582,7 +585,7 @@ where } } -impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { +impl<'a, P: Pattern<&'a str>> SplitInternal<'a, P> { #[inline] fn get_end(&mut self) -> Option<&'a str> { if !self.finished { @@ -639,7 +642,7 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { #[inline] fn next_back(&mut self) -> Option<&'a str> where - P::Searcher: ReverseSearcher<'a>, + P::Searcher: ReverseSearcher<&'a str>, { if self.finished { return None; @@ -676,7 +679,7 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { #[inline] fn next_back_inclusive(&mut self) -> Option<&'a str> where - P::Searcher: ReverseSearcher<'a>, + P::Searcher: ReverseSearcher<&'a str>, { if self.finished { return None; @@ -746,7 +749,7 @@ generate_pattern_iterators! { delegate double ended; } -impl<'a, P: Pattern<'a>> Split<'a, P> { +impl<'a, P: Pattern<&'a str>> Split<'a, P> { /// Returns remainder of the split string. /// /// If the iterator is empty, returns `None`. @@ -769,7 +772,7 @@ impl<'a, P: Pattern<'a>> Split<'a, P> { } } -impl<'a, P: Pattern<'a>> RSplit<'a, P> { +impl<'a, P: Pattern<&'a str>> RSplit<'a, P> { /// Returns remainder of the split string. /// /// If the iterator is empty, returns `None`. @@ -810,7 +813,7 @@ generate_pattern_iterators! { delegate double ended; } -impl<'a, P: Pattern<'a>> SplitTerminator<'a, P> { +impl<'a, P: Pattern<&'a str>> SplitTerminator<'a, P> { /// Returns remainder of the split string. /// /// If the iterator is empty, returns `None`. @@ -833,7 +836,7 @@ impl<'a, P: Pattern<'a>> SplitTerminator<'a, P> { } } -impl<'a, P: Pattern<'a>> RSplitTerminator<'a, P> { +impl<'a, P: Pattern<&'a str>> RSplitTerminator<'a, P> { /// Returns remainder of the split string. /// /// If the iterator is empty, returns `None`. @@ -861,7 +864,7 @@ derive_pattern_clone! { with |s| SplitNInternal { iter: s.iter.clone(), ..*s } } -pub(super) struct SplitNInternal<'a, P: Pattern<'a>> { +pub(super) struct SplitNInternal<'a, P: Pattern<&'a str>> { pub(super) iter: SplitInternal<'a, P>, /// The number of splits remaining pub(super) count: usize, @@ -869,7 +872,7 @@ pub(super) struct SplitNInternal<'a, P: Pattern<'a>> { impl<'a, P> fmt::Debug for SplitNInternal<'a, P> where - P: Pattern<'a, Searcher: fmt::Debug>, + P: Pattern<&'a str, Searcher: fmt::Debug>, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("SplitNInternal") @@ -879,7 +882,7 @@ where } } -impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> { +impl<'a, P: Pattern<&'a str>> SplitNInternal<'a, P> { #[inline] fn next(&mut self) -> Option<&'a str> { match self.count { @@ -898,7 +901,7 @@ impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> { #[inline] fn next_back(&mut self) -> Option<&'a str> where - P::Searcher: ReverseSearcher<'a>, + P::Searcher: ReverseSearcher<&'a str>, { match self.count { 0 => None, @@ -937,7 +940,7 @@ generate_pattern_iterators! { delegate single ended; } -impl<'a, P: Pattern<'a>> SplitN<'a, P> { +impl<'a, P: Pattern<&'a str>> SplitN<'a, P> { /// Returns remainder of the split string. /// /// If the iterator is empty, returns `None`. @@ -960,7 +963,7 @@ impl<'a, P: Pattern<'a>> SplitN<'a, P> { } } -impl<'a, P: Pattern<'a>> RSplitN<'a, P> { +impl<'a, P: Pattern<&'a str>> RSplitN<'a, P> { /// Returns remainder of the split string. /// /// If the iterator is empty, returns `None`. @@ -988,18 +991,18 @@ derive_pattern_clone! { with |s| MatchIndicesInternal(s.0.clone()) } -pub(super) struct MatchIndicesInternal<'a, P: Pattern<'a>>(pub(super) P::Searcher); +pub(super) struct MatchIndicesInternal<'a, P: Pattern<&'a str>>(pub(super) P::Searcher); impl<'a, P> fmt::Debug for MatchIndicesInternal<'a, P> where - P: Pattern<'a, Searcher: fmt::Debug>, + P: Pattern<&'a str, Searcher: fmt::Debug>, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("MatchIndicesInternal").field(&self.0).finish() } } -impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> { +impl<'a, P: Pattern<&'a str>> MatchIndicesInternal<'a, P> { #[inline] fn next(&mut self) -> Option<(usize, &'a str)> { self.0 @@ -1011,7 +1014,7 @@ impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> { #[inline] fn next_back(&mut self) -> Option<(usize, &'a str)> where - P::Searcher: ReverseSearcher<'a>, + P::Searcher: ReverseSearcher<&'a str>, { self.0 .next_match_back() @@ -1043,18 +1046,18 @@ derive_pattern_clone! { with |s| MatchesInternal(s.0.clone()) } -pub(super) struct MatchesInternal<'a, P: Pattern<'a>>(pub(super) P::Searcher); +pub(super) struct MatchesInternal<'a, P: Pattern<&'a str>>(pub(super) P::Searcher); impl<'a, P> fmt::Debug for MatchesInternal<'a, P> where - P: Pattern<'a, Searcher: fmt::Debug>, + P: Pattern<&'a str, Searcher: fmt::Debug>, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("MatchesInternal").field(&self.0).finish() } } -impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> { +impl<'a, P: Pattern<&'a str>> MatchesInternal<'a, P> { #[inline] fn next(&mut self) -> Option<&'a str> { // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries. @@ -1067,7 +1070,7 @@ impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> { #[inline] fn next_back(&mut self) -> Option<&'a str> where - P::Searcher: ReverseSearcher<'a>, + P::Searcher: ReverseSearcher<&'a str>, { // SAFETY: `Searcher` guarantees that `start` and `end` lie on unicode boundaries. self.0.next_match_back().map(|(a, b)| unsafe { @@ -1213,7 +1216,7 @@ pub struct SplitAsciiWhitespace<'a> { /// /// [`split_inclusive`]: str::split_inclusive #[stable(feature = "split_inclusive", since = "1.51.0")] -pub struct SplitInclusive<'a, P: Pattern<'a>>(pub(super) SplitInternal<'a, P>); +pub struct SplitInclusive<'a, P: Pattern<&'a str>>(pub(super) SplitInternal<'a, P>); #[stable(feature = "split_whitespace", since = "1.1.0")] impl<'a> Iterator for SplitWhitespace<'a> { @@ -1335,7 +1338,7 @@ impl<'a> SplitAsciiWhitespace<'a> { } #[stable(feature = "split_inclusive", since = "1.51.0")] -impl<'a, P: Pattern<'a>> Iterator for SplitInclusive<'a, P> { +impl<'a, P: Pattern<&'a str>> Iterator for SplitInclusive<'a, P> { type Item = &'a str; #[inline] @@ -1345,7 +1348,7 @@ impl<'a, P: Pattern<'a>> Iterator for SplitInclusive<'a, P> { } #[stable(feature = "split_inclusive", since = "1.51.0")] -impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitInclusive<'a, P> { +impl<'a, P: Pattern<&'a str, Searcher: fmt::Debug>> fmt::Debug for SplitInclusive<'a, P> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("SplitInclusive").field("0", &self.0).finish() } @@ -1353,14 +1356,14 @@ impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitInclusive<'a, // FIXME(#26925) Remove in favor of `#[derive(Clone)]` #[stable(feature = "split_inclusive", since = "1.51.0")] -impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitInclusive<'a, P> { +impl<'a, P: Pattern<&'a str, Searcher: Clone>> Clone for SplitInclusive<'a, P> { fn clone(&self) -> Self { SplitInclusive(self.0.clone()) } } #[stable(feature = "split_inclusive", since = "1.51.0")] -impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator +impl<'a, P: Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>> DoubleEndedIterator for SplitInclusive<'a, P> { #[inline] @@ -1370,9 +1373,9 @@ impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator } #[stable(feature = "split_inclusive", since = "1.51.0")] -impl<'a, P: Pattern<'a>> FusedIterator for SplitInclusive<'a, P> {} +impl<'a, P: Pattern<&'a str>> FusedIterator for SplitInclusive<'a, P> {} -impl<'a, P: Pattern<'a>> SplitInclusive<'a, P> { +impl<'a, P: Pattern<&'a str>> SplitInclusive<'a, P> { /// Returns remainder of the split string. /// /// If the iterator is empty, returns `None`. diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index ab2f8520ecb33..778a0e23c0407 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -13,11 +13,9 @@ mod iter; mod traits; mod validations; -use self::pattern::Pattern; -use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher}; - use crate::char::{self, EscapeDebugExtArgs}; use crate::mem; +use crate::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher}; use crate::slice::{self, SliceIndex}; pub mod pattern; @@ -70,7 +68,7 @@ pub use iter::SplitAsciiWhitespace; pub use iter::SplitInclusive; #[unstable(feature = "str_internals", issue = "none")] -pub use validations::{next_code_point, utf8_char_width}; +pub use validations::{next_code_point, try_first_code_point, utf8_char_width}; use iter::MatchIndicesInternal; use iter::SplitInternal; @@ -1067,7 +1065,7 @@ impl str { /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[inline] - pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { + pub fn contains<'a, P: Pattern<&'a str>>(&'a self, pat: P) -> bool { pat.is_contained_in(self) } @@ -1093,7 +1091,7 @@ impl str { /// assert!(!bananas.starts_with("nana")); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { + pub fn starts_with<'a, P: Pattern<&'a str>>(&'a self, pat: P) -> bool { pat.is_prefix_of(self) } @@ -1121,7 +1119,7 @@ impl str { #[stable(feature = "rust1", since = "1.0.0")] pub fn ends_with<'a, P>(&'a self, pat: P) -> bool where - P: Pattern<'a, Searcher: ReverseSearcher<'a>>, + P: Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>, { pat.is_suffix_of(self) } @@ -1170,7 +1168,7 @@ impl str { /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[inline] - pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option { + pub fn find<'a, P: Pattern<&'a str>>(&'a self, pat: P) -> Option { pat.into_searcher(self).next_match().map(|(i, _)| i) } @@ -1218,7 +1216,7 @@ impl str { #[inline] pub fn rfind<'a, P>(&'a self, pat: P) -> Option where - P: Pattern<'a, Searcher: ReverseSearcher<'a>>, + P: Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>, { pat.into_searcher(self).next_match_back().map(|(i, _)| i) } @@ -1338,7 +1336,7 @@ impl str { /// [`split_whitespace`]: str::split_whitespace #[stable(feature = "rust1", since = "1.0.0")] #[inline] - pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> { + pub fn split<'a, P: Pattern<&'a str>>(&'a self, pat: P) -> Split<'a, P> { Split(SplitInternal { start: 0, end: self.len(), @@ -1378,7 +1376,7 @@ impl str { /// ``` #[stable(feature = "split_inclusive", since = "1.51.0")] #[inline] - pub fn split_inclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitInclusive<'a, P> { + pub fn split_inclusive<'a, P: Pattern<&'a str>>(&'a self, pat: P) -> SplitInclusive<'a, P> { SplitInclusive(SplitInternal { start: 0, end: self.len(), @@ -1435,7 +1433,7 @@ impl str { #[inline] pub fn rsplit<'a, P>(&'a self, pat: P) -> RSplit<'a, P> where - P: Pattern<'a, Searcher: ReverseSearcher<'a>>, + P: Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>, { RSplit(self.split(pat).0) } @@ -1484,7 +1482,7 @@ impl str { /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[inline] - pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> { + pub fn split_terminator<'a, P: Pattern<&'a str>>(&'a self, pat: P) -> SplitTerminator<'a, P> { SplitTerminator(SplitInternal { allow_trailing_empty: false, ..self.split(pat).0 }) } @@ -1532,7 +1530,7 @@ impl str { #[inline] pub fn rsplit_terminator<'a, P>(&'a self, pat: P) -> RSplitTerminator<'a, P> where - P: Pattern<'a, Searcher: ReverseSearcher<'a>>, + P: Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>, { RSplitTerminator(self.split_terminator(pat).0) } @@ -1585,7 +1583,7 @@ impl str { /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[inline] - pub fn splitn<'a, P: Pattern<'a>>(&'a self, n: usize, pat: P) -> SplitN<'a, P> { + pub fn splitn<'a, P: Pattern<&'a str>>(&'a self, n: usize, pat: P) -> SplitN<'a, P> { SplitN(SplitNInternal { iter: self.split(pat).0, count: n }) } @@ -1636,7 +1634,7 @@ impl str { #[inline] pub fn rsplitn<'a, P>(&'a self, n: usize, pat: P) -> RSplitN<'a, P> where - P: Pattern<'a, Searcher: ReverseSearcher<'a>>, + P: Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>, { RSplitN(self.splitn(n, pat).0) } @@ -1654,7 +1652,7 @@ impl str { /// ``` #[stable(feature = "str_split_once", since = "1.52.0")] #[inline] - pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)> { + pub fn split_once<'a, P: Pattern<&'a str>>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)> { let (start, end) = delimiter.into_searcher(self).next_match()?; // SAFETY: `Searcher` is known to return valid indices. unsafe { Some((self.get_unchecked(..start), self.get_unchecked(end..))) } @@ -1674,7 +1672,7 @@ impl str { #[inline] pub fn rsplit_once<'a, P>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)> where - P: Pattern<'a, Searcher: ReverseSearcher<'a>>, + P: Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>, { let (start, end) = delimiter.into_searcher(self).next_match_back()?; // SAFETY: `Searcher` is known to return valid indices. @@ -1714,7 +1712,7 @@ impl str { /// ``` #[stable(feature = "str_matches", since = "1.2.0")] #[inline] - pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> { + pub fn matches<'a, P: Pattern<&'a str>>(&'a self, pat: P) -> Matches<'a, P> { Matches(MatchesInternal(pat.into_searcher(self))) } @@ -1752,7 +1750,7 @@ impl str { #[inline] pub fn rmatches<'a, P>(&'a self, pat: P) -> RMatches<'a, P> where - P: Pattern<'a, Searcher: ReverseSearcher<'a>>, + P: Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>, { RMatches(self.matches(pat).0) } @@ -1796,7 +1794,7 @@ impl str { /// ``` #[stable(feature = "str_match_indices", since = "1.5.0")] #[inline] - pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> { + pub fn match_indices<'a, P: Pattern<&'a str>>(&'a self, pat: P) -> MatchIndices<'a, P> { MatchIndices(MatchIndicesInternal(pat.into_searcher(self))) } @@ -1840,7 +1838,7 @@ impl str { #[inline] pub fn rmatch_indices<'a, P>(&'a self, pat: P) -> RMatchIndices<'a, P> where - P: Pattern<'a, Searcher: ReverseSearcher<'a>>, + P: Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>, { RMatchIndices(self.match_indices(pat).0) } @@ -2057,7 +2055,7 @@ impl str { #[stable(feature = "rust1", since = "1.0.0")] pub fn trim_matches<'a, P>(&'a self, pat: P) -> &'a str where - P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>, + P: Pattern<&'a str, Searcher: DoubleEndedSearcher<&'a str>>, { let mut i = 0; let mut j = 0; @@ -2104,7 +2102,7 @@ impl str { #[must_use = "this returns the trimmed string as a new slice, \ without modifying the original"] #[stable(feature = "trim_direction", since = "1.30.0")] - pub fn trim_start_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str { + pub fn trim_start_matches<'a, P: Pattern<&'a str>>(&'a self, pat: P) -> &'a str { let mut i = self.len(); let mut matcher = pat.into_searcher(self); if let Some((a, _)) = matcher.next_reject() { @@ -2137,7 +2135,7 @@ impl str { #[must_use = "this returns the remaining substring as a new slice, \ without modifying the original"] #[stable(feature = "str_strip", since = "1.45.0")] - pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a str> { + pub fn strip_prefix<'a, P: Pattern<&'a str>>(&'a self, prefix: P) -> Option<&'a str> { prefix.strip_prefix_of(self) } @@ -2166,8 +2164,8 @@ impl str { #[stable(feature = "str_strip", since = "1.45.0")] pub fn strip_suffix<'a, P>(&'a self, suffix: P) -> Option<&'a str> where - P: Pattern<'a>, -

>::Searcher: ReverseSearcher<'a>, + P: Pattern<&'a str>, +

>::Searcher: ReverseSearcher<&'a str>, { suffix.strip_suffix_of(self) } @@ -2210,7 +2208,7 @@ impl str { #[stable(feature = "trim_direction", since = "1.30.0")] pub fn trim_end_matches<'a, P>(&'a self, pat: P) -> &'a str where - P: Pattern<'a, Searcher: ReverseSearcher<'a>>, + P: Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>, { let mut j = 0; let mut matcher = pat.into_searcher(self); @@ -2254,7 +2252,7 @@ impl str { note = "superseded by `trim_start_matches`", suggestion = "trim_start_matches" )] - pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str { + pub fn trim_left_matches<'a, P: Pattern<&'a str>>(&'a self, pat: P) -> &'a str { self.trim_start_matches(pat) } @@ -2299,7 +2297,7 @@ impl str { )] pub fn trim_right_matches<'a, P>(&'a self, pat: P) -> &'a str where - P: Pattern<'a, Searcher: ReverseSearcher<'a>>, + P: Pattern<&'a str, Searcher: ReverseSearcher<&'a str>>, { self.trim_end_matches(pat) } diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index 19da6d2fbecbc..d98780f960da1 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -1,36 +1,47 @@ -//! The string Pattern API. +//! [The Pattern API] implementation for searching in `&str`. //! -//! The Pattern API provides a generic mechanism for using different pattern -//! types when searching through a string. +//! The implementation provides generic mechanism for using different pattern +//! types when searching through a string. Although this API is unstable, it is +//! exposed via stable APIs on the [`str`] type. //! -//! For more details, see the traits [`Pattern`], [`Searcher`], -//! [`ReverseSearcher`], and [`DoubleEndedSearcher`]. +//! Depending on the type of the pattern, the behaviour of methods like +//! [`str::find`] and [`str::contains`] can change. The table below describes +//! some of those behaviours. //! -//! Although this API is unstable, it is exposed via stable APIs on the -//! [`str`] type. +//! | Pattern type | Match condition | +//! |--------------------------|-------------------------------------------| +//! | `&str` | is substring | +//! | `char` | is contained in string | +//! | `&[char]` | any char in slice is contained in string | +//! | `F: FnMut(char) -> bool` | `F` returns `true` for a char in string | +//! | `&&str` | is substring | +//! | `&String` | is substring | //! //! # Examples //! -//! [`Pattern`] is [implemented][pattern-impls] in the stable API for -//! [`&str`][`str`], [`char`], slices of [`char`], and functions and closures -//! implementing `FnMut(char) -> bool`. -//! //! ``` //! let s = "Can you find a needle in a haystack?"; //! //! // &str pattern //! assert_eq!(s.find("you"), Some(4)); +//! assert_eq!(s.find("thou"), None); +//! //! // char pattern //! assert_eq!(s.find('n'), Some(2)); -//! // array of chars pattern +//! assert_eq!(s.find('N'), None); +//! +//! // Array of chars pattern and slices thereof //! assert_eq!(s.find(&['a', 'e', 'i', 'o', 'u']), Some(1)); -//! // slice of chars pattern //! assert_eq!(s.find(&['a', 'e', 'i', 'o', 'u'][..]), Some(1)); -//! // closure pattern +//! assert_eq!(s.find(&['q', 'v', 'x']), None); +//! +//! // Predicate closure //! assert_eq!(s.find(|c: char| c.is_ascii_punctuation()), Some(35)); +//! assert_eq!(s.find(|c: char| c.is_lowercase()), Some(1)); +//! assert_eq!(s.find(|c: char| !c.is_ascii()), None); //! ``` //! -//! [pattern-impls]: Pattern#implementors +//! [The Pattern API]: crate::pattern #![unstable( feature = "pattern", @@ -39,319 +50,37 @@ )] use crate::cmp; -use crate::cmp::Ordering; use crate::fmt; +use crate::pattern::{DoubleEndedSearcher, Haystack, Pattern, ReverseSearcher, Searcher, SearchStep}; use crate::slice::memchr; -// Pattern - -/// A string pattern. -/// -/// A `Pattern<'a>` expresses that the implementing type -/// can be used as a string pattern for searching in a [`&'a str`][str]. -/// -/// For example, both `'a'` and `"aa"` are patterns that -/// would match at index `1` in the string `"baaaab"`. -/// -/// The trait itself acts as a builder for an associated -/// [`Searcher`] type, which does the actual work of finding -/// occurrences of the pattern in a string. -/// -/// Depending on the type of the pattern, the behaviour of methods like -/// [`str::find`] and [`str::contains`] can change. The table below describes -/// some of those behaviours. -/// -/// | Pattern type | Match condition | -/// |--------------------------|-------------------------------------------| -/// | `&str` | is substring | -/// | `char` | is contained in string | -/// | `&[char]` | any char in slice is contained in string | -/// | `F: FnMut(char) -> bool` | `F` returns `true` for a char in string | -/// | `&&str` | is substring | -/// | `&String` | is substring | -/// -/// # Examples -/// -/// ``` -/// // &str -/// assert_eq!("abaaa".find("ba"), Some(1)); -/// assert_eq!("abaaa".find("bac"), None); -/// -/// // char -/// assert_eq!("abaaa".find('a'), Some(0)); -/// assert_eq!("abaaa".find('b'), Some(1)); -/// assert_eq!("abaaa".find('c'), None); -/// -/// // &[char; N] -/// assert_eq!("ab".find(&['b', 'a']), Some(0)); -/// assert_eq!("abaaa".find(&['a', 'z']), Some(0)); -/// assert_eq!("abaaa".find(&['c', 'd']), None); -/// -/// // &[char] -/// assert_eq!("ab".find(&['b', 'a'][..]), Some(0)); -/// assert_eq!("abaaa".find(&['a', 'z'][..]), Some(0)); -/// assert_eq!("abaaa".find(&['c', 'd'][..]), None); -/// -/// // FnMut(char) -> bool -/// assert_eq!("abcdef_z".find(|ch| ch > 'd' && ch < 'y'), Some(4)); -/// assert_eq!("abcddd_z".find(|ch| ch > 'd' && ch < 'y'), None); -/// ``` -pub trait Pattern<'a>: Sized { - /// Associated searcher for this pattern - type Searcher: Searcher<'a>; - - /// Constructs the associated searcher from - /// `self` and the `haystack` to search in. - fn into_searcher(self, haystack: &'a str) -> Self::Searcher; - - /// Checks whether the pattern matches anywhere in the haystack - #[inline] - fn is_contained_in(self, haystack: &'a str) -> bool { - self.into_searcher(haystack).next_match().is_some() - } - - /// Checks whether the pattern matches at the front of the haystack - #[inline] - fn is_prefix_of(self, haystack: &'a str) -> bool { - matches!(self.into_searcher(haystack).next(), SearchStep::Match(0, _)) - } - - /// Checks whether the pattern matches at the back of the haystack - #[inline] - fn is_suffix_of(self, haystack: &'a str) -> bool - where - Self::Searcher: ReverseSearcher<'a>, - { - matches!(self.into_searcher(haystack).next_back(), SearchStep::Match(_, j) if haystack.len() == j) - } - - /// Removes the pattern from the front of haystack, if it matches. - #[inline] - fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> { - if let SearchStep::Match(start, len) = self.into_searcher(haystack).next() { - debug_assert_eq!( - start, 0, - "The first search step from Searcher \ - must include the first character" - ); - // SAFETY: `Searcher` is known to return valid indices. - unsafe { Some(haystack.get_unchecked(len..)) } - } else { - None - } - } - - /// Removes the pattern from the back of haystack, if it matches. - #[inline] - fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> - where - Self::Searcher: ReverseSearcher<'a>, - { - if let SearchStep::Match(start, end) = self.into_searcher(haystack).next_back() { - debug_assert_eq!( - end, - haystack.len(), - "The first search step from ReverseSearcher \ - must include the last character" - ); - // SAFETY: `Searcher` is known to return valid indices. - unsafe { Some(haystack.get_unchecked(..start)) } - } else { - None - } - } -} - -// Searcher - -/// Result of calling [`Searcher::next()`] or [`ReverseSearcher::next_back()`]. -#[derive(Copy, Clone, Eq, PartialEq, Debug)] -pub enum SearchStep { - /// Expresses that a match of the pattern has been found at - /// `haystack[a..b]`. - Match(usize, usize), - /// Expresses that `haystack[a..b]` has been rejected as a possible match - /// of the pattern. - /// - /// Note that there might be more than one `Reject` between two `Match`es, - /// there is no requirement for them to be combined into one. - Reject(usize, usize), - /// Expresses that every byte of the haystack has been visited, ending - /// the iteration. - Done, -} - -/// A searcher for a string pattern. -/// -/// This trait provides methods for searching for non-overlapping -/// matches of a pattern starting from the front (left) of a string. -/// -/// It will be implemented by associated `Searcher` -/// types of the [`Pattern`] trait. -/// -/// The trait is marked unsafe because the indices returned by the -/// [`next()`][Searcher::next] methods are required to lie on valid utf8 -/// boundaries in the haystack. This enables consumers of this trait to -/// slice the haystack without additional runtime checks. -pub unsafe trait Searcher<'a> { - /// Getter for the underlying string to be searched in - /// - /// Will always return the same [`&str`][str]. - fn haystack(&self) -> &'a str; - - /// Performs the next search step starting from the front. - /// - /// - Returns [`Match(a, b)`][SearchStep::Match] if `haystack[a..b]` matches - /// the pattern. - /// - Returns [`Reject(a, b)`][SearchStep::Reject] if `haystack[a..b]` can - /// not match the pattern, even partially. - /// - Returns [`Done`][SearchStep::Done] if every byte of the haystack has - /// been visited. - /// - /// The stream of [`Match`][SearchStep::Match] and - /// [`Reject`][SearchStep::Reject] values up to a [`Done`][SearchStep::Done] - /// will contain index ranges that are adjacent, non-overlapping, - /// covering the whole haystack, and laying on utf8 boundaries. - /// - /// A [`Match`][SearchStep::Match] result needs to contain the whole matched - /// pattern, however [`Reject`][SearchStep::Reject] results may be split up - /// into arbitrary many adjacent fragments. Both ranges may have zero length. - /// - /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"` - /// might produce the stream - /// `[Reject(0, 1), Reject(1, 2), Match(2, 5), Reject(5, 8)]` - fn next(&mut self) -> SearchStep; - - /// Finds the next [`Match`][SearchStep::Match] result. See [`next()`][Searcher::next]. - /// - /// Unlike [`next()`][Searcher::next], there is no guarantee that the returned ranges - /// of this and [`next_reject`][Searcher::next_reject] will overlap. This will return - /// `(start_match, end_match)`, where start_match is the index of where - /// the match begins, and end_match is the index after the end of the match. - #[inline] - fn next_match(&mut self) -> Option<(usize, usize)> { - loop { - match self.next() { - SearchStep::Match(a, b) => return Some((a, b)), - SearchStep::Done => return None, - _ => continue, - } - } - } +///////////////////////////////////////////////////////////////////////////// +// Impl for Haystack +///////////////////////////////////////////////////////////////////////////// - /// Finds the next [`Reject`][SearchStep::Reject] result. See [`next()`][Searcher::next] - /// and [`next_match()`][Searcher::next_match]. - /// - /// Unlike [`next()`][Searcher::next], there is no guarantee that the returned ranges - /// of this and [`next_match`][Searcher::next_match] will overlap. - #[inline] - fn next_reject(&mut self) -> Option<(usize, usize)> { - loop { - match self.next() { - SearchStep::Reject(a, b) => return Some((a, b)), - SearchStep::Done => return None, - _ => continue, - } - } - } -} +impl<'a> Haystack for &'a str { + type Cursor = usize; -/// A reverse searcher for a string pattern. -/// -/// This trait provides methods for searching for non-overlapping -/// matches of a pattern starting from the back (right) of a string. -/// -/// It will be implemented by associated [`Searcher`] -/// types of the [`Pattern`] trait if the pattern supports searching -/// for it from the back. -/// -/// The index ranges returned by this trait are not required -/// to exactly match those of the forward search in reverse. -/// -/// For the reason why this trait is marked unsafe, see the -/// parent trait [`Searcher`]. -pub unsafe trait ReverseSearcher<'a>: Searcher<'a> { - /// Performs the next search step starting from the back. - /// - /// - Returns [`Match(a, b)`][SearchStep::Match] if `haystack[a..b]` - /// matches the pattern. - /// - Returns [`Reject(a, b)`][SearchStep::Reject] if `haystack[a..b]` - /// can not match the pattern, even partially. - /// - Returns [`Done`][SearchStep::Done] if every byte of the haystack - /// has been visited - /// - /// The stream of [`Match`][SearchStep::Match] and - /// [`Reject`][SearchStep::Reject] values up to a [`Done`][SearchStep::Done] - /// will contain index ranges that are adjacent, non-overlapping, - /// covering the whole haystack, and laying on utf8 boundaries. - /// - /// A [`Match`][SearchStep::Match] result needs to contain the whole matched - /// pattern, however [`Reject`][SearchStep::Reject] results may be split up - /// into arbitrary many adjacent fragments. Both ranges may have zero length. - /// - /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"` - /// might produce the stream - /// `[Reject(7, 8), Match(4, 7), Reject(1, 4), Reject(0, 1)]`. - fn next_back(&mut self) -> SearchStep; - - /// Finds the next [`Match`][SearchStep::Match] result. - /// See [`next_back()`][ReverseSearcher::next_back]. - #[inline] - fn next_match_back(&mut self) -> Option<(usize, usize)> { - loop { - match self.next_back() { - SearchStep::Match(a, b) => return Some((a, b)), - SearchStep::Done => return None, - _ => continue, - } - } - } + #[inline(always)] + fn cursor_at_front(&self) -> usize { 0 } + #[inline(always)] + fn cursor_at_back(&self) -> usize { self.len() } - /// Finds the next [`Reject`][SearchStep::Reject] result. - /// See [`next_back()`][ReverseSearcher::next_back]. - #[inline] - fn next_reject_back(&mut self) -> Option<(usize, usize)> { - loop { - match self.next_back() { - SearchStep::Reject(a, b) => return Some((a, b)), - SearchStep::Done => return None, - _ => continue, - } - } + #[inline(always)] + unsafe fn split_at_cursor_unchecked(self, cursor: usize) -> (Self, Self) { + // SAFETY: Caller promises position is a character boundary. + unsafe { (self.get_unchecked(..cursor), self.get_unchecked(cursor..)) } } } -/// A marker trait to express that a [`ReverseSearcher`] -/// can be used for a [`DoubleEndedIterator`] implementation. -/// -/// For this, the impl of [`Searcher`] and [`ReverseSearcher`] need -/// to follow these conditions: -/// -/// - All results of `next()` need to be identical -/// to the results of `next_back()` in reverse order. -/// - `next()` and `next_back()` need to behave as -/// the two ends of a range of values, that is they -/// can not "walk past each other". -/// -/// # Examples -/// -/// `char::Searcher` is a `DoubleEndedSearcher` because searching for a -/// [`char`] only requires looking at one at a time, which behaves the same -/// from both ends. -/// -/// `(&str)::Searcher` is not a `DoubleEndedSearcher` because -/// the pattern `"aa"` in the haystack `"aaa"` matches as either -/// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched. -pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {} - ///////////////////////////////////////////////////////////////////////////// // Impl for char ///////////////////////////////////////////////////////////////////////////// -/// Associated type for `>::Searcher`. +/// Associated type for `>::Searcher`. #[derive(Clone, Debug)] -pub struct CharSearcher<'a> { - haystack: &'a str, +pub struct CharSearcher { + haystack: H, // safety invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack` // This invariant can be broken *within* next_match and next_match_back, however // they must exit with fingers on valid code point boundaries. @@ -359,12 +88,12 @@ pub struct CharSearcher<'a> { /// Imagine that it exists before the byte at its index, i.e. /// `haystack[finger]` is the first byte of the slice we must inspect during /// forward searching - finger: usize, + finger: H::Cursor, /// `finger_back` is the current byte index of the reverse search. /// Imagine that it exists after the byte at its index, i.e. /// haystack[finger_back - 1] is the last byte of the slice we must inspect during /// forward searching (and thus the first byte to be inspected when calling next_back()). - finger_back: usize, + finger_back: H::Cursor, /// The character being searched for needle: char, @@ -375,7 +104,7 @@ pub struct CharSearcher<'a> { utf8_encoded: [u8; 4], } -unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { +unsafe impl<'a> Searcher<&'a str> for CharSearcher<&'a str> { #[inline] fn haystack(&self) -> &'a str { self.haystack @@ -453,7 +182,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { // let next_reject use the default implementation from the Searcher trait } -unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { +unsafe impl<'a> ReverseSearcher<&'a str> for CharSearcher<&'a str> { #[inline] fn next_back(&mut self) -> SearchStep { let old_finger = self.finger_back; @@ -527,7 +256,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { // let next_reject_back use the default implementation from the Searcher trait } -impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {} +impl<'a> DoubleEndedSearcher<&'a str> for CharSearcher<&'a str> {} /// Searches for chars that are equal to a given [`char`]. /// @@ -536,8 +265,8 @@ impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {} /// ``` /// assert_eq!("Hello world".find('o'), Some(4)); /// ``` -impl<'a> Pattern<'a> for char { - type Searcher = CharSearcher<'a>; +impl<'a> Pattern<&'a str> for char { + type Searcher = CharSearcher<&'a str>; #[inline] fn into_searcher(self, haystack: &'a str) -> Self::Searcher { @@ -559,34 +288,37 @@ impl<'a> Pattern<'a> for char { haystack.as_bytes().contains(&(self as u8)) } else { let mut buffer = [0u8; 4]; - self.encode_utf8(&mut buffer).is_contained_in(haystack) + let chr: &str = self.encode_utf8(&mut buffer); + chr.is_contained_in(haystack) } } #[inline] fn is_prefix_of(self, haystack: &'a str) -> bool { - self.encode_utf8(&mut [0u8; 4]).is_prefix_of(haystack) + let mut buffer = [0u8; 4]; + let chr: &str = self.encode_utf8(&mut buffer); + chr.is_prefix_of(haystack) } #[inline] fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> { - self.encode_utf8(&mut [0u8; 4]).strip_prefix_of(haystack) + let mut buffer = [0u8; 4]; + let chr: &str = self.encode_utf8(&mut buffer); + chr.strip_prefix_of(haystack) } #[inline] - fn is_suffix_of(self, haystack: &'a str) -> bool - where - Self::Searcher: ReverseSearcher<'a>, - { - self.encode_utf8(&mut [0u8; 4]).is_suffix_of(haystack) + fn is_suffix_of(self, haystack: &'a str) -> bool { + let mut buffer = [0u8; 4]; + let chr: &str = self.encode_utf8(&mut buffer); + chr.is_suffix_of(haystack) } #[inline] - fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> - where - Self::Searcher: ReverseSearcher<'a>, - { - self.encode_utf8(&mut [0u8; 4]).strip_suffix_of(haystack) + fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> { + let mut buffer = [0u8; 4]; + let chr: &str = self.encode_utf8(&mut buffer); + chr.strip_suffix_of(haystack) } } @@ -639,7 +371,7 @@ struct MultiCharEqSearcher<'a, C: MultiCharEq> { char_indices: super::CharIndices<'a>, } -impl<'a, C: MultiCharEq> Pattern<'a> for MultiCharEqPattern { +impl<'a, C: MultiCharEq> Pattern<&'a str> for MultiCharEqPattern { type Searcher = MultiCharEqSearcher<'a, C>; #[inline] @@ -648,7 +380,7 @@ impl<'a, C: MultiCharEq> Pattern<'a> for MultiCharEqPattern { } } -unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> { +unsafe impl<'a, C: MultiCharEq> Searcher<&'a str> for MultiCharEqSearcher<'a, C> { #[inline] fn haystack(&self) -> &'a str { self.haystack @@ -673,7 +405,7 @@ unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> { } } -unsafe impl<'a, C: MultiCharEq> ReverseSearcher<'a> for MultiCharEqSearcher<'a, C> { +unsafe impl<'a, C: MultiCharEq> ReverseSearcher<&'a str> for MultiCharEqSearcher<'a, C> { #[inline] fn next_back(&mut self) -> SearchStep { let s = &mut self.char_indices; @@ -693,7 +425,7 @@ unsafe impl<'a, C: MultiCharEq> ReverseSearcher<'a> for MultiCharEqSearcher<'a, } } -impl<'a, C: MultiCharEq> DoubleEndedSearcher<'a> for MultiCharEqSearcher<'a, C> {} +impl<'a, C: MultiCharEq> DoubleEndedSearcher<&'a str> for MultiCharEqSearcher<'a, C> {} ///////////////////////////////////////////////////////////////////////////// @@ -724,7 +456,7 @@ macro_rules! pattern_methods { #[inline] fn is_suffix_of(self, haystack: &'a str) -> bool where - $t: ReverseSearcher<'a>, + $t: ReverseSearcher<&'a str>, { ($pmap)(self).is_suffix_of(haystack) } @@ -732,7 +464,7 @@ macro_rules! pattern_methods { #[inline] fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> where - $t: ReverseSearcher<'a>, + $t: ReverseSearcher<&'a str>, { ($pmap)(self).strip_suffix_of(haystack) } @@ -774,16 +506,16 @@ macro_rules! searcher_methods { }; } -/// Associated type for `<[char; N] as Pattern<'a>>::Searcher`. +/// Associated type for `<[char; N] as Pattern<&'a str>>::Searcher`. #[derive(Clone, Debug)] pub struct CharArraySearcher<'a, const N: usize>( - as Pattern<'a>>::Searcher, + as Pattern<&'a str>>::Searcher, ); -/// Associated type for `<&[char; N] as Pattern<'a>>::Searcher`. +/// Associated type for `<&[char; N] as Pattern<&'a str>>::Searcher`. #[derive(Clone, Debug)] pub struct CharArrayRefSearcher<'a, 'b, const N: usize>( - as Pattern<'a>>::Searcher, + as Pattern<&'a str>>::Searcher, ); /// Searches for chars that are equal to any of the [`char`]s in the array. @@ -794,15 +526,15 @@ pub struct CharArrayRefSearcher<'a, 'b, const N: usize>( /// assert_eq!("Hello world".find(['l', 'l']), Some(2)); /// assert_eq!("Hello world".find(['l', 'l']), Some(2)); /// ``` -impl<'a, const N: usize> Pattern<'a> for [char; N] { +impl<'a, const N: usize> Pattern<&'a str> for [char; N] { pattern_methods!(CharArraySearcher<'a, N>, MultiCharEqPattern, CharArraySearcher); } -unsafe impl<'a, const N: usize> Searcher<'a> for CharArraySearcher<'a, N> { +unsafe impl<'a, const N: usize> Searcher<&'a str> for CharArraySearcher<'a, N> { searcher_methods!(forward); } -unsafe impl<'a, const N: usize> ReverseSearcher<'a> for CharArraySearcher<'a, N> { +unsafe impl<'a, const N: usize> ReverseSearcher<&'a str> for CharArraySearcher<'a, N> { searcher_methods!(reverse); } @@ -814,15 +546,15 @@ unsafe impl<'a, const N: usize> ReverseSearcher<'a> for CharArraySearcher<'a, N> /// assert_eq!("Hello world".find(&['l', 'l']), Some(2)); /// assert_eq!("Hello world".find(&['l', 'l']), Some(2)); /// ``` -impl<'a, 'b, const N: usize> Pattern<'a> for &'b [char; N] { +impl<'a, 'b, const N: usize> Pattern<&'a str> for &'b [char; N] { pattern_methods!(CharArrayRefSearcher<'a, 'b, N>, MultiCharEqPattern, CharArrayRefSearcher); } -unsafe impl<'a, 'b, const N: usize> Searcher<'a> for CharArrayRefSearcher<'a, 'b, N> { +unsafe impl<'a, 'b, const N: usize> Searcher<&'a str> for CharArrayRefSearcher<'a, 'b, N> { searcher_methods!(forward); } -unsafe impl<'a, 'b, const N: usize> ReverseSearcher<'a> for CharArrayRefSearcher<'a, 'b, N> { +unsafe impl<'a, 'b, const N: usize> ReverseSearcher<&'a str> for CharArrayRefSearcher<'a, 'b, N> { searcher_methods!(reverse); } @@ -832,19 +564,19 @@ unsafe impl<'a, 'b, const N: usize> ReverseSearcher<'a> for CharArrayRefSearcher // Todo: Change / Remove due to ambiguity in meaning. -/// Associated type for `<&[char] as Pattern<'a>>::Searcher`. +/// Associated type for `<&[char] as Pattern<&'a str>>::Searcher`. #[derive(Clone, Debug)] -pub struct CharSliceSearcher<'a, 'b>( as Pattern<'a>>::Searcher); +pub struct CharSliceSearcher<'a, 'b>( as Pattern<&'a str>>::Searcher); -unsafe impl<'a, 'b> Searcher<'a> for CharSliceSearcher<'a, 'b> { +unsafe impl<'a, 'b> Searcher<&'a str> for CharSliceSearcher<'a, 'b> { searcher_methods!(forward); } -unsafe impl<'a, 'b> ReverseSearcher<'a> for CharSliceSearcher<'a, 'b> { +unsafe impl<'a, 'b> ReverseSearcher<&'a str> for CharSliceSearcher<'a, 'b> { searcher_methods!(reverse); } -impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {} +impl<'a, 'b> DoubleEndedSearcher<&'a str> for CharSliceSearcher<'a, 'b> {} /// Searches for chars that are equal to any of the [`char`]s in the slice. /// @@ -854,7 +586,7 @@ impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {} /// assert_eq!("Hello world".find(&['l', 'l'] as &[_]), Some(2)); /// assert_eq!("Hello world".find(&['l', 'l'][..]), Some(2)); /// ``` -impl<'a, 'b> Pattern<'a> for &'b [char] { +impl<'a, 'b> Pattern<&'a str> for &'b [char] { pattern_methods!(CharSliceSearcher<'a, 'b>, MultiCharEqPattern, CharSliceSearcher); } @@ -862,9 +594,9 @@ impl<'a, 'b> Pattern<'a> for &'b [char] { // Impl for F: FnMut(char) -> bool ///////////////////////////////////////////////////////////////////////////// -/// Associated type for `>::Searcher`. +/// Associated type for `>::Searcher`. #[derive(Clone)] -pub struct CharPredicateSearcher<'a, F>( as Pattern<'a>>::Searcher) +pub struct CharPredicateSearcher<'a, F>( as Pattern<&'a str>>::Searcher) where F: FnMut(char) -> bool; @@ -879,21 +611,21 @@ where .finish() } } -unsafe impl<'a, F> Searcher<'a> for CharPredicateSearcher<'a, F> +unsafe impl<'a, F> Searcher<&'a str> for CharPredicateSearcher<'a, F> where F: FnMut(char) -> bool, { searcher_methods!(forward); } -unsafe impl<'a, F> ReverseSearcher<'a> for CharPredicateSearcher<'a, F> +unsafe impl<'a, F> ReverseSearcher<&'a str> for CharPredicateSearcher<'a, F> where F: FnMut(char) -> bool, { searcher_methods!(reverse); } -impl<'a, F> DoubleEndedSearcher<'a> for CharPredicateSearcher<'a, F> where F: FnMut(char) -> bool {} +impl<'a, F> DoubleEndedSearcher<&'a str> for CharPredicateSearcher<'a, F> where F: FnMut(char) -> bool {} /// Searches for [`char`]s that match the given predicate. /// @@ -903,7 +635,7 @@ impl<'a, F> DoubleEndedSearcher<'a> for CharPredicateSearcher<'a, F> where F: Fn /// assert_eq!("Hello world".find(char::is_uppercase), Some(0)); /// assert_eq!("Hello world".find(|c| "aeiou".contains(c)), Some(1)); /// ``` -impl<'a, F> Pattern<'a> for F +impl<'a, F> Pattern<&'a str> for F where F: FnMut(char) -> bool, { @@ -915,7 +647,7 @@ where ///////////////////////////////////////////////////////////////////////////// /// Delegates to the `&str` impl. -impl<'a, 'b, 'c> Pattern<'a> for &'c &'b str { +impl<'a, 'b, 'c> Pattern<&'a str> for &'c &'b str { pattern_methods!(StrSearcher<'a, 'b>, |&s| s, |s| s); } @@ -933,7 +665,7 @@ impl<'a, 'b, 'c> Pattern<'a> for &'c &'b str { /// ``` /// assert_eq!("Hello world".find("world"), Some(6)); /// ``` -impl<'a, 'b> Pattern<'a> for &'b str { +impl<'a, 'b> Pattern<&'a str> for &'b str { type Searcher = StrSearcher<'a, 'b>; #[inline] @@ -950,27 +682,7 @@ impl<'a, 'b> Pattern<'a> for &'b str { /// Checks whether the pattern matches anywhere in the haystack #[inline] fn is_contained_in(self, haystack: &'a str) -> bool { - if self.len() == 0 { - return true; - } - - match self.len().cmp(&haystack.len()) { - Ordering::Less => { - if self.len() == 1 { - return haystack.as_bytes().contains(&self.as_bytes()[0]); - } - - #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] - if self.len() <= 32 { - if let Some(result) = simd_contains(self, haystack) { - return result; - } - } - - self.into_searcher(haystack).next_match().is_some() - } - _ => self == haystack, - } + self.as_bytes().is_contained_in(haystack.as_bytes()) } /// Removes the pattern from the front of haystack, if it matches. @@ -1008,7 +720,7 @@ impl<'a, 'b> Pattern<'a> for &'b str { ///////////////////////////////////////////////////////////////////////////// #[derive(Clone, Debug)] -/// Associated type for `<&str as Pattern<'a>>::Searcher`. +/// Associated type for `<&str as Pattern<&'a str>>::Searcher`. pub struct StrSearcher<'a, 'b> { haystack: &'a str, needle: &'b str, @@ -1059,7 +771,7 @@ impl<'a, 'b> StrSearcher<'a, 'b> { } } -unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> { +unsafe impl<'a, 'b> Searcher<&'a str> for StrSearcher<'a, 'b> { #[inline] fn haystack(&self) -> &'a str { self.haystack @@ -1149,7 +861,7 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> { } } -unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> { +unsafe impl<'a, 'b> ReverseSearcher<&'a str> for StrSearcher<'a, 'b> { #[inline] fn next_back(&mut self) -> SearchStep { match self.searcher { @@ -1711,210 +1423,3 @@ impl TwoWayStrategy for RejectAndMatch { SearchStep::Match(a, b) } } - -/// SIMD search for short needles based on -/// Wojciech Muła's "SIMD-friendly algorithms for substring searching"[0] -/// -/// It skips ahead by the vector width on each iteration (rather than the needle length as two-way -/// does) by probing the first and last byte of the needle for the whole vector width -/// and only doing full needle comparisons when the vectorized probe indicated potential matches. -/// -/// Since the x86_64 baseline only offers SSE2 we only use u8x16 here. -/// If we ever ship std with for x86-64-v3 or adapt this for other platforms then wider vectors -/// should be evaluated. -/// -/// For haystacks smaller than vector-size + needle length it falls back to -/// a naive O(n*m) search so this implementation should not be called on larger needles. -/// -/// [0]: http://0x80.pl/articles/simd-strfind.html#sse-avx2 -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -#[inline] -fn simd_contains(needle: &str, haystack: &str) -> Option { - let needle = needle.as_bytes(); - let haystack = haystack.as_bytes(); - - debug_assert!(needle.len() > 1); - - use crate::ops::BitAnd; - use crate::simd::mask8x16 as Mask; - use crate::simd::u8x16 as Block; - use crate::simd::{SimdPartialEq, ToBitMask}; - - let first_probe = needle[0]; - let last_byte_offset = needle.len() - 1; - - // the offset used for the 2nd vector - let second_probe_offset = if needle.len() == 2 { - // never bail out on len=2 needles because the probes will fully cover them and have - // no degenerate cases. - 1 - } else { - // try a few bytes in case first and last byte of the needle are the same - let Some(second_probe_offset) = (needle.len().saturating_sub(4)..needle.len()).rfind(|&idx| needle[idx] != first_probe) else { - // fall back to other search methods if we can't find any different bytes - // since we could otherwise hit some degenerate cases - return None; - }; - second_probe_offset - }; - - // do a naive search if the haystack is too small to fit - if haystack.len() < Block::LANES + last_byte_offset { - return Some(haystack.windows(needle.len()).any(|c| c == needle)); - } - - let first_probe: Block = Block::splat(first_probe); - let second_probe: Block = Block::splat(needle[second_probe_offset]); - // first byte are already checked by the outer loop. to verify a match only the - // remainder has to be compared. - let trimmed_needle = &needle[1..]; - - // this #[cold] is load-bearing, benchmark before removing it... - let check_mask = #[cold] - |idx, mask: u16, skip: bool| -> bool { - if skip { - return false; - } - - // and so is this. optimizations are weird. - let mut mask = mask; - - while mask != 0 { - let trailing = mask.trailing_zeros(); - let offset = idx + trailing as usize + 1; - // SAFETY: mask is between 0 and 15 trailing zeroes, we skip one additional byte that was already compared - // and then take trimmed_needle.len() bytes. This is within the bounds defined by the outer loop - unsafe { - let sub = haystack.get_unchecked(offset..).get_unchecked(..trimmed_needle.len()); - if small_slice_eq(sub, trimmed_needle) { - return true; - } - } - mask &= !(1 << trailing); - } - return false; - }; - - let test_chunk = |idx| -> u16 { - // SAFETY: this requires at least LANES bytes being readable at idx - // that is ensured by the loop ranges (see comments below) - let a: Block = unsafe { haystack.as_ptr().add(idx).cast::().read_unaligned() }; - // SAFETY: this requires LANES + block_offset bytes being readable at idx - let b: Block = unsafe { - haystack.as_ptr().add(idx).add(second_probe_offset).cast::().read_unaligned() - }; - let eq_first: Mask = a.simd_eq(first_probe); - let eq_last: Mask = b.simd_eq(second_probe); - let both = eq_first.bitand(eq_last); - let mask = both.to_bitmask(); - - return mask; - }; - - let mut i = 0; - let mut result = false; - // The loop condition must ensure that there's enough headroom to read LANE bytes, - // and not only at the current index but also at the index shifted by block_offset - const UNROLL: usize = 4; - while i + last_byte_offset + UNROLL * Block::LANES < haystack.len() && !result { - let mut masks = [0u16; UNROLL]; - for j in 0..UNROLL { - masks[j] = test_chunk(i + j * Block::LANES); - } - for j in 0..UNROLL { - let mask = masks[j]; - if mask != 0 { - result |= check_mask(i + j * Block::LANES, mask, result); - } - } - i += UNROLL * Block::LANES; - } - while i + last_byte_offset + Block::LANES < haystack.len() && !result { - let mask = test_chunk(i); - if mask != 0 { - result |= check_mask(i, mask, result); - } - i += Block::LANES; - } - - // Process the tail that didn't fit into LANES-sized steps. - // This simply repeats the same procedure but as right-aligned chunk instead - // of a left-aligned one. The last byte must be exactly flush with the string end so - // we don't miss a single byte or read out of bounds. - let i = haystack.len() - last_byte_offset - Block::LANES; - let mask = test_chunk(i); - if mask != 0 { - result |= check_mask(i, mask, result); - } - - Some(result) -} - -/// Compares short slices for equality. -/// -/// It avoids a call to libc's memcmp which is faster on long slices -/// due to SIMD optimizations but it incurs a function call overhead. -/// -/// # Safety -/// -/// Both slices must have the same length. -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] // only called on x86 -#[inline] -unsafe fn small_slice_eq(x: &[u8], y: &[u8]) -> bool { - debug_assert_eq!(x.len(), y.len()); - // This function is adapted from - // https://github.com/BurntSushi/memchr/blob/8037d11b4357b0f07be2bb66dc2659d9cf28ad32/src/memmem/util.rs#L32 - - // If we don't have enough bytes to do 4-byte at a time loads, then - // fall back to the naive slow version. - // - // Potential alternative: We could do a copy_nonoverlapping combined with a mask instead - // of a loop. Benchmark it. - if x.len() < 4 { - for (&b1, &b2) in x.iter().zip(y) { - if b1 != b2 { - return false; - } - } - return true; - } - // When we have 4 or more bytes to compare, then proceed in chunks of 4 at - // a time using unaligned loads. - // - // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is - // that this particular version of memcmp is likely to be called with tiny - // needles. That means that if we do 8 byte loads, then a higher proportion - // of memcmp calls will use the slower variant above. With that said, this - // is a hypothesis and is only loosely supported by benchmarks. There's - // likely some improvement that could be made here. The main thing here - // though is to optimize for latency, not throughput. - - // SAFETY: Via the conditional above, we know that both `px` and `py` - // have the same length, so `px < pxend` implies that `py < pyend`. - // Thus, derefencing both `px` and `py` in the loop below is safe. - // - // Moreover, we set `pxend` and `pyend` to be 4 bytes before the actual - // end of `px` and `py`. Thus, the final dereference outside of the - // loop is guaranteed to be valid. (The final comparison will overlap with - // the last comparison done in the loop for lengths that aren't multiples - // of four.) - // - // Finally, we needn't worry about alignment here, since we do unaligned - // loads. - unsafe { - let (mut px, mut py) = (x.as_ptr(), y.as_ptr()); - let (pxend, pyend) = (px.add(x.len() - 4), py.add(y.len() - 4)); - while px < pxend { - let vx = (px as *const u32).read_unaligned(); - let vy = (py as *const u32).read_unaligned(); - if vx != vy { - return false; - } - px = px.add(4); - py = py.add(4); - } - let vx = (pxend as *const u32).read_unaligned(); - let vy = (pyend as *const u32).read_unaligned(); - vx == vy - } -} diff --git a/library/core/src/str/validations.rs b/library/core/src/str/validations.rs index 2acef432f2063..0d40b536863c0 100644 --- a/library/core/src/str/validations.rs +++ b/library/core/src/str/validations.rs @@ -120,6 +120,38 @@ const fn contains_nonascii(x: usize) -> bool { (x & NONASCII_MASK) != 0 } +/// Reads the first code point out of a byte slice validating whether it’s +/// valid. +/// +/// This is different than [`next_code_point`] in that it doesn’t assume +/// argument is well-formed UTF-8-like string. Together with the character its +/// encoded length is returned. +/// +/// ``` +/// #![feature(str_internals)] +/// use core::str::try_first_code_point; +/// +/// assert_eq!(Some(('f', 1)), try_first_code_point(b"foo".as_ref())); +/// assert_eq!(Some(('Ż', 2)), try_first_code_point("Żółw".as_bytes())); +/// assert_eq!(None, try_first_code_point(b"\xffoo".as_ref())); +/// ``` +#[unstable(feature = "str_internals", issue = "none")] +#[inline] +pub const fn try_first_code_point(bytes: &[u8]) -> Option<(char, usize)> { + let first = match bytes.first() { + Some(&byte) => byte, + None => return None, + }; + let (value, length) = if first < 0x80 { + (first as u32, 1) + } else if let Ok((cp, len)) = try_finish_byte_sequence(first, bytes, 0) { + (cp, len) + } else { + return None + }; + Some((unsafe { char::from_u32_unchecked(value) }, length)) +} + /// Walks through `v` checking that it's a valid UTF-8 sequence, /// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`. #[inline(always)] @@ -134,78 +166,13 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { let align = v.as_ptr().align_offset(usize_bytes); while index < len { - let old_offset = index; - macro_rules! err { - ($error_len: expr) => { - return Err(Utf8Error { valid_up_to: old_offset, error_len: $error_len }) - }; - } - - macro_rules! next { - () => {{ - index += 1; - // we needed data, but there was none: error! - if index >= len { - err!(None) - } - v[index] - }}; - } - + let valid_up_to = index; let first = v[index]; if first >= 128 { - let w = utf8_char_width(first); - // 2-byte encoding is for codepoints \u{0080} to \u{07ff} - // first C2 80 last DF BF - // 3-byte encoding is for codepoints \u{0800} to \u{ffff} - // first E0 A0 80 last EF BF BF - // excluding surrogates codepoints \u{d800} to \u{dfff} - // ED A0 80 to ED BF BF - // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff - // first F0 90 80 80 last F4 8F BF BF - // - // Use the UTF-8 syntax from the RFC - // - // https://tools.ietf.org/html/rfc3629 - // UTF8-1 = %x00-7F - // UTF8-2 = %xC2-DF UTF8-tail - // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / - // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) - // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / - // %xF4 %x80-8F 2( UTF8-tail ) - match w { - 2 => { - if next!() as i8 >= -64 { - err!(Some(1)) - } - } - 3 => { - match (first, next!()) { - (0xE0, 0xA0..=0xBF) - | (0xE1..=0xEC, 0x80..=0xBF) - | (0xED, 0x80..=0x9F) - | (0xEE..=0xEF, 0x80..=0xBF) => {} - _ => err!(Some(1)), - } - if next!() as i8 >= -64 { - err!(Some(2)) - } - } - 4 => { - match (first, next!()) { - (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {} - _ => err!(Some(1)), - } - if next!() as i8 >= -64 { - err!(Some(2)) - } - if next!() as i8 >= -64 { - err!(Some(3)) - } - } - _ => err!(Some(1)), + match try_finish_byte_sequence(first, v, index) { + Ok((_value, length)) => index += length, + Err(error_len) => return Err(Utf8Error { valid_up_to, error_len }), } - index += 1; } else { // Ascii case, try to skip forward quickly. // When the pointer is aligned, read 2 words of data per iteration @@ -241,6 +208,95 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { Ok(()) } +/// Try to finish an UTF-8 byte sequence. +/// +/// Assumes that `bytes[index] == first` and than `first >= 128`, i.e. that +/// `index` points at the beginning of a non-ASCII UTF-8 sequence in `bytes`. +/// +/// If the byte sequence at the index is correct, returns decoded code point and +/// length of the sequence. If it was invalid returns number of invalid bytes +/// or None if read was cut short. +#[inline(always)] +#[rustc_const_unstable(feature = "str_internals", issue = "none")] +const fn try_finish_byte_sequence( + first: u8, + bytes: &[u8], + index: usize, +) -> Result<(u32, usize), Option> { + macro_rules! get { + (raw $offset:expr) => { + match bytes.get(index + $offset) { + Some(byte) => *byte, + None => return Err(None), + } + }; + (cont $offset:expr) => {{ + let byte = get!(raw $offset); + if !utf8_is_cont_byte(byte) { + return Err(Some($offset as u8)) + } + byte + }} + } + + // 2-byte encoding is for codepoints \u{0080} to \u{07ff} + // first C2 80 last DF BF + // 3-byte encoding is for codepoints \u{0800} to \u{ffff} + // first E0 A0 80 last EF BF BF + // excluding surrogates codepoints \u{d800} to \u{dfff} + // ED A0 80 to ED BF BF + // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff + // first F0 90 80 80 last F4 8F BF BF + // + // Use the UTF-8 syntax from the RFC + // + // https://tools.ietf.org/html/rfc3629 + // UTF8-1 = %x00-7F + // UTF8-2 = %xC2-DF UTF8-tail + // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / + // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) + // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / + // %xF4 %x80-8F 2( UTF8-tail ) + match utf8_char_width(first) { + 2 => { + let second = get!(cont 1); + let value = utf8_first_byte(first, 3); + let value = utf8_acc_cont_byte(value, second); + Ok((value, 2)) + } + 3 => { + let second = get!(raw 1); + match (first, second) { + (0xE0 , 0xA0..=0xBF) | + (0xE1..=0xEC, 0x80..=0xBF) | + (0xED , 0x80..=0x9F) | + (0xEE..=0xEF, 0x80..=0xBF) => {} + _ => return Err(Some(1)), + } + let value = utf8_first_byte(first, 3); + let value = utf8_acc_cont_byte(value, second); + let value = utf8_acc_cont_byte(value, get!(cont 2)); + Ok((value, 3)) + } + 4 => { + let second = get!(raw 1); + match (first, second) { + (0xF0 , 0x90..=0xBF) | + (0xF1..=0xF3, 0x80..=0xBF) | + (0xF4 , 0x80..=0x8F) => {} + _ => return Err(Some(1)), + } + let value = utf8_first_byte(first, 4); + let value = utf8_acc_cont_byte(value, second); + let value = utf8_acc_cont_byte(value, get!(cont 2)); + let value = utf8_acc_cont_byte(value, get!(cont 3)); + Ok((value, 4)) + } + _ => Err(Some(1)), + } +} + + // https://tools.ietf.org/html/rfc3629 const UTF8_CHAR_WIDTH: &[u8; 256] = &[ // 1 2 3 4 5 6 7 8 9 A B C D E F diff --git a/library/core/tests/pattern.rs b/library/core/tests/pattern.rs index d4bec996d89a1..0e943bd80ec7f 100644 --- a/library/core/tests/pattern.rs +++ b/library/core/tests/pattern.rs @@ -1,4 +1,4 @@ -use std::str::pattern::*; +use std::pattern::*; // This macro makes it easier to write // tests that do a series of iterations diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index 80ed34157e6dc..4c3a72b662bfe 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -8,11 +8,13 @@ use crate::fmt; use crate::hash::{Hash, Hasher}; use crate::iter::Extend; use crate::ops; +use crate::pattern::{DoubleEndedSearcher, Pattern, Searcher, SearchStep, ReverseSearcher}; use crate::rc::Rc; use crate::str::FromStr; use crate::sync::Arc; use crate::sys::os_str::{Buf, Slice}; +use crate::sys::os_str_pattern::Slice as Slice2; use crate::sys_common::{AsInner, FromInner, IntoInner}; /// A type that can represent owned, mutable platform-native strings, but is @@ -979,6 +981,82 @@ impl OsStr { pub fn eq_ignore_ascii_case>(&self, other: S) -> bool { self.inner.eq_ignore_ascii_case(&other.as_ref().inner) } + + + /// XXX placeholder + /// + /// # Examples + /// + /// ``` + /// # #![feature(pattern)] + /// use std::ffi::OsStr; + /// + /// assert!(OsStr::new("foo").starts_with('f')); + /// assert!(!OsStr::new("foo").starts_with('F')); + /// ``` + #[unstable(feature = "pattern", issue = "27721")] + pub fn starts_with<'a, P: Pattern<&'a OsStr>>(&'a self, pat: P) -> bool { + pat.is_prefix_of(self) + } + + /// XXX placeholder + /// + /// # Examples + /// + /// ``` + /// # #![feature(pattern)] + /// use std::ffi::OsStr; + /// + /// assert!(OsStr::new("foo").ends_with('o')); + /// assert!(!OsStr::new("foo").ends_with('O')); + /// ``` + #[unstable(feature = "pattern", issue = "27721")] + pub fn ends_with<'a, P>(&'a self, pat: P) -> bool + where P: Pattern<&'a OsStr, Searcher: ReverseSearcher<&'a OsStr>>, + { + pat.is_suffix_of(self) + } + + /// XXX placeholder + /// + /// # Examples + /// + /// ``` + /// # #![feature(pattern)] + /// use std::ffi::OsStr; + /// + /// assert_eq!(Some(OsSttr::from("oo")), OsStr::new("foo").strip_prefix('f')); + /// assert_eq!(None, OsStr::new("foo").strip_prefix('F')); + /// ``` + #[must_use = "this returns the remaining substring as a new slice, \ + without modifying the original"] + #[unstable(feature = "pattern", issue = "27721")] + pub fn strip_prefix<'a, P>(&'a self, prefix: P) -> Option<&'a OsStr> + where P: Pattern<&'a OsStr> + { + prefix.strip_prefix_of(self) + } + + /// XXX placeholder + /// + /// # Examples + /// + /// ``` + /// # #![feature(pattern)] + /// use std::ffi::OsStr; + /// + /// assert_eq!(Some(OsSttr::from("fo")), OsStr::new("foo").strip_suffix('o')); + /// assert_eq!(None, OsStr::new("foo").strip_suffix('O')); + /// ``` + #[must_use = "this returns the remaining substring as a new slice, \ + without modifying the original"] + #[unstable(feature = "pattern", issue = "27721")] + pub fn strip_suffix<'a, P>(&'a self, suffix: P) -> Option<&'a OsStr> + where P: Pattern<&'a OsStr>, +

>::Searcher: ReverseSearcher<&'a OsStr>, + { + suffix.strip_suffix_of(self) + } } #[stable(feature = "box_from_os_str", since = "1.17.0")] @@ -1446,3 +1524,94 @@ impl<'a> FromIterator> for OsString { } } } + +#[unstable(feature = "pattern", issue = "27721")] +impl<'hs> crate::pattern::Haystack for &'hs OsStr { + type Cursor = usize; + + fn cursor_at_front(&self) -> usize { 0 } + fn cursor_at_back(&self) -> usize { self.inner.inner.len() } + + unsafe fn split_at_cursor_unchecked(self, cursor: usize) -> (Self, Self) { + let bytes = &self.inner.inner; + unsafe { + let head = bytes.get_unchecked(..cursor); + // XXX + let head = core::mem::transmute(head); + + let tail = bytes.get_unchecked(cursor..); + // XXX + let tail = core::mem::transmute(tail); + + (head, tail) + } + } +} + +macro_rules! define_pattern { + ($pattern:ty) => { + #[unstable(feature = "pattern", issue = "27721")] + impl<'hs> Pattern<&'hs OsStr> for $pattern { + type Searcher = SearcherImpl<<$pattern as Pattern<&'hs Slice2>>::Searcher>; + + fn into_searcher(self, haystack: &'hs OsStr) -> Self::Searcher { + let haystack: &'hs Slice2 = unsafe { core::mem::transmute(haystack) }; + Self::Searcher::new(self.into_searcher(haystack)) + } + + fn is_contained_in(self, haystack: &'hs OsStr) -> bool { + let haystack: &'hs Slice2 = unsafe { core::mem::transmute(haystack) }; + self.is_contained_in(haystack) + } + + fn is_prefix_of(self, haystack: &'hs OsStr) -> bool { + let haystack: &'hs Slice2 = unsafe { core::mem::transmute(haystack) }; + self.is_prefix_of(haystack) + } + + fn is_suffix_of(self, haystack: &'hs OsStr) -> bool { + let haystack: &'hs Slice2 = unsafe { core::mem::transmute(haystack) }; + self.is_suffix_of(haystack) + } + + fn strip_prefix_of(self, haystack: &'hs OsStr) -> Option<&'hs OsStr> { + let haystack: &'hs Slice2 = unsafe { core::mem::transmute(haystack) }; + self.strip_prefix_of(haystack).map(core::mem::transmute) + } + + fn strip_suffix_of(self, haystack: &'hs OsStr) -> Option<&'hs OsStr> { + let haystack: &'hs Slice2 = unsafe { core::mem::transmute(haystack) }; + self.strip_suffix_of(haystack).map(core::mem::transmute) + } + } + + #[unstable(feature = "pattern", issue = "27721")] + pub struct SearcherImpl(S); + + #[unstable(feature = "pattern", issue = "27721")] + unsafe impl<'hs, S> Searcher<&'hs OsStr> for SearcherImpl + where S: Searcher<&'hs Slice2> + { + fn haystack(&self) -> &'hs OsStr { core::mem::transmute(self.0.haystack()) } + + fn next(&mut self) -> SearchStep { self.0.next() } + fn next_match(&mut self) -> Option<(usize, usize)> { self.0.next_match() } + fn next_reject(&mut self) -> Option<(usize, usize)> { self.0.next_reject() } + } + + #[unstable(feature = "pattern", issue = "27721")] + unsafe impl<'hs, S> ReverseSearcher<&'hs OsStr> for SearcherImpl + where S: ReverseSearcher<&'hs Slice2> + { + fn next_back(&mut self) -> SearchStep { self.0.next_back() } + fn next_match_back(&mut self) -> Option<(usize, usize)> { self.0.next_match_back() } + fn next_reject_back(&mut self) -> Option<(usize, usize)> { self.0.next_reject_back() } + } + + #[unstable(feature = "pattern", issue = "27721")] + impl<'hs, S> DoubleEndedSearcher<&'hs OsStr> for SearcherImpl + where S: DoubleEndedSearcher<&'hs Slice2> {} + } +} + +define_pattern!(char); diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 363a266717467..836e35f427b14 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -238,6 +238,7 @@ #![feature(allocator_internals)] #![feature(allow_internal_unsafe)] #![feature(allow_internal_unstable)] +#![feature(associated_type_bounds)] #![feature(c_unwind)] #![feature(cfg_target_thread_local)] #![feature(concat_idents)] @@ -294,6 +295,7 @@ #![feature(panic_can_unwind)] #![feature(panic_info_message)] #![feature(panic_internals)] +#![feature(pattern)] #![feature(pointer_byte_offsets)] #![feature(pointer_is_aligned)] #![feature(portable_simd)] @@ -478,6 +480,8 @@ pub use core::mem; pub use core::ops; #[stable(feature = "rust1", since = "1.0.0")] pub use core::option; +#[unstable(feature = "pattern", issue = "27721")] +pub use core::pattern; #[stable(feature = "pin", since = "1.33.0")] pub use core::pin; #[stable(feature = "rust1", since = "1.0.0")] diff --git a/library/std/src/sys/unix/mod.rs b/library/std/src/sys/unix/mod.rs index 30a96be14300a..3a6c4ca3b1211 100644 --- a/library/std/src/sys/unix/mod.rs +++ b/library/std/src/sys/unix/mod.rs @@ -31,6 +31,7 @@ pub mod net; pub use self::l4re::net; pub mod os; pub mod os_str; +pub mod os_str_pattern; pub mod path; pub mod pipe; pub mod process; diff --git a/library/std/src/sys/unix/os_str.rs b/library/std/src/sys/unix/os_str.rs index 017e2af29d4f4..5c2480f6b94aa 100644 --- a/library/std/src/sys/unix/os_str.rs +++ b/library/std/src/sys/unix/os_str.rs @@ -14,7 +14,6 @@ use crate::sys_common::{AsInner, IntoInner}; use core::str::Utf8Chunks; #[cfg(test)] -#[path = "../unix/os_str/tests.rs"] mod tests; #[derive(Hash)] diff --git a/library/std/src/sys/unix/os_str_pattern.rs b/library/std/src/sys/unix/os_str_pattern.rs new file mode 100644 index 0000000000000..22fbb655b2061 --- /dev/null +++ b/library/std/src/sys/unix/os_str_pattern.rs @@ -0,0 +1,419 @@ +#![unstable( + feature = "pattern", + reason = "API not fully fleshed out and ready to be stabilized", + issue = "27721" +)] + +use core::pattern::{Haystack, Pattern, SearchStep}; +use core::pattern; +use core::str::try_first_code_point; + +#[derive(Debug)] +pub struct Slice { + pub inner: [u8], +} + +impl Slice { + #[inline] + fn from_u8_slice(s: &[u8]) -> &Slice { + unsafe { core::mem::transmute(s) } + } +} + +///////////////////////////////////////////////////////////////////////////// +// Impl for Haystack +///////////////////////////////////////////////////////////////////////////// + +impl<'hs> Haystack for &'hs Slice { + type Cursor = usize; + + fn cursor_at_front(&self) -> usize { 0 } + fn cursor_at_back(&self) -> usize { self.inner.len() } + + unsafe fn split_at_cursor_unchecked(self, pos: usize) -> (Self, Self) { + // SAFETY: Caller promises cursor is valid. + unsafe { (get_unchecked(&self, ..pos), get_unchecked(&self, pos..)) } + } +} + +///////////////////////////////////////////////////////////////////////////// +// Impl Pattern for char +///////////////////////////////////////////////////////////////////////////// + +impl<'hs> Pattern<&'hs Slice> for char { + type Searcher = CharSearcher<'hs>; + + fn into_searcher(self, slice: &'hs Slice) -> Self::Searcher { + Self::Searcher::new(slice, self) + } + + fn is_contained_in(self, slice: &'hs Slice) -> bool { + let mut buf = [0; 4]; + slice.inner.contains(self.encode_utf8(&mut buf).as_bytes()) + } + + fn is_prefix_of(self, slice: &'hs Slice) -> bool { + let mut buf = [0; 4]; + slice.inner.starts_with(self.encode_utf8(&mut buf).as_bytes()) + } + + fn is_suffix_of(self, slice: &'hs Slice) -> bool { + let mut buf = [0; 4]; + slice.inner.ends_with(self.encode_utf8(&mut buf).as_bytes()) + } + + fn strip_prefix_of(self, slice: &'hs Slice) -> Option<&'hs Slice> { + let mut buf = [0; 4]; + let needle = self.encode_utf8(&mut buf).as_bytes(); + slice.inner.starts_with(needle).then(|| { + // SAFETY: We’ve just checked slice starts with needle. + unsafe { get_unchecked(slice, needle.len()..) } + }) + } + + fn strip_suffix_of(self, slice: &'hs Slice) -> Option<&'hs Slice> { + let mut buf = [0; 4]; + let needle = self.encode_utf8(&mut buf).as_bytes(); + slice.inner.ends_with(needle).then(|| { + // SAFETY: We’ve just checked slice starts with needle. + unsafe { get_unchecked(slice, ..slice.inner.len() - needle.len()) } + }) + } +} + +#[derive(Clone, Debug)] +pub struct CharSearcher<'hs> { + /// Zero-padded UTF-8 encoded character we’re searching for. + _needle: Box<[u8; 4]>, + /// Slice searcher over the slice. + searcher: <&'hs [u8] as Pattern<&'hs [u8]>>::Searcher, +} + +impl<'hs> CharSearcher<'hs> { + fn new(slice: &'hs Slice, chr: char) -> Self { + let mut buf = [0; 4]; + let len = chr.encode_utf8(&mut buf).len(); + let needle = Box::new(buf); + // XXX: This is potentially unsound? We’re transmuting needle’s + // lifetime to 'hs which is definitely not true, but at the same time + // Searcher dies when needle dies so it won’t reference it after it + // dies. + let pattern: &'hs [u8] = unsafe { core::mem::transmute(&needle[..len]) }; + Self { + _needle: needle, + searcher: pattern.into_searcher(&slice.inner) + } + } +} + +unsafe impl<'hs> pattern::Searcher<&'hs Slice> for CharSearcher<'hs> { + fn haystack(&self) -> &'hs Slice { + Slice::from_u8_slice(self.searcher.haystack()) + } + + fn next(&mut self) -> SearchStep { + self.searcher.next() + } + + fn next_match(&mut self) -> Option<(usize, usize)> { + self.searcher.next_match() + } + + fn next_reject(&mut self) -> Option<(usize, usize)> { + self.searcher.next_match() + } +} + +unsafe impl<'hs> pattern::ReverseSearcher<&'hs Slice> for CharSearcher<'hs> { + fn next_back(&mut self) -> SearchStep { + self.searcher.next_back() + } + + fn next_match_back(&mut self) -> Option<(usize, usize)> { + self.searcher.next_match_back() + } + + fn next_reject_back(&mut self) -> Option<(usize, usize)> { + self.searcher.next_match_back() + } +} + +impl<'hs> pattern::DoubleEndedSearcher<&'hs Slice> for CharSearcher<'hs> {} + +///////////////////////////////////////////////////////////////////////////// +// Impl Pattern for &FnMut(char) +///////////////////////////////////////////////////////////////////////////// + +// XXX TODO +// This is work-around of the following: +// error[E0210]: type parameter `F` must be covered by another type when it +// appears before the first local type (`pattern::Slice`) +// --> library/std/src/sys/unix/os_str/pattern.rs:148:11 +// | +// 148 | impl<'hs, F: FnMut(char) -> bool> Pattern<&'hs Slice> for F { +// | ^ type parameter `F` must be covered by another type when +// it appears before the first local type (`pattern::Slice`) +// | +pub struct Predicate(F); + +#[rustc_has_incoherent_inherent_impls] +impl<'hs, F: FnMut(char) -> bool> Pattern<&'hs Slice> for F { + type Searcher = PredicateSearcher<'hs, F>; + + fn into_searcher(self, slice: &'hs Slice) -> Self::Searcher { + Self::Searcher::new(slice, self) + } + + fn is_prefix_of(mut self, slice: &'hs Slice) -> bool { + matches!(try_first_code_point(&slice.inner), + Some((chr, _)) if self(chr)) + } + + fn is_suffix_of(mut self, slice: &'hs Slice) -> bool { + matches!(try_last_code_point(&slice.inner), + Some((chr, _)) if self(chr)) + } + + fn strip_prefix_of(mut self, slice: &'hs Slice) -> Option<&'hs Slice> { + let bytes = &slice.inner; + if let Some((chr, len)) = try_first_code_point(bytes) { + if self(chr) { + return Some(Slice::from_u8_slice(&bytes[len..])); + } + } + None + } + + fn strip_suffix_of(mut self, slice: &'hs Slice) -> Option<&'hs Slice> { + let bytes = &slice.inner; + if let Some((chr, len)) = try_last_code_point(bytes) { + if self(chr) { + return Some(Slice::from_u8_slice(&bytes[..bytes.len() - len])); + } + } + None + } +} + +#[derive(Clone, Debug)] +pub struct PredicateSearcher<'hs, F> { + slice: &'hs Slice, + pred: F, + + start: usize, + end: usize, + fw_match_len: usize, + bw_match_len: usize, +} + +impl<'hs, F: FnMut(char) -> bool> PredicateSearcher<'hs, F> { + fn new(slice: &'hs Slice, pred: F) -> Self { + Self { + slice: slice, + pred, + start: 0, + end: 0, + fw_match_len: 0, + bw_match_len: 0, + } + } + + /// Looks for the next match and returns its position and length. Doesn’t + /// update searcher’s state. + fn next_match_impl(&mut self) -> Option<(usize, usize)> { + let bytes = &self.slice.inner[..self.end]; + let mut pos = self.start; + while pos < bytes.len() { + pos += count_utf8_cont_bytes(bytes[pos..].iter()); + if let Some((chr, len)) = try_first_code_point(&bytes[pos..]) { + if (self.pred)(chr) { + return Some((pos, len)) + } + pos += len; + } else { + pos += 1; + } + } + None + } + + /// Implementation of Searcher::next and Searcher::next_match functions. + fn next_impl(&mut self) -> R { + while self.start < self.end { + if self.fw_match_len == 0 { + let (pos, len) = self.next_match_impl().unwrap_or((self.end, 0)); + self.fw_match_len = len; + let start = self.start; + if pos != start { + self.start = pos; + if let Some(ret) = R::rejecting(start, pos) { + return ret; + } + } + } + + debug_assert_ne!(0, self.fw_match_len); + let pos = self.start; + self.start += self.fw_match_len; + self.fw_match_len = 0; + if let Some(ret) = R::matching(pos, self.start) { + return ret; + } + } + R::DONE + } + + /// Looks for the next match back and returns its position and length. + /// Doesn’t update searcher’s state. + fn next_match_back_impl(&mut self) -> Option<(usize, usize)> { + let mut bytes = &self.slice.inner[self.start..self.end]; + while !bytes.is_empty() { + let pos = bytes.len() - count_utf8_cont_bytes(bytes.iter().rev()); + let pos = pos.checked_sub(1)?; + if let Some((chr, len)) = try_first_code_point(&bytes[pos..]) { + if (self.pred)(chr) { + return Some((pos + self.start, len)) + } + } + bytes = &bytes[..pos] + } + None + } + + /// Implementation of ReverseSearcher::next and ReverseSearcher::next_match + /// functions. + fn next_back_impl(&mut self) -> R { + while self.start < self.end { + if self.bw_match_len == 0 { + let end = self.end; + let (pos, len) = self.next_match_back_impl().unwrap_or((end, 0)); + self.bw_match_len = len; + if pos + len != end { + self.end = pos + len; + if let Some(ret) = R::rejecting(self.end, end) { + return ret; + } + } + } + + debug_assert_ne!(0, self.bw_match_len); + let end = self.end; + self.end -= self.bw_match_len; + self.bw_match_len = 0; + if let Some(ret) = R::matching(self.end, end) { + return ret; + } + } + R::DONE + } +} + +unsafe impl<'hs, F: FnMut(char) -> bool> pattern::Searcher<&'hs Slice> for PredicateSearcher<'hs, F> { + fn haystack(&self) -> &'hs Slice { self.slice } + + fn next(&mut self) -> SearchStep { + self.next_impl() + } + + fn next_match(&mut self) -> Option<(usize, usize)> { + self.next_impl::().0 + } + + fn next_reject(&mut self) -> Option<(usize, usize)> { + self.next_impl::().0 + } +} + +unsafe impl<'hs, F: FnMut(char) -> bool> pattern::ReverseSearcher<&'hs Slice> for PredicateSearcher<'hs, F> { + fn next_back(&mut self) -> SearchStep { + self.next_back_impl() + } + + fn next_match_back(&mut self) -> Option<(usize, usize)> { + self.next_back_impl::().0 + } + + fn next_reject_back(&mut self) -> Option<(usize, usize)> { + self.next_back_impl::().0 + } +} + +impl<'hs, F: FnMut(char) -> bool> pattern::DoubleEndedSearcher<&'hs Slice> for PredicateSearcher<'hs, F> {} + +///////////////////////////////////////////////////////////////////////////// + +/// Possible return type of a search. +/// +/// It abstract differences between `next`, `next_match` and `next_reject` +/// methods. Depending on return type an implementation for those functions +/// will generate matches and rejects, only matches or only rejects. +trait SearchReturn: Sized { + const DONE: Self; + fn matching(start: usize, end: usize) -> Option; + fn rejecting(start: usize, end: usize) -> Option; +} + +struct MatchOnly(Option<(usize, usize)>); +struct RejectOnly(Option<(usize, usize)>); + +impl SearchReturn for SearchStep { + const DONE: Self = SearchStep::Done; + fn matching(s: usize, e: usize) -> Option { + Some(SearchStep::Match(s, e)) + } + fn rejecting(s: usize, e: usize) ->Option { + Some(SearchStep::Reject(s, e)) + } +} + +impl SearchReturn for MatchOnly { + const DONE: Self = Self(None); + fn matching(s: usize, e: usize) -> Option { Some(Self(Some((s, e)))) } + fn rejecting(_s: usize, _e: usize) -> Option { None } +} + +impl SearchReturn for RejectOnly { + const DONE: Self = Self(None); + fn matching(_s: usize, _e: usize) -> Option { None } + fn rejecting(s: usize, e: usize) -> Option { Some(Self(Some((s, e)))) } +} + + +unsafe fn get_unchecked(slice: &Slice, index: I) -> &Slice +where I: core::slice::SliceIndex<[u8], Output = [u8]>, +{ + // SAFETY: Caller Promises index is valid. + Slice::from_u8_slice(unsafe { slice.inner.get_unchecked(index) }) +} + + +/// Tries to extract UTF-8 sequence from the end of the slice. +/// +/// If last bytes of the slice don’t form a valid UTF-8 sequence (or if slice is +/// empty), returns `None`. If they do, decodes the character and returns its +/// encoded length. +fn try_last_code_point(bytes: &[u8]) -> Option<(char, usize)> { + // Fast path: ASCII + let last = *bytes.last()?; + if last < 0x80 { + return Some((unsafe { char::from_u32_unchecked(last as u32) }, 1)); + } + + // Count how many continuation bytes there are at the end. + let count = count_utf8_cont_bytes(bytes.iter().rev().take(4)); + if count == bytes.len() || count >= 4 { + return None; + } + let pos = bytes.len() - count - 1; + + // Try decode. If length matches, we have ourselves a character. + let (chr, len) = try_first_code_point(&bytes[pos..])?; + (len == count + 1).then_some((chr, len)) +} + + +/// Counts UTF-8 continuation bytes at the beginning of the iterator. +#[inline] +fn count_utf8_cont_bytes<'a>(bytes: impl Iterator) -> usize { + bytes.take_while(|&&byte| (byte as i8) < -64).count() +} diff --git a/tests/run-make-fulldeps/save-analysis-fail/foo.rs b/tests/run-make-fulldeps/save-analysis-fail/foo.rs index c5a70605e04cb..dccf83f6ec3a1 100644 --- a/tests/run-make-fulldeps/save-analysis-fail/foo.rs +++ b/tests/run-make-fulldeps/save-analysis-fail/foo.rs @@ -403,17 +403,17 @@ impl Iterator for nofields { } } -trait Pattern<'a> { +trait Pattern<&'a str> { type Searcher; } struct CharEqPattern; -impl<'a> Pattern<'a> for CharEqPattern { +impl<'a> Pattern<&'a str> for CharEqPattern { type Searcher = CharEqPattern; } -struct CharSearcher<'a>(>::Searcher); +struct CharSearcher<'a>(>::Searcher); pub trait Error {} diff --git a/tests/run-make-fulldeps/save-analysis/foo.rs b/tests/run-make-fulldeps/save-analysis/foo.rs index 384589de3b480..5c6fe02cbdc12 100644 --- a/tests/run-make-fulldeps/save-analysis/foo.rs +++ b/tests/run-make-fulldeps/save-analysis/foo.rs @@ -402,17 +402,17 @@ impl Iterator for nofields { } } -trait Pattern<'a> { +trait Pattern<&'a str> { type Searcher; } struct CharEqPattern; -impl<'a> Pattern<'a> for CharEqPattern { +impl<'a> Pattern<&'a str> for CharEqPattern { type Searcher = CharEqPattern; } -struct CharSearcher<'a>(>::Searcher); +struct CharSearcher<'a>(>::Searcher); pub trait Error {} diff --git a/tests/rustdoc/async-fn.rs b/tests/rustdoc/async-fn.rs index 70bcbcb6ff44a..3f641473d308a 100644 --- a/tests/rustdoc/async-fn.rs +++ b/tests/rustdoc/async-fn.rs @@ -46,7 +46,7 @@ impl Foo { pub async fn mut_self(mut self, mut first: usize) {} } -pub trait Pattern<'a> {} +pub trait Pattern<&'a str> {} pub trait Trait {} // @has async_fn/fn.const_generics.html @@ -91,5 +91,5 @@ impl Foo { // @has - '//pre[@class="rust item-decl"]' "pub async fn named<'a, 'b>(foo: &'a str) -> &'b str" pub async fn named<'a, 'b>(foo: &'a str) -> &'b str {} // @has async_fn/fn.named_trait.html -// @has - '//pre[@class="rust item-decl"]' "pub async fn named_trait<'a, 'b>(foo: impl Pattern<'a>) -> impl Pattern<'b>" -pub async fn named_trait<'a, 'b>(foo: impl Pattern<'a>) -> impl Pattern<'b> {} +// @has - '//pre[@class="rust item-decl"]' "pub async fn named_trait<'a, 'b>(foo: impl Pattern<&'a str>) -> impl Pattern<'b>" +pub async fn named_trait<'a, 'b>(foo: impl Pattern<&'a str>) -> impl Pattern<'b> {} diff --git a/tests/ui/traits/bound/assoc-fn-bound-root-obligation.rs b/tests/ui/traits/bound/assoc-fn-bound-root-obligation.rs index f9a9347641143..8a047a082c4a4 100644 --- a/tests/ui/traits/bound/assoc-fn-bound-root-obligation.rs +++ b/tests/ui/traits/bound/assoc-fn-bound-root-obligation.rs @@ -3,7 +3,7 @@ fn strip_lf(s: &str) -> &str { //~^ ERROR expected a `FnMut<(char,)>` closure, found `u8` //~| NOTE expected an `FnMut<(char,)>` closure, found `u8` //~| HELP the trait `FnMut<(char,)>` is not implemented for `u8` - //~| HELP the following other types implement trait `Pattern<'a>`: + //~| HELP the following other types implement trait `Pattern<&'a str>`: //~| NOTE required for `u8` to implement `Pattern<'_>` } diff --git a/tests/ui/traits/bound/assoc-fn-bound-root-obligation.stderr b/tests/ui/traits/bound/assoc-fn-bound-root-obligation.stderr index ce9ab2d811ae1..e97aaa6834309 100644 --- a/tests/ui/traits/bound/assoc-fn-bound-root-obligation.stderr +++ b/tests/ui/traits/bound/assoc-fn-bound-root-obligation.stderr @@ -5,7 +5,7 @@ LL | s.strip_suffix(b'\n').unwrap_or(s) | ^^^^^^^^^^^^ expected an `FnMut<(char,)>` closure, found `u8` | = help: the trait `FnMut<(char,)>` is not implemented for `u8` - = help: the following other types implement trait `Pattern<'a>`: + = help: the following other types implement trait `Pattern<&'a str>`: &'b String &'b [char; N] &'b [char]