Skip to content

Commit b5cee02

Browse files
author
Clar Charr
committed
Add str::split_ascii_whitespace.
1 parent 23b5516 commit b5cee02

File tree

3 files changed

+158
-4
lines changed

3 files changed

+158
-4
lines changed

src/liballoc/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@
108108
#![cfg_attr(stage0, feature(repr_transparent))]
109109
#![feature(rustc_attrs)]
110110
#![feature(specialization)]
111+
#![feature(split_ascii_whitespace)]
111112
#![feature(staged_api)]
112113
#![feature(str_internals)]
113114
#![feature(trusted_len)]

src/liballoc/str.rs

+2
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ pub use core::str::SplitWhitespace;
7878
pub use core::str::pattern;
7979
#[stable(feature = "encode_utf16", since = "1.8.0")]
8080
pub use core::str::EncodeUtf16;
81+
#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
82+
pub use core::str::SplitAsciiWhitespace;
8183

8284
#[unstable(feature = "slice_concat_ext",
8385
reason = "trait should not have to exist",

src/libcore/str/mod.rs

+155-4
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use char;
2121
use fmt;
2222
use iter::{Map, Cloned, FusedIterator, TrustedLen, Filter};
2323
use iter_private::TrustedRandomAccess;
24-
use slice::{self, SliceIndex};
24+
use slice::{self, SliceIndex, Split as SliceSplit};
2525
use mem;
2626

2727
pub mod pattern;
@@ -2722,7 +2722,10 @@ impl str {
27222722
/// the original string slice, separated by any amount of whitespace.
27232723
///
27242724
/// 'Whitespace' is defined according to the terms of the Unicode Derived
2725-
/// Core Property `White_Space`.
2725+
/// Core Property `White_Space`. If you only want to split on ASCII whitespace
2726+
/// instead, use [`split_ascii_whitespace`].
2727+
///
2728+
/// [`split_ascii_whitespace`]: #method.split_ascii_whitespace
27262729
///
27272730
/// # Examples
27282731
///
@@ -2756,6 +2759,53 @@ impl str {
27562759
SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
27572760
}
27582761

2762+
/// Split a string slice by ASCII whitespace.
2763+
///
2764+
/// The iterator returned will return string slices that are sub-slices of
2765+
/// the original string slice, separated by any amount of ASCII whitespace.
2766+
///
2767+
/// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
2768+
///
2769+
/// [`split_whitespace`]: #method.split_whitespace
2770+
///
2771+
/// # Examples
2772+
///
2773+
/// Basic usage:
2774+
///
2775+
/// ```
2776+
/// #![feature(split_ascii_whitespace)]
2777+
/// let mut iter = "A few words".split_ascii_whitespace();
2778+
///
2779+
/// assert_eq!(Some("A"), iter.next());
2780+
/// assert_eq!(Some("few"), iter.next());
2781+
/// assert_eq!(Some("words"), iter.next());
2782+
///
2783+
/// assert_eq!(None, iter.next());
2784+
/// ```
2785+
///
2786+
/// All kinds of ASCII whitespace are considered:
2787+
///
2788+
/// ```
2789+
/// let mut iter = " Mary had\ta little \n\t lamb".split_whitespace();
2790+
/// assert_eq!(Some("Mary"), iter.next());
2791+
/// assert_eq!(Some("had"), iter.next());
2792+
/// assert_eq!(Some("a"), iter.next());
2793+
/// assert_eq!(Some("little"), iter.next());
2794+
/// assert_eq!(Some("lamb"), iter.next());
2795+
///
2796+
/// assert_eq!(None, iter.next());
2797+
/// ```
2798+
#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
2799+
#[inline]
2800+
pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace {
2801+
let inner = self
2802+
.as_bytes()
2803+
.split(IsAsciiWhitespace)
2804+
.filter(IsNotEmpty)
2805+
.map(UnsafeBytesToStr);
2806+
SplitAsciiWhitespace { inner }
2807+
}
2808+
27592809
/// An iterator over the lines of a string, as string slices.
27602810
///
27612811
/// Lines are ended with either a newline (`\n`) or a carriage return with
@@ -3895,6 +3945,20 @@ pub struct SplitWhitespace<'a> {
38953945
inner: Filter<Split<'a, IsWhitespace>, IsNotEmpty>,
38963946
}
38973947

3948+
/// An iterator over the non-ASCII-whitespace substrings of a string,
3949+
/// separated by any amount of ASCII whitespace.
3950+
///
3951+
/// This struct is created by the [`split_ascii_whitespace`] method on [`str`].
3952+
/// See its documentation for more.
3953+
///
3954+
/// [`split_ascii_whitespace`]: ../../std/primitive.str.html#method.split_ascii_whitespace
3955+
/// [`str`]: ../../std/primitive.str.html
3956+
#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
3957+
#[derive(Clone, Debug)]
3958+
pub struct SplitAsciiWhitespace<'a> {
3959+
inner: Map<Filter<SliceSplit<'a, u8, IsAsciiWhitespace>, IsNotEmpty>, UnsafeBytesToStr>,
3960+
}
3961+
38983962
#[derive(Clone)]
38993963
struct IsWhitespace;
39003964

@@ -3914,37 +3978,98 @@ impl FnMut<(char, )> for IsWhitespace {
39143978
}
39153979
}
39163980

3981+
#[derive(Clone)]
3982+
struct IsAsciiWhitespace;
3983+
3984+
impl<'a> FnOnce<(&'a u8, )> for IsAsciiWhitespace {
3985+
type Output = bool;
3986+
3987+
#[inline]
3988+
extern "rust-call" fn call_once(mut self, arg: (&u8, )) -> bool {
3989+
self.call_mut(arg)
3990+
}
3991+
}
3992+
3993+
impl<'a> FnMut<(&'a u8, )> for IsAsciiWhitespace {
3994+
#[inline]
3995+
extern "rust-call" fn call_mut(&mut self, arg: (&u8, )) -> bool {
3996+
arg.0.is_ascii_whitespace()
3997+
}
3998+
}
3999+
39174000
#[derive(Clone)]
39184001
struct IsNotEmpty;
39194002

39204003
impl<'a, 'b> FnOnce<(&'a &'b str, )> for IsNotEmpty {
39214004
type Output = bool;
39224005

39234006
#[inline]
3924-
extern "rust-call" fn call_once(mut self, arg: (&&str, )) -> bool {
4007+
extern "rust-call" fn call_once(mut self, arg: (&'a &'b str, )) -> bool {
39254008
self.call_mut(arg)
39264009
}
39274010
}
39284011

39294012
impl<'a, 'b> FnMut<(&'a &'b str, )> for IsNotEmpty {
39304013
#[inline]
3931-
extern "rust-call" fn call_mut(&mut self, arg: (&&str, )) -> bool {
4014+
extern "rust-call" fn call_mut(&mut self, arg: (&'a &'b str, )) -> bool {
4015+
!arg.0.is_empty()
4016+
}
4017+
}
4018+
4019+
impl<'a, 'b> FnOnce<(&'a &'b [u8], )> for IsNotEmpty {
4020+
type Output = bool;
4021+
4022+
#[inline]
4023+
extern "rust-call" fn call_once(mut self, arg: (&'a &'b [u8], )) -> bool {
4024+
self.call_mut(arg)
4025+
}
4026+
}
4027+
4028+
impl<'a, 'b> FnMut<(&'a &'b [u8], )> for IsNotEmpty {
4029+
#[inline]
4030+
extern "rust-call" fn call_mut(&mut self, arg: (&'a &'b [u8], )) -> bool {
39324031
!arg.0.is_empty()
39334032
}
39344033
}
39354034

4035+
#[derive(Clone)]
4036+
struct UnsafeBytesToStr;
4037+
4038+
impl<'a> FnOnce<(&'a [u8], )> for UnsafeBytesToStr {
4039+
type Output = &'a str;
4040+
4041+
#[inline]
4042+
extern "rust-call" fn call_once(mut self, arg: (&'a [u8], )) -> &'a str {
4043+
self.call_mut(arg)
4044+
}
4045+
}
4046+
4047+
impl<'a> FnMut<(&'a [u8], )> for UnsafeBytesToStr {
4048+
#[inline]
4049+
extern "rust-call" fn call_mut(&mut self, arg: (&'a [u8], )) -> &'a str {
4050+
unsafe { from_utf8_unchecked(arg.0) }
4051+
}
4052+
}
4053+
39364054

39374055
#[stable(feature = "split_whitespace", since = "1.1.0")]
39384056
impl<'a> Iterator for SplitWhitespace<'a> {
39394057
type Item = &'a str;
39404058

4059+
#[inline]
39414060
fn next(&mut self) -> Option<&'a str> {
39424061
self.inner.next()
39434062
}
4063+
4064+
#[inline]
4065+
fn size_hint(&self) -> (usize, Option<usize>) {
4066+
self.inner.size_hint()
4067+
}
39444068
}
39454069

39464070
#[stable(feature = "split_whitespace", since = "1.1.0")]
39474071
impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
4072+
#[inline]
39484073
fn next_back(&mut self) -> Option<&'a str> {
39494074
self.inner.next_back()
39504075
}
@@ -3953,6 +4078,32 @@ impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
39534078
#[stable(feature = "fused", since = "1.26.0")]
39544079
impl<'a> FusedIterator for SplitWhitespace<'a> {}
39554080

4081+
#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
4082+
impl<'a> Iterator for SplitAsciiWhitespace<'a> {
4083+
type Item = &'a str;
4084+
4085+
#[inline]
4086+
fn next(&mut self) -> Option<&'a str> {
4087+
self.inner.next()
4088+
}
4089+
4090+
#[inline]
4091+
fn size_hint(&self) -> (usize, Option<usize>) {
4092+
self.inner.size_hint()
4093+
}
4094+
}
4095+
4096+
#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
4097+
impl<'a> DoubleEndedIterator for SplitAsciiWhitespace<'a> {
4098+
#[inline]
4099+
fn next_back(&mut self) -> Option<&'a str> {
4100+
self.inner.next_back()
4101+
}
4102+
}
4103+
4104+
#[unstable(feature = "split_ascii_whitespace", issue = "48656")]
4105+
impl<'a> FusedIterator for SplitAsciiWhitespace<'a> {}
4106+
39564107
/// An iterator of [`u16`] over the string encoded as UTF-16.
39574108
///
39584109
/// [`u16`]: ../../std/primitive.u16.html

0 commit comments

Comments
 (0)