8
8
// option. This file may not be copied, modified, or distributed
9
9
// except according to those terms.
10
10
11
- //! An owned, growable string that enforces that its contents are valid UTF-8.
11
+ //! A UTF-8 encoded, growable string.
12
+ //!
13
+ //! This module contains the [`String`] type, a trait for converting
14
+ //! [`ToString`]s, and several error types that may result from working with
15
+ //! [`String`]s.
16
+ //!
17
+ //! [`String`]: struct.String.html
18
+ //! [`ToString`]: trait.ToString.html
12
19
13
20
#![ stable( feature = "rust1" , since = "1.0.0" ) ]
14
21
@@ -29,7 +36,190 @@ use str::{self, FromStr, Utf8Error, Chars};
29
36
use vec:: Vec ;
30
37
use boxed:: Box ;
31
38
32
- /// A growable string stored as a UTF-8 encoded buffer.
39
+ /// A UTF-8 encoded, growable string.
40
+ ///
41
+ /// The `String` type is the most common string type that has ownership over the
42
+ /// contents of the string. It has a close relationship with its borrowed
43
+ /// counterpart, the primitive [`str`].
44
+ ///
45
+ /// [`str`]: ../primitive.str.html
46
+ ///
47
+ /// # Examples
48
+ ///
49
+ /// You can create a `String` from a literal string with `String::from`:
50
+ ///
51
+ /// ```
52
+ /// let hello = String::from("Hello, world!");
53
+ /// ```
54
+ ///
55
+ /// You can append a [`char`] to a `String` with the [`push()`] method, and
56
+ /// append a [`&str`] with the [`push_str()`] method:
57
+ ///
58
+ /// ```
59
+ /// let mut hello = String::from("Hello, ");
60
+ ///
61
+ /// hello.push('w');
62
+ /// hello.push_str("orld!");
63
+ /// ```
64
+ ///
65
+ /// [`push()`]: #method.push
66
+ /// [`push_str()`]: #method.push_str
67
+ ///
68
+ /// If you have a vector of UTF-8 bytes, you can create a `String` from it with
69
+ /// the [`from_utf8()`] method:
70
+ ///
71
+ /// ```
72
+ /// // some bytes, in a vector
73
+ /// let sparkle_heart = vec![240, 159, 146, 150];
74
+ ///
75
+ /// // We know these bytes are valid, so we'll use `unwrap()`.
76
+ /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
77
+ ///
78
+ /// assert_eq!("💖", sparkle_heart);
79
+ /// ```
80
+ ///
81
+ /// [`from_utf8()`]: #method.from_utf8
82
+ ///
83
+ /// # UTF-8
84
+ ///
85
+ /// `String`s are always valid UTF-8. This has a few implications, the first of
86
+ /// which is that if you need a non-UTF-8 string, consider [`OsString`]. It is
87
+ /// similar, but without the UTF-8 constraint. The second implication is that
88
+ /// you cannot index into a `String`:
89
+ ///
90
+ /// ```ignore
91
+ /// let s = "hello";
92
+ ///
93
+ /// println!("The first letter of s is {}", s[0]); // ERROR!!!
94
+ /// ```
95
+ ///
96
+ /// [`OsString`]: ../ffi/struct.OsString.html
97
+ ///
98
+ /// Indexing is intended to be a constant-time operation, but UTF-8 encoding
99
+ /// does not allow us to do this. Furtheremore, it's not clear what sort of
100
+ /// thing the index should return: a byte, a codepoint, or a grapheme cluster.
101
+ /// The [`as_bytes()`] and [`chars()`] methods return iterators over the first
102
+ /// two, respectively.
103
+ ///
104
+ /// [`as_bytes()`]: #method.as_bytes
105
+ /// [`chars()`]: #method.chars
106
+ ///
107
+ /// # Deref
108
+ ///
109
+ /// `String`s implement [`Deref`]`<Target=str>`, and so inherit all of [`str`]'s
110
+ /// methods. In addition, this means that you can pass a `String` to any
111
+ /// function which takes a [`&str`] by using an ampersand (`&`):
112
+ ///
113
+ /// ```
114
+ /// fn takes_str(s: &str) { }
115
+ ///
116
+ /// let s = String::from("Hello");
117
+ ///
118
+ /// takes_str(&s);
119
+ /// ```
120
+ ///
121
+ /// [`&str`]: ../primitive.str.html
122
+ /// [`Deref`]: ../ops/trait.Deref.html
123
+ ///
124
+ /// This will create a [`&str`] from the `String` and pass it in. This
125
+ /// conversion is very inexpensive, and so generally, functions will accept
126
+ /// [`&str`]s as arguments unless they need a `String` for some specific reason.
127
+ ///
128
+ ///
129
+ /// # Representation
130
+ ///
131
+ /// A `String` is made up of three components: a pointer to some bytes, a
132
+ /// length, and a capacity. The pointer points to an internal buffer `String`
133
+ /// uses to store its data. The length is the number of bytes currently stored
134
+ /// in the buffer, and the capacity is the size of the buffer in bytes. As such,
135
+ /// the length will always be less than or equal to the capacity.
136
+ ///
137
+ /// This buffer is always stored on the heap.
138
+ ///
139
+ /// You can look at these with the [`as_ptr()`], [`len()`], and [`capacity()`]
140
+ /// methods:
141
+ ///
142
+ /// ```
143
+ /// use std::mem;
144
+ ///
145
+ /// let story = String::from("Once upon a time...");
146
+ ///
147
+ /// let ptr = story.as_ptr();
148
+ /// let len = story.len();
149
+ /// let capacity = story.capacity();
150
+ ///
151
+ /// // story has thirteen bytes
152
+ /// assert_eq!(19, len);
153
+ ///
154
+ /// // Now that we have our parts, we throw the story away.
155
+ /// mem::forget(story);
156
+ ///
157
+ /// // We can re-build a String out of ptr, len, and capacity. This is all
158
+ /// // unsafe becuase we are responsible for making sure the components are
159
+ /// // valid:
160
+ /// let s = unsafe { String::from_raw_parts(ptr as *mut _, len, capacity) } ;
161
+ ///
162
+ /// assert_eq!(String::from("Once upon a time..."), s);
163
+ /// ```
164
+ ///
165
+ /// [`as_ptr()`]: #method.as_ptr
166
+ /// [`len()`]: # method.len
167
+ /// [`capacity()`]: # method.capacity
168
+ ///
169
+ /// If a `String` has enough capacity, adding elements to it will not
170
+ /// re-allocate. For example, consider this program:
171
+ ///
172
+ /// ```
173
+ /// let mut s = String::new();
174
+ ///
175
+ /// println!("{}", s.capacity());
176
+ ///
177
+ /// for _ in 0..5 {
178
+ /// s.push_str("hello");
179
+ /// println!("{}", s.capacity());
180
+ /// }
181
+ /// ```
182
+ ///
183
+ /// This will output the following:
184
+ ///
185
+ /// ```text
186
+ /// 0
187
+ /// 5
188
+ /// 10
189
+ /// 20
190
+ /// 20
191
+ /// 40
192
+ /// ```
193
+ ///
194
+ /// At first, we have no memory allocated at all, but as we append to the
195
+ /// string, it increases its capacity appropriately. If we instead use the
196
+ /// [`with_capacity()`] method to allocate the correct capacity initially:
197
+ ///
198
+ /// ```
199
+ /// let mut s = String::with_capacity(25);
200
+ ///
201
+ /// println!("{}", s.capacity());
202
+ ///
203
+ /// for _ in 0..5 {
204
+ /// s.push_str("hello");
205
+ /// println!("{}", s.capacity());
206
+ /// }
207
+ /// ```
208
+ ///
209
+ /// [`with_capacity()`]: #method.with_capacity
210
+ ///
211
+ /// We end up with a different output:
212
+ ///
213
+ /// ```text
214
+ /// 25
215
+ /// 25
216
+ /// 25
217
+ /// 25
218
+ /// 25
219
+ /// 25
220
+ /// ```
221
+ ///
222
+ /// Here, there's no need to allocate more memory inside the loop.
33
223
#[ derive( PartialOrd , Eq , Ord ) ]
34
224
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
35
225
pub struct String {
@@ -139,7 +329,7 @@ impl String {
139
329
/// // some bytes, in a vector
140
330
/// let sparkle_heart = vec![240, 159, 146, 150];
141
331
///
142
- /// // We know these bytes are valid, so just use `unwrap()`.
332
+ /// // We know these bytes are valid, so we'll use `unwrap()`.
143
333
/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
144
334
///
145
335
/// assert_eq!("💖", sparkle_heart);
@@ -201,7 +391,7 @@ impl String {
201
391
/// // some bytes, in a vector
202
392
/// let sparkle_heart = vec![240, 159, 146, 150];
203
393
///
204
- /// // We know these bytes are valid, so just use `unwrap()`.
394
+ /// // We know these bytes are valid, so we'll use `unwrap()`.
205
395
/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
206
396
///
207
397
/// assert_eq!("💖", sparkle_heart);
0 commit comments