Skip to content

Commit 6b423ec

Browse files
committed
Improve performance of pg_strtointNN functions
Experiments have shown that modern versions of both gcc and clang are unable to fully optimize the multiplication by 10 that we're doing in the pg_strtointNN functions. Both compilers seem to be making use of "imul", which is not the most efficient way to multiply by 10. This seems to be due to the overflow checking that we're doing. Without the overflow checks, both those compilers switch to a more efficient method of multiplying by 10. In absence of overflow concern, integer multiplication by 10 can be done by bit-shifting left 3 places to multiply by 8 and then adding the original value twice. To allow compilers this flexibility, here we adjust the code so that we accumulate the number as an unsigned version of the type and remove the use of pg_mul_sNN_overflow() and pg_sub_sNN_overflow(). The overflow checking can be done simply by checking if the accumulated value has gone beyond a 10th of the maximum *signed* value for the given type. If it has then the accumulation of the next digit will cause an overflow. After this is done, we do a final overflow check before converting the unsigned version of the number back to its signed counterpart. Testing has shown about an 8% speedup of a COPY into a table containing 2 INT columns. Author: David Rowley, Dean Rasheed Discussion: https://postgr.es/m/CAApHDvrL6_+wKgPqRHr7gH_6xy3hXM6a3QCsZ5ForurjDFfenA@mail.gmail.com Discussion: https://postgr.es/m/CAApHDvrdYByjfj-=WbmVNFgmVZg88-dE7heukw8p55aJ+W=qxQ@mail.gmail.com
1 parent 29452de commit 6b423ec

File tree

1 file changed

+42
-44
lines changed

1 file changed

+42
-44
lines changed

src/backend/utils/adt/numutils.c

+42-44
Original file line numberDiff line numberDiff line change
@@ -91,15 +91,15 @@ decimalLength64(const uint64 v)
9191
* Allows any number of leading or trailing whitespace characters. Will throw
9292
* ereport() upon bad input format or overflow.
9393
*
94-
* NB: Accumulate input as a negative number, to deal with two's complement
94+
* NB: Accumulate input as an unsigned number, to deal with two's complement
9595
* representation of the most negative number, which can't be represented as a
96-
* positive number.
96+
* signed positive number.
9797
*/
9898
int16
9999
pg_strtoint16(const char *s)
100100
{
101101
const char *ptr = s;
102-
int16 tmp = 0;
102+
uint16 tmp = 0;
103103
bool neg = false;
104104

105105
/* skip leading spaces */
@@ -122,11 +122,10 @@ pg_strtoint16(const char *s)
122122
/* process digits */
123123
while (*ptr && isdigit((unsigned char) *ptr))
124124
{
125-
int8 digit = (*ptr++ - '0');
126-
127-
if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) ||
128-
unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
125+
if (unlikely(tmp > (PG_INT16_MAX / 10)))
129126
goto out_of_range;
127+
128+
tmp = tmp * 10 + (*ptr++ - '0');
130129
}
131130

132131
/* allow trailing whitespace, but not other trailing chars */
@@ -136,15 +135,18 @@ pg_strtoint16(const char *s)
136135
if (unlikely(*ptr != '\0'))
137136
goto invalid_syntax;
138137

139-
if (!neg)
138+
if (neg)
140139
{
141-
/* could fail if input is most negative number */
142-
if (unlikely(tmp == PG_INT16_MIN))
140+
/* check the negative equivalent will fit without overflowing */
141+
if (tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)
143142
goto out_of_range;
144-
tmp = -tmp;
143+
return -((int16) tmp);
145144
}
146145

147-
return tmp;
146+
if (tmp > PG_INT16_MAX)
147+
goto out_of_range;
148+
149+
return (int16) tmp;
148150

149151
out_of_range:
150152
ereport(ERROR,
@@ -167,15 +169,15 @@ pg_strtoint16(const char *s)
167169
* Allows any number of leading or trailing whitespace characters. Will throw
168170
* ereport() upon bad input format or overflow.
169171
*
170-
* NB: Accumulate input as a negative number, to deal with two's complement
172+
* NB: Accumulate input as an unsigned number, to deal with two's complement
171173
* representation of the most negative number, which can't be represented as a
172-
* positive number.
174+
* signed positive number.
173175
*/
174176
int32
175177
pg_strtoint32(const char *s)
176178
{
177179
const char *ptr = s;
178-
int32 tmp = 0;
180+
uint32 tmp = 0;
179181
bool neg = false;
180182

181183
/* skip leading spaces */
@@ -198,11 +200,10 @@ pg_strtoint32(const char *s)
198200
/* process digits */
199201
while (*ptr && isdigit((unsigned char) *ptr))
200202
{
201-
int8 digit = (*ptr++ - '0');
202-
203-
if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) ||
204-
unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
203+
if (unlikely(tmp > (PG_INT32_MAX / 10)))
205204
goto out_of_range;
205+
206+
tmp = tmp * 10 + (*ptr++ - '0');
206207
}
207208

208209
/* allow trailing whitespace, but not other trailing chars */
@@ -212,15 +213,18 @@ pg_strtoint32(const char *s)
212213
if (unlikely(*ptr != '\0'))
213214
goto invalid_syntax;
214215

215-
if (!neg)
216+
if (neg)
216217
{
217-
/* could fail if input is most negative number */
218-
if (unlikely(tmp == PG_INT32_MIN))
218+
/* check the negative equivalent will fit without overflowing */
219+
if (tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)
219220
goto out_of_range;
220-
tmp = -tmp;
221+
return -((int32) tmp);
221222
}
222223

223-
return tmp;
224+
if (tmp > PG_INT32_MAX)
225+
goto out_of_range;
226+
227+
return (int32) tmp;
224228

225229
out_of_range:
226230
ereport(ERROR,
@@ -243,25 +247,17 @@ pg_strtoint32(const char *s)
243247
* Allows any number of leading or trailing whitespace characters. Will throw
244248
* ereport() upon bad input format or overflow.
245249
*
246-
* NB: Accumulate input as a negative number, to deal with two's complement
250+
* NB: Accumulate input as an unsigned number, to deal with two's complement
247251
* representation of the most negative number, which can't be represented as a
248-
* positive number.
252+
* signed positive number.
249253
*/
250254
int64
251255
pg_strtoint64(const char *s)
252256
{
253257
const char *ptr = s;
254-
int64 tmp = 0;
258+
uint64 tmp = 0;
255259
bool neg = false;
256260

257-
/*
258-
* Do our own scan, rather than relying on sscanf which might be broken
259-
* for long long.
260-
*
261-
* As INT64_MIN can't be stored as a positive 64 bit integer, accumulate
262-
* value as a negative number.
263-
*/
264-
265261
/* skip leading spaces */
266262
while (*ptr && isspace((unsigned char) *ptr))
267263
ptr++;
@@ -282,11 +278,10 @@ pg_strtoint64(const char *s)
282278
/* process digits */
283279
while (*ptr && isdigit((unsigned char) *ptr))
284280
{
285-
int8 digit = (*ptr++ - '0');
286-
287-
if (unlikely(pg_mul_s64_overflow(tmp, 10, &tmp)) ||
288-
unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
281+
if (unlikely(tmp > (PG_INT64_MAX / 10)))
289282
goto out_of_range;
283+
284+
tmp = tmp * 10 + (*ptr++ - '0');
290285
}
291286

292287
/* allow trailing whitespace, but not other trailing chars */
@@ -296,15 +291,18 @@ pg_strtoint64(const char *s)
296291
if (unlikely(*ptr != '\0'))
297292
goto invalid_syntax;
298293

299-
if (!neg)
294+
if (neg)
300295
{
301-
/* could fail if input is most negative number */
302-
if (unlikely(tmp == PG_INT64_MIN))
296+
/* check the negative equivalent will fit without overflowing */
297+
if (tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1)
303298
goto out_of_range;
304-
tmp = -tmp;
299+
return -((int64) tmp);
305300
}
306301

307-
return tmp;
302+
if (tmp > PG_INT64_MAX)
303+
goto out_of_range;
304+
305+
return (int64) tmp;
308306

309307
out_of_range:
310308
ereport(ERROR,

0 commit comments

Comments
 (0)