Skip to content

Commit d1b307e

Browse files
committed
Optimise numeric division for one and two base-NBASE digit divisors.
Formerly div_var() had "fast path" short division code that was significantly faster when the divisor was just one base-NBASE digit, but otherwise used long division. This commit adds a new function div_var_int() that divides by an arbitrary 32-bit integer, using the fast short division algorithm, and updates both div_var() and div_var_fast() to use it for one and two digit divisors. In the case of div_var(), this is slightly faster in the one-digit case, because it avoids some digit array copying, and is much faster in the two-digit case where it replaces long division. For div_var_fast(), it is much faster in both cases because the main div_var_fast() algorithm is optimised for larger inputs. Additionally, optimise exp() and ln() by using div_var_int(), allowing a NumericVar to be replaced by an int in a couple of places, most notably in the Taylor series code. This produces a significant speedup of exp(), ln() and the numeric_big regression test. Dean Rasheed, reviewed by Tom Lane. Discussion: https://postgr.es/m/CAEZATCVwsBi-ND-t82Cuuh1=8ee6jdOpzsmGN+CUZB6yjLg9jw@mail.gmail.com
1 parent d996d64 commit d1b307e

File tree

1 file changed

+180
-43
lines changed

1 file changed

+180
-43
lines changed

src/backend/utils/adt/numeric.c

+180-43
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,8 @@ static void div_var(const NumericVar *var1, const NumericVar *var2,
551551
int rscale, bool round);
552552
static void div_var_fast(const NumericVar *var1, const NumericVar *var2,
553553
NumericVar *result, int rscale, bool round);
554+
static void div_var_int(const NumericVar *var, int ival, int ival_weight,
555+
NumericVar *result, int rscale, bool round);
554556
static int select_div_scale(const NumericVar *var1, const NumericVar *var2);
555557
static void mod_var(const NumericVar *var1, const NumericVar *var2,
556558
NumericVar *result);
@@ -8451,8 +8453,33 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
84518453
errmsg("division by zero")));
84528454

84538455
/*
8454-
* Now result zero check
8456+
* If the divisor has just one or two digits, delegate to div_var_int(),
8457+
* which uses fast short division.
84558458
*/
8459+
if (var2ndigits <= 2)
8460+
{
8461+
int idivisor;
8462+
int idivisor_weight;
8463+
8464+
idivisor = var2->digits[0];
8465+
idivisor_weight = var2->weight;
8466+
if (var2ndigits == 2)
8467+
{
8468+
idivisor = idivisor * NBASE + var2->digits[1];
8469+
idivisor_weight--;
8470+
}
8471+
if (var2->sign == NUMERIC_NEG)
8472+
idivisor = -idivisor;
8473+
8474+
div_var_int(var1, idivisor, idivisor_weight, result, rscale, round);
8475+
return;
8476+
}
8477+
8478+
/*
8479+
* Otherwise, perform full long division.
8480+
*/
8481+
8482+
/* Result zero check */
84568483
if (var1ndigits == 0)
84578484
{
84588485
zero_var(result);
@@ -8510,23 +8537,6 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
85108537
alloc_var(result, res_ndigits);
85118538
res_digits = result->digits;
85128539

8513-
if (var2ndigits == 1)
8514-
{
8515-
/*
8516-
* If there's only a single divisor digit, we can use a fast path (cf.
8517-
* Knuth section 4.3.1 exercise 16).
8518-
*/
8519-
divisor1 = divisor[1];
8520-
carry = 0;
8521-
for (i = 0; i < res_ndigits; i++)
8522-
{
8523-
carry = carry * NBASE + dividend[i + 1];
8524-
res_digits[i] = carry / divisor1;
8525-
carry = carry % divisor1;
8526-
}
8527-
}
8528-
else
8529-
{
85308540
/*
85318541
* The full multiple-place algorithm is taken from Knuth volume 2,
85328542
* Algorithm 4.3.1D.
@@ -8659,7 +8669,6 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
86598669
/* And we're done with this quotient digit */
86608670
res_digits[j] = qhat;
86618671
}
8662-
}
86638672

86648673
pfree(dividend);
86658674

@@ -8735,8 +8744,33 @@ div_var_fast(const NumericVar *var1, const NumericVar *var2,
87358744
errmsg("division by zero")));
87368745

87378746
/*
8738-
* Now result zero check
8747+
* If the divisor has just one or two digits, delegate to div_var_int(),
8748+
* which uses fast short division.
87398749
*/
8750+
if (var2ndigits <= 2)
8751+
{
8752+
int idivisor;
8753+
int idivisor_weight;
8754+
8755+
idivisor = var2->digits[0];
8756+
idivisor_weight = var2->weight;
8757+
if (var2ndigits == 2)
8758+
{
8759+
idivisor = idivisor * NBASE + var2->digits[1];
8760+
idivisor_weight--;
8761+
}
8762+
if (var2->sign == NUMERIC_NEG)
8763+
idivisor = -idivisor;
8764+
8765+
div_var_int(var1, idivisor, idivisor_weight, result, rscale, round);
8766+
return;
8767+
}
8768+
8769+
/*
8770+
* Otherwise, perform full long division.
8771+
*/
8772+
8773+
/* Result zero check */
87408774
if (var1ndigits == 0)
87418775
{
87428776
zero_var(result);
@@ -9008,6 +9042,118 @@ div_var_fast(const NumericVar *var1, const NumericVar *var2,
90089042
}
90099043

90109044

9045+
/*
9046+
* div_var_int() -
9047+
*
9048+
* Divide a numeric variable by a 32-bit integer with the specified weight.
9049+
* The quotient var / (ival * NBASE^ival_weight) is stored in result.
9050+
*/
9051+
static void
9052+
div_var_int(const NumericVar *var, int ival, int ival_weight,
9053+
NumericVar *result, int rscale, bool round)
9054+
{
9055+
NumericDigit *var_digits = var->digits;
9056+
int var_ndigits = var->ndigits;
9057+
int res_sign;
9058+
int res_weight;
9059+
int res_ndigits;
9060+
NumericDigit *res_buf;
9061+
NumericDigit *res_digits;
9062+
uint32 divisor;
9063+
int i;
9064+
9065+
/* Guard against division by zero */
9066+
if (ival == 0)
9067+
ereport(ERROR,
9068+
errcode(ERRCODE_DIVISION_BY_ZERO),
9069+
errmsg("division by zero"));
9070+
9071+
/* Result zero check */
9072+
if (var_ndigits == 0)
9073+
{
9074+
zero_var(result);
9075+
result->dscale = rscale;
9076+
return;
9077+
}
9078+
9079+
/*
9080+
* Determine the result sign, weight and number of digits to calculate.
9081+
* The weight figured here is correct if the emitted quotient has no
9082+
* leading zero digits; otherwise strip_var() will fix things up.
9083+
*/
9084+
if (var->sign == NUMERIC_POS)
9085+
res_sign = ival > 0 ? NUMERIC_POS : NUMERIC_NEG;
9086+
else
9087+
res_sign = ival > 0 ? NUMERIC_NEG : NUMERIC_POS;
9088+
res_weight = var->weight - ival_weight;
9089+
/* The number of accurate result digits we need to produce: */
9090+
res_ndigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS;
9091+
/* ... but always at least 1 */
9092+
res_ndigits = Max(res_ndigits, 1);
9093+
/* If rounding needed, figure one more digit to ensure correct result */
9094+
if (round)
9095+
res_ndigits++;
9096+
9097+
res_buf = digitbuf_alloc(res_ndigits + 1);
9098+
res_buf[0] = 0; /* spare digit for later rounding */
9099+
res_digits = res_buf + 1;
9100+
9101+
/*
9102+
* Now compute the quotient digits. This is the short division algorithm
9103+
* described in Knuth volume 2, section 4.3.1 exercise 16, except that we
9104+
* allow the divisor to exceed the internal base.
9105+
*
9106+
* In this algorithm, the carry from one digit to the next is at most
9107+
* divisor - 1. Therefore, while processing the next digit, carry may
9108+
* become as large as divisor * NBASE - 1, and so it requires a 64-bit
9109+
* integer if this exceeds UINT_MAX.
9110+
*/
9111+
divisor = Abs(ival);
9112+
9113+
if (divisor <= UINT_MAX / NBASE)
9114+
{
9115+
/* carry cannot overflow 32 bits */
9116+
uint32 carry = 0;
9117+
9118+
for (i = 0; i < res_ndigits; i++)
9119+
{
9120+
carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0);
9121+
res_digits[i] = (NumericDigit) (carry / divisor);
9122+
carry = carry % divisor;
9123+
}
9124+
}
9125+
else
9126+
{
9127+
/* carry may exceed 32 bits */
9128+
uint64 carry = 0;
9129+
9130+
for (i = 0; i < res_ndigits; i++)
9131+
{
9132+
carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0);
9133+
res_digits[i] = (NumericDigit) (carry / divisor);
9134+
carry = carry % divisor;
9135+
}
9136+
}
9137+
9138+
/* Store the quotient in result */
9139+
digitbuf_free(result->buf);
9140+
result->ndigits = res_ndigits;
9141+
result->buf = res_buf;
9142+
result->digits = res_digits;
9143+
result->weight = res_weight;
9144+
result->sign = res_sign;
9145+
9146+
/* Round or truncate to target rscale (and set result->dscale) */
9147+
if (round)
9148+
round_var(result, rscale);
9149+
else
9150+
trunc_var(result, rscale);
9151+
9152+
/* Strip leading/trailing zeroes */
9153+
strip_var(result);
9154+
}
9155+
9156+
90119157
/*
90129158
* Default scale selection for division
90139159
*
@@ -9783,7 +9929,7 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
97839929
{
97849930
NumericVar x;
97859931
NumericVar elem;
9786-
NumericVar ni;
9932+
int ni;
97879933
double val;
97889934
int dweight;
97899935
int ndiv2;
@@ -9792,7 +9938,6 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
97929938

97939939
init_var(&x);
97949940
init_var(&elem);
9795-
init_var(&ni);
97969941

97979942
set_var_from_var(arg, &x);
97989943

@@ -9820,29 +9965,24 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
98209965

98219966
/*
98229967
* Reduce x to the range -0.01 <= x <= 0.01 (approximately) by dividing by
9823-
* 2^n, to improve the convergence rate of the Taylor series.
9968+
* 2^ndiv2, to improve the convergence rate of the Taylor series.
9969+
*
9970+
* Note that the overflow check above ensures that Abs(x) < 6000, which
9971+
* means that ndiv2 <= 20 here.
98249972
*/
98259973
if (Abs(val) > 0.01)
98269974
{
9827-
NumericVar tmp;
9828-
9829-
init_var(&tmp);
9830-
set_var_from_var(&const_two, &tmp);
9831-
98329975
ndiv2 = 1;
98339976
val /= 2;
98349977

98359978
while (Abs(val) > 0.01)
98369979
{
98379980
ndiv2++;
98389981
val /= 2;
9839-
add_var(&tmp, &tmp, &tmp);
98409982
}
98419983

98429984
local_rscale = x.dscale + ndiv2;
9843-
div_var_fast(&x, &tmp, &x, local_rscale, true);
9844-
9845-
free_var(&tmp);
9985+
div_var_int(&x, 1 << ndiv2, 0, &x, local_rscale, true);
98469986
}
98479987
else
98489988
ndiv2 = 0;
@@ -9870,16 +10010,16 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
987010010
add_var(&const_one, &x, result);
987110011

987210012
mul_var(&x, &x, &elem, local_rscale);
9873-
set_var_from_var(&const_two, &ni);
9874-
div_var_fast(&elem, &ni, &elem, local_rscale, true);
10013+
ni = 2;
10014+
div_var_int(&elem, ni, 0, &elem, local_rscale, true);
987510015

987610016
while (elem.ndigits != 0)
987710017
{
987810018
add_var(result, &elem, result);
987910019

988010020
mul_var(&elem, &x, &elem, local_rscale);
9881-
add_var(&ni, &const_one, &ni);
9882-
div_var_fast(&elem, &ni, &elem, local_rscale, true);
10021+
ni++;
10022+
div_var_int(&elem, ni, 0, &elem, local_rscale, true);
988310023
}
988410024

988510025
/*
@@ -9899,7 +10039,6 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
989910039

990010040
free_var(&x);
990110041
free_var(&elem);
9902-
free_var(&ni);
990310042
}
990410043

990510044

@@ -9993,7 +10132,7 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale)
999310132
{
999410133
NumericVar x;
999510134
NumericVar xx;
9996-
NumericVar ni;
10135+
int ni;
999710136
NumericVar elem;
999810137
NumericVar fact;
999910138
int nsqrt;
@@ -10012,7 +10151,6 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale)
1001210151

1001310152
init_var(&x);
1001410153
init_var(&xx);
10015-
init_var(&ni);
1001610154
init_var(&elem);
1001710155
init_var(&fact);
1001810156

@@ -10073,13 +10211,13 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale)
1007310211
set_var_from_var(result, &xx);
1007410212
mul_var(result, result, &x, local_rscale);
1007510213

10076-
set_var_from_var(&const_one, &ni);
10214+
ni = 1;
1007710215

1007810216
for (;;)
1007910217
{
10080-
add_var(&ni, &const_two, &ni);
10218+
ni += 2;
1008110219
mul_var(&xx, &x, &xx, local_rscale);
10082-
div_var_fast(&xx, &ni, &elem, local_rscale, true);
10220+
div_var_int(&xx, ni, 0, &elem, local_rscale, true);
1008310221

1008410222
if (elem.ndigits == 0)
1008510223
break;
@@ -10095,7 +10233,6 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale)
1009510233

1009610234
free_var(&x);
1009710235
free_var(&xx);
10098-
free_var(&ni);
1009910236
free_var(&elem);
1010010237
free_var(&fact);
1010110238
}

0 commit comments

Comments
 (0)