Add support for the error functions erf() and erfc().

deanrasheed · deanrasheed · commit d5d574146d71 · 2023-03-14T09:17:36.000Z
Expose the standard error functions as SQL-callable functions. These are expected to be useful to people working with normal distributions, and we use them here to test the distribution from random_normal(). Since these functions are defined in the POSIX and C99 standards, they should in theory be available on all supported platforms. If that turns out not to be the case, more work will be needed. On all platforms tested so far, using extra_float_digits = -1 in the regression tests is sufficient to allow for variations between implementations. However, past experience has shown that there are almost certainly going to be additional unexpected portability issues, so these tests may well need further adjustments, based on the buildfarm results. Dean Rasheed, reviewed by Nathan Bossart and Thomas Munro. Discussion: https://postgr.es/m/CAEZATCXv5fi7+Vu-POiyai+ucF95+YMcCMafxV+eZuN1B-=MkQ@mail.gmail.com
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
@@ -1286,6 +1286,41 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>erf</primary>
+        </indexterm>
+        <function>erf</function> ( <type>double precision</type> )
+        <returnvalue>double precision</returnvalue>
+       </para>
+       <para>
+        Error function
+       </para>
+       <para>
+        <literal>erf(1.0)</literal>
+        <returnvalue>0.8427007929497149</returnvalue>
+       </para></entry>
+      </row>
+
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>erfc</primary>
+        </indexterm>
+        <function>erfc</function> ( <type>double precision</type> )
+        <returnvalue>double precision</returnvalue>
+       </para>
+       <para>
+        Complementary error function (<literal>1 - erf(x)</literal>, without
+        loss of precision for large inputs)
+       </para>
+       <para>
+        <literal>erfc(1.0)</literal>
+        <returnvalue>0.15729920705028513</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
@@ -2742,6 +2742,53 @@ datanh(PG_FUNCTION_ARGS)
 }
 
 
+/* ========== ERROR FUNCTIONS ========== */
+
+
+/*
+ *		derf			- returns the error function: erf(arg1)
+ */
+Datum
+derf(PG_FUNCTION_ARGS)
+{
+	float8		arg1 = PG_GETARG_FLOAT8(0);
+	float8		result;
+
+	/*
+	 * For erf, we don't need an errno check because it never overflows.
+	 */
+	result = erf(arg1);
+
+	if (unlikely(isinf(result)))
+		float_overflow_error();
+
+	PG_RETURN_FLOAT8(result);
+}
+
+/*
+ *		derfc			- returns the complementary error function: 1 - erf(arg1)
+ */
+Datum
+derfc(PG_FUNCTION_ARGS)
+{
+	float8		arg1 = PG_GETARG_FLOAT8(0);
+	float8		result;
+
+	/*
+	 * For erfc, we don't need an errno check because it never overflows.
+	 */
+	result = erfc(arg1);
+
+	if (unlikely(isinf(result)))
+		float_overflow_error();
+
+	PG_RETURN_FLOAT8(result);
+}
+
+
+/* ========== RANDOM FUNCTIONS ========== */
+
+
 /*
  * initialize_drandom_seed - initialize drandom_seed if not yet done
  */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202303131
+#define CATALOG_VERSION_NO	202303141
 
 #endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
@@ -3465,6 +3465,13 @@
   proname => 'atanh', prorettype => 'float8', proargtypes => 'float8',
   prosrc => 'datanh' },
 
+{ oid => '8788', descr => 'error function',
+  proname => 'erf', prorettype => 'float8', proargtypes => 'float8',
+  prosrc => 'derf' },
+{ oid => '8789', descr => 'complementary error function',
+  proname => 'erfc', prorettype => 'float8', proargtypes => 'float8',
+  prosrc => 'derfc' },
+
 { oid => '1618',
   proname => 'interval_mul', prorettype => 'interval',
   proargtypes => 'interval float8', prosrc => 'interval_mul' },
diff --git a/src/test/regress/expected/float8.out b/src/test/regress/expected/float8.out
@@ -790,6 +790,45 @@ SELECT atanh(float8 'nan');
    NaN
 (1 row)
 
+-- error functions
+-- we run these with extra_float_digits = -1, to get consistently rounded
+-- results on all platforms.
+SET extra_float_digits = -1;
+SELECT x,
+       erf(x),
+       erfc(x)
+FROM (VALUES (float8 '-infinity'),
+      (-28), (-6), (-3.4), (-2.1), (-1.1), (-0.45),
+      (-1.2e-9), (-2.3e-13), (-1.2e-17), (0),
+      (1.2e-17), (2.3e-13), (1.2e-9),
+      (0.45), (1.1), (2.1), (3.4), (6), (28),
+      (float8 'infinity'), (float8 'nan')) AS t(x);
+     x     |         erf          |        erfc         
+-----------+----------------------+---------------------
+ -Infinity |                   -1 |                   2
+       -28 |                   -1 |                   2
+        -6 |                   -1 |                   2
+      -3.4 |    -0.99999847800664 |     1.9999984780066
+      -2.1 |    -0.99702053334367 |     1.9970205333437
+      -1.1 |    -0.88020506957408 |     1.8802050695741
+     -0.45 |    -0.47548171978692 |     1.4754817197869
+  -1.2e-09 | -1.3540550005146e-09 |     1.0000000013541
+  -2.3e-13 | -2.5952720843197e-13 |     1.0000000000003
+  -1.2e-17 | -1.3540550005146e-17 |                   1
+         0 |                    0 |                   1
+   1.2e-17 |  1.3540550005146e-17 |                   1
+   2.3e-13 |  2.5952720843197e-13 |    0.99999999999974
+   1.2e-09 |  1.3540550005146e-09 |    0.99999999864595
+      0.45 |     0.47548171978692 |    0.52451828021308
+       1.1 |     0.88020506957408 |    0.11979493042592
+       2.1 |     0.99702053334367 |   0.002979466656333
+       3.4 |     0.99999847800664 | 1.5219933628623e-06
+         6 |                    1 | 2.1519736712499e-17
+        28 |                    1 |                   0
+  Infinity |                    1 |                   0
+       NaN |                  NaN |                 NaN
+(22 rows)
+
 RESET extra_float_digits;
 -- test for over- and underflow
 INSERT INTO FLOAT8_TBL(f1) VALUES ('10e400');
diff --git a/src/test/regress/expected/random.out b/src/test/regress/expected/random.out
@@ -88,6 +88,38 @@ GROUP BY r;
  -10 |   100
 (1 row)
 
+-- Check standard normal distribution using the Kolmogorov-Smirnov test.
+CREATE FUNCTION ks_test_normal_random()
+RETURNS boolean AS
+$$
+DECLARE
+  n int := 1000;        -- Number of samples
+  c float8 := 1.94947;  -- Critical value for 99.9% confidence
+  ok boolean;
+BEGIN
+  ok := (
+    WITH samples AS (
+      SELECT random_normal() r FROM generate_series(1, n) ORDER BY 1
+    ), indexed_samples AS (
+      SELECT (row_number() OVER())-1.0 i, r FROM samples
+    )
+    SELECT max(abs((1+erf(r/sqrt(2)))/2 - i/n)) < c / sqrt(n)
+    FROM indexed_samples
+  );
+  RETURN ok;
+END
+$$
+LANGUAGE plpgsql;
+-- As above, ks_test_normal_random() returns true about 99.9%
+-- of the time, so try it 3 times and accept if any test passes.
+SELECT ks_test_normal_random() OR
+       ks_test_normal_random() OR
+       ks_test_normal_random() AS standard_normal;
+ standard_normal 
+-----------------
+ t
+(1 row)
+
 -- setseed() should produce a reproducible series of random() values.
 SELECT setseed(0.5);
  setseed 
diff --git a/src/test/regress/sql/float8.sql b/src/test/regress/sql/float8.sql
@@ -229,6 +229,20 @@ SELECT atanh(float8 'infinity');
 SELECT atanh(float8 '-infinity');
 SELECT atanh(float8 'nan');
 
+-- error functions
+-- we run these with extra_float_digits = -1, to get consistently rounded
+-- results on all platforms.
+SET extra_float_digits = -1;
+SELECT x,
+       erf(x),
+       erfc(x)
+FROM (VALUES (float8 '-infinity'),
+      (-28), (-6), (-3.4), (-2.1), (-1.1), (-0.45),
+      (-1.2e-9), (-2.3e-13), (-1.2e-17), (0),
+      (1.2e-17), (2.3e-13), (1.2e-9),
+      (0.45), (1.1), (2.1), (3.4), (6), (28),
+      (float8 'infinity'), (float8 'nan')) AS t(x);
+
 RESET extra_float_digits;
 
 -- test for over- and underflow
diff --git a/src/test/regress/sql/random.sql b/src/test/regress/sql/random.sql
@@ -70,6 +70,36 @@ SELECT r, count(*)
 FROM (SELECT random_normal(-10, 0) r FROM generate_series(1, 100)) ss
 GROUP BY r;
 
+-- Check standard normal distribution using the Kolmogorov-Smirnov test.
+
+CREATE FUNCTION ks_test_normal_random()
+RETURNS boolean AS
+$$
+DECLARE
+  n int := 1000;        -- Number of samples
+  c float8 := 1.94947;  -- Critical value for 99.9% confidence
+  ok boolean;
+BEGIN
+  ok := (
+    WITH samples AS (
+      SELECT random_normal() r FROM generate_series(1, n) ORDER BY 1
+    ), indexed_samples AS (
+      SELECT (row_number() OVER())-1.0 i, r FROM samples
+    )
+    SELECT max(abs((1+erf(r/sqrt(2)))/2 - i/n)) < c / sqrt(n)
+    FROM indexed_samples
+  );
+  RETURN ok;
+END
+$$
+LANGUAGE plpgsql;
+
+-- As above, ks_test_normal_random() returns true about 99.9%
+-- of the time, so try it 3 times and accept if any test passes.
+SELECT ks_test_normal_random() OR
+       ks_test_normal_random() OR
+       ks_test_normal_random() AS standard_normal;
+
 -- setseed() should produce a reproducible series of random() values.
 
 SELECT setseed(0.5);