summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Eisentraut2019-10-13 20:10:38 +0000
committerPeter Eisentraut2019-10-13 20:10:38 +0000
commitbdb839cbdebe851c200b2c7c03aec7483573d631 (patch)
tree2fe5e45e59f12231529aec985ad98246af24a650
parent9abb2bfc046070b22e3be28173a0736da31cab5a (diff)
Update unicode.org URLs
Use https, consistent host name, remove references to ftp. Also update the URLs for CLDR, which has moved from Trac to GitHub.
-rw-r--r--contrib/unaccent/generate_unaccent_rules.py16
-rw-r--r--doc/src/sgml/acronyms.sgml2
-rw-r--r--doc/src/sgml/charset.sgml8
-rw-r--r--src/backend/utils/mb/Unicode/Makefile14
-rwxr-xr-xsrc/backend/utils/mb/Unicode/UCS_to_BIG5.pl4
-rwxr-xr-xsrc/backend/utils/mb/Unicode/UCS_to_JOHAB.pl4
-rwxr-xr-xsrc/backend/utils/mb/Unicode/UCS_to_most.pl4
-rw-r--r--src/common/unicode/Makefile2
-rw-r--r--src/common/unicode_norm.c8
9 files changed, 31 insertions, 31 deletions
diff --git a/contrib/unaccent/generate_unaccent_rules.py b/contrib/unaccent/generate_unaccent_rules.py
index 7a0a96e04f7..acfb4f0b686 100644
--- a/contrib/unaccent/generate_unaccent_rules.py
+++ b/contrib/unaccent/generate_unaccent_rules.py
@@ -24,9 +24,9 @@
# Latin-ASCII.xml, the latest data sets released can be browsed directly
# via [3]. Note that this script is compatible with at least release 29.
#
-# [1] http://unicode.org/Public/8.0.0/ucd/UnicodeData.txt
-# [2] http://unicode.org/cldr/trac/export/14746/tags/release-34/common/transforms/Latin-ASCII.xml
-# [3] https://unicode.org/cldr/trac/browser/tags
+# [1] https://www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt
+# [2] https://raw.githubusercontent.com/unicode-org/cldr/release-34/common/transforms/Latin-ASCII.xml
+# [3] https://github.com/unicode-org/cldr/tags
# BEGIN: Python 2/3 compatibility - remove when Python 2 compatibility dropped
# The approach is to be Python3 compatible with Python2 "backports".
@@ -113,7 +113,7 @@ def is_mark(codepoint):
def is_letter_with_marks(codepoint, table):
"""Returns true for letters combined with one or more marks."""
- # See http://www.unicode.org/reports/tr44/tr44-14.html#General_Category_Values
+ # See https://www.unicode.org/reports/tr44/tr44-14.html#General_Category_Values
# Letter may have no combining characters, in which case it has
# no marks.
@@ -226,7 +226,7 @@ def special_cases():
return charactersSet
def main(args):
- # http://www.unicode.org/reports/tr44/tr44-14.html#Character_Decomposition_Mappings
+ # https://www.unicode.org/reports/tr44/tr44-14.html#Character_Decomposition_Mappings
decomposition_type_pattern = re.compile(" *<[^>]*> *")
table = {}
@@ -243,7 +243,7 @@ def main(args):
for line in unicodeDataFile:
fields = line.split(";")
if len(fields) > 5:
- # http://www.unicode.org/reports/tr44/tr44-14.html#UnicodeData.txt
+ # https://www.unicode.org/reports/tr44/tr44-14.html#UnicodeData.txt
general_category = fields[2]
decomposition = fields[5]
decomposition = re.sub(decomposition_type_pattern, ' ', decomposition)
@@ -281,8 +281,8 @@ def main(args):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='This script builds unaccent.rules on standard output when given the contents of UnicodeData.txt and Latin-ASCII.xml given as arguments.')
- parser.add_argument("--unicode-data-file", help="Path to formatted text file corresponding to UnicodeData.txt. See <http://unicode.org/Public/8.0.0/ucd/UnicodeData.txt>.", type=str, required=True, dest='unicodeDataFilePath')
- parser.add_argument("--latin-ascii-file", help="Path to XML file from Unicode Common Locale Data Repository (CLDR) corresponding to Latin-ASCII transliterator (Latin-ASCII.xml). See <http://unicode.org/cldr/trac/export/12304/tags/release-28/common/transforms/Latin-ASCII.xml>.", type=str, dest='latinAsciiFilePath')
+ parser.add_argument("--unicode-data-file", help="Path to formatted text file corresponding to UnicodeData.txt.", type=str, required=True, dest='unicodeDataFilePath')
+ parser.add_argument("--latin-ascii-file", help="Path to XML file from Unicode Common Locale Data Repository (CLDR) corresponding to Latin-ASCII transliterator (Latin-ASCII.xml).", type=str, dest='latinAsciiFilePath')
parser.add_argument("--no-ligatures-expansion", help="Do not expand ligatures and do not use Unicode CLDR Latin-ASCII transliterator. By default, this option is not enabled and \"--latin-ascii-file\" argument is required. If this option is enabled, \"--latin-ascii-file\" argument is optional and ignored.", action="store_true", dest='noLigaturesExpansion')
args = parser.parse_args()
diff --git a/doc/src/sgml/acronyms.sgml b/doc/src/sgml/acronyms.sgml
index 411e368a9c6..f638665dc92 100644
--- a/doc/src/sgml/acronyms.sgml
+++ b/doc/src/sgml/acronyms.sgml
@@ -728,7 +728,7 @@
<term><acronym>UTF</acronym></term>
<listitem>
<para>
- <ulink url="http://www.unicode.org/">Unicode Transformation
+ <ulink url="https://www.unicode.org/">Unicode Transformation
Format</ulink>
</para>
</listitem>
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index b672da47d0a..45290bd27bf 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -832,12 +832,12 @@ CREATE COLLATION german (provider = libc, locale = 'de_DE');
</varlistentry>
</variablelist>
- See <ulink url="http://unicode.org/reports/tr35/tr35-collation.html">Unicode
+ See <ulink url="https://www.unicode.org/reports/tr35/tr35-collation.html">Unicode
Technical Standard #35</ulink>
and <ulink url="https://tools.ietf.org/html/bcp47">BCP 47</ulink> for
details. The list of possible collation types (<literal>co</literal>
subtag) can be found in
- the <ulink url="http://www.unicode.org/repos/cldr/trunk/common/bcp47/collation.xml">CLDR
+ the <ulink url="https://github.com/unicode-org/cldr/blob/master/common/bcp47/collation.xml">CLDR
repository</ulink>.
The <ulink url="https://ssl.icu-project.org/icu-bin/locexp">ICU Locale
Explorer</ulink> can be used to check the details of a particular locale
@@ -900,7 +900,7 @@ CREATE COLLATION french FROM "fr-x-icu";
different Unicode normal forms. It is up to the collation provider to
actually implement such insensitive comparisons; the deterministic flag
only determines whether ties are to be broken using bytewise comparison.
- See also <ulink url="https://unicode.org/reports/tr10">Unicode Technical
+ See also <ulink url="https://www.unicode.org/reports/tr10">Unicode Technical
Standard 10</ulink> for more information on the terminology.
</para>
@@ -1926,7 +1926,7 @@ RESET client_encoding;
</varlistentry>
<varlistentry>
- <term><ulink url="http://www.unicode.org/"></ulink></term>
+ <term><ulink url="https://www.unicode.org/"></ulink></term>
<listitem>
<para>
diff --git a/src/backend/utils/mb/Unicode/Makefile b/src/backend/utils/mb/Unicode/Makefile
index a97e1c6cd72..63710f9ea73 100644
--- a/src/backend/utils/mb/Unicode/Makefile
+++ b/src/backend/utils/mb/Unicode/Makefile
@@ -119,7 +119,7 @@ DOWNLOAD = wget -O $@ --no-use-server-timestamps
#DOWNLOAD = curl -o $@
BIG5.TXT CNS11643.TXT:
- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/$(@F)
+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/$(@F)
euc-jis-2004-std.txt sjis-0213-2004-std.txt:
$(DOWNLOAD) http://x0213.org/codetable/$(@F)
@@ -131,19 +131,19 @@ GB2312.TXT:
$(DOWNLOAD) 'http://trac.greenstone.org/browser/trunk/gsdl/unicode/MAPPINGS/EASTASIA/GB/GB2312.TXT?rev=1842&format=txt'
JIS0212.TXT:
- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/$(@F)
+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/$(@F)
JOHAB.TXT KSX1001.TXT:
- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/$(@F)
+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/$(@F)
KOI8-R.TXT KOI8-U.TXT:
- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/$(@F)
+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/$(@F)
$(ISO8859TEXTS):
- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/ISO8859/$(@F)
+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/ISO8859/$(@F)
$(filter-out CP8%,$(WINTEXTS)) CP932.TXT CP950.TXT:
- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/$(@F)
+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/$(@F)
$(filter CP8%,$(WINTEXTS)):
- $(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/$(@F)
+ $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/$(@F)
diff --git a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
index bcdd29b686f..297f7b98938 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
@@ -8,8 +8,8 @@
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
-# you have to obtain the map files from the organization's ftp site.
-# ftp://www.unicode.org/Public/MAPPINGS/
+# you have to obtain the map files from the organization's download site.
+# https://www.unicode.org/Public/MAPPINGS/
#
# Our "big5" comes from BIG5.TXT, with the addition of the characters
# in the range 0xf9d6-0xf9dc from CP950.TXT.
diff --git a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
index 222093dff22..8645a7ea6ef 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
@@ -8,8 +8,8 @@
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
-# you have to obtain the map files from the organization's ftp site.
-# ftp://www.unicode.org/Public/MAPPINGS/
+# you have to obtain the map files from the organization's download site.
+# https://www.unicode.org/Public/MAPPINGS/
# We assume the file include three tab-separated columns:
# JOHAB code in hex
# UCS-2 code in hex
diff --git a/src/backend/utils/mb/Unicode/UCS_to_most.pl b/src/backend/utils/mb/Unicode/UCS_to_most.pl
index 647417b4bf8..2290feddf4e 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_most.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl
@@ -8,8 +8,8 @@
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
-# you have to obtain the map files from the organization's ftp site.
-# ftp://www.unicode.org/Public/MAPPINGS/
+# you have to obtain the map files from the organization's download site.
+# https://www.unicode.org/Public/MAPPINGS/
# We assume the file include three tab-separated columns:
# source character set code in hex
# UCS-2 code in hex
diff --git a/src/common/unicode/Makefile b/src/common/unicode/Makefile
index e20ef778f34..334859c9848 100644
--- a/src/common/unicode/Makefile
+++ b/src/common/unicode/Makefile
@@ -23,7 +23,7 @@ DOWNLOAD = wget -O $@ --no-use-server-timestamps
# These files are part of the Unicode Character Database. Download
# them on demand.
UnicodeData.txt CompositionExclusions.txt NormalizationTest.txt:
- $(DOWNLOAD) http://unicode.org/Public/UNIDATA/$(@F)
+ $(DOWNLOAD) https://www.unicode.org/Public/UNIDATA/$(@F)
# Generation of conversion tables used for string normalization with
# UTF-8 strings.
diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c
index 89c55332128..7509f814379 100644
--- a/src/common/unicode_norm.c
+++ b/src/common/unicode_norm.c
@@ -3,7 +3,7 @@
* Normalize a Unicode string to NFKC form
*
* This implements Unicode normalization, per the documentation at
- * http://www.unicode.org/reports/tr15/.
+ * https://www.unicode.org/reports/tr15/.
*
* Portions Copyright (c) 2017-2019, PostgreSQL Global Development Group
*
@@ -109,7 +109,7 @@ get_decomposed_size(pg_wchar code)
/*
* Fast path for Hangul characters not stored in tables to save memory as
* decomposition is algorithmic. See
- * http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details on
+ * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details on
* the matter.
*/
if (code >= SBASE && code < SBASE + SCOUNT)
@@ -234,7 +234,7 @@ decompose_code(pg_wchar code, pg_wchar **result, int *current)
/*
* Fast path for Hangul characters not stored in tables to save memory as
* decomposition is algorithmic. See
- * http://unicode.org/reports/tr15/tr15-18.html, annex 10 for details on
+ * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details on
* the matter.
*/
if (code >= SBASE && code < SBASE + SCOUNT)
@@ -362,7 +362,7 @@ unicode_normalize_kc(const pg_wchar *input)
continue;
/*
- * Per Unicode (http://unicode.org/reports/tr15/tr15-18.html) annex 4,
+ * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html) annex 4,
* a sequence of two adjacent characters in a string is an
* exchangeable pair if the combining class (from the Unicode
* Character Database) for the first character is greater than the