Mapping FoldToASCII
Mapping FoldToASCII
0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Syntax:
# "source" => "target"
# "source".length() > 0 (source cannot be empty.)
# "target".length() >= 0 (target can be empty.)
# ⁰ [SUPERSCRIPT ZERO]
"\u2070" => "0"
# ₀ [SUBSCRIPT ZERO]
"\u2080" => "0"
# ¹ [SUPERSCRIPT ONE]
"\u00B9" => "1"
# ₁ [SUBSCRIPT ONE]
"\u2081" => "1"
# ² [SUPERSCRIPT TWO]
"\u00B2" => "2"
# ₂ [SUBSCRIPT TWO]
"\u2082" => "2"
# ② [CIRCLED DIGIT TWO]
"\u2461" => "2"
# ³ [SUPERSCRIPT THREE]
"\u00B3" => "3"
# ₃ [SUBSCRIPT THREE]
"\u2083" => "3"
# ⁴ [SUPERSCRIPT FOUR]
"\u2074" => "4"
# ₄ [SUBSCRIPT FOUR]
"\u2084" => "4"
# ④ [CIRCLED DIGIT FOUR]
"\u2463" => "4"
# ⁵ [SUPERSCRIPT FIVE]
"\u2075" => "5"
# ₅ [SUBSCRIPT FIVE]
"\u2085" => "5"
# ⁶ [SUPERSCRIPT SIX]
"\u2076" => "6"
# ₆ [SUBSCRIPT SIX]
"\u2086" => "6"
# ⁷ [SUPERSCRIPT SEVEN]
"\u2077" => "7"
# ₇ [SUBSCRIPT SEVEN]
"\u2087" => "7"
# ⁸ [SUPERSCRIPT EIGHT]
"\u2078" => "8"
# ₈ [SUBSCRIPT EIGHT]
"\u2088" => "8"
# ⁹ [SUPERSCRIPT NINE]
"\u2079" => "9"
# ₉ [SUBSCRIPT NINE]
"\u2089" => "9"
# ″ [DOUBLE PRIME]
"\u2033" => "\""
# ′ [PRIME]
"\u2032" => "\'"
# ‵ [REVERSED PRIME]
"\u2035" => "\'"
# ' [FULLWIDTH APOSTROPHE]
"\uFF07" => "\'"
# ‐ [HYPHEN]
"\u2010" => "-"
# - [NON-BREAKING HYPHEN]
"\u2011" => "-"
# ‒ [FIGURE DASH]
"\u2012" => "-"
# – [EN DASH]
"\u2013" => "-"
# — [EM DASH]
"\u2014" => "-"
# ⁻ [SUPERSCRIPT MINUS]
"\u207B" => "-"
# ₋ [SUBSCRIPT MINUS]
"\u208B" => "-"
# - [FULLWIDTH HYPHEN-MINUS]
"\uFF0D" => "-"
# ⁎ [LOW ASTERISK]
"\u204E" => "*"
# * [FULLWIDTH ASTERISK]
"\uFF0A" => "*"
# , [FULLWIDTH COMMA]
"\uFF0C" => ","
# ⁄ [FRACTION SLASH]
"\u2044" => "/"
# / [FULLWIDTH SOLIDUS]
"\uFF0F" => "/"
# : [FULLWIDTH COLON]
"\uFF1A" => ":"
# ⁏ [REVERSED SEMICOLON]
"\u204F" => ";"
# ; [FULLWIDTH SEMICOLON]
"\uFF1B" => ";"
# ‸ [CARET]
"\u2038" => "^"
# ⁓ [SWUNG DASH]
"\u2053" => "~"
# ~ [FULLWIDTH TILDE]
"\uFF5E" => "~"
################################################################
# Below is the Perl script used to generate the above mappings #
# from ASCIIFoldingFilter.java: #
################################################################
#
# #!/usr/bin/perl
#
# use warnings;
# use strict;
#
# my @source_chars = ();
# my @source_char_descriptions = ();
# my $target = '';
#
# while (<>) {
# if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) {
# push @source_chars, $1;
# push @source_char_descriptions, $2;
# next;
# }
# if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) {
# $target .= $1;
# next;
# }
# if (/break;/) {
# $target = "\\\"" if ($target eq '"');
# for my $source_char_num (0..$#source_chars) {
# print "# $source_char_descriptions[$source_char_num]\n";
# print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n";
# }
# @source_chars = ();
# @source_char_descriptions = ();
# $target = '';
# }
# }