Regular Expression Pocket Reference - Regular Expressions For Perl, Ruby, PHP, Python, C, Java and .NET (Pocket Reference (O'Reilly) ) (PDFDrive)
Regular Expression Pocket Reference - Regular Expressions For Perl, Ruby, PHP, Python, C, Java and .NET (Pocket Reference (O'Reilly) ) (PDFDrive)
cat|category
alert backspace escape character form feed newline
carriage return horizontal tab vertical tab
\n
\b
backspace
\b backspace
\num
\015\012
\x0D\x0A
\cchar
char
\cH
[...] [^...]
[...]
[^...]
-
[a-z]
\w \d \s \W \D \S
\s
\S
\d
[0-9]
[:alnum:]
[:lower:]
[[:lower:]] [a-z]
Alnum
Alpha
Blank
Cntrl
Digit
Graph
Lower
Print
Punct
Space
Upper
Xdigit
\p{prop} \P{prop}
Is In \p{Ll}
a α
\X
\P{M}\p{M} \X è;
e'
\p{L}
\p{Ll}
\p{Lm}
\p{Lo}
\p{Lt}
\p{Lu}
\p{C}
\p{Cc}
\p{Cf}
\p{Cn}
\p{Co}
\p{Cs}
\p{M}
\p{Mc}
\p{Me}
\p{Mn}
\p{N}
\p{Nd}
\p{Nl}
\p{No}
\p{P}
\p{Pc}
\p{Pd}
\p{Pe} \p{Ps}
\p{Pi}
\p{Pf}
\p{Po}
\p{Ps}
\p{S}
\p{Sc}
\p{Sk}
\p{Sm}
\p{So}
\p{Z}
\p{Zl}
\p{Zp}
\p{Zs}
^ \A
^
\A
$ \Z \z
$ $
\Z
\z
\G
\G
\b \B \< \>
\b
\B
\< \>
(?=...) (?!...)
(?<=...) (?<!...)
foo(?=bar)
foo foobar food
m
^ $
s
.
i
x
(?mod)
(?-mod)
( mod ...)
.{0,80}(?#Field limit
is 80 chars)
.{0,80}
\Q...\E
\Q \E
\Q(.*)\E \(\.\*\)
(...) \1 \2
\1 \2
\b(\w+)\b
\s+\1\b the the
(?:...)
(?:foobar)
foobar
(?<name>...)
name Subject:(?<subject>.*)
Subject
subject
(?>...)
\b(foo|bar)\b
foo bar
(?(if)then |else)
if
then else
if then
else (<)?foo(?(1)>|bar)
<foo> foobar
* + ? {num num }
(ab)+ ababababab
*? +? ?? {num num }?
(an)+? an banana
*+ ++ ?+ {num num }+
(ab)++ab ababababab
\w
è; e
/pattern/mode
s/pattern/replacement/mode
s/^\s+//
s/\s+$//
/^\d{1,6}$/
42 678234
10,000
/^#([a-fA-F0-9]){3}(([a-fA-F0-9]){3})?$/
/^\d{3}-\d{2}-\d{4}$/
078-05-1120
078051120 1234-12-12
/^\d{5}(-\d{4})?$/
94941-3232 10024
949413232
/^\$\(d{1,3}(\,\d{3})*|\d+)(\.\d{2})?$/
$20 $15,000.01
$1.001 $.99
/^\d\d\/\d\d\/\d\d\d\d \d\d:\d\d:\d\d$/
04/30/1978 20:45:38
4/30/1978 20:45:38 4/30/78
/^.*\//
/usr/local/bin/apachectl
C:\\System\foo.exe
/^(\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(\d|[01]?\d\d|2[0-4]
\d|25[0-5])\.
(\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(\d|[01]?\d\d|2[0-4]
\d|25[0-5])$/
127.0.0.1 224.22.5.110
127.1
/^([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}$/
01:23:45:67:89:ab
01:23:45 0123456789ab
/^[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z_+])*@([0-9a-zA-Z][-\w]*
[0-9a-zA-Z]\.)+[a-zA-Z]{2,9}$/
tony@mail.
example.museum
[email protected]
/(https?):\/\/([0-9a-zA-Z][-\w]*[0-9a-zA-Z]\.)+
[a-zA-Z]{2,9})
(:\d{1,4})?([-\w\/#~:.?+=&%@~]*)/
http://foo.com:8080/bar.html
\a
\b
\e x1B
\n x0A x0D
\r x0D x0A
\f x0C
\t x09
\octal
\xhex
\x{hex}
\cchar
\N{name}
PATH_TO_PERLLIB/unicode/Names.txt use
charnames ':full'
[...]
[^...]
[ class:]
. /s
\C
\X
\w \p{IsWord}
\W \P{IsWord}
\d \p{IsDigit}
\D \P{IsDigit}
\s \p{IsSpace}
\S \P{IsSpace}
\p{prop
\P{prop
^ /m
\A
$
/m
\Z
\z
\G
\b
\B
(?=...)
(?!...)
(?<=...)
(?<!...)
/i
/m ^ $ \n
/s .
/x #
/o
(?mode) xsmi
(?-mode) xsmi
(?mode:...) xsmi
(?-mode:...) xsmi
(?#...)
#... /x
\u
\l
\U
\L
\Q
\E \U \L \Q
(...) \1 \2 $1
$2
\n n
(?:...)
(?>...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n,}? n
{x,y}? x y
(?(COND)...| COND
...)
(?(COND)...)
(?{CODE})
(??{CODE})
(?<name>...)
(?'name'...)
\k<name>
\k'name'
%+
$+{foo}
%-
$-{foo}[0]
\g{n} \gn n
\g{-n} \g-n n
(?n) n
(?&NAME)
(?R)
?(DEFINE)...
(*FAIL)
(*ACCEPT)
(*PRUNE)
(*MARK:name)
$REGMARK
(*SKIP:name) MARK
(*THEN)
(*COMMIT)
/p ${^PREMATCH} ${MATCH}
${^POSTMATCH}
\K
qr//
m// s/// split
/.../
m
/.../ #same as m/.../
\N{name} \u \l \U \L \Q
\E
qr/PATTERN/ismxo
PATTERN
/ismxo
m/PATTERN/imsxocg
PATTERN
(1)
()
1 "" /imsxo
/cg /g
/g
/g
/cg
s/PATTERN/REPLACEMENT/egimosx
PATTERN
REPLACEMENT /imosx
/g PATTERN
/e REPLACEMENT
\w \d \s \b
use
locale i \L \l \U \u \w
\W
Is
In
IsASCII [\x00-\x7f]
IsAlnum [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Nd}]
IsAlpha [\p{Ll}\p{Lu}\p{Lt}\p{Lo}]
IsCntrl \p{C}
IsDigit \p{Nd}
IsGraph [^\p{C}\p{Space}]
IsLower \p{Ll}
IsPrint \P{C}
IsPunct \p{P}
IsSpace [\t\n\f\r\p{Z}]
IsUppper [\p{Lu}\p{Lt}]
IsWord [_\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Nd}]
IsXDigit [0-9a-fA-F]
perlre
java.
util.regex
java.util.regex
\a
\b x08
\e x1B
\n x0A
\r x0D
\f x0C
\t x09
\0octal
\xhex
\uhex
\cchar
[...]
[^...]
. DOTALL
\w [a-zA-Z0-9_]
\W [^a-zA-Z0-9_]
\d [0-9]
\D [^0-9]
\s [ \t\n\f\r\x0B]
\S [^ \t\n\f\r\x0B]
\p{prop}
\P{prop}
^
MULTILINE
\A
$
MULTILINE
\Z
\z
\b
\B
\G
(?=...)
(?!...)
(?<=...)
(?<!...)
Pattern.UNIX_LINES d \n
Pattern.DOTALL s .
Pattern.MULTILINE m ^ $
Pattern.COMMENTS x
#
Pattern.CASE_ i
INSENSITIVE
Pattern.UNICODE_ u
CASE
Pattern.CANON_EQ
(?mode)
idmsux
(?-mode)
idmsux
(?mode:...)
idmsux
(?-mode:...)
idmsux
#...
/x
(...) \1 \2
$1 $2
\n n
$n
n
(?:...)
(?>...
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n,}? n
{x,y}? x y
*+
++
?+
{n}+ n
{n,}+ n
{x,y}+ x y
java.util.regex.Pattern java.util.regex.Matcher
java.util.regex.PatternSyntaxException
CharSequence String
CharSequence
Pattern
CharSequence
Matcher
Pattern
CharSequence
String
\n \\n
String
regex
String [ ] split(String regex, int limit)
limit-1
regex
String replaceFirst(String regex, String replacement)
regex replacement
String replaceAll(String regex, String replacement)
regex replacement
Pattern input
String[ ] split(CharSequence input, int limit)
limit
regex
Matcher appendReplacement(StringBuffer sb, String replacement)
replacement sb
StringBuffer appendTail(StringBuffer sb)
sb
int end( )
boolean hasTransparentBounds( )
Matcher
boolean hitEnd( )
boolean lookingAt( )
boolean matches( )
Pattern
Pattern pattern( )
Pattern Matcher
static String quoteReplacement(String string)
start end
int regionStart( )
int regionEnd( )
Matcher reset( )
Matcher usePattern(Pattern p)
Matcher
Matcher useTransparentBounds(boolean b)
Matcher
String getDescription( )
int getIndex( )
String getMessage( )
String getPattern( )
char charAt(int index)
index
int length( )
\w \W \d \D \s
\S
\p{L} \P{L} \p{Nd} \P{Nd} \p{Z} \P{Z}
\b \B
\p{Lu} \p{Lowercase_Letter}
In
\p{InGreekExtended} \p{In_
Greek_Extended} \p{In Greek Extended}
import java.util.regex.*;
if (dailyBugle.matches(regex)) {
System.out.println("Matched: " + dailyBugle);
}
}
}
Matcher m = p.matcher(date);
if (m.find( )) {
String month = m.group(1);
String day = m.group(2);
String year = m.group(3);
System.out.printf("Found %s-%s-%s\n", year, month, day);
}
}
}
Pattern p = Pattern.compile(regex,
Pattern.CASE_INSENSITIVE + Pattern.COMMENTS);
Matcher m = p.matcher(text);
String result = m.replaceAll("<a href=\"$1\">$1</a>");
System.out.println(result);
}
}
\a \x07
\b \x08
\e \x1B
\n \x0A
\r \x0D
\f \x0C
\t \x09
\v \x0B
\0octal
\xhex
\uhex
\cchar
[...]
[^...]
.
s
\w [\p{Ll}\p{Lu}\p{Lt}\p{Lo}
\p{Nd}\p{Pc}] [a-zA-Z_0-9]
ECMAScript
\W [\p{Ll}\p{Lu}\p{Lt}
\p{Lo}\p{Nd}\p{Pc}] [^a-zA-Z_0-9]
ECMAScript
\d \p{Nd} [0-9] ECMAScript
\D \P{Nd} [^0-9] ECMAScript
\s [ \f\n\r\t\v\x85\p{Z}]
[ \f\n\r\t\v] ECMAScript
\S [^ \f\n\r\t\v\x85
\p{Z}] [^ \f\n\r\t\v] ECMAScript
\p{prop}
\P{prop}
^
MULTILINE
\A
$
MULTILINE
\Z
\z
\b \w \W
\B
\G
(?=...)
(?!...)
(?<=...)
(?<!...)
Singleline s .
Multiline m ^ $
IgnorePatternWhite x
space
#
IgnoreCase i
CultureInvariant i
ExplicitCapture n
Compiled
RightToLeft
ECMAScript ECMAScript
IgnoreCase Multiline
(?imnsx-imnsx)
(?imnsx-imnsx:...)
(?#...)
#...
/x
(... \1 \2 $1 $2
\n
n
$n n
(?<name>...) name
(?:...)
(?>...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n,}? n
{x,y}? x y
System.
Text.RegularExpressions RegExp( )
RegExp Groups
Match
@""
RegExp
#
public static bool IsMatch(string input string pattern)
public static bool IsMatch(string input string pattern
RegexOptions options)
public bool IsMatch(string input)
public bool IsMatch(string input int startat)
input
pattern options
startat
input
pattern options
startat length
pattern options
startat
input
public static string Replace(string input pattern
MatchEvaluator evaluator)
public static string Replace(string input pattern
MatchEvaluator evaluator RegexOptions options)
public static string Replace(string input pattern string
replacement)
public static string Replace(string input pattern string
replacement RegexOptions options)
public string Replace(string input MatchEvaluator evaluator)
public string Replace(string input MatchEvaluator evaluator
int count)
public string Replace(string input MatchEvaluator evaluator
int count int startat)
public string Replace(string input string replacement)
public string Replace(string input string replacement int count)
public string Replace(string input string replacement int
count int startat)
input
replacement
MatchEvaluator replacement
$n ${name}
options
count
startat
input
public static string[ ] Split(string input string pattern)
public static string[ ] Split(string input string pattern
RegexOptions options)
public static string[ ] Split(string input)
public static string[ ] Split(string input int count)
public static string[ ] Split(string input int count int
startat)
count
input startat
public bool Success
\w \d \s
ECMAScript
Thread.
CurrentCulture CultureInvariant
Is
class SimpleMatchTest
{
static void Main( )
{
string dailybugle = "Spider-Man Menaces City!";
class MatchTest
{
static void Main( )
{
string date = "12/30/1969";
Regex r =
new Regex( @"^(\d\d)[-/](\d\d)[-/](\d\d(?:\d\d)?)$" );
Match m = r.Match(date);
if (m.Success) {
string month = m.Groups[1].Value;
string day = m.Groups[2].Value;
string year = m.Groups[3].Value;
}
}
}
class SimpleSubstitutionTest
{
static void Main( )
{
string text = "Hello world. <br>";
string regex = "<br>";
string replacement = "<br />";
string result =
Regex.Replace(text, regex, replacement, RegexOptions.
IgnoreCase);
}
}
\a \x07
\b \x08
\e \x1B
\n \x0A
\r \x0D
\f \x0C
\t x09
\octal
\xhex
\x{hex}
\cchar
[...]
[^...]
[ class:]
. /s
\C
\w [a-zA-z0-9_]
\W [^a-zA-z0-9_]
\d [0-9]
\D [^0-9]
\s [\n\r\f\t ]
\S [^\n\r\f\t ]
^
/m
\A
$
/m
\Z
\z
\G
\b \w
\W
\B
(?=...)
(?!...)
(?<=...)
(?<!...)
i
m ^ $ \n
s .
x #
U * *?
A
D $
(?mode) imsxU
(?-mode) imsxU
(?mode:...) xsmi
(?-mode:...) xsmi
(?#...)
#... x
\Q
\E \Q
(...) \1
\2
(?P<name>...)
name
\n n
(?:...)
(?>...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n,}? n
{x,y}? x y
*+
++
?+
{n,}+ n
{x,y}+ x y
matches
matches
n matches
n
matches[7][3]
pattern
subject
matches
PREG_SET_ORDER PREG_SET_ORDER
matches
PREG_
OFFSET_CAPTURE
subject
int preg_match (string pattern string subject array
matches int flags )
pattern subject
matches
matches[0]
PREG_OFFSET_CAPTURE matches
str
mixed preg_replace_callback (mixed pattern callback
callback mixed subject int limit )
subject pattern
callback
limit
limit
pattern
callback subject
mixed preg_replace (mixed pattern mixed replacement mixed
subject , int limit )
subject pattern
replacement limit
limit
$n \n
pattern /e replacement
pattern
replacement replacement
replacement subject
PREG_SPLIT_OFFSET_CAPTURE
subject
if (preg_match($regex, $dailybugle)) {
//do something
}
//Match dates formatted like MM/DD/YYYY, MM-DD-YY,...
$date = "12/30/1969";
$p = "!^(\\d\\d)[-/](\\d\\d)[-/](\\d\\d(?:\\d\\d)?)$!";
if (preg_match($p,$date,$matches) {
$month = $matches[1];
$day = $matches[2];
$year = $matches[3];
}
$pattern = "{<br>}i";
re
re
\a \x07
\b \x08
\n \x0A
\r \x0D
\f \x0C
\t \x09
\v \x0B
\octal
\xhh
\uhhhh
\Uhhhhhhhh
[...]
[^...]
. DOTALL
\w [a-zA-z0-9_] LOCALE
UNICODE
\W [^a-zA-z0-9_] LOCALE
UNICODE
\d [0-9]
\D [^0-9]
\s [ \t\n\r\f\v]
\S [ \t\n\r\f\v]
^
MULTILINE
\A
$
MULTILINE
\Z
\b
\B
(?=...)
(?!...)
(?<=...)
(?<!...)
I IGNORECASE i
L LOCALE L \w \W \b \B
M MULTILINE (?m) m ^ $
\n
S DOTALL (?s) s .
U UNICODE (?u) u \w \W \b \B
X VERBOSE (?x) x
#
(?mode)
iLmsux
(?#...)
#...
VERBOSE
(...) \1 \2
(?P<name> ...)
name
(?P=name
name
\n n
(?:...)
...|...
*
+
?
{n} n
{x,y} x y
*?
+?
??
{x,y}? x y
re
r'' r""
r'\n'
\\n
r'''text''' r"""text"""
re
compile(pattern [ flags])
flags
match(pattern string [ flags])
pattern string
None
search(pattern string flags )
pattern string
None
split(pattern string maxsplit )
string pattern
maxsplit
findall(pattern string)
pattern string pattern
finditer(pattern string)
pattern string
escape(string)
string
exception error
re.compile
flags
groupindex
pattern
match find
pos
endpos
pos endpos search match
re
match search
string
match search
group([g1 g2 ])
None
groups([default])
None default
groupdict([default])
None
default
start([group])
group
group
end([group])
group
group
span([group])
group
group
expand([template])
template
lastgroup
None
lastindex
None
re
\u UNICODE \w
\W \b \B
re
#Find Spider-Man, Spiderman, SPIDER-MAN, etc.
import re
date = '12/30/1969'
regex = re.compile(r'^(\d\d)[-/](\d\d)[-/](\d\d(?:\d\d)?)$')
match = regex.match(date)
if match:
month = match.group(1) #12
day = match.group(2) #30
year = match.group(3) #1969
result = regex.sub(repl,text)
#urlify - turn URLs into HTML links
import re
pattern = r'''
\b # start at word boundary
( # capture to \1
(https?|telnet|gopher|file|wais|ftp) :
# resource and colon
[\w/#~:.?+=&%@!\-] +? # one or more valid chars
# take little as possible
)
(?= # lookahead
[.:?\-] * # for possible punc
(?: [^\w/#~:.?+=&%@!\-] # invalid character
| $ ) # or end of string
)'''
Regexp String
\a \x07
\b \x08
\e \x1B
\n \x0A
\r \x0D
\f \x0C
\t \x09
\v \x0B
\0octal
\xhex
\cchar
[...]
[^...]
.
s
\w
\W
\d
\D
\s [ \f\n\r\t\v]
\S [^ \f\n\r\t\v]
^
\A
$
\Z
\z
\b \w \W
\B
\G
(?=...)
(?!...)
m .
x
#
i
n
o #{...}
(?imns-imns)
(?imns-imns:
...)
(?#...)
#... /x
(?<=...)
(?<!...)
(...) \1 \2 $1 $2
(?<name>...) \k<name>
\n n
$n n
\k<name>
(?:...)
(?>...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n,}? n
{x,y}? x y
Regexp MatchData
String
/.../ =~
/.../
Regexp.new =~ String#match /.../
Regexp
"foo, bar, frog".split(/,\s*/)
string =~ regexp => fixnum or nil
regexp
nil
regexp === string => boolean
regexp matches the string
regexp
nil
split(pattern=$;, [limit]) => anArray
Regexp
limit limit limit
limit
Regexp
Regexp
Regexp
source => string
(?imns-imns:...)
n
begin(n) => integer
n
captures => array
MatchData#to_a
end(n) => integer
n
length => integer
size => integer
n
post_match => string
$`
pre_match => string
$`
select([index]*) => array
index
$KCODE = "UTF8"
\w \d \s \b
Regexp.new
/n
if dailybugle.match(/spider[- ]?man./i)
puts dailybugle
end
date = '12/30/1969'
regexp = Regexp.new('^(\d\d)[-/](\d\d)[-/](\d\d(?:\d\d)?)$')
if md = regexp.match(date)
month = md[1] #12
day = md[2] #30
year = md[3] #1969
end
regexp = Regexp.new('
\b # start at word boundary
( # capture to \1
(https?|telnet|gopher|file|wais|ftp) :
# resource and colon
[\w/#~:.?+=&%@!\-] +? # one or more valid chars
# take little as possible
)
(?= # lookahead
[.:?\-] * # for possible punc
(?: [^\w/#~:.?+=&%@!\-] # invalid character
| $ ) # or end of string
)', Regexp::EXTENDED)
. [^\x0A\x0D\
u2028\u2029]
\w [a-zA-Z0-9_]
\W [^a-zA-Z0-9_]
\d [0-9]
\D [^0-9]
\s
\S
^
/m
$
/m
\b
\B
(?=...)
(?!...)
m ^ $
i
(... \1 \2
$1 $2
\n n
$n n
(?:...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n}? n
{x,y}? x y
String RegExp
\\w
\w
/pattern/img
pattern RegExp
search(pattern)
pattern
-1
replace(pattern replacement)
pattern
replacement pattern
pattern
$n
n pattern
match(pattern)
pattern
-1
g pattern
split(pattern limit)
pattern limit
limit
pattern pattern
SyntaxError
pattern attributes
TypeError pattern RegExp
attributes
global
RegExp g
ignoreCase
RegExp i
lastIndex
multiline
RegExp m
source
exec(text)
text
null
g lastIndex
test(text)
true RegExp text test( )
exec( )
lastIndex
//Find Spider-Man, Spiderman, SPIDER-MAN, etc.
var dailybugle = "Spider-Man Menaces City!";
if (dailybugle.search(regex)) {
//do something
}
\x{hex
\cchar
\p{prop}
\P{prop}
[...]
[^...]
[:class:]
.
PCRE_DOTALL
\C
\w [a-zA-z0-9_]
\W [^a-zA-z0-9_]
\d [0-9]
\D [^0-9]
\s [\n\r\f\t\v ]
\S [^\n\r\f\t\v ]
\R
^
PCRE_MULTILINE
\A
$
PCRE_MULTILINE
\Z
\z
\G
\b \w
\W
\B
(?=...)
(?!...)
(?<=...)
(?<!...)
PCRE_CASELESS i
PCRE_MULTILINE m ^ $
\n
PCRE_DOTALL s .
PCRE_EXTENDED x
#
PCRE_UNGREEDY U
*
*?
PCRE_ANCHORED
PCRE_DOLLAR_ $
ENDONLY
PCRE_NO_AUTO_
CAPTURE
PCRE_UTF8
PCRE_AUTO_CALLOUT
PCRE_DUPNAMES
PCRE_FIRSTLINE
PCRE_NEWLINE_CR
PCRE_NEWLINE_LF
PCRE_NEWLINE_CRLF
PCRE_NEWLINE_ANY
PCRE_NOTBOL
PCRE_NOTEOL
PCRE_NOTEMPTY
PCRE_NO_UTF8_CHECK
PCRE_PARTIAL
PCRE
PARTIAL PCRE_
ERROR_NO_MATCH
(?mode)
imsxU
(?-mode)
imsxU
(?mode:...)
imsx
(?-mode:...)
imsx
\Q
\E \Q
(?#...)
#...
PCRE_EXTENDED
(...)
\1 \2
(?P<name>...),
(?<name>),(?’name') name
(?P=name),\k<name>, \k'name'
\n, \gn, \g{n} n
(?:...)
(?>...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x
y
*?
+?
??
{n,}? n
{x,y}? x y
*+
++
?+
{n}+ n
{n,}+ n
{x,y}+ x y
(?(condition)... ...)
condition
(?(condition)...)
condition
pcre.h libpcre.a
-lpcre
pcre_
compile( )
pcre_exec( )
pcre_free_substring( ) pcre_free_substring_
list( )
code subject
length
ovector ovector
ovector
options pcre_extra
pcre_study( )
pcre_extra *pcre_study(const pcre *code int options const char
**errptr)
pcre_exec( )
code options
errptr
int pcre_copy_named_substring(const pcre *code const char
*subject int *ovector int stringcount const char *stringname
char *buffer int buffersize)
name
int pcre_get_substring(const char *subject int *ovector int
stringcount int stringnumber const char **stringptr)
stringptr
stringnumber stringcount
ovector
pcre_exec( )
int pcre_get_substring_list(const char *subject int *ovector
int stringcount const char ***listptr)
listptr
void pcre_free_substring(const char *stringptr)
stringptr pcre_
get_named_substring( ) pcre_get_substring_list( )
void pcre_free_substring_list(const char **stringptr)
stringptr pcre_
get_substring_list( )
const unsigned char *pcre_maketables(void)
void *(*pcre_malloc)(size_t)
malloc( )
void (*pcre_free)(void *)
pcre_free( )
int (*pcre_callout)(pcre_callout_block *)
PCRE_UTF8
setlocale(LC_CTYPE, "fr");
tables = pcre_maketables( );
re = pcre_compile(..., tables);
#include <stdio.h>
#include <string.h>
#include <pcre.h>
/* Compile Regex */
regex = pcre_compile(
pattern,
PCRE_CASELESS, /* OR'd mode modifiers */
&error, /* error message */
&erroffset, /* position in regex where error occurred */
NULL); /* use default locale */
/* Handle Errors */
if (regex = = NULL)
{
printf("Compilation failed at offset %d: %s\n", erroffset,
error);
return 1;
}
/* Try Match */
rc = pcre_exec(
regex, /* compiled regular expression */
NULL, /* optional results from pcre_study */
text, /* input string */
(int)strlen(text), /* length of input string */
0, /* starting position in input string */
0, /* OR'd options */
capturevector, /* holds results of capture groups */
CAPTUREVECTORSIZE);
/* Handle Errors */
if (rc < 0)
{
switch(rc)
{
case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
default: printf("Matching error %d\n", rc); break;
}
return 1;
}
return 0;
}
#include <stdio.h>
#include <string.h>
#include <pcre.h>
#define CAPTUREVECTORSIZE 30 /* should be a multiple of 3 */
rc = pcre_exec(
regex, /* compiled regular expression */
NULL, /* optional results from pcre_study */
text, /* input string */
(int)strlen(text), /* length of input string */
0, /* starting position in input string */
0, /* OR'd options */
capturevector, /* holds results of capture groups */
CAPTUREVECTORSIZE);
/* Handle Match Errors */
if (rc < 0)
{
switch(rc)
{
case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
/*
Handle other special cases if you like
*/
default: printf("Matching error %d\n", rc); break;
}
return 1;
}
/* Match succeded */
printf("Match succeeded\n");
return 0;
}
RewriteRule LocationMatch DirectoryMatch FilesMatch
ProxyMatch AliasMatch
\octal
\xhex
\x{hex}
\cchar
[...]
[^...]
[ class:]
.
/s
\C
\w [a-zA-z0-9_]
\W [^a-zA-z0-9_]
\d [0-9]
\D [^0-9]
\s [\n\r\f\t ]
\S [^\n\r\f\t ]
^
$
\b \w
\W
\B
(?=...)
(?!...)
(?<=...)
(?<!...)
NC
(?mode) imsxU
(?-mode) imsxU
(?mode:...) xsmi
(?-mode:...) xsmi
(?#...)
#... x
\Q
\E \Q
(...) \1
\2
(?P<name>...)
name
\n n
(?:...)
(?>...)
...|...
*
+
?
{n} n
{n,} n
{x,y} x y
*?
+?
??
{n,}? n
{x,y}? x y
*+
++
?+
{n}+ n
{n,}+ n
{x,y}+ x y
RewriteEngine
On RewriteRule
RewriteCond RewriteRule
RewriteRule pattern substitution [[FLAG1, FLAG2, ...]]
substitution
pattern substitution
$N RewriteRule
%N RewriteCond
%{VARNAME} ${mapname:
key|default}
RewriteCond teststring pattern
RewriteRule RewriteCond
RewriteRule AND
OR teststring
$N RewriteRule
%N RewriteCond
%{VARNAME}
${mapname:key|default}
CO=NAME:VAL:domain
[:lifetime[:path]]
E=VAR:VAL
F 403
G 401
H=Content-handler
L
N
NC
NE
NS
PT
Alias ScriptAlias
Redirect
QSA
R[=Code]
302
S=num num
T=MIME-type
NC
OR OR
AND
HTTP_USER_AGENT REMOTE_ADDR
HTTP_REFERER REMOTE_HOST
HTTP_COOKIE REMOTE_PORT
HTTP_FORWARDED REMOTE_USER
HTTP_HOST REMOTE_IDENT
HTTP_PROXY_CONNECTION REQUEST_METHOD
HTTP_ACCEPT SCRIPT_FILENAME
PATH_INFO
DOCUMENT_ROOT AUTH_TYPE
SERVER_ADMIN
SERVER_ADDR TIME_YEAR
SERVER_PORT TIME_MON
SERVER_PROTOCOL TIME_DAY
SERVER_SOFTWARE TIME_HOUR
TIME_MIN
API_VERSION TIME_WDAY
THE_REQUEST TIME
REQUEST_URI
REQUEST_FILENAME
IS_SUBREQ
HTTPS
$1...$n
<DirectoryMatch pattern> ... </DirectoryMatch>
pattern
<FilesMatch pattern> ... </FilesMatch>
pattern
<LocationMatch pattern> ... </LocationMatch>
pattern
<ProxyMatch pattern> ... </ProxyMatch>
pattern
\b \x08
\e \x1B
\n \x0A
\r \x0D
\t \x09
[...]
[^...]
[ class:]
. /s
\w [a-zA-z0-9_]
\W [^a-zA-z0-9_]
\a [a-zA-z]
\A [^a-zA-z]
\h [a-zA-z_]
\H [^a-zA-z_]
\d [0-9]
\D [^0-9]
\s [ \t]
\S [^ \t]
\x [a-fA-F0-9]
\X [^a-fA-F0-9]
\o [0-7]
\O [^0-7]
\l [a-z]
\L [^a-z]
\u [A-Z]
\U [^A-Z]
\i isident
\I
\k iskeyword
\K
\f isfname
\F
\p isprint x20-x7E
\P
\<
\>
:set ic
:set noic
\u
\l
\U
\L
\E \e \U \L
\(...\) \1 \2
\n n
&
\+
\=
\{n} n
\{n,} n
\{ n} n
\{x y} x y
/pattern ?pattern
pattern ?pattern
n N
:[addr1[,addr2]]s/pattern/replacement/[cgp]
pattern replacement
addr1
addr2
C
g
p
.
$
%
't t
/...[/]
?...[?]
\/
\?
\&
Find spider-man, Spider-Man, Spider Man
/[Ss]pider[- ][Mm]an
\f awk, sed
\n awk, sed
\r awk, sed
\t awk, sed
\v awk, sed
\ooctal sed
\octal awk
\w egrep, sed
[a-zA-
Z0-9_]
\W egrep, sed
[^a-zA-Z0-
9_]
[ prop:] awk, sed
\< egrep
\> egrep
i I sed
-i egrep
IGNORECASE awk
non-zero
(PATTERN) awk
\(PATTERN\) sed
\1,
\2 \9
\n n sed
/.../
address1
address2
& \n replacement
& pattern
\n
n
n n
g
pattern
p
w file
file
MM/DD/YYYY DD.MM.YYYY
$ echo 12/30/1969' |
sed 's!\([0-9][0-9]\)/\([0-9][0-9]\)/\([0-9]\{2,4\}\)!
\2.\1.\3!g'
match(text pattern)
pattern text text
RSTART
RLENGTH
gsub(pattern replacement text)
pattern text replacement
$0 text
$0 text
$ cat sub.awk
{
gsub(/https?:\/\/[a-z_.\\w\/\\#~:?+=&;%@!-]*/,
"<a href=\"\&\">\&</a>");
print
}