index.html - codemirror-mode-pcre (master) - kindwolf.org Git repositories

Tree @master (Download .tar.gz)

index.html @master — raw · history · blame

<!doctype html>

<html lang="en">
	<head>
		<meta charset="utf-8">
		<title>CodeMirror PCRE mode</title>
		<link
			rel="stylesheet"
			href="https://cdnjs.cloudflare.com/ajax/libs/codemirror/5.65.12/codemirror.min.css"
			integrity="sha512-uf06llspW44/LZpHzHT6qBOIVODjWtv4MxCricRxkzvopAlSWnTf6hpZTFxuuZcuNE9CBQhqE0Seu1CoRk84nQ=="
			crossorigin="anonymous"
			referrerpolicy="no-referrer" />
		<link rel="stylesheet" href="src/pcre.css">
		<style>
			.CodeMirror {
				border-top: 1px solid black;
				border-bottom: 1px solid black;
				height: 100%;
			}
		</style>
	</head>
	<body>
		<h1>CodeMirror PCRE mode</h1>
		<p>
			This is a <a href="https://codemirror.net/">CodeMirror</a> mode that brings
			syntax highlighting for <a href="https://www.pcre.org/">Perl Compatible Regular Expressions (PCRE)</a>.
		</p>
		<p>
			<strong>MIME types defined:</strong>
			<ul>
				<li>text/x-regex</li>
				<li>text/x-pcre-regex</li>
			</ul>
		</p>
		<p>
			<strong>Options:</strong>
			<ul>
				<li>extended: boolean: initial state of the 'x' flag; default: true.</li>
			</ul>
		</p>
		<p>
			<strong>Table of contents:</strong>
			<ul>
				<li><a href="#h2nested">Use as nested mode</a>
				<li><a href="#h2pcresyntax">man pcresyntax</a>
				<li><a href="#h2pcrepattern">Examples from man pcrepattern</a>
				<li><a href="#h2details">Other details</a>
			</ul>
		</p>
		<h2 id="h2nested">Use as nested mode</h2>
		<p>
			Below are demonstrations of how the PCRE mode can be used to highlight regular expressions within other languages.
			This first example is an nginx configuration snippet:
		</p>
		<textarea id="nested-nginx">
server {
	location ~ "(?x) # Enable PCRE extended mode
		^
		/user
		/(?<action>login|logout|profile) # Also include 'profile' because...
		/(?<tail>.*)
	"
	{
		if ($tail ~ "^(some|[^/]+/really|compl(?:ex|icated)|stuff|t?here)$") {
			return 307 "${scheme}://${host}/somewhere/else/${tail}${is_args}${args}";
		}
		# ...
	}
}</textarea>
		<p>This second example is a simple list of one-line regexes with comments:</p>
		<textarea id="nested-regex-list">
# This is a comment about this first regex:
^hello

  # This is another comment, this time with leading spaces:
(?i)this is (?<what>a regex ) # not a comment

^(?:good|bye )bye$</textarea>
		<h2 id="h2pcresyntax">man pcresyntax</h2>
		<p>
		This is a slightly adjusted copy of `man pcresyntax`. This man page reflects most PCRE syntactic structures in a
		colourful way thanks to CodeMirror and the PCRE mode.
		</p>
		<textarea id="pcresyntax">
# PCRESYNTAX(3)                                  Library Functions Manual                                 PCRESYNTAX(3)
#
# NAME
#        PCRE - Perl-compatible regular expressions
#
# PCRE REGULAR EXPRESSION SYNTAX SUMMARY
#
#        The  full  syntax  and  semantics  of  the regular expressions that are supported by PCRE are described in the
#        pcrepattern documentation. This document contains a quick-reference summary of the syntax.
#
# QUOTING

          \x         where x is non-alphanumeric is a literal x
          \Q...\E    treat enclosed characters as literal

# CHARACTERS

          \a       # alarm, that is, the BEL character (hex 07)
          \cx      # "control-x", where x is any ASCII character
          \e       # escape (hex 1B)
          \f       # form feed (hex 0C)
          \n       # newline (hex 0A)
          \r       # carriage return (hex 0D)
          \t       # tab (hex 09)
          \077     # character with octal code 0dd
          \777     # character with octal code ddd, or backreference
          \o{777}  # character with octal code ddd..
          \xff     # character with hex code hh
          \x{fffe} # character with hex code hhh..

#        Note that \0dd is always an octal code, and that \8 and \9 are the literal characters "8" and "9".
#
# CHARACTER TYPES

          .        # any character except newline;
                   #   in dotall mode, any character whatsoever
          \C       # one data unit, even in UTF mode (best avoided)
          \d       # a decimal digit
          \D       # a character that is not a decimal digit
          \h       # a horizontal white space character
          \H       # a character that is not a horizontal white space character
          \N       # a character that is not a newline
          \p{Pi}   # a character with the xx property
          \P{Pi}   # a character without the xx property
          \R       # a newline sequence
          \s       # a white space character
          \S       # a character that is not a white space character
          \v       # a vertical white space character
          \V       # a character that is not a vertical white space character
          \w       # a "word" character
          \W       # a "non-word" character
          \X       # a Unicode extended grapheme cluster

#        By default, \d, \s, and \w match only ASCII characters, even in UTF-8 mode or in the 16- bit  and  32-bit  li‐
#        braries.  However,  if  locale-specific  matching  is happening, \s and \w may also match characters with code
#        points in the range 128-255. If the PCRE_UCP option is set, the behaviour of these escape sequences is changed
#        to use Unicode properties and they match many more characters.

# GENERAL CATEGORY PROPERTIES FOR \p and \P

       \p{C}       # Other
       \p{Cc}      # Control
       \p{Cf}      # Format
       \p{Cn}      # Unassigned
       \p{Co}      # Private use
       \p{Cs}      # Surrogate

       \p{L}       # Letter
       \p{Ll}      # Lower case letter
       \p{Lm}      # Modifier letter
       \p{Lo}      # Other letter
       \p{Lt}      # Title case letter
       \p{Lu}      # Upper case letter
       \p{L&}      # Ll, Lu, or Lt

       \p{M}       # Mark
       \p{Mc}      # Spacing mark
       \p{Me}      # Enclosing mark
       \p{Mn}      # Non-spacing mark

       \p{N}       # Number
       \p{Nd}      # Decimal number
       \p{Nl}      # Letter number
       \p{No}      # Other number

       \p{P}       # Punctuation
       \p{Pc}      # Connector punctuation
       \p{Pd}      # Dash punctuation
       \p{Pe}      # Close punctuation
       \p{Pf}      # Final punctuation
       \p{Pi}      # Initial punctuation
       \p{Po}      # Other punctuation
       \p{Ps}      # Open punctuation

       \p{S}       # Symbol
       \p{Sc}      # Currency symbol
       \p{Sk}      # Modifier symbol
       \p{Sm}      # Mathematical symbol
       \p{So}      # Other symbol

       \p{Z}       # Separator
       \p{Zl}      # Line separator
       \p{Zp}      # Paragraph separator
       \p{Zs}      # Space separator

# PCRE SPECIAL CATEGORY PROPERTIES FOR \p and \P

       \p{Xan}     # Alphanumeric: union of properties L and N
       \p{Xps}     # POSIX space: property Z or tab, NL, VT, FF, CR
       \p{Xsp}     # Perl space: property Z or tab, NL, VT, FF, CR
       \p{Xuc}     # Univerally-named character: one that can be
                   #   represented by a Universal Character Name
       \p{Xwd}     # Perl word: property Xan or underscore

#        Perl  and  POSIX  space  are  now  the same. Perl added VT to its space character set at release 5.18 and PCRE
#        changed at release 8.34.

# SCRIPT NAMES FOR \p AND \P

       \p{Arabic}, \p{Armenian}, \p{Avestan}, \p{Balinese}, \p{Bamum}, \p{Bassa_Vah}, \p{Batak}, \p{Bengali},
       \p{Bopomofo}, \p{Brahmi}, \p{Braille}, \p{Buginese}, \p{Buhid}, \p{Canadian_Aboriginal}, \p{Carian},
       \p{Caucasian_Albanian}, \p{Chakma}, \p{Cham}, \p{Cherokee}, \p{Common}, \p{Coptic}, \p{Cuneiform},
       \p{Cypriot}, \p{Cyrillic}, \p{Deseret}, \p{Devanagari}, \p{Duployan}, \p{Egyptian_Hieroglyphs}, \p{Elbasan},
       \p{Ethiopic}, \p{Georgian}, \p{Glagolitic}, \p{Gothic}, \p{Grantha}, \p{Greek}, \p{Gujarati}, \p{Gurmukhi},
       \p{Han}, \p{Hangul}, \p{Hanunoo}, \p{Hebrew}, \p{Hiragana}, \p{Imperial_Aramaic}, \p{Inherited},
       \p{Inscriptional_Pahlavi}, \p{Inscriptional_Parthian}, \p{Javanese}, \p{Kaithi}, \p{Kannada}, \p{Katakana},
       \p{Kayah_Li}, \p{Kharoshthi}, \p{Khmer}, \p{Khojki}, \p{Khudawadi}, \p{Lao}, \p{Latin}, \p{Lepcha}, \p{Limbu},
       \p{Linear_A}, \p{Linear_B}, \p{Lisu}, \p{Lycian}, \p{Lydian}, \p{Mahajani}, \p{Malayalam}, \p{Mandaic},
       \p{Manichaean}, \p{Meetei_Mayek}, \p{Mende_Kikakui}, \p{Meroitic_Cursive}, \p{Meroitic_Hieroglyphs}, \p{Miao},
       \p{Modi}, \p{Mongolian}, \p{Mro}, \p{Myanmar}, \p{Nabataean}, \p{New_Tai_Lue}, \p{Nko}, \p{Ogham},
       \p{Ol_Chiki}, \p{Old_Italic}, \p{Old_North_Arabian}, \p{Old_Permic}, \p{Old_Persian}, \p{Old_South_Arabian},
       \p{Old_Turkic}, \p{Oriya}, \p{Osmanya}, \p{Pahawh_Hmong}, \p{Palmyrene}, \p{Pau_Cin_Hau}, \p{Phags_Pa},
       \p{Phoenician}, \p{Psalter_Pahlavi}, \p{Rejang}, \p{Runic}, \p{Samaritan}, \p{Saurashtra}, \p{Sharada},
       \p{Shavian}, \p{Siddham}, \p{Sinhala}, \p{Sora_Sompeng}, \p{Sundanese}, \p{Syloti_Nagri}, \p{Syriac},
       \p{Tagalog}, \p{Tagbanwa}, \p{Tai_Le}, \p{Tai_Tham}, \p{Tai_Viet}, \p{Takri}, \p{Tamil}, \p{Telugu},
       \p{Thaana}, \p{Thai}, \p{Tibetan}, \p{Tifinagh}, \p{Tirhuta}, \p{Ugaritic}, \p{Vai}, \p{Warang_Citi}, \p{Yi}.

# CHARACTER CLASSES

       [...]        # positive character class
       [^...]       # negative character class
       [x-y]        # range (can be used for hex characters)
       [[:word:]]   # positive POSIX named set
       [[:^word:]]  # negative POSIX named set

       [[:alnum:]]  # alphanumeric
       [[:alpha:]]  # alphabetic
       [[:ascii:]]  # 0-127
       [[:blank:]]  # space or tab
       [[:cntrl:]]  # control character
       [[:digit:]]  # decimal digit
       [[:graph:]]  # printing, excluding space
       [[:lower:]]  # lower case letter
       [[:print:]]  # printing, including space
       [[:punct:]]  # printing, excluding alphanumeric
       [[:space:]]  # white space
       [[:upper:]]  # upper case letter
       [[:word:]]   # same as \w
       [[:xdigit:]] # hexadecimal digit

#        In  PCRE,  POSIX  character set names recognize only ASCII characters by default, but some of them use Unicode
#        properties if PCRE_UCP is set. You can use \Q...\E inside a character class.

# QUANTIFIERS

          ?         # 0 or 1, greedy
          ?+        # 0 or 1, possessive
          ??        # 0 or 1, lazy
          *         # 0 or more, greedy
          *+        # 0 or more, possessive
          *?        # 0 or more, lazy
          +         # 1 or more, greedy
          ++        # 1 or more, possessive
          +?        # 1 or more, lazy
          {1}       # exactly n
          {1,6}     # at least n, no more than m, greedy
          {1,6}+    # at least n, no more than m, possessive
          {1,6}?    # at least n, no more than m, lazy
          {1,}      # n or more, greedy
          {1,}+     # n or more, possessive
          {1,}?     # n or more, lazy

# ANCHORS AND SIMPLE ASSERTIONS

          \b        # word boundary
          \B        # not a word boundary
          ^         # start of subject
                    #  also after internal newline in multiline mode
          \A        # start of subject
          $         # end of subject
                    #  also before newline at end of subject
                    #  also before internal newline in multiline mode
          \Z        # end of subject
                    #  also before newline at end of subject
          \z        # end of subject
          \G        # first matching position in subject

# MATCH POINT RESET

          \K        # reset start of match

#        \K is honoured in positive assertions, but ignored in negative ones.

# ALTERNATION

          expr|expr|expr...

# CAPTURING

          (...)         # capturing group
          (?<name>...)  # named capturing group (Perl)
          (?'name'...)  # named capturing group (Perl)
          (?P<name>...) # named capturing group (Python)
          (?:...)       # non-capturing group
          (?|...)       # non-capturing group; reset group numbers for
                        #  capturing groups in each alternative

# ATOMIC GROUPS

          (?>...)       # atomic, non-capturing group

# COMMENT

          (?#....)      # comment (not nestable)

# OPTION SETTING

          (?i)          # caseless
          (?J)          # allow duplicate names
          (?m)          # multiline
          (?s)          # single line (dotall)
          (?U)          # default ungreedy (lazy)
          (?x)          # extended (ignore white space)
          (?-iUs)       # unset option(s)

#        The following are recognized only at the very start of a pattern or after one of the  newline  or  \R  options
#        with similar syntax. More than one of them may appear.

          (*LIMIT_MATCH=4)     # set the match limit to d (decimal number)
          (*LIMIT_RECURSION=3) # set the recursion limit to d (decimal number)
          (*NO_AUTO_POSSESS)   # no auto-possessification (PCRE_NO_AUTO_POSSESS)
          (*NO_START_OPT)      # no start-match optimization (PCRE_NO_START_OPTIMIZE)
          (*UTF8)              # set UTF-8 mode: 8-bit library (PCRE_UTF8)
          (*UTF16)             # set UTF-16 mode: 16-bit library (PCRE_UTF16)
          (*UTF32)             # set UTF-32 mode: 32-bit library (PCRE_UTF32)
          (*UTF)               # set appropriate UTF mode for the library in use
          (*UCP)               # set PCRE_UCP (use Unicode properties for \d etc)

#        Note  that  LIMIT_MATCH  and  LIMIT_RECURSION  can  only  reduce  the value of the limits set by the caller of
#        pcre_exec(), not increase them.

# NEWLINE CONVENTION

#        These are recognized only at the very start of the pattern or after option settings with a similar syntax.

          (*CR)         # carriage return only
          (*LF)         # linefeed only
          (*CRLF)       # carriage return followed by linefeed
          (*ANYCRLF)    # all three of the above
          (*ANY)        # any Unicode newline sequence

# WHAT \R MATCHES

#        These are recognized only at the very start of the pattern or after option setting with a similar syntax.

          (*BSR_ANYCRLF) # CR, LF, or CRLF
          (*BSR_UNICODE) # any Unicode newline sequence

# LOOKAHEAD AND LOOKBEHIND ASSERTIONS

          (?=...)       # positive look ahead
          (?!...)       # negative look ahead
          (?<=...)      # positive look behind
          (?<!...)      # negative look behind

#        Each top-level branch of a look behind must be of a fixed length.

# BACKREFERENCES

          \1            # reference by number (can be ambiguous)
          \g2           # reference by number
          \g{3}         # reference by number
          \g{-4}        # relative reference by number
          \k<name>      # reference by name (Perl)
          \k'name'      # reference by name (Perl)
          \g{name}      # reference by name (Perl)
          \k{name}      # reference by name (.NET)
          (?P=name)     # reference by name (Python)

# SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)

          (?R)          # recurse whole pattern
          (?1)          # call subpattern by absolute number
          (?+2)         # call subpattern by relative number
          (?-3)         # call subpattern by relative number
          (?&name)      # call subpattern by name (Perl)
          (?P>name)     # call subpattern by name (Python)
          \g<name>      # call subpattern by name (Oniguruma)
          \g'name'      # call subpattern by name (Oniguruma)
          \g<4>         # call subpattern by absolute number (Oniguruma)
          \g'5'         # call subpattern by absolute number (Oniguruma)
          \g<+6>        # call subpattern by relative number (PCRE extension)
          \g'+7'        # call subpattern by relative number (PCRE extension)
          \g<-8>        # call subpattern by relative number (PCRE extension)
          \g'-9'        # call subpattern by relative number (PCRE extension)

# CONDITIONAL PATTERNS

          (?(condition)yes-pattern)
          (?(condition)yes-pattern|no-pattern)

          (?(1)...)      # absolute reference condition
          (?(+2)...)     # relative reference condition
          (?(-3)...)     # relative reference condition
          (?(<name>)...) # named reference condition (Perl)
          (?('name')...) # named reference condition (Perl)
          (?(name)...)   # named reference condition (PCRE)
          (?(R)...)      # overall recursion condition
          (?(R4)...)     # specific group recursion condition
          (?(R&name)...) # specific recursion condition
          (?(DEFINE)...) # define subpattern for reference
          (?(?=assert).) # assertion condition

# BACKTRACKING CONTROL

#        The following act immediately they are reached:

          (*ACCEPT)     # force successful match
          (*FAIL)       (?# force backtrack; synonym) (*F)
          (*MARK:NAME)  (?# set name to be passed back; synonym) (*:NAME)

#        The following act only when a subsequent match failure causes a backtrack to reach  them.  They  all  force  a
#        match  failure,  but they differ in what happens afterwards. Those that advance the start-of-match point do so
#        only if the pattern is not anchored.

          (*COMMIT)     # overall failure, no advance of starting point
          (*PRUNE)      # advance to next starting character
          (*PRUNE:NAME) # equivalent to (*MARK:NAME)(*PRUNE)
          (*SKIP)       # advance to current matching position
          (*SKIP:NAME)  # advance to position corresponding to an earlier
                        # (*MARK:NAME); if not found, the (*SKIP) is ignored
          (*THEN)       # local failure, backtrack to next alternation
          (*THEN:NAME)  # equivalent to (*MARK:NAME)(*THEN)

# CALLOUTS

          (?C)    # callout
          (?C255) # callout with data n

# SEE ALSO
#
#        pcrepattern(3), pcreapi(3), pcrecallout(3), pcrematching(3), pcre(3).
#
# AUTHOR
#
#        Philip Hazel
#        University Computing Service
#        Cambridge CB2 3QH, England.
#
# REVISION
#
#        Last updated: 08 January 2014
#        Copyright (c) 1997-2014 University of Cambridge.
#
# PCRE 8.35                                          08 January 2014                                      PCRESYNTAX(3)</textarea>
		<h2 id="h2pcrepattern">Examples from man pcrepattern</h2>
		<p>
			`man pcrepattern` is much longer than `man pcresyntax`, which is why only its examples were reproduced below.
		</p>
		<textarea id="pcrepattern">
(*CR)a.b

\Qabc$xyz\E \Qabc\$xyz\E \Qabc\E\$\Qxyz\E

\040  # is another way of writing an ASCII space
\40   # is the same, provided there are fewer than 40 previous capturing subpatterns
\7    # is always a back reference
\11   # might be a back reference, or another way of writing a tab
\011  # is always a tab
\0113 # is a tab followed by the character "3"
\113  # might be a back reference, otherwise the character with octal code 113
\377  # might be a back reference, otherwise the value 255 (decimal)
\81   # is either a back reference, or the two characters "8" and "1"

\xdc is exactly the same as \x{dc}, or \u00dc in JavaScript mode.

[In addition, inside a character class, \b is interpreted as the backspace character (hex 08).]

[\N is not allowed in a character class.]
[\B, \R, and \X are not special inside a character class.]

# In 8-bit non-UTF-8 mode \R is equivalent to the following:
(?>\r\n|\n|\x0b|\f|\r|\x85)

foo\Kbar
(foo)\Kbar
^abc$

(?|	(?=[\x00-\x7f])(\C) |
	(?=[\x80-\x{7ff}])(\C)(\C) |
	(?=[\x{800}-\x{ffff}])(\C)(\C)(\C) |
	(?=[\x{10000}-\x{1fffff}])(\C)(\C)(\C)(\C))

[aeiou] [^aeiou] [d-m]
[W-]46] [W-\]46]
[z-\xff]
[A-\d] [A-[:digit:]]
[\000-\037]
(?i)[W-c] [][\\^_`wxyzabc]
[\xc8-\xcb]
# If a closing square bracket is required as a member of the class, it should be the first data character in the class
# (after an initial circumflex, if present) or escaped with a backslash.
[]\\^_`wxyzabc]

[01[:alpha:]%]
[12[:^digit:]]

[[:<:]]  is converted to  \b(?=\w)
[[:>:]]  is converted to  \b(?<=\w)
# Only these exact character sequences are recognized. A sequence such as
[a[:<:]b]
# provokes error for an unrecognized POSIX class name.

gilbert|sullivan
(a(?i)b)c
(a(?i)b|c)

cat(aract|erpillar|)
 ((red|white) (king|queen))
((?:red|white) (king|queen))

(?i:saturday|sunday)
(?:(?i)saturday|sunday)

(?|(Sat)ur|(Sun))day
/(?|(abc)|(def))\1/
/(?|(abc)|(def))(?1)/

(?<DN>Mon|Fri|Sun)(?:day)?|
(?<DN>Tue)(?:sday)?|
(?<DN>Wed)(?:nesday)?|
(?<DN>Thu)(?:rsday)?|
(?<DN>Sat)(?:urday)?


(?:(?<n>foo)|(?<n>bar))\k<n>
z{2,4}
[aeiou]{3,}
\d{8}
(a?)*
/\*.*\*/
/\*.*?\*/
\d??\d
(.*)abc\1
(?>.*?a)b
(tweedle[dume]{3}\s*)+
/(a|(b))+/
\d+foo
(?>\d+)foo
\d++foo
(abc|xyz){2,3}+
(\D+|<\d+>)*[!?]
((?>\D+)|<\d+>)*[!?]
(ring), \1
(ring), \g1
(ring), \g{1}
(abc(def)ghi)\g{-1}
(sens|respons)e and \1ibility
((?i)rah)\s+\1
(?<p1>(?i)rah)\s+\k<p1>
(?'p1'(?i)rah)\s+\k{p1}
(?P<p1>(?i)rah)\s+(?P=p1)
(?<p1>(?i)rah)\s+\g{p1}
(a|(bc))\2
(a\1)
(a|b\1)+
\w+(?=;)
foo(?!bar)
(?!foo)bar
(?<!foo)bar
(?<=bullock|donkey)
(?<!dogs?|cats?)
(?<=ab(c|de))
(?<=abc|abde)
abcd$
^.*abcd$
^.*+(?<=abcd)
(?<=\d{3})(?<!999)foo
(?<=\d{3}...)(?<!999)foo
(?<=(?<!foo)bar)baz
(?<=\d{3}(?!999)...)foo

(?(condition)yes-pattern)
(?(condition)yes-pattern|no-pattern)

(?(1) (A|B|C) | (D | (?(2)E|F) | E) )

( \( )?    [^()]+    (?(1) \) )

( \( )?    [^()]+    (?(-1) \) )

(?<OPEN> \( )?    [^()]+    (?(<OPEN>) \) )

(?(R3)...) or (?(R&name)...)

(?(DEFINE) (?<byte> 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
\b (?&byte) (\.(?&byte)){3} \b


(?(?=[^a-z]*[a-z])
\d{2}-[a-z]{3}-\d{2}  |  \d{2}-\d{2}-\d{2} )

abc #comment \n still comment
( \( ( [^()]++ | (?1) )* \) )
(?<pn> \( ( [^()]++ | (?&pn) )* \) )
(ab(cd)ef)
< (?: (?(R) \d++  | [^<>]*+) | (?R)) * >
^(.|(.)(?1)\2)$
^((.)(?1)\2|.)$
^((.)(?1)\2|.?)$
^(?:((.)(?1)\2|)|((.)(?3)\4|.))
^\W*+(?:((.)\W*+(?1)\W*+\2|)|((.)\W*+(?3)\W*+\4|\W*+.\W*+))\W*+$
^(.)(\1|a(?2))

(...(absolute)...)...(?2)...
(...(relative)...)...(?-1)...
(...(?+1)...(relative)...)
(sens|respons)e and \1ibility
(sens|respons)e and (?1)ibility
(abc)(?i:(?-1))
(?<pn> \( ( (?>[^()]+) | \g<pn> )* \) )
(sens|respons)e and \g'1'ibility
(abc)(?i:\g<-1>)
(?C1)abc(?C2)def
# An explicit callout may also be set at this position, as in this example:
# Note that this applies only to assertion conditions, not to other types of condition.
(?(?C9)(?=a)abc|def)
A((?:A|B(*ACCEPT)|C)D)
a+(?C)(*FAIL)
/X(*MARK:A)Y|X(*MARK:B)Z/K
a+(*COMMIT)b
/(*COMMIT)abc/
a+(*SKIP)b
( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ...
A (B(*THEN)C) | D
A (B(*THEN)C | (*FAIL)) | D
^.*? (?(?=a) a | b(*THEN)c )
(A(*COMMIT)B(*THEN)C|ABD)
...(*COMMIT)(*PRUNE)...
/(a(*COMMIT)b)+ac/</textarea>
		<h2 id="h2details">Other details</h2>
		<textarea id="details">
# Option setting: extended mode (x) is enabled by default
(?-i) (?-J) (?-m) (?-s) (?-U) (?-x) # This part is no longer in extended mode so this is not a comment
(?iJm) (?-iJm) (?iJm-sUx) (?xJm-s-U-i) # Back to extended mode: this is a comment

# Reset group numbers:
(?|(reset)|(group)|(numbers))

## Everything that can take a name:
# Capturing groups:
 (?<named>capturing_group)  (?<1badname>capturing_group)  (?<nametoolooooooooooooooooooooooooong>capturing_group)
(?P<named>capturing_group) (?P<2badname>capturing_group) (?P<nametoolooooooooooooooooooooooooong>capturing_group)
 (?'named'capturing_group)  (?'3badname'capturing_group)  (?'nametoolooooooooooooooooooooooooong'capturing_group)

# Backreferences:
 \g{name}  \g{1badname}  \g{nametoolooooooooooooooooooooooooong}
 \k{name}  \k{1badname}  \k{nametoolooooooooooooooooooooooooong}
 \k<name>  \k<1badname>  \k<nametoolooooooooooooooooooooooooong>
 \k'name'  \k'1badname'  \k'nametoolooooooooooooooooooooooooong'
(?P=name) (?P=1badname) (?P=nametoolooooooooooooooooooooooooong)

# Subroutines:
(?P>name) (?P>1badname) (?P>nametoolooooooooooooooooooooooooong)
 (?&name)  (?&1badname)  (?&nametoolooooooooooooooooooooooooong)
 \g<name>  \g<1badname>  \g<nametoolooooooooooooooooooooooooong>
 \g'name'  \g'1badname'  \g'nametoolooooooooooooooooooooooooong'

# Conditions:
 (?(<name>)...) (?(<1badname>)...) (?(<nametoolooooooooooooooooooooooooong>)...)
 (?('name')...) (?('1badname')...) (?('nametoolooooooooooooooooooooooooong')...)
  (?(name)...)   (?(1badname)...)   (?(nametoolooooooooooooooooooooooooong)...)
(?(R&name)...) (?(R&1badname)...) (?(R&nametoolooooooooooooooooooooooooong)...)

# Verbs:
     (*:name)      (*:1badname)      (*:nametoolooooooooooooooooooooooooong)
 (*MARK:name)  (*MARK:1badname)  (*MARK:nametoolooooooooooooooooooooooooong)
(*PRUNE:name) (*PRUNE:1badname) (*PRUNE:nametoolooooooooooooooooooooooooong)
 (*SKIP:name)  (*SKIP:1badname)  (*SKIP:nametoolooooooooooooooooooooooooong)
 (*THEN:name)  (*THEN:1badname)  (*THEN:nametoolooooooooooooooooooooooooong)

# Nested groups on a single line:
  (?<level1>  (?<level2>  (?<level3>  (?<level4>  (?<level5>  (?<level6>  (?<level7>  (?<level8>  8)  7)  6)  5)  4)  3)  2)  1)

# Nested groups on multiple lines:
	(?<level1>
		(?<level2>
			(?<level3>
				(?<level4>
					(?<level5>
						(?<level6>
							(?<level7>
								(?<level8>
									This ought to be enough for everyone ©
								)
							)
						)
					)
				)
			)
		)
	)

\[\\\#/\]       # [\#/] escaped

# Better \Q...\E test:
\Q Everything between \Q and \ E is escaped: !@#$%^&*()_-+[]{} \E

# \Q...\E in a character class:
[123\QYou can use \Q...\ E inside character classes\E456]

# It remains ugly in extended mode though:
\Q hello
   I span
   multiple lines
   and this is ugly
\E

# Octal notations:
\0 \07 \077 \123 \o{456}

# Hexadecimal notations:
\x \xa \xaa \xAA \x{bb} \u12ff

(?C) (?C0) (?C1) (?C23) (?C234) (?C255) (?C256) # callouts

\d \D \h \H \v \V \w \W
\n \N # LF (line feed, 0x0A) character vs any character that is not a newline

# More \p tests:
\p{C}   \P{C}   \p{^C}   \P{^C}
\p{L&}  \P{L&}  \p{^L&}  \P{^L&}
\p{Xwd} \P{Xwd} \p{^Xwd} \P{^Xwd}
\p{Han} \P{Han} \p{^Han} \P{^Han}
\p{Hna} \P{Hna} \p{^Hna} \P{^Hna} # The Han/Hna typo is made visible

# POSIX named classes are supported only within a class:
[:word:] [:^space:]
[
	[:alnum:]  [:alpha:]  [:ascii:]  [:blank:]   [:cntrl:]
	[:digit:]  [:graph:]  [:lower:]  [:print:]   [:punct:]
	[:space:]  [:upper:]  [:word:]   [:xdigit:]  [:fake:]
	[:^alnum:] [:^alpha:] [:^ascii:] [:^blank:]  [:^cntrl:]
	[:^digit:] [:^graph:] [:^lower:] [:^print:]  [:^punct:]
	[:^space:] [:^upper:] [:^word:]  [:^xdigit:] [:^fake:]
]

# Quantifiers: there are no {,m} quantifiers:
x{,6}      # For example, {,6} is not  a  quantifier, but a literal string of four characters.

# Conditional patterns: DEFINE should not prevent names starting with "DEFINE"
(?(DEFINER)yes-pattern|no-pattern)</textarea>
		<script
			src="https://cdnjs.cloudflare.com/ajax/libs/codemirror/5.65.12/codemirror.min.js"
			integrity="sha512-05P5yOM5/yfeUDgnwTL0yEVQa0Cg6j3alVSbWSQtBxz24fERIyW3jeBdp7ZSHcgPMRYJWoa26IIWhJ2/UComLA=="
			crossorigin="anonymous"
			referrerpolicy="no-referrer">
		</script>
		<script
			src="https://cdnjs.cloudflare.com/ajax/libs/codemirror/5.65.12/addon/edit/matchbrackets.js"
			integrity="sha512-xUUWekUNHRMUrO8BL11fcEbz6rcJW55X6LkW9MNcAGgCRbxGmMXmXG5Ds5O6v2BLb90AVT1I+ualk0G9IYx3bw=="
			crossorigin="anonymous"
			referrerpolicy="no-referrer">
		</script>
		<script src="src/pcre.js"></script>
		<script>
			CodeMirror.defineMode('nginx-mini+regex', function(editor_options, mode_options) {
				var pcre_mode = CodeMirror.getMode(editor_options, {name: 'pcre', extended: false});
				return {
					startState: function() {
						return {
							current: '',
							pcre_state: CodeMirror.startState(pcre_mode),
							expect_regex: false, // whether the next string should be a regex
						};
					},
					copyState: function(os) { // os = original state
						return {
							current: os.current,
							pcre_state: CodeMirror.copyState(pcre_mode, os.pcre_state),
							expect_regex: os.expect_regex,
						};
					},
					token: function(stream, state) {
						if (state.current === 'string' || state.current === 'regex') {
							if (stream.eat('"')) {
								state.current = '';
								return 'string';
							}
							if (state.current === 'regex') return pcre_mode.token(stream, state.pcre_state);
							if (!stream.match('\\"')) stream.next();
							return state.current;
						}
						var operator = stream.match(/(=|~|~\*|\^~)/);
						if (operator) {
							state.expect_regex = operator[1][0] === '~'; // ~ and ~* imply a regex
							return 'operator';
						}
						if (stream.eat('"')) {
							state.current = state.expect_regex ? 'regex' : 'string';
							if (state.expect_regex) state.pcre_state = CodeMirror.startState(pcre_mode);
							state.expect_regex = false; // ^ new regex, new PCRE state object
							return 'string';
						}
						/* Minimalistic nginx syntax highlighting, just for demonstration purposes: */
						if (stream.eat('#')) {
							stream.skipToEnd();
							return 'comment';
						}
						if (stream.match(/\b(?:if|return|server|location)\b/)) return 'keyword';
						if (stream.match(/\$\w+/)) return 'variable-3';
						if (stream.match(/\d+/)) return 'number';
						stream.next();
						return state.current;
					},
				};
			});
		</script>
		<script>
			CodeMirror.defineMode('pcre-list', function(editor_options, mode_options) {
				let pcre_mode = CodeMirror.getMode(editor_options, {name: 'pcre', extended: false});
				return {
					startState: function() {
						return { pcre_state: CodeMirror.startState(pcre_mode) };
					},
					copyState: function(os) { // os = original state
						return { pcre_state: CodeMirror.copyState(pcre_mode, os.pcre_state) };
					},
					token: function(stream, state) {
						if (stream.sol()) {
							if (stream.match(/^\s*#.*$/)) return 'comment';
							// New non-comment line, new PCRE state:
							state.pcre_state = CodeMirror.startState(pcre_mode);
						}
						return pcre_mode.token(stream, state.pcre_state);
					},
				};
			});
		</script>
		<script>
			var conf = {
				lineNumbers: true,
				indentWithTabs: true,
				showCursorWhenSelecting: true,
				// Enable matching for {} [] and () but not <> because syntaxes
				// that feature a single '<' or '>' (specifically: atomic
				// groups, lookbehinds and Python subpatterns) trigger
				// undesirable behaviour:
				matchBrackets: true,
				mode: {
					name: 'pcre',
					extended: true,
				},
			};
			CodeMirror.fromTextArea(document.getElementById('pcresyntax'), conf);
			CodeMirror.fromTextArea(document.getElementById('pcrepattern'), conf);
			CodeMirror.fromTextArea(document.getElementById('details'), conf);
			conf.mode = {name: 'nginx-mini+regex'};
			CodeMirror.fromTextArea(document.getElementById('nested-nginx'), conf);
			conf.mode = {name: 'pcre-list'};
			CodeMirror.fromTextArea(document.getElementById('nested-regex-list'), conf);
		</script>
	</body>
</html>