kindwolf.org Git repositories pecoregex / master tests / test_pcre.py
master

Tree @master (Download .tar.gz)

test_pcre.py @masterraw · history · blame

"""
Tests for pecoregex.pcre.
"""
import re
from ctypes import CDLL
import pytest
from pecoregex import pcre

# pylint: disable=C0111

def test_nametable_entry():
	entry = bytes((1, 0)) + b'hello' + bytes(28)
	index, name = pcre.nametable_entry(entry)
	assert index == 256
	assert name == 'hello'

def test_pcre_lib_constructor():
	lib = pcre.PCRELibrary(ovector_size=40, soname='libpcre.so.1', encode='ascii')
	assert lib.ovector_size == 40
	assert lib.encode == 'ascii'
	assert lib.decode == 'utf-8'
	assert lib.shared_object_name == 'libpcre.so.1'

def test_pcre_lib_get_lib():
	lib = pcre.PCRELibrary().get_lib()
	assert isinstance(lib, CDLL)

def test_pcre_lib_version():
	version = pcre.PCRELibrary().version()
	print(version)
	assert isinstance(version, str)
	assert re.match(r'''
^
\d+\.\d+        # version itself
\s+             # whitespace
\d{4}-\d\d-\d\d # date
$
''', version, re.VERBOSE)

def test_pcre_lib_config():
	lib = pcre.PCRELibrary()
	exceptions = (pcre.PCRE_CONFIG_UTF16, pcre.PCRE_CONFIG_UTF32)
	for i in exceptions:
		with pytest.raises(pcre.PCREErrorBadOption):
			lib.config(i)
	for i in range(13):
		if i in exceptions:
			continue
		value = lib.config(i)
		assert isinstance(value, pcre.CONFIG_OUTPUT_TYPE[i])
	# Not testing pcre.PCRE_CONFIG_PARENS_LIMIT (13) as it may not be present everywhere

def test_pcre_lib_caseless():
	lib = pcre.PCRELibrary()
	assert isinstance(lib.supports_caseless_utf8(), bool)

def test_pcre_lib_compile():
	lib = pcre.PCRELibrary()
	# Simple pattern:
	assert lib.compile('^hello') is not None
	# Simple pattern with legal options:
	assert lib.compile('hello', pcre.PCRE_ANCHORED|pcre.PCRE_CASELESS) is not None

	# Incorrect patterns:
	for pattern in ('^hello(', '^(?<namé>hello)'):
		with pytest.raises(pcre.PCRECompileException):
			lib.compile(pattern)

	# Correct pattern with conflicting options:
	with pytest.raises(pcre.PCRECompileException):
		lib.compile('hello', pcre.PCRE_UTF8|pcre.PCRE_NEVER_UTF)

def test_pcre_lib_exec():
	lib = pcre.PCRELibrary()
	# Simple pattern:
	pcre_code = lib.compile(r'^(?i)hello')
	assert lib.exec(pcre_code, 'Hello!')
	assert lib.exec(pcre_code, 'Oh, hello!') is False

	# Simple pattern with options:
	pcre_code = lib.compile(r'hello', pcre.PCRE_CASELESS|pcre.PCRE_ANCHORED)
	assert lib.exec(pcre_code, 'Hello!')
	assert lib.exec(pcre_code, 'Oh, hello!') is False

	# Captures:
	pcre_code = lib.compile(r'It is raining (?<rain1>\S+) and (?<rain2>\S+)')
	captures = lib.exec(pcre_code, 'It is raining cats and dogs')
	assert captures
	assert captures['by_index'][0] == 'It is raining cats and dogs'
	assert captures['by_index'][1] == 'cats'
	assert captures['by_index'][2] == 'dogs'
	assert captures['by_name']['rain1'] == 'cats'
	assert captures['by_name']['rain2'] == 'dogs'

	# UTF-8 matching:
	if lib.supports_caseless_utf8():
		pcre_code = lib.compile(r'éléphant', pcre.PCRE_UTF8|pcre.PCRE_CASELESS|pcre.PCRE_ANCHORED)
		assert lib.exec(pcre_code, 'Éléphant!')
		assert lib.exec(pcre_code, 'Oh, éléphant!') is False

def test_pcre_lib_exec_data_unit():
	lib = pcre.PCRELibrary(encode='utf-8', decode='iso-8859-15')
	pcre_code = lib.compile(r'\C(\Cl\C)\Cphant', pcre.PCRE_UTF8|pcre.PCRE_CASELESS)
	captures = lib.exec(pcre_code, 'éléphant')
	assert captures
	assert captures['by_index'][1] == '©lÃ'
	captures = lib.exec(pcre_code, 'ÉLÉPHANT')
	assert captures
	assert captures['by_index'][1] == '\x89LÃ'
	lib.free(pcre_code)
	assert True # still alive / no segfault

def test_pcre_lib_info_nametable():
	lib = pcre.PCRELibrary()
	pcre_code = lib.compile(r'''(?x)
^
(?<all>
	(?<dotdotdot>
		\.\.\.
	)
	(?<dashdashdash>
		---
	)
	(?<dotdotdotagain>
		\.\.\.
	)
)
$
''')
	expected_entries = 4
	expected_entry_size = 2 + len('dotdotdotagain\x00')
	expected_table_size = expected_entries * expected_entry_size
	assert lib.info_namecount(pcre_code) == expected_entries
	assert lib.info_nameentrysize(pcre_code) == expected_entry_size
	nametable = lib.info_nametable_data(pcre_code)
	nametable_bytes = nametable[0:expected_table_size]
	assert len(nametable_bytes) == expected_table_size # still alive

	nametable = lib.nametable(pcre_code)
	assert nametable
	assert len(nametable) == expected_entries

	ordered_nametable = lib.ordered_nametable(pcre_code)
	assert ordered_nametable == [None, 'all', 'dotdotdot', 'dashdashdash', 'dotdotdotagain']

def test_pcre_lib_match():
	res = pcre.PCRELibrary().match(r'''
/(?<prefix>[^/]+)
/(?<action>[^/]+)
/(?<value>.+)
''', '/just/do/it', pcre.PCRE_EXTENDED, pcre.PCRE_ANCHORED)
	assert res
	assert res['by_index'] == ['/just/do/it', 'just', 'do', 'it']
	assert res['by_name'] == {'action': 'do', 'prefix': 'just', 'value': 'it'}

def test_alternative_match():
	lib = pcre.PCRELibrary()
	alt_match_re = r'''
^(?:
	(?<name1>abc)
	(?<name2>def)
|
	(?<name3>ghi)
	(?<name4>jkl)
)$
'''
	pcre_code = lib.compile(alt_match_re, pcre.PCRE_EXTENDED)
	res = lib.exec(pcre_code, 'abcdef')
	assert res
	assert res['by_index'] == ['abcdef', 'abc', 'def']
	assert res['by_name'] == {'name1': 'abc', 'name2': 'def', 'name3': None, 'name4': None}
	res = lib.exec(pcre_code, 'ghijkl')
	assert res
	assert res['by_index'] == ['ghijkl', '', '', 'ghi', 'jkl']
	assert res['by_name'] == {'name1': '', 'name2': '', 'name3': 'ghi', 'name4': 'jkl'}