Python - regex 模块

文章目录

    • Examples
      • Added POSIX matching (leftmost longest)
      • Added partial matches
      • regex.sub
      • match
      • captures
      • fullmatch
    • subf and subfn
    • 参考



PYPI:https://pypi.org/project/regex/

代码:https://bitbucket.org/mrabarnett/mrab-regex/src/hg/


安装:

(base) $ pip install regex

regex支持Python 2.5+和Python 3.1+


Examples

>>> regex.match(r'(?(?=\d)\d+|\w+)', '123abc')
<regex.Match object; span=(0, 3), match='123'>>>> regex.match(r'(?(?=\d)\d+|\w+)', 'abc123')
<regex.Match object; span=(0, 6), match='abc123'>>>> print(regex.match(r'(?:(?=\d)\d+\b|\w+)', '123abc'))
<regex.Match object; span=(0, 6), match='123abc'>>>> print(regex.match(r'(?(?=\d)\d+\b|\w+)', '123abc'))
None

Added POSIX matching (leftmost longest)


>>> # Normal matching.
>>> regex.search(r'Mr|Mrs', 'Mrs')
<regex.Match object; span=(0, 2), match='Mr'>>>> regex.search(r'one(self)?(selfsufficient)?', 'oneselfsufficient')
<regex.Match object; span=(0, 7), match='oneself'>>>> # POSIX matching.
>>> regex.search(r'(?p)Mr|Mrs', 'Mrs')
<regex.Match object; span=(0, 3), match='Mrs'>>>> regex.search(r'(?p)one(self)?(selfsufficient)?', 'oneselfsufficient')
<regex.Match object; span=(0, 17), match='oneselfsufficient'>>>> m = regex.search(r'(\w\w\K\w\w\w)', 'abcdef')
>>> m[0]
'cde'
>>> m[1]
'abcde'
>>>
>>> m = regex.search(r'(?r)(\w\w\K\w\w\w)', 'abcdef')
>>> m[0]
'bc'
>>> m[1]
'bcdef'>>> m = regex.match(r"(\w)+", "abc")
>>> m.expandf("{1}")
'c'
>>> m.expandf("{1[0]} {1[1]} {1[2]}")
'a b c'
>>> m.expandf("{1[-1]} {1[-2]} {1[-3]}")
'c b a'
>>>
>>> m = regex.match(r"(?P\w)+", "abc")
>>> m.expandf("{letter}")
'c'
>>> m.expandf("{letter[0]} {letter[1]} {letter[2]}")
'a b c'
>>> m.expandf("{letter[-1]} {letter[-2]} {letter[-3]}")
'c b a'

Added partial matches


>>> pattern = regex.compile(r'\d{4}')>>> # Initially, nothing has been entered:
>>> print(pattern.fullmatch('', partial=True))
<regex.Match object; span=(0, 0), match='', partial=True>>>> # An empty string is OK, but it's only a partial match.
>>> # The user enters a letter:
>>> print(pattern.fullmatch('a', partial=True))
None
>>> # It'll never match.>>> # The user deletes that and enters a digit:
>>> print(pattern.fullmatch('1', partial=True))
<regex.Match object; span=(0, 1), match='1', partial=True>
>>> # It matches this far, but it's only a partial match.>>> # The user enters 2 more digits:
>>> print(pattern.fullmatch('123', partial=True))
<regex.Match object; span=(0, 3), match='123', partial=True>
>>> # It matches this far, but it's only a partial match.>>> # The user enters another digit:
>>> print(pattern.fullmatch('1234', partial=True))
<regex.Match object; span=(0, 4), match='1234'>
>>> # It's a complete match.>>> # If the user enters another digit:
>>> print(pattern.fullmatch('12345', partial=True))
None
>>> # It's no longer a match.>>> # This is a partial match:
>>> pattern.match('123', partial=True).partial
True>>> # This is a complete match:
>>> pattern.match('1233', partial=True).partial
False

regex.sub

# Python 3.7 and later
>>> regex.sub('.*', 'x', 'test')
'xx'>>> regex.sub('.*?', '|', 'test')
'|||||||||'

# Python 3.6 and earlier
>>> regex.sub('(?V0).*', 'x', 'test')
'x'>>> regex.sub('(?V1).*', 'x', 'test')
'xx'>>> regex.sub('(?V0).*?', '|', 'test')
'|t|e|s|t|'>>> regex.sub('(?V1).*?', '|', 'test')
'|||||||||'

match

>>> m = regex.match(r"(?:(?P\w+) (?P\d+)\n)+", "one 1\ntwo 2\nthree 3\n")
>
>>> m.groupdict()
{'word': 'three', 'digits': '3'}>>> m.captures("word")
['one', 'two', 'three']>>> m.captures("digits")
['1', '2', '3']>>> m.capturesdict()
{'word': ['one', 'two', 'three'], 'digits': ['1', '2', '3']}

captures

>>> # With optional groups:
>>>
>>> # Both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P\w+)? or (?P\w+)?", "first or second")>>> m.group("item")
'second'>>> m.captures("item")
['first', 'second']>>> # Only the second group captures.
>>> m = regex.match(r"(?P\w+)? or (?P\w+)?", " or second")>>> m.group("item")
'second'>>> m.captures("item")
['second']>>> # Only the first group captures.
>>> m = regex.match(r"(?P\w+)? or (?P\w+)?", "first or ")>>> m.group("item")
'first'>>> m.captures("item")
['first']>>> # With mandatory groups:
>>> # Both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P\w*) or (?P\w*)?", "first or second")>>> m.group("item")
'second'>>> m.captures("item")
['first', 'second']>>> # Again, both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P\w*) or (?P\w*)", " or second")>>> m.group("item")
'second'>>> m.captures("item")
['', 'second']>>> # And yet again, both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P\w*) or (?P\w*)", "first or ")>>> m.group("item")
''>>> m.captures("item")
['first', '']

fullmatch

>>> print(regex.fullmatch(r"abc", "abc").span())
(0, 3)>>> print(regex.fullmatch(r"abc", "abcx"))
None>>> print(regex.fullmatch(r"abc", "abcx", endpos=3).span())
(0, 3)>>> print(regex.fullmatch(r"abc", "xabcy", pos=1, endpos=4).span())
(1, 4)>>> regex.match(r"a.*?", "abcd").group(0)
'a'>>> regex.fullmatch(r"a.*?", "abcd").group(0)
'abcd'

subf and subfn

>>> regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}", "foo bar")
'foo bar => bar foo'
>>> regex.subf(r"(?P\w+) (?P\w+)", "{word2} {word1}", "foo bar")
'bar foo'

Added expandf to match object

>>> m = regex.match(r"(\w+) (\w+)", "foo bar")
>>> m.expandf("{0} => {2} {1}")
'foo bar => bar foo'
>>>
>>> m = regex.match(r"(?P\w+) (?P\w+)", "foo bar")
>>> m.expandf("{word2} {word1}")
'bar foo'

>>> m = regex.search(r"\w+", "Hello world")
>>> print(m.group())
Hello>>> print(m.string)
Hello world>>> m.detach_string()>>> print(m.group())
Hello>>> print(m.string)
None

>>> regex.match(r"(Tarzan|Jane) loves (?1)", "Tarzan loves Jane").groups()
('Tarzan',)>>> regex.match(r"(Tarzan|Jane) loves (?1)", "Jane loves Tarzan").groups()
('Jane',)>>> m = regex.search(r"(\w)(?:(?R)|(\w?))\1", "kayak")>>> m.group(0, 1, 2)
('kayak', 'k', None)


>>> regex.match(r"(?iV1)strasse", "stra\N{LATIN SMALL LETTER SHARP S}e").span()
(0, 6)>>> regex.match(r"(?iV1)stra\N{LATIN SMALL LETTER SHARP S}e", "STRASSE").span()
(0, 7)

>>> # A 'raw' fuzzy match:
>>> regex.fullmatch(r"(?:cats|cat){e<=1}", "cat").fuzzy_counts
(0, 0, 1)>>> # 0 substitutions, 0 insertions, 1 deletion.>>> # A better match might be possible if the ENHANCEMATCH flag used:
>>> regex.fullmatch(r"(?e)(?:cats|cat){e<=1}", "cat").fuzzy_counts
(0, 0, 0)>>> # 0 substitutions, 0 insertions, 0 deletions.


>>> m = regex.search('(fuu){i<=2,d<=2,e<=5}', 'anaconda foo bar')>>> m
<regex.Match object; span=(7, 10), match='a f', fuzzy_counts=(0, 2, 2)>>>> m.fuzzy_changes
([], [7, 8], [10, 11])


>>> p = regex.compile(r"first|second|third|fourth|fifth")>>> option_set = ["first", "second", "third", "fourth", "fifth"]>>> p = regex.compile(r"\L", options=option_set)>>> print(p.named_lists)
# Python 3
{'options': frozenset({'fifth', 'first', 'fourth', 'second', 'third'})}# Python 2
{'options': frozenset(['fifth', 'fourth', 'second', 'third', 'first'])}>>> option_set = ["first", "second", "third", "fourth", "fifth"]>>> p = regex.compile(r"\L", options=option_set, other_options=[])
Traceback (most recent call last):File "", line 1, in <module>File "C:\Python37\lib\site-packages\regex\regex.py", line 348, in compilereturn _compile(pattern, flags, ignore_unused, kwargs)File "C:\Python37\lib\site-packages\regex\regex.py", line 585, in _compileraise ValueError('unused keyword argument {!a}'.format(any_one))
ValueError: unused keyword argument 'other_options'>>> p = regex.compile(r"\L", options=option_set, other_options=[], ignore_unused=True)


>>> m = regex.search(r"(\w{3})+", "123456789")
>>> m.group(1)
'789'
>>> m.captures(1)
['123', '456', '789']
>>> m.start(1)
6
>>> m.starts(1)
[0, 3, 6]
>>> m.end(1)
9
>>> m.ends(1)
[3, 6, 9]
>>> m.span(1)
(6, 9)
>>> m.spans(1)
[(0, 3), (3, 6), (6, 9)]


>>> m = regex.search(r"(?P.*?)(?P\d+)(?P.*)", "pqr123stu")>>> print(m["before"])
pqr>>> print(len(m))
4>>> print(m[:])
('pqr123stu', 'pqr', '123', 'stu')

findall


>>> regex.findall(r".", "abc")
['a', 'b', 'c']>>> regex.findall(r"(?r).", "abc")
['c', 'b', 'a']>>> regex.findall(r"..", "abcde")
['ab', 'cd']>>> regex.findall(r"(?r)..", "abcde")
['de', 'bc']

Branch reset

>>> regex.match(r"(?|(first)|(second))", "first").groups()
('first',)>>> regex.match(r"(?|(first)|(second))", "second").groups()
('second',)

\p{han} 可以匹配汉字, \p{Latin} 可以匹配拉丁字母


参考

  • regex
    https://www.cnblogs.com/animalize/p/4949219.html
  • re 模块
    https://docs.python.org/3/library/re.html
  • 熊清亮:Python 正则表达式引擎 Regex vs RE
    https://seealso.cn/post/python-regex-vs-re-regex-engine/


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部