[] 用法

In [94]:
# [] 匹配或
import re
re.findall("[abc]", "12a3456b78c90")
Out[94]:
['a', 'b', 'c']
In [95]:
# [^] 匹配非
re.findall("[^abc]", "12a3456b78c90")
Out[95]:
['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']

*+? 用法

In [68]:
# * 匹配0次或无限多次
re.findall("python*", "pytho0python6pythonn2")
Out[68]:
['pytho', 'python', 'pythonn']
In [69]:
# + 匹配1次或无限多次
re.findall("python+", "pytho0python6pythonn2")
Out[69]:
['python', 'pythonn']
In [70]:
# ? 匹配0次或1次
re.findall("python?", "pytho0python6pythonn2")
Out[70]:
['pytho', 'python', 'python']

贪婪非贪婪

In [66]:
# {} 贪婪
re.findall("[a-z]{3,6}", "pythonn1111java678php")
Out[66]:
['python', 'java', 'php']
In [67]:
# {}? 贪婪
re.findall("[a-z]{3,6}?", "pythonn1111java678php")
Out[67]:
['pyt', 'hon', 'jav', 'php']

() 组的用法

查找组

使用 re.findall 注意事项:

  • r = re.findall('正则表达式', '字符串', flags = re.I | re.S)
  • re.I 不区分大小写
  • re.S 使 . 也匹配换行符
In [92]:
str_emails = 'Dave邮箱:[email protected],奥斯卡Steve邮箱:[email protected],as京东方士大夫Rob:[email protected]埃'
patternn = r'([A-Z0-9._%+-]+@[A-Z0-9.-]+)(\.[A-Z]{2,4})'
# 匹配邮箱
find_r = re.findall(patternn, str_emails, flags=re.IGNORECASE)
find_r
Out[92]:
[('dave@google', '.com'), ('steve@gmail', '.com'), ('rob@gmail', '.com')]

替换组

In [91]:
# 邮箱 .com 替换成 .cn
sub_r = re.sub(patternn, r'\g<1>.cn', str_emails, flags=re.IGNORECASE)
sub_r
# Out: Dave邮箱:[email protected],奥斯卡Steve邮箱:[email protected],as京东方士大夫Rob:[email protected]
Out[91]:
'Dave邮箱:[email protected],奥斯卡Steve邮箱:[email protected],as京东方士大夫Rob:[email protected]埃'

re.sub() 传参为方法示例

In [77]:
lanuage = 'PythonC#JavaC#PHPC#'

def convert(value):
    matched = value.group()
    return '!!' + matched + '!!'

# 将函数作为参数传入
r = re.sub('C#', convert, lanuage)
r
Out[77]:
'Python!!C#!!Java!!C#!!PHP!!C#!!'