[] 用法¶

# [] 匹配或
import re
re.findall("[abc]", "12a3456b78c90")

['a', 'b', 'c']

# [^] 匹配非
re.findall("[^abc]", "12a3456b78c90")

['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']

*+? 用法¶

# * 匹配0次或无限多次
re.findall("python*", "pytho0python6pythonn2")

['pytho', 'python', 'pythonn']

# + 匹配1次或无限多次
re.findall("python+", "pytho0python6pythonn2")

['python', 'pythonn']

# ？ 匹配0次或1次
re.findall("python?", "pytho0python6pythonn2")

['pytho', 'python', 'python']

贪婪非贪婪¶

# {} 贪婪
re.findall("[a-z]{3,6}", "pythonn1111java678php")

['python', 'java', 'php']

# {}? 贪婪
re.findall("[a-z]{3,6}?", "pythonn1111java678php")

['pyt', 'hon', 'jav', 'php']

() 组的用法¶

查找组¶

使用 re.findall 注意事项:

r = re.findall('正则表达式', '字符串', flags = re.I | re.S)
re.I 不区分大小写
re.S 使 . 也匹配换行符

str_emails = 'Dave邮箱：[email protected],奥斯卡Steve邮箱：[email protected],as京东方士大夫Rob:[email protected]埃'
patternn = r'([A-Z0-9._%+-]+@[A-Z0-9.-]+)(\.[A-Z]{2,4})'
# 匹配邮箱
find_r = re.findall(patternn, str_emails, flags=re.IGNORECASE)
find_r

[('dave@google', '.com'), ('steve@gmail', '.com'), ('rob@gmail', '.com')]

替换组¶

# 邮箱 .com 替换成 .cn
sub_r = re.sub(patternn, r'\g<1>.cn', str_emails, flags=re.IGNORECASE)
sub_r
# Out: Dave邮箱：[email protected],奥斯卡Steve邮箱：[email protected],as京东方士大夫Rob:[email protected]埃

'Dave邮箱：[email protected],奥斯卡Steve邮箱：[email protected],as京东方士大夫Rob:[email protected]埃'

re.sub() 传参为方法示例¶

lanuage = 'PythonC#JavaC#PHPC#'

def convert(value):
    matched = value.group()
    return '!!' + matched + '!!'

# 将函数作为参数传入
r = re.sub('C#', convert, lanuage)
r

'Python!!C#!!Java!!C#!!PHP!!C#!!'