Python版本为3.4.3
直接看官方文档。。。记录当笔记
https://docs.python.org/3/library/re.html?highlight=re#module-re
和一般的正则表达式不同,Python用想表示一个'/'必须用'//',而正则表达式中是用'//'表示一个反斜杠的,所以在Python中要用'////'表示一个'/',太麻烦,所以可以在字符串前面加上'r'表示这里面的字符都是不需要转义的原生态字符。
现在我们所要分析的字符串为"fsfxczf//sadfDOTA2s////dafasdsfDOTA2wwwwwwwwwww"。
re提供了两种简单的匹配方式(查找字符串是否有某个字符串),一种是search,另一种是match(匹配开头)。
import regame = r'doTa2fsfxczf//sadfDOTA2s////dafasdsfDOTA2wwwwwdotawwwwww'#三个参数分别是,简单的正则表达式,所匹配的字符串,匹配模式(可以用数字代替)game2=re.match('dota2',game,re.I)#匹配模式为忽略大小写PRint(game2)#<_sre.SRE_Match object; span=(0, 5), match='doTa2'>
这个模块中的方法如下:
import regame = r'doTa2fsfxczf//sadfDOTA2s////dafasdsfDOTA2wwwwwdotawwwwww'style='DOTA.'rengine = re.compile(style,re.I)gamename= rengine.findall(game)print(gamename)#['doTa2', 'DOTA2', 'DOTA2', 'dotaw']
re.A/re.ASCII | 值为256,仅匹配ASCII码值 |
re.I/re.IGNORECASE | 值为2,无视大小写 |
re.DEBUG | 值为128,显示编译表达式的debug信息 |
re.L/re.LOCALE | 值为4,暂时没有理解是什么意思 |
re.M/re.MULTILINE | 值为8,使得匹配可以跨行 |
temp=r'this is a temp'print(re.fullmatch(temp,'this is a temp'))print(re.fullmatch(temp,'this is b temp'))#<_sre.SRE_Match object; span=(0, 14), match='this is a temp'>#None
temp = r'this is DOTA2!'print(re.split(' ', temp, 0, re.I))print(re.split(' ', temp, 1, re.I))#['this', 'is', 'DOTA2!']#['this', 'is DOTA2!']
temp =r'this is dota,and this is DOTA2.'print(re.findall('dota.',temp,re.I))#['dota,', 'DOTA2']
temp =r'this is dota,and this is DOTA2.'ite = re.finditer('dota.',temp,re.I)for obj in ite: print(obj.group())#dota,#DOTA2
temp = r'this-is-dota!'print(re.sub('6*', ' ', temp)) # *present zero or more# t h i s - i s - d o t a !def show(repl): # there must be a parameter print('play') return '+'print(re.sub('-', show, temp))# play# play# this+is+dota!
temp = r'this-is-dota!'print(re.subn('-','*',temp))#tuple#('this*is*dota!', 2)
temp = r'<script>'print(re.escape(temp))# /<script/>
之后就是经过re.compile()之后的regex对象了
变成regex对象之后的方法和直接使用re加载正则匹配模式的方法大体上差不多,只不过多了以下几种方法:
具体如下:
temp ='this is dota,that is dota2,this is war3!'r0=re.compile('(dota)(.)(?P<nickname>....)',re.I)print(r0.flags)print(r0.groups)print(r0.groupindex)print(r0.pattern)print(r0.search(temp).group('nickname'))print(r0.search(temp).group(2))# 34# 3# {'nickname': 3}# (dota)(.)(?P<nickname>....)# that# ,
最后就是match对象了
temp ='this is dota,that is dota2,this is war3!'r0=re.compile('(....)(.)(..)')matchobj=r0.search(temp)print(matchobj)print(matchobj.expand(r'/3/2/1'))# <_sre.SRE_Match object; span=(0, 7), match='this is'># is this
temp ='this*is dota,that is dota2,this is war3!'r0=re.compile('(....)(?P<second>.)(..)')tem=r0.search(temp)print(tem.group(0))#the entire matchprint(tem.group(1))print(tem.group(2))print(tem.group('second'))print(tem.group(3))print(tem.group(1,2))# a tuple# this*is# this# *# *# is# ('this', '*'
temp ='this is dota,that is dota2,this is war3!'m=re.match('(....)(.)(..)',temp)print(m.groups())print(type(m.groups()))# ('this', ' ', 'is')# <class 'tuple'>
temp ='this is dota,that is dota2,this is war3!'
m=re.match('(....)(?P<Word1>.)(?P<word2>..)',temp)
print(m.groupdict())#返回所有有别名的组
# {'word1': ' ', 'word2': 'is'}
temp ='this is dota,that is dota2,this is war3!'reg =re.compile('dota')m=reg.search(temp,10,30)print(m.pos)#开始匹配的索引print(m.endpos)#返回所匹配字符串的最后一位的索引# 10# 30
temp ='this is dota,that is dota2,this is war3!'m=re.match('(....)(?P<Tom>.)(?P<Mary>..)',temp)print(m.lastindex)#返回最后一个匹配组的索引,如果压根没有匹配成功的,为Noneprint(m.lastgroup)#返回最后一个匹配组的名字,如果没有,则返回Noneprint(m.re)#返回该正则print(m.string)#返回匹配字符串# 3# Mary# re.compile('(....)(?P<Tom>.)(?P<Mary>..)')# this is dota,that is dota2,this is war3!
temp ='this is dota,that is dota2,this is war3!'m=re.match('(....)(?P<word1>.)(?P<word2>..)',temp)print(m.start(1))print(m.end(2))print(m.span(2))# 0# 5# (4, 5)
以上
新闻热点
疑难解答