re模块--python正则表达式

上一篇 / 下一篇 2017-08-28 16:39:29 / 个人分类：python

步骤方法

1.用import re 导入正则表达式模块

2.用re.compile()函数创建一个Regex对象【记得使用原始字符串(不包含转义字符),即Regex对象内容前加小写r】

3.向Regex对象的search()或findall()方法传入想要查找的字符串。search()方法返回一个Match对象，findall()方法返回一组字符串

4.调用Match对象的group()或groups()方法，返回实际匹配文本的字符串

正则表达式符号

1）？

匹配零次或一次前面的分组（表示可选匹配）

2）*

匹配零次或多次前面的分组（即星号之前的分组可以出现任意次）

3）+

匹配一次或多次前面的分组（至少出现一次）

4）{n}

匹配n次前面的分组（匹配特定次数）

5）{n,m}

匹配至少n次，至多m次前面的分组（匹配范围内的次数）。默认为贪心模式（取最大次数）

6）{n,m}?或*？或+？

对前面分组进行非贪心匹配

7）^spam

意味着字符串必须以spam开始

8）spam$

意味着字符串必须以spam结束

9）^spam$

意味着字符串必须为spam

10）.

匹配所有字符，换行字符除外（匹配一个字符）

11）re.compile('.*',re.DOALL)

匹配所有字符，包括换行字符（\n)

12).*

匹配所有字符，换行字符除外（匹配多个字符）

13）\d、\w 、\s

分别匹配数字、单词和空格

14）\D、\W、\S

分别匹配数字、单词和空格外所有字符

15）[abc]

匹配括号内任意字符，诸如a、b、c

16）[^abc]

匹配不在括号内的任意字符

17）re.compile(r'''()''',re.VERBOAE)

忽略正则表达式字符串中的空格和注释

18）

re.compile(r'''()''',re.I)

re.compile(r'''()''',re.IGNORECASE)

让正则表达式不区分大小写

19）mo1=re.compile(r'(^\s+)')	   #首字母为一个或多个空格

mo1.sub('a '  It is beautiful')

将'  It is beautiful'符合表达式的部分替换为'a '（即输出为'a  It is beautiful'）

练习代码

#-*- coding:utf-8 -*-

importsys

reload(sys)

sys.setdefaultencoding('utf-8')

importre

importpyperclip

#步骤：创建查找正则表达式

# 1）导入模块re

# 2）compile()创建一个Regex对象（调用模式正则对象）

# 3）向模式对象调用search()方法搜索需要查找的对象，并返回Match()对象

# 4）调用Matcha（）对象的group()方法，返回实际匹配文本的字符串

# bMo=re.compile('\d\d\d-\d\d\d\d\d\d\d\d')

obMo=re.compile('\\d\\d\\d-\\d\\d\\d\\d\\d\\d\\d\\d')

mo=obMo.search('this phone number is 189-26769482')

printmo.group()

#括号分离分组

getArea=re.compile('(\d\d\d)-(\d\d\d\d\d\d\d\d)')

moArea=getArea.search('this phone number is 189-26769482')

printmoArea.group(1)

printmoArea.groups()#括号分离后，一次获取全部分组

#管道符号'|',匹配其中一个(逐个匹配，如果第一个不存在则匹配第二个）

pipeline=re.compile('huanhuan|lele|JJ')

pipeline2=re.compile('Bat(huanhuan|lele|JJ)')#同时给huanhuan、lele、JJ 指定前缀Bat，即实际名称为Bathuanhuan、Batlele……

choiceOne=pipeline.search('huanhuan and lele and JJ')

choiceOne2=pipeline.search('lele is there')

choiceOne3=pipeline2.search('Batlele is there and huanhuan JJ')

choiceOne4=pipeline2.search('Bathuanhuan is there and huanhuan JJ')

printchoiceOne.group()

printchoiceOne2.group()

printchoiceOne3.group()

printchoiceOne4.group(1)#可以匹配名称的后缀部分

#用问号实现可选匹配

batRegex=re.compile('Bat(wo)?man')

mo1=batRegex.search('The Adeventures of  Batwoman and Batman')#同时存在Batman 和Batwoman时，只会显示第一个项

printmo1.group()

#用问号可选匹配实现号码区号可选,零次或一次

batNumber=re.compile('(\d\d\d\d-)?\d\d\d\d\d\d\d')

num1=batNumber.search('5927739')

num2=batNumber.search('0755-5927739')

printnum1.group(),num2.group()

#用*号匹配零次或多次

betReget2=re.compile('Bat(wo)*man')

mo3=betReget2.search('there hava Batwowowowoman')

printmo3.group()

#用+号匹配一次或多次

batReget3=re.compile('Bat(wo)+man')

# mo4=batReget3.search('there hava Batman') #+号匹配时，必须至少匹配一次。此处调用没有匹配一次wo，会报错

mo5=batReget3.search('there hava Batwowowoman')

print'mo5 is',mo5.group()

#用花括号匹配特定次数

batReget4=re.compile('Bat(wo){2}man')

batReget5=re.compile('Bat(wo){2,5}man')#匹配2-5次，贪心匹配，默认匹配最大值5次

batReget6=re.compile('Bat(wo){2,5}?')#匹配2-5次，加？号变为非贪心匹配，默认匹配最小值2。如果wo{2,5}?后有字符，会成贪心匹配

# mo7=batReget4.search('there hava Batwowoman') #只能匹配对应的次数2，超过次数报错

mo6=batReget4.search('there hava Batwowoman')

mo7=batReget5.search('there hava Batwowowowoman')

mo8=batReget6.search('there hava Batwowowowoman')

print'mo6、mo7、mo8 is', mo6.group(),mo7.group(),mo8.group()

#findall()方法：返回一组字符串 。如果有多个正则匹配，只匹配第一个正则表达式

batReget7=re.compile('\d\d\d\d-\d\d\d\d\d\d\d')

batReget8=re.compile(r'(\d\d\d\d-)?\d\d\d\d\d\d\d')

mo10=batReget8.findall('0755-57927739 and 0734-5927739')#mo10 打印出0755- 和0734-

mo9=batReget7.findall('0755-57927739 and 0734-5927739')

print'mo9、mo10 is', mo9,mo10

#字符分类   \d \D \w \W \s \S

batReget9=re.compile('\d+\s+\w+')

mo11=batReget9.findall('123123213    wer')

print'mo11 is',mo11

#建立自己的字符分类。文本中间使用‘^’符合，表示非

vowelReget=re.compile('[aeiouAEIOU]')#所有元音字母

consonantReget=re.compile('[^aeiouAEIOU]')#所有辅音字母

mo12=vowelReget.findall('sdf sdfwe uowe aeiou')

mo13=consonantReget.findall('sdf sdfwe uowe aeiou')

print'mo12 is',mo12

print'mo13 is',mo13

#插入字符在开始处使用^，表明匹配必须发生在被查文本的开始

beginWithHello=re.compile('^Hello')

mo14=beginWithHello.search('Hello world')

printmo14.group()

printbeginWithHello.search('he said Hello')==None

printmo14==None

#美元字符。表示字符串以这个正则表达式结束

endWith=re.compile('\d+$')

mo15=endWith.search('7989sd3f123')#只会打印123，。$符号只会匹配文本末的几个数字

print'mo15 is',mo15.group()

#同时使用^和$表面整个字符串必须匹配该模式

theAll=re.compile('^Hello\d+$')

mo16=theAll.search('Hello the 123 world 78910')

mo17=theAll.search('Hello78910')

# print 'mo16 is',mo16.group()  #除了Hello和尾数为数字，还有其他信息。不匹配mo16报错

print'mo17 is',mo17.group()

#通配字符。英文句点'.'匹配出换行以为的所有字符。句点字符只匹配一个字符，会打印出匹配字符已经匹配字符前的一个字符

atReget=re.compile('.at')

mo18=atReget.findall('you mat muat tttyoat eeeaa yy')

print'mo18 is',mo18

# ‘.*’表示匹配所有字符，除换行字符以外。默认为贪心模式，非贪心模式为'.*?'

nameReget=re.compile('First Name:(.*)Last Name:(.*)')

mo18=nameReget.search('First Name: All Last Name: Sweigart')

printmo18.group()

nameReget2=re.compile('<.*?>')

nameReget3=re.compile('<.*>')

mo19=nameReget2.search('<the name><the name two>>')

mo20=nameReget3.search('<the name><the name two>>')

print'mo19 is',mo19.group()

print'mo20 is',mo20.group()

#用句点字符匹配换行。re.compile('.*',re.DOTALL)

newLineRegex=re.compile('.*')

mo21=newLineRegex.search('Server he public trust.\nprotect the innocent.'

'\nUphold the law.').group()

print'mo21 is ',mo21

newLineRegex2=re.compile('.*',re.DOTALL)

mo22=newLineRegex2.search('\n\nServer he public trust.\nprotect the innocent.'

'\nUphold the law\n

TAG: