Reguläre Ausdrücke in Python
main.py
import re
def lines( source ):
return source.split( "\n" )def iter( s, p ):
it = re.finditer( p, s )
for m in it:
yield mdef matches( line ):
for m in iter( line, r'(\bADD\b)' ):
yield mdef show( line, match ):
print( line[ 0: match.start( 0 )], end = '(' )
print( line[ match.start( 0 ): match.end( 0 )], end = ')' )
print( line[ match.end( 0 ): len( line )])source = "ADD R1, R2\nADD3 R4, R5, R6\nADD.MOV R1, R2, [0x10]\n"
for line in lines( source ):
for match in matches( line ):
show( line, match )- transcript
(ADD) R1, R2
(ADD).MOV R1, R2, [0x10]main.py
import re
def iter( s, p ):
it = re.finditer( p, s )
for m in it:
yield m.groups()[ 0 ]for s in iter( 'a=x,a=y,a=z', 'a=(.)' ):
print( s )- Protokoll
x
y
zmain.py
import re
s = open( "tmp.txt" ).read()
pattern = "(http[0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,.!/()=?`*;:_{}\[\]\\|~%^-]*?jpg)"
it = re.finditer( pattern, s )
seen = set()
for m in it:
u = m.groups()[ 0 ]
if not u in seen:
print( f'"{u}",' )
seen.add( u )main.py
# section: a regular expression for balanced terms
# using ideas probably developed by "user3489112"
balanced = \
r"[^()]*?(?:\(" * 18 + \
r"[^()]*?" + \
r"\)[^()]*?)*?" * 18
main.py
import re
print( re.sub( r':(\w+)', r'${\1}', "abc :def ghi :jkl mno" ))
- Protokoll
abc ${def} ghi ${jkl} mno
main.py
import re
match = re.sub( r'a.*b', 'a222222b', re.DOTALL) # searches first occurence, "." matches newlines
print( match ) # prints a summary of the match
print( match.group() ) # prints the full text of the match
- Protokoll
<_sre.SRE_Match object; span=(0, 8), match='a222222b'>
a222222bmain.py
s = re.sub( r'(\d+[+\-*/]\d+)', lambda x:str(eval(x.group(0))), s )
main.py
import re
import builtins
import _sre
source = '''\
<p>Viel Text +1 234 555 6789 Viel Text
<p>Viel Text +1 234 555 6789 Viel Text
<p>Viel Text +1 234 555 6789 Viel Text
'''
def hyperlink_of( text:str, reference :str )-> str:
assert( type( text ) == str )
assert( type( reference ) == str )
result = f'<a href="{reference}">{text}</a>'
assert( type( result ) == str )
return result
def matchobject_formatted_as_telephone_number( matchobject ):
telephone_number = matchobject.group( 0 )
telephone_number_without_spaces = telephone_number.replace( ' ', '' )
return hyperlink_of \
( text = ' ' + telephone_number,
reference = 'tel:' + telephone_number_without_spaces )pattern_for_telephone_number = '\+[ 0-9]*[0-9]'
result = re.sub( pattern_for_telephone_number, matchobject_formatted_as_telephone_number, source )
print( result )
Protokoll
<class '_sre.SRE_Match'>
<class '_sre.SRE_Match'>
<class '_sre.SRE_Match'>
<p>Viel Text <a href="tel:+12345556789"> +1 234 555 6789</a> Viel Text
<p>Viel Text <a href="tel:+12345556789"> +1 234 555 6789</a> Viel Text
<p>Viel Text <a href="tel:+12345556789"> +1 234 555 6789</a> Viel Textmain.py
import re
def iter( s, p ):
it = re.finditer( p, s )
for m in it:
yield m.groups()[ 0 ]
# ...
for s in iter( sourcetext, r'<a href="example/([^"]*)">example</a>' ):
example( s )