Reguläre Ausdrücke in Python

main.py

import re
def lines( source ):
    return source.split( "\n" )
def iter( s, p ):
    it = re.finditer( p, s )
    for m in it:
        yield m
def matches( line ):
    for m in iter( line, r'(\bADD\b)' ):
        yield m
def show( line, match ):
    print( line[ 0: match.start( 0 )], end = '(' )
    print( line[ match.start( 0 ): match.end( 0 )], end = ')' )
    print( line[ match.end( 0 ): len( line )])
source = "ADD R1, R2\nADD3 R4, R5, R6\nADD.MOV R1, R2, [0x10]\n"
for line in lines( source ): 
    for match in matches( line ):
        show( line, match )

transcript

(ADD) R1, R2
(ADD).MOV R1, R2, [0x10]

main.py

import re
def iter( s, p ):
    it = re.finditer( p, s )
    for m in it:
        yield m.groups()[ 0 ]
for s in iter( 'a=x,a=y,a=z', 'a=(.)' ):
    print( s )

Protokoll

x
y
z

main.py

import re
s = open( "tmp.txt" ).read()
pattern = "(http[0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,.!/()=?`*;:_{}\[\]\\|~%^-]*?jpg)"
it = re.finditer( pattern, s )
seen = set()
for m in it:
    u = m.groups()[ 0 ]
    if not u in seen:
        print( f'"{u}",' )
    seen.add( u )

main.py

# section: a regular expression for balanced terms
# using ideas probably developed by "user3489112"
balanced = \
r"[^()]*?(?:\(" * 18 + \
r"[^()]*?" + \
r"\)[^()]*?)*?" * 18

main.py

import re
print( re.sub( r':(\w+)', r'${\1}', "abc :def ghi :jkl mno" ))

Protokoll

abc ${def} ghi ${jkl} mno

main.py

import re
match = re.sub( r'a.*b', 'a222222b', re.DOTALL) # searches first occurence, "." matches newlines
print( match )              # prints a summary of the match
print( match.group() )      # prints the full text of the match

Protokoll

<_sre.SRE_Match object; span=(0, 8), match='a222222b'>
a222222b

main.py

s = re.sub( r'(\d+[+\-*/]\d+)', lambda x:str(eval(x.group(0))), s )

main.py

import re
import builtins
import _sre
source = '''\
<p>Viel Text +1 234 555 6789 Viel Text
<p>Viel Text +1 234 555 6789 Viel Text
<p>Viel Text +1 234 555 6789 Viel Text
'''
def hyperlink_of( text:str, reference :str )-> str:
    assert( type( text ) == str )
    assert( type( reference ) == str )
    result = f'<a href="{reference}">{text}</a>'
    assert( type( result ) == str )
    return result
def matchobject_formatted_as_telephone_number( matchobject ):
    telephone_number = matchobject.group( 0 )
    telephone_number_without_spaces = telephone_number.replace( ' ', '' )
    return hyperlink_of \
    ( text = ' ' + telephone_number,
      reference = 'tel:' + telephone_number_without_spaces )
pattern_for_telephone_number = '\+[ 0-9]*[0-9]'
result = re.sub( pattern_for_telephone_number, matchobject_formatted_as_telephone_number, source )
print( result )

Protokoll

<class '_sre.SRE_Match'>
<class '_sre.SRE_Match'>
<class '_sre.SRE_Match'>

<p>Viel Text <a href="tel:+12345556789"> +1 234 555 6789</a> Viel Text

<p>Viel Text <a href="tel:+12345556789"> +1 234 555 6789</a> Viel Text

<p>Viel Text <a href="tel:+12345556789"> +1 234 555 6789</a> Viel Text

main.py

import re
def iter( s, p ):
    it = re.finditer( p, s )
    for m in it:
        yield m.groups()[ 0 ]
# ...
for s in iter( sourcetext, r'<a href="example/([^"]*)">example</a>' ):
    example( s )