Pyparsing

90 min | Última modificación: Diciembre 17, 2020

[1]:

import pyparsing as pp

[2]:

##
## Hola mundo
##

##
## Define la gramática como:
##
## greet -> string ',' string '!'
##
greet = pp.Word(pp.alphas) + "," + pp.Word(pp.alphas) + "!"


## procesa saludos en varios idiomas:
for greeting_str in [
            "Hello, World!",
            "Bonjour, Monde!",
            "Hola, Mundo!",
            "Hallo, Welt!",
        ]:
    greeting = greet.parseString(greeting_str)
    print(greeting)

['Hello', ',', 'World', '!']
['Bonjour', ',', 'Monde', '!']
['Hola', ',', 'Mundo', '!']
['Hallo', ',', 'Welt', '!']

[3]:

##
## Pasing de una dirección IP y un número telefonico
## en formato US:
##
##    111.222.333.444(123)456-7890
##    131.322.393.458(599)353-7800
##

ipField = pp.Word(pp.nums, max=3)
ipAddr = pp.Combine(ipField + "." + ipField + "." + ipField + "." + ipField)
phoneNum = pp.Combine(
    "("
    + pp.Word(pp.nums, exact=3)
    + ")"
    + pp.Word(pp.nums, exact=3)
    + "-"
    + pp.Word(pp.nums, exact=4)
)
userdata = ipAddr + phoneNum

for text in [
    "111.222.333.444(123)456-7890",
    "131.322.393.458(599)353-7800",
]:
    parsed_text = userdata.parseString(text)
    print(parsed_text)

['111.222.333.444', '(123)456-7890']
['131.322.393.458', '(599)353-7800']

[4]:

##
## Ejemplo de la definición de una gramática
## para asignaciones en lenguajes de programación.
##
## Ejemplo:
##
##    a = 10
##    a_2=100
##    pi=3.14159
##    goldenRatio = 1.61803
##    E = mc2
##
identifier = pp.Word(pp.alphas, pp.alphanums+'_')
number = pp.Word(pp.nums+".")
assignmentExpr = identifier + "=" + (identifier | number)

for text in [
    "a = 10",
    "a_2=100",
    "pi=3.14159",
    "goldenRatio = 1.61803",
    "E = mc2",
]:
    parsed_text = assignmentExpr.parseString(text)
    print(parsed_text)

['a', '=', '10']
['a_2', '=', '100']
['pi', '=', '3.14159']
['goldenRatio', '=', '1.61803']
['E', '=', 'mc2']

[5]:

##
## Asignación de nombres a partes de la expresión
## usando setResultName()
##
identifier = pp.Word(pp.alphas, pp.alphanums + "_")
number = pp.Word(pp.nums + ".")

## define los nombres de las partes
assignmentExpr = (
    identifier.setResultsName("lhs") + "=" + (identifier | number).setResultsName("rhs")
)

## parser
assignmentTokens = assignmentExpr.parseString("pi=3.14159")

## imprime las componentes usando los nombres asignados
print(assignmentTokens.rhs, "is assigned to", assignmentTokens.lhs)

3.14159 is assigned to pi

[6]:

##
## Ejemplo de una gramática más compleja para
## parsear los siguientes textos:
##
##    Hello, World!
##    Hi, Mom!
##    Good morning, Miss Crabtree!
##    Yo, Adrian!
##    Whattup, G?
##    How's it goin', Dude?
##    Hey, Jude!
##    Goodbye, Mr. Chips!
##
word = pp.Word(pp.alphas+"'.")
salutation = pp.OneOrMore(word)
comma = pp.Literal(",")
greetee = pp.OneOrMore(word)
endpunc = pp.oneOf("! ?")
greeting = salutation + comma + greetee + endpunc

for text in [
    "Hello, World!",
    "Hi, Mom!",
    "Good morning, Miss Crabtree!",
    "Yo, Adrian!",
    "Whattup, G?",
    "How's it goin', Dude?",
    "Hey, Jude!",
    "Goodbye, Mr. Chips!",
]:
    parsed_text = greeting.parseString(text)
    print(parsed_text)

['Hello', ',', 'World', '!']
['Hi', ',', 'Mom', '!']
['Good', 'morning', ',', 'Miss', 'Crabtree', '!']
['Yo', ',', 'Adrian', '!']
['Whattup', ',', 'G', '?']
["How's", 'it', "goin'", ',', 'Dude', '?']
['Hey', ',', 'Jude', '!']
['Goodbye', ',', 'Mr.', 'Chips', '!']

[7]:

##
## Extracción de la parte del saludo
##
for t in [
    "Hello, World!",
    "Hi, Mom!",
    "Good morning, Miss Crabtree!",
    "Yo, Adrian!",
    "Whattup, G?",
    "How's it goin', Dude?",
    "Hey, Jude!",
    "Goodbye, Mr. Chips!",
]:
    results = greeting.parseString(t)
    salutation = []
    for token in results:
        if token == ",": break
        salutation.append(token)
    print(salutation)

['Hello']
['Hi']
['Good', 'morning']
['Yo']
['Whattup']
["How's", 'it', "goin'"]
['Hey']
['Goodbye']

[8]:

##
## Adicion de grupos usando Group()
##
word = pp.Word(pp.alphas+"'.")
salutation = pp.Group(pp.OneOrMore(word))  ## <- regla modificada
comma = pp.Literal(",")
greetee = pp.Group( pp.OneOrMore(word) )  ## <- regla modificada
endpunc = pp.oneOf("! ?")
greeting = salutation + comma + greetee + endpunc

for t in [
    "Hello, World!",
    "Hi, Mom!",
    "Good morning, Miss Crabtree!",
    "Yo, Adrian!",
    "Whattup, G?",
    "How's it goin', Dude?",
    "Hey, Jude!",
    "Goodbye, Mr. Chips!",
]:
    parsed_text = greeting.parseString(t)
    print(parsed_text)

[['Hello'], ',', ['World'], '!']
[['Hi'], ',', ['Mom'], '!']
[['Good', 'morning'], ',', ['Miss', 'Crabtree'], '!']
[['Yo'], ',', ['Adrian'], '!']
[['Whattup'], ',', ['G'], '?']
[["How's", 'it', "goin'"], ',', ['Dude'], '?']
[['Hey'], ',', ['Jude'], '!']
[['Goodbye'], ',', ['Mr.', 'Chips'], '!']

[9]:

##
## Es posible asignar cada parte a una
## variable para su uso posterior
##
for t in [
    "Hello, World!",
    "Hi, Mom!",
    "Good morning, Miss Crabtree!",
    "Yo, Adrian!",
    "Whattup, G?",
    "How's it goin', Dude?",
    "Hey, Jude!",
    "Goodbye, Mr. Chips!",
]:
    salutation, _, greetee, endpunc = greeting.parseString(t)
    print(salutation, greetee, endpunc)

['Hello'] ['World'] !
['Hi'] ['Mom'] !
['Good', 'morning'] ['Miss', 'Crabtree'] !
['Yo'] ['Adrian'] !
['Whattup'] ['G'] ?
["How's", 'it', "goin'"] ['Dude'] ?
['Hey'] ['Jude'] !
['Goodbye'] ['Mr.', 'Chips'] !

[10]:

##
## Supresión de elementos con Suppress
##
word = pp.Word(pp.alphas+"'.")
salutation = pp.Group(pp.OneOrMore(word))  ## <- regla modificada
comma = pp.Suppress(pp.Literal(","))
greetee = pp.Group( pp.OneOrMore(word) )  ## <- regla modificada
endpunc = pp.oneOf("! ?")
greeting = salutation + comma + greetee + endpunc


for t in [
    "Hello, World!",
    "Hi, Mom!",
    "Good morning, Miss Crabtree!",
    "Yo, Adrian!",
    "Whattup, G?",
    "How's it goin', Dude?",
    "Hey, Jude!",
    "Goodbye, Mr. Chips!",
]:
    salutation, greetee, endpunc = greeting.parseString(t)
    print(salutation, greetee, endpunc)

['Hello'] ['World'] !
['Hi'] ['Mom'] !
['Good', 'morning'] ['Miss', 'Crabtree'] !
['Yo'] ['Adrian'] !
['Whattup'] ['G'] ?
["How's", 'it', "goin'"] ['Dude'] ?
['Hey'] ['Jude'] !
['Goodbye'] ['Mr.', 'Chips'] !

[11]:

##
## Separación en partes usando listas
##
salutes = []
greetees = []

for t in [
    "Hello, World!",
    "Hi, Mom!",
    "Good morning, Miss Crabtree!",
    "Yo, Adrian!",
    "Whattup, G?",
    "How's it goin', Dude?",
    "Hey, Jude!",
    "Goodbye, Mr. Chips!",
]:
    salutation, greetee, endpunc = greeting.parseString(t)
    salutes.append( ( " ".join(salutation), endpunc) )
    greetees.append( " ".join(greetee) )

print(salutes)
print('---')
print(greetees)

[('Hello', '!'), ('Hi', '!'), ('Good morning', '!'), ('Yo', '!'), ('Whattup', '?'), ("How's it goin'", '?'), ('Hey', '!'), ('Goodbye', '!')]
---
['World', 'Mom', 'Miss Crabtree', 'Adrian', 'G', 'Dude', 'Jude', 'Mr. Chips']

[12]:

##
## Generación de cadenas aleatorias
##
import random

for i in range(20):
    salute = random.choice( salutes )
    greetee = random.choice( greetees )
    print("{:s}, {:s}{:s}".format( salute[0], greetee, salute[1] ))

Goodbye, G!
Hello, World!
Hi, Dude!
Hey, Mom!
Goodbye, Jude!
Yo, World!
Hey, World!
Goodbye, Mr. Chips!
Hey, Adrian!
Good morning, Adrian!
Hello, Miss Crabtree!
Hello, Miss Crabtree!
Hello, Mom!
How's it goin', Miss Crabtree?
Goodbye, Miss Crabtree!
Yo, Miss Crabtree!
Hi, Jude!
Yo, Mr. Chips!
Hey, Miss Crabtree!
Hey, Jude!

[13]:

##
## Otro ejemplo de frases aleatorias
##
for i in range(20):
    print(
        '{:s}, say "{:s}" to {:s}.'.format(
            random.choice(greetees),
            "".join(random.choice(salutes)),
            random.choice(greetees),
        )
    )

Adrian, say "Hey!" to Mom.
G, say "Goodbye!" to Adrian.
Mom, say "Goodbye!" to Dude.
Mr. Chips, say "Goodbye!" to Miss Crabtree.
Jude, say "Good morning!" to Mr. Chips.
Adrian, say "Yo!" to Dude.
Jude, say "Yo!" to Dude.
Mom, say "Hi!" to World.
World, say "Hi!" to G.
G, say "Good morning!" to Mom.
Dude, say "Hi!" to G.
G, say "Goodbye!" to Jude.
Jude, say "Good morning!" to Adrian.
Adrian, say "Hi!" to G.
World, say "Whattup?" to Miss Crabtree.
Dude, say "Hi!" to Jude.
World, say "Hey!" to Miss Crabtree.
Adrian, say "Good morning!" to Jude.
G, say "Hello!" to Mom.
Miss Crabtree, say "Yo!" to Adrian.

[14]:

##
## Ejemplo de un texto
##
## 09/04/2004  Virginia        44   Temple             14
## 09/04/2004  LSU             22   Oregon State       21
## 09/09/2004  Troy State      24   Missouri           14
## 01/02/2003  Florida State  103   University of Miami 2
##


##
## Gramática básica para capturar los datos
##
num = pp.Word(pp.nums)
date = num + "/" + num + "/" + num
schoolName = pp.OneOrMore( pp.Word(pp.alphas) )
score = pp.Word(pp.nums)
schoolAndScore = schoolName + score
gameResult = date + schoolAndScore + schoolAndScore

tests = """\
09/04/2004  Virginia        44   Temple             14
09/04/2004  LSU             22   Oregon State       21
09/09/2004  Troy State      24   Missouri           14
01/02/2003  Florida State  103   University of Miami 2""".splitlines()

for test in tests:
    stats = gameResult.parseString(test)
    print(stats.asList())

['09', '/', '04', '/', '2004', 'Virginia', '44', 'Temple', '14']
['09', '/', '04', '/', '2004', 'LSU', '22', 'Oregon', 'State', '21']
['09', '/', '09', '/', '2004', 'Troy', 'State', '24', 'Missouri', '14']
['01', '/', '02', '/', '2003', 'Florida', 'State', '103', 'University', 'of', 'Miami', '2']

[15]:

##
## Combina los strings para las fechas
## usando Combine
##
num = pp.Word(pp.nums)
date = pp.Combine(num + "/" + num + "/" + num)
schoolName = pp.OneOrMore( pp.Word(pp.alphas) )
score = pp.Word(pp.nums)
schoolAndScore = schoolName + score
gameResult = date + schoolAndScore + schoolAndScore

tests = """\
09/04/2004  Virginia        44   Temple             14
09/04/2004  LSU             22   Oregon State       21
09/09/2004  Troy State      24   Missouri           14
01/02/2003  Florida State  103   University of Miami 2""".splitlines()

for test in tests:
    stats = gameResult.parseString(test)
    print(stats.asList())

['09/04/2004', 'Virginia', '44', 'Temple', '14']
['09/04/2004', 'LSU', '22', 'Oregon', 'State', '21']
['09/09/2004', 'Troy', 'State', '24', 'Missouri', '14']
['01/02/2003', 'Florida', 'State', '103', 'University', 'of', 'Miami', '2']

[16]:

##
## Combina los strings de los nombres
##
num = pp.Word(pp.nums)
date = pp.Combine(num + "/" + num + "/" + num)
schoolName = pp.OneOrMore( pp.Word(pp.alphas) )
schoolName.setParseAction( lambda tokens: " ".join(tokens) ) # <- modificación
score = pp.Word(pp.nums)
schoolAndScore = schoolName + score
gameResult = date + schoolAndScore + schoolAndScore

tests = """\
09/04/2004  Virginia        44   Temple             14
09/04/2004  LSU             22   Oregon State       21
09/09/2004  Troy State      24   Missouri           14
01/02/2003  Florida State  103   University of Miami 2""".splitlines()

for test in tests:
    stats = gameResult.parseString(test)
    print(stats.asList())

['09/04/2004', 'Virginia', '44', 'Temple', '14']
['09/04/2004', 'LSU', '22', 'Oregon State', '21']
['09/09/2004', 'Troy State', '24', 'Missouri', '14']
['01/02/2003', 'Florida State', '103', 'University of Miami', '2']

[17]:

##
## Validación de las fechas
##
num = pp.Word(pp.nums)
date = pp.Combine(num + "/" + num + "/" + num)
schoolName = pp.OneOrMore( pp.Word(pp.alphas) )
schoolName.setParseAction( lambda tokens: " ".join(tokens) ) # <- modificación
score = pp.Word(pp.nums)
schoolAndScore = schoolName + score
gameResult = date + schoolAndScore + schoolAndScore

import time
def validateDateString(tokens):
    try:
        time.strptime(tokens[0], "%m/%d/%Y")
    except ValueError:
        raise pp.ParseException("Invalid date string (%s)" % tokens[0])

date.setParseAction(validateDateString)

## se modifica la primera linea para generar el error
tests = """\
19/04/2004  Virginia        44   Temple             14
09/04/2004  LSU             22   Oregon State       21
09/09/2004  Troy State      24   Missouri           14
01/02/2003  Florida State  103   University of Miami 2""".splitlines()

for test in tests:
    stats = gameResult.parseString(test)
    print(stats.asList())

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-17-86e9c1e9bf6b> in validateDateString(tokens)
     14     try:
---> 15         time.strptime(tokens[0], "%m/%d/%Y")
     16     except ValueError:

/usr/lib/python3.6/_strptime.py in _strptime_time(data_string, format)
    558     format string."""
--> 559     tt = _strptime(data_string, format)[0]
    560     return time.struct_time(tt[:time._STRUCT_TM_ITEMS])

/usr/lib/python3.6/_strptime.py in _strptime(data_string, format)
    361         raise ValueError("time data %r does not match format %r" %
--> 362                          (data_string, format))
    363     if len(data_string) != found.end():

ValueError: time data '19/04/2004' does not match format '%m/%d/%Y'

During handling of the above exception, another exception occurred:

ParseException                            Traceback (most recent call last)
<ipython-input-17-86e9c1e9bf6b> in <module>
     27
     28 for test in tests:
---> 29     stats = gameResult.parseString(test)
     30     print(stats.asList())

/usr/local/lib/python3.6/dist-packages/pyparsing.py in parseString(self, instring, parseAll)
   1953                 if getattr(exc, '__traceback__', None) is not None:
   1954                     exc.__traceback__ = self._trim_traceback(exc.__traceback__)
-> 1955                 raise exc
   1956         else:
   1957             return tokens

<ipython-input-17-86e9c1e9bf6b> in validateDateString(tokens)
     15         time.strptime(tokens[0], "%m/%d/%Y")
     16     except ValueError:
---> 17         raise pp.ParseException("Invalid date string (%s)" % tokens[0])
     18
     19 date.setParseAction(validateDateString)

ParseException: Invalid date string (19/04/2004)  (at char 0), (line:1, col:1)

[18]:

##
## Se convierte en entero el score y se agrupa
##
num = pp.Word(pp.nums)
date = pp.Combine(num + "/" + num + "/" + num)
schoolName = pp.OneOrMore( pp.Word(pp.alphas) )
schoolName.setParseAction( lambda tokens: " ".join(tokens) )
score = pp.Word(pp.nums).setParseAction( lambda tokens : int(tokens[0]) ) # <- modificación
schoolAndScore = pp.Group(schoolName + score)
gameResult = date + schoolAndScore + schoolAndScore

import time
def validateDateString(tokens):
    try:
        time.strptime(tokens[0], "%m/%d/%Y")
    except ValueError:
        raise pp.ParseException("Invalid date string (%s)" % tokens[0])

date.setParseAction(validateDateString)

## se modifica la primera linea para generar el error
tests = """\
09/04/2004  Virginia        44   Temple             14
09/04/2004  LSU             22   Oregon State       21
09/09/2004  Troy State      24   Missouri           14
01/02/2003  Florida State  103   University of Miami 2""".splitlines()

for test in tests:
    stats = gameResult.parseString(test)
    print(stats.asList())

['09/04/2004', ['Virginia', 44], ['Temple', 14]]
['09/04/2004', ['LSU', 22], ['Oregon State', 21]]
['09/09/2004', ['Troy State', 24], ['Missouri', 14]]
['01/02/2003', ['Florida State', 103], ['University of Miami', 2]]

[19]:

##
## Introducción de textos explicativos
##
for test in tests:
    stats = gameResult.parseString(test)
    if stats[1][1] != stats[2][1]:
        if stats[1][1] > stats[2][1]:
            result = "won by " + stats[1][0]
        else:
            result = "won by " + stats[2][0]
    else:
        result = "tied"
    print(
        "{:s} {:s}({:d}) {:s}({:d}), {:s}".format(
            stats[0], stats[1][0], stats[1][1], stats[2][0], stats[2][1], result
        )
    )

09/04/2004 Virginia(44) Temple(14), won by Virginia
09/04/2004 LSU(22) Oregon State(21), won by LSU
09/09/2004 Troy State(24) Missouri(14), won by Troy State
01/02/2003 Florida State(103) University of Miami(2), won by Florida State

[20]:

##
## Manejo de nombres para aumentar la legibilidad
##
num = pp.Word(pp.nums)
date = pp.Combine(num + "/" + num + "/" + num)
schoolName = pp.OneOrMore(pp.Word(pp.alphas))
schoolName.setParseAction(lambda tokens: " ".join(tokens))
score = pp.Word(pp.nums).setParseAction(
    lambda tokens: int(tokens[0])
)
schoolAndScore = pp.Group(
    schoolName.setResultsName("school") + score.setResultsName("score")
)
gameResult = (
    date.setResultsName("date")
    + schoolAndScore.setResultsName("team1")
    + schoolAndScore.setResultsName("team2")
)

date.setParseAction(validateDateString)


for test in tests:
    stats = gameResult.parseString(test)
    if stats.team1.score != stats.team2.score:
        if stats.team1.score > stats.team2.score:
            result = "won by " + stats.team1.school
        else:
            result = "won by " + stats.team2.school
    else:
        result = "tied"
    print(
        "{:s} {:s}({:d}) {:s}({:d}), {:s}".format(
            stats.date,
            stats.team1.school,
            stats.team1.score,
            stats.team2.school,
            stats.team2.score,
            result,
        )
    )

09/04/2004 Virginia(44) Temple(14), won by Virginia
09/04/2004 LSU(22) Oregon State(21), won by LSU
09/09/2004 Troy State(24) Missouri(14), won by Troy State
01/02/2003 Florida State(103) University of Miami(2), won by Florida State

[21]:

##
## Se puede usar dump() para imprimir la info
## y revisar
##
print(stats.dump())

['01/02/2003', ['Florida State', 103], ['University of Miami', 2]]
- date: '01/02/2003'
- team1: ['Florida State', 103]
  - school: 'Florida State'
  - score: 103
- team2: ['University of Miami', 2]
  - school: 'University of Miami'
  - score: 2

[22]:

##
## Se puede generar XML
##
print(stats.asXML("GAME"))


<GAME>
  <date>01/02/2003</date>
  <team1>
    <school>Florida State</school>
    <score>103</score>
  </team1>
  <team2>
    <school>University of Miami</school>
    <score>2</score>
  </team2>
</GAME>

[23]:

##
## Lectura de HTML
##
from pyparsing import makeHTMLTags
import urllib

url = "https://www.cia.gov/library/publications/the-world-factbook/docs/refmaps.html"
html = urllib.request.urlopen(url).read()

## Define la expresión para el tag <img>
imgTag,endImgTag = makeHTMLTags("img")

## busca el tag e imprime los atributos
for img in imgTag.searchString(html):
    if img['src'].endswith('jpg'):
        print("'{:s}' : {:s}".format(img['alt'], img['src']))

'About Menu' : ../images/image-about.jpg
'Careers Menu' : ../images/image-careers.jpg
'Offices Menu' : ../images/image-offices.jpg
'News Menu' : ../images/image-news.jpg
'Library Menu' : ../images/image-library.jpg

[24]:

##
## Lectura de las componentes de una tabla
## (corregir el siguiente codigo)
##

import urllib
from pyparsing import *

url = (
    "https://www.cia.gov/library/"
    "publications/the-world-factbook/"
    "appendix/appendix-g.html"
)

## abre la pagina y la lee
page = urllib.request.urlopen(url)
html = page.read()
page.close()

## crea los tags de la tabla
tdStart, tdEnd = makeHTMLTags("td")
trStart, trEnd = makeHTMLTags("tr")

## especificación del parser para las componentes de la tabla
decimalNumber = Word(nums + ",") + Optional("." + OneOrMore(Word(nums)))
joinTokens = lambda tokens: "".join(tokens)
stripCommas = lambda tokens: tokens[0].replace(",", "")
convertToFloat = lambda tokens: float(tokens[0])
decimalNumber.setParseAction(joinTokens, stripCommas, convertToFloat)
conversionValue = tdStart + decimalNumber.setResultsName("factor") + tdEnd
units = SkipTo(tdEnd)

## rutina auxiliar para limpiar la tabla
def htmlCleanup(t):
    unitText = t[0]
    unitText = " ".join(unitText.split())
    unitText = unitText.replace("<br>", "")
    return unitText

units.setParseAction(htmlCleanup)

## componente del parser para leer la tabla
fromUnit = tdStart + units.setResultsName("fromUnit") + tdEnd
toUnit = tdStart + units.setResultsName("toUnit") + tdEnd
conversion = trStart + fromUnit + toUnit + conversionValue + trEnd

## imprime los resultados
for tokens, start, end in conversion.scanString(html):
    print(tokens, '>')
    print(
        "{:s} : {:s} : {:s}".format(
            tokens["fromUnit"], tokens["toUnit"], tokens["factor"]
        )
    )

[25]:

##
## Ejemplo S-expression Parser
##
##   1
##   x
##   (+ 1 2)
##   (* (+ 1 2) (+ 3 4))
##

alphaword = pp.Word(pp.alphas)
integer = pp.Word(pp.nums)

sexp = Forward()

LPAREN = pp.Suppress("(")
RPAREN = pp.Suppress(")")

sexp << ( alphaword | integer | ( LPAREN + ZeroOrMore(sexp) + RPAREN ))

tests = """\
    red
    100
    ( red 100 blue )
    ( green ( ( 1 2 ) mauve ) plaid () )""".splitlines()
for t in tests:
    print(t)
    print(sexp.parseString(t))
    print()

    red
['red']

    100
['100']

    ( red 100 blue )
['red', '100', 'blue']

    ( green ( ( 1 2 ) mauve ) plaid () )
['green', '1', '2', 'mauve', 'plaid']

[26]:

##
## Agrupación de las expresiones S
##
alphaword = pp.Word(pp.alphas)
integer = pp.Word(pp.nums)

sexp = Forward()

LPAREN = pp.Suppress("(")
RPAREN = pp.Suppress(")")

sexp << ( alphaword | integer | pp.Group( LPAREN + ZeroOrMore(sexp) + RPAREN ) ) # <--

tests = """\
    red
    100
    ( red 100 blue )
    ( green ( ( 1 2 ) mauve ) plaid () )""".splitlines()
for t in tests:
    print(t)
    print(sexp.parseString(t))
    print()

    red
['red']

    100
['100']

    ( red 100 blue )
[['red', '100', 'blue']]

    ( green ( ( 1 2 ) mauve ) plaid () )
[['green', [['1', '2'], 'mauve'], 'plaid', []]]

[27]:

##
## Parser para expresiones de búsqueda
##
##    wood and blue or red
##    wood and (blue or red)
##    (steel or iron) and "lime green"
##    not steel or iron and "lime green"
##    not(steel or iron) and "lime green"
##

from pyparsing import *

and_ = CaselessLiteral("and")
or_ = CaselessLiteral("or")
not_ = CaselessLiteral("not")
searchTerm = Word(alphanums) | quotedString.setParseAction(removeQuotes)
searchExpr = operatorPrecedence(
    searchTerm,
    [
        (not_, 1, opAssoc.RIGHT),
        (and_, 2, opAssoc.LEFT),
        (or_, 2, opAssoc.LEFT),
    ],
)
tests = """\
    wood and blue or red
    wood and (blue or red)
    (steel or iron) and "lime green"
    not steel or iron and "lime green"
    not(steel or iron) and "lime green" """.splitlines()

for t in tests:
    print(t.strip())
    print(searchExpr.parseString(t)[0])
    print()

wood and blue or red
[['wood', 'and', 'blue'], 'or', 'red']

wood and (blue or red)
['wood', 'and', ['blue', 'or', 'red']]

(steel or iron) and "lime green"
[['steel', 'or', 'iron'], 'and', 'lime green']

not steel or iron and "lime green"
[['not', 'steel'], 'or', ['iron', 'and', 'lime green']]

not(steel or iron) and "lime green"
[['not', ['steel', 'or', 'iron']], 'and', 'lime green']