Pyparsing
90 min | Última modificación: Diciembre 17, 2020
[1]:
import pyparsing as pp
[2]:
##
## Hola mundo
##
##
## Define la gramática como:
##
## greet -> string ',' string '!'
##
greet = pp.Word(pp.alphas) + "," + pp.Word(pp.alphas) + "!"
## procesa saludos en varios idiomas:
for greeting_str in [
"Hello, World!",
"Bonjour, Monde!",
"Hola, Mundo!",
"Hallo, Welt!",
]:
greeting = greet.parseString(greeting_str)
print(greeting)
['Hello', ',', 'World', '!']
['Bonjour', ',', 'Monde', '!']
['Hola', ',', 'Mundo', '!']
['Hallo', ',', 'Welt', '!']
[3]:
##
## Pasing de una dirección IP y un número telefonico
## en formato US:
##
## 111.222.333.444(123)456-7890
## 131.322.393.458(599)353-7800
##
ipField = pp.Word(pp.nums, max=3)
ipAddr = pp.Combine(ipField + "." + ipField + "." + ipField + "." + ipField)
phoneNum = pp.Combine(
"("
+ pp.Word(pp.nums, exact=3)
+ ")"
+ pp.Word(pp.nums, exact=3)
+ "-"
+ pp.Word(pp.nums, exact=4)
)
userdata = ipAddr + phoneNum
for text in [
"111.222.333.444(123)456-7890",
"131.322.393.458(599)353-7800",
]:
parsed_text = userdata.parseString(text)
print(parsed_text)
['111.222.333.444', '(123)456-7890']
['131.322.393.458', '(599)353-7800']
[4]:
##
## Ejemplo de la definición de una gramática
## para asignaciones en lenguajes de programación.
##
## Ejemplo:
##
## a = 10
## a_2=100
## pi=3.14159
## goldenRatio = 1.61803
## E = mc2
##
identifier = pp.Word(pp.alphas, pp.alphanums+'_')
number = pp.Word(pp.nums+".")
assignmentExpr = identifier + "=" + (identifier | number)
for text in [
"a = 10",
"a_2=100",
"pi=3.14159",
"goldenRatio = 1.61803",
"E = mc2",
]:
parsed_text = assignmentExpr.parseString(text)
print(parsed_text)
['a', '=', '10']
['a_2', '=', '100']
['pi', '=', '3.14159']
['goldenRatio', '=', '1.61803']
['E', '=', 'mc2']
[5]:
##
## Asignación de nombres a partes de la expresión
## usando setResultName()
##
identifier = pp.Word(pp.alphas, pp.alphanums + "_")
number = pp.Word(pp.nums + ".")
## define los nombres de las partes
assignmentExpr = (
identifier.setResultsName("lhs") + "=" + (identifier | number).setResultsName("rhs")
)
## parser
assignmentTokens = assignmentExpr.parseString("pi=3.14159")
## imprime las componentes usando los nombres asignados
print(assignmentTokens.rhs, "is assigned to", assignmentTokens.lhs)
3.14159 is assigned to pi
[6]:
##
## Ejemplo de una gramática más compleja para
## parsear los siguientes textos:
##
## Hello, World!
## Hi, Mom!
## Good morning, Miss Crabtree!
## Yo, Adrian!
## Whattup, G?
## How's it goin', Dude?
## Hey, Jude!
## Goodbye, Mr. Chips!
##
word = pp.Word(pp.alphas+"'.")
salutation = pp.OneOrMore(word)
comma = pp.Literal(",")
greetee = pp.OneOrMore(word)
endpunc = pp.oneOf("! ?")
greeting = salutation + comma + greetee + endpunc
for text in [
"Hello, World!",
"Hi, Mom!",
"Good morning, Miss Crabtree!",
"Yo, Adrian!",
"Whattup, G?",
"How's it goin', Dude?",
"Hey, Jude!",
"Goodbye, Mr. Chips!",
]:
parsed_text = greeting.parseString(text)
print(parsed_text)
['Hello', ',', 'World', '!']
['Hi', ',', 'Mom', '!']
['Good', 'morning', ',', 'Miss', 'Crabtree', '!']
['Yo', ',', 'Adrian', '!']
['Whattup', ',', 'G', '?']
["How's", 'it', "goin'", ',', 'Dude', '?']
['Hey', ',', 'Jude', '!']
['Goodbye', ',', 'Mr.', 'Chips', '!']
[7]:
##
## Extracción de la parte del saludo
##
for t in [
"Hello, World!",
"Hi, Mom!",
"Good morning, Miss Crabtree!",
"Yo, Adrian!",
"Whattup, G?",
"How's it goin', Dude?",
"Hey, Jude!",
"Goodbye, Mr. Chips!",
]:
results = greeting.parseString(t)
salutation = []
for token in results:
if token == ",": break
salutation.append(token)
print(salutation)
['Hello']
['Hi']
['Good', 'morning']
['Yo']
['Whattup']
["How's", 'it', "goin'"]
['Hey']
['Goodbye']
[8]:
##
## Adicion de grupos usando Group()
##
word = pp.Word(pp.alphas+"'.")
salutation = pp.Group(pp.OneOrMore(word)) ## <- regla modificada
comma = pp.Literal(",")
greetee = pp.Group( pp.OneOrMore(word) ) ## <- regla modificada
endpunc = pp.oneOf("! ?")
greeting = salutation + comma + greetee + endpunc
for t in [
"Hello, World!",
"Hi, Mom!",
"Good morning, Miss Crabtree!",
"Yo, Adrian!",
"Whattup, G?",
"How's it goin', Dude?",
"Hey, Jude!",
"Goodbye, Mr. Chips!",
]:
parsed_text = greeting.parseString(t)
print(parsed_text)
[['Hello'], ',', ['World'], '!']
[['Hi'], ',', ['Mom'], '!']
[['Good', 'morning'], ',', ['Miss', 'Crabtree'], '!']
[['Yo'], ',', ['Adrian'], '!']
[['Whattup'], ',', ['G'], '?']
[["How's", 'it', "goin'"], ',', ['Dude'], '?']
[['Hey'], ',', ['Jude'], '!']
[['Goodbye'], ',', ['Mr.', 'Chips'], '!']
[9]:
##
## Es posible asignar cada parte a una
## variable para su uso posterior
##
for t in [
"Hello, World!",
"Hi, Mom!",
"Good morning, Miss Crabtree!",
"Yo, Adrian!",
"Whattup, G?",
"How's it goin', Dude?",
"Hey, Jude!",
"Goodbye, Mr. Chips!",
]:
salutation, _, greetee, endpunc = greeting.parseString(t)
print(salutation, greetee, endpunc)
['Hello'] ['World'] !
['Hi'] ['Mom'] !
['Good', 'morning'] ['Miss', 'Crabtree'] !
['Yo'] ['Adrian'] !
['Whattup'] ['G'] ?
["How's", 'it', "goin'"] ['Dude'] ?
['Hey'] ['Jude'] !
['Goodbye'] ['Mr.', 'Chips'] !
[10]:
##
## Supresión de elementos con Suppress
##
word = pp.Word(pp.alphas+"'.")
salutation = pp.Group(pp.OneOrMore(word)) ## <- regla modificada
comma = pp.Suppress(pp.Literal(","))
greetee = pp.Group( pp.OneOrMore(word) ) ## <- regla modificada
endpunc = pp.oneOf("! ?")
greeting = salutation + comma + greetee + endpunc
for t in [
"Hello, World!",
"Hi, Mom!",
"Good morning, Miss Crabtree!",
"Yo, Adrian!",
"Whattup, G?",
"How's it goin', Dude?",
"Hey, Jude!",
"Goodbye, Mr. Chips!",
]:
salutation, greetee, endpunc = greeting.parseString(t)
print(salutation, greetee, endpunc)
['Hello'] ['World'] !
['Hi'] ['Mom'] !
['Good', 'morning'] ['Miss', 'Crabtree'] !
['Yo'] ['Adrian'] !
['Whattup'] ['G'] ?
["How's", 'it', "goin'"] ['Dude'] ?
['Hey'] ['Jude'] !
['Goodbye'] ['Mr.', 'Chips'] !
[11]:
##
## Separación en partes usando listas
##
salutes = []
greetees = []
for t in [
"Hello, World!",
"Hi, Mom!",
"Good morning, Miss Crabtree!",
"Yo, Adrian!",
"Whattup, G?",
"How's it goin', Dude?",
"Hey, Jude!",
"Goodbye, Mr. Chips!",
]:
salutation, greetee, endpunc = greeting.parseString(t)
salutes.append( ( " ".join(salutation), endpunc) )
greetees.append( " ".join(greetee) )
print(salutes)
print('---')
print(greetees)
[('Hello', '!'), ('Hi', '!'), ('Good morning', '!'), ('Yo', '!'), ('Whattup', '?'), ("How's it goin'", '?'), ('Hey', '!'), ('Goodbye', '!')]
---
['World', 'Mom', 'Miss Crabtree', 'Adrian', 'G', 'Dude', 'Jude', 'Mr. Chips']
[12]:
##
## Generación de cadenas aleatorias
##
import random
for i in range(20):
salute = random.choice( salutes )
greetee = random.choice( greetees )
print("{:s}, {:s}{:s}".format( salute[0], greetee, salute[1] ))
Goodbye, G!
Hello, World!
Hi, Dude!
Hey, Mom!
Goodbye, Jude!
Yo, World!
Hey, World!
Goodbye, Mr. Chips!
Hey, Adrian!
Good morning, Adrian!
Hello, Miss Crabtree!
Hello, Miss Crabtree!
Hello, Mom!
How's it goin', Miss Crabtree?
Goodbye, Miss Crabtree!
Yo, Miss Crabtree!
Hi, Jude!
Yo, Mr. Chips!
Hey, Miss Crabtree!
Hey, Jude!
[13]:
##
## Otro ejemplo de frases aleatorias
##
for i in range(20):
print(
'{:s}, say "{:s}" to {:s}.'.format(
random.choice(greetees),
"".join(random.choice(salutes)),
random.choice(greetees),
)
)
Adrian, say "Hey!" to Mom.
G, say "Goodbye!" to Adrian.
Mom, say "Goodbye!" to Dude.
Mr. Chips, say "Goodbye!" to Miss Crabtree.
Jude, say "Good morning!" to Mr. Chips.
Adrian, say "Yo!" to Dude.
Jude, say "Yo!" to Dude.
Mom, say "Hi!" to World.
World, say "Hi!" to G.
G, say "Good morning!" to Mom.
Dude, say "Hi!" to G.
G, say "Goodbye!" to Jude.
Jude, say "Good morning!" to Adrian.
Adrian, say "Hi!" to G.
World, say "Whattup?" to Miss Crabtree.
Dude, say "Hi!" to Jude.
World, say "Hey!" to Miss Crabtree.
Adrian, say "Good morning!" to Jude.
G, say "Hello!" to Mom.
Miss Crabtree, say "Yo!" to Adrian.
[14]:
##
## Ejemplo de un texto
##
## 09/04/2004 Virginia 44 Temple 14
## 09/04/2004 LSU 22 Oregon State 21
## 09/09/2004 Troy State 24 Missouri 14
## 01/02/2003 Florida State 103 University of Miami 2
##
##
## Gramática básica para capturar los datos
##
num = pp.Word(pp.nums)
date = num + "/" + num + "/" + num
schoolName = pp.OneOrMore( pp.Word(pp.alphas) )
score = pp.Word(pp.nums)
schoolAndScore = schoolName + score
gameResult = date + schoolAndScore + schoolAndScore
tests = """\
09/04/2004 Virginia 44 Temple 14
09/04/2004 LSU 22 Oregon State 21
09/09/2004 Troy State 24 Missouri 14
01/02/2003 Florida State 103 University of Miami 2""".splitlines()
for test in tests:
stats = gameResult.parseString(test)
print(stats.asList())
['09', '/', '04', '/', '2004', 'Virginia', '44', 'Temple', '14']
['09', '/', '04', '/', '2004', 'LSU', '22', 'Oregon', 'State', '21']
['09', '/', '09', '/', '2004', 'Troy', 'State', '24', 'Missouri', '14']
['01', '/', '02', '/', '2003', 'Florida', 'State', '103', 'University', 'of', 'Miami', '2']
[15]:
##
## Combina los strings para las fechas
## usando Combine
##
num = pp.Word(pp.nums)
date = pp.Combine(num + "/" + num + "/" + num)
schoolName = pp.OneOrMore( pp.Word(pp.alphas) )
score = pp.Word(pp.nums)
schoolAndScore = schoolName + score
gameResult = date + schoolAndScore + schoolAndScore
tests = """\
09/04/2004 Virginia 44 Temple 14
09/04/2004 LSU 22 Oregon State 21
09/09/2004 Troy State 24 Missouri 14
01/02/2003 Florida State 103 University of Miami 2""".splitlines()
for test in tests:
stats = gameResult.parseString(test)
print(stats.asList())
['09/04/2004', 'Virginia', '44', 'Temple', '14']
['09/04/2004', 'LSU', '22', 'Oregon', 'State', '21']
['09/09/2004', 'Troy', 'State', '24', 'Missouri', '14']
['01/02/2003', 'Florida', 'State', '103', 'University', 'of', 'Miami', '2']
[16]:
##
## Combina los strings de los nombres
##
num = pp.Word(pp.nums)
date = pp.Combine(num + "/" + num + "/" + num)
schoolName = pp.OneOrMore( pp.Word(pp.alphas) )
schoolName.setParseAction( lambda tokens: " ".join(tokens) ) # <- modificación
score = pp.Word(pp.nums)
schoolAndScore = schoolName + score
gameResult = date + schoolAndScore + schoolAndScore
tests = """\
09/04/2004 Virginia 44 Temple 14
09/04/2004 LSU 22 Oregon State 21
09/09/2004 Troy State 24 Missouri 14
01/02/2003 Florida State 103 University of Miami 2""".splitlines()
for test in tests:
stats = gameResult.parseString(test)
print(stats.asList())
['09/04/2004', 'Virginia', '44', 'Temple', '14']
['09/04/2004', 'LSU', '22', 'Oregon State', '21']
['09/09/2004', 'Troy State', '24', 'Missouri', '14']
['01/02/2003', 'Florida State', '103', 'University of Miami', '2']
[17]:
##
## Validación de las fechas
##
num = pp.Word(pp.nums)
date = pp.Combine(num + "/" + num + "/" + num)
schoolName = pp.OneOrMore( pp.Word(pp.alphas) )
schoolName.setParseAction( lambda tokens: " ".join(tokens) ) # <- modificación
score = pp.Word(pp.nums)
schoolAndScore = schoolName + score
gameResult = date + schoolAndScore + schoolAndScore
import time
def validateDateString(tokens):
try:
time.strptime(tokens[0], "%m/%d/%Y")
except ValueError:
raise pp.ParseException("Invalid date string (%s)" % tokens[0])
date.setParseAction(validateDateString)
## se modifica la primera linea para generar el error
tests = """\
19/04/2004 Virginia 44 Temple 14
09/04/2004 LSU 22 Oregon State 21
09/09/2004 Troy State 24 Missouri 14
01/02/2003 Florida State 103 University of Miami 2""".splitlines()
for test in tests:
stats = gameResult.parseString(test)
print(stats.asList())
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-17-86e9c1e9bf6b> in validateDateString(tokens)
14 try:
---> 15 time.strptime(tokens[0], "%m/%d/%Y")
16 except ValueError:
/usr/lib/python3.6/_strptime.py in _strptime_time(data_string, format)
558 format string."""
--> 559 tt = _strptime(data_string, format)[0]
560 return time.struct_time(tt[:time._STRUCT_TM_ITEMS])
/usr/lib/python3.6/_strptime.py in _strptime(data_string, format)
361 raise ValueError("time data %r does not match format %r" %
--> 362 (data_string, format))
363 if len(data_string) != found.end():
ValueError: time data '19/04/2004' does not match format '%m/%d/%Y'
During handling of the above exception, another exception occurred:
ParseException Traceback (most recent call last)
<ipython-input-17-86e9c1e9bf6b> in <module>
27
28 for test in tests:
---> 29 stats = gameResult.parseString(test)
30 print(stats.asList())
/usr/local/lib/python3.6/dist-packages/pyparsing.py in parseString(self, instring, parseAll)
1953 if getattr(exc, '__traceback__', None) is not None:
1954 exc.__traceback__ = self._trim_traceback(exc.__traceback__)
-> 1955 raise exc
1956 else:
1957 return tokens
<ipython-input-17-86e9c1e9bf6b> in validateDateString(tokens)
15 time.strptime(tokens[0], "%m/%d/%Y")
16 except ValueError:
---> 17 raise pp.ParseException("Invalid date string (%s)" % tokens[0])
18
19 date.setParseAction(validateDateString)
ParseException: Invalid date string (19/04/2004) (at char 0), (line:1, col:1)
[18]:
##
## Se convierte en entero el score y se agrupa
##
num = pp.Word(pp.nums)
date = pp.Combine(num + "/" + num + "/" + num)
schoolName = pp.OneOrMore( pp.Word(pp.alphas) )
schoolName.setParseAction( lambda tokens: " ".join(tokens) )
score = pp.Word(pp.nums).setParseAction( lambda tokens : int(tokens[0]) ) # <- modificación
schoolAndScore = pp.Group(schoolName + score)
gameResult = date + schoolAndScore + schoolAndScore
import time
def validateDateString(tokens):
try:
time.strptime(tokens[0], "%m/%d/%Y")
except ValueError:
raise pp.ParseException("Invalid date string (%s)" % tokens[0])
date.setParseAction(validateDateString)
## se modifica la primera linea para generar el error
tests = """\
09/04/2004 Virginia 44 Temple 14
09/04/2004 LSU 22 Oregon State 21
09/09/2004 Troy State 24 Missouri 14
01/02/2003 Florida State 103 University of Miami 2""".splitlines()
for test in tests:
stats = gameResult.parseString(test)
print(stats.asList())
['09/04/2004', ['Virginia', 44], ['Temple', 14]]
['09/04/2004', ['LSU', 22], ['Oregon State', 21]]
['09/09/2004', ['Troy State', 24], ['Missouri', 14]]
['01/02/2003', ['Florida State', 103], ['University of Miami', 2]]
[19]:
##
## Introducción de textos explicativos
##
for test in tests:
stats = gameResult.parseString(test)
if stats[1][1] != stats[2][1]:
if stats[1][1] > stats[2][1]:
result = "won by " + stats[1][0]
else:
result = "won by " + stats[2][0]
else:
result = "tied"
print(
"{:s} {:s}({:d}) {:s}({:d}), {:s}".format(
stats[0], stats[1][0], stats[1][1], stats[2][0], stats[2][1], result
)
)
09/04/2004 Virginia(44) Temple(14), won by Virginia
09/04/2004 LSU(22) Oregon State(21), won by LSU
09/09/2004 Troy State(24) Missouri(14), won by Troy State
01/02/2003 Florida State(103) University of Miami(2), won by Florida State
[20]:
##
## Manejo de nombres para aumentar la legibilidad
##
num = pp.Word(pp.nums)
date = pp.Combine(num + "/" + num + "/" + num)
schoolName = pp.OneOrMore(pp.Word(pp.alphas))
schoolName.setParseAction(lambda tokens: " ".join(tokens))
score = pp.Word(pp.nums).setParseAction(
lambda tokens: int(tokens[0])
)
schoolAndScore = pp.Group(
schoolName.setResultsName("school") + score.setResultsName("score")
)
gameResult = (
date.setResultsName("date")
+ schoolAndScore.setResultsName("team1")
+ schoolAndScore.setResultsName("team2")
)
date.setParseAction(validateDateString)
for test in tests:
stats = gameResult.parseString(test)
if stats.team1.score != stats.team2.score:
if stats.team1.score > stats.team2.score:
result = "won by " + stats.team1.school
else:
result = "won by " + stats.team2.school
else:
result = "tied"
print(
"{:s} {:s}({:d}) {:s}({:d}), {:s}".format(
stats.date,
stats.team1.school,
stats.team1.score,
stats.team2.school,
stats.team2.score,
result,
)
)
09/04/2004 Virginia(44) Temple(14), won by Virginia
09/04/2004 LSU(22) Oregon State(21), won by LSU
09/09/2004 Troy State(24) Missouri(14), won by Troy State
01/02/2003 Florida State(103) University of Miami(2), won by Florida State
[21]:
##
## Se puede usar dump() para imprimir la info
## y revisar
##
print(stats.dump())
['01/02/2003', ['Florida State', 103], ['University of Miami', 2]]
- date: '01/02/2003'
- team1: ['Florida State', 103]
- school: 'Florida State'
- score: 103
- team2: ['University of Miami', 2]
- school: 'University of Miami'
- score: 2
[22]:
##
## Se puede generar XML
##
print(stats.asXML("GAME"))
<GAME>
<date>01/02/2003</date>
<team1>
<school>Florida State</school>
<score>103</score>
</team1>
<team2>
<school>University of Miami</school>
<score>2</score>
</team2>
</GAME>
[23]:
##
## Lectura de HTML
##
from pyparsing import makeHTMLTags
import urllib
url = "https://www.cia.gov/library/publications/the-world-factbook/docs/refmaps.html"
html = urllib.request.urlopen(url).read()
## Define la expresión para el tag <img>
imgTag,endImgTag = makeHTMLTags("img")
## busca el tag e imprime los atributos
for img in imgTag.searchString(html):
if img['src'].endswith('jpg'):
print("'{:s}' : {:s}".format(img['alt'], img['src']))
'About Menu' : ../images/image-about.jpg
'Careers Menu' : ../images/image-careers.jpg
'Offices Menu' : ../images/image-offices.jpg
'News Menu' : ../images/image-news.jpg
'Library Menu' : ../images/image-library.jpg
[24]:
##
## Lectura de las componentes de una tabla
## (corregir el siguiente codigo)
##
import urllib
from pyparsing import *
url = (
"https://www.cia.gov/library/"
"publications/the-world-factbook/"
"appendix/appendix-g.html"
)
## abre la pagina y la lee
page = urllib.request.urlopen(url)
html = page.read()
page.close()
## crea los tags de la tabla
tdStart, tdEnd = makeHTMLTags("td")
trStart, trEnd = makeHTMLTags("tr")
## especificación del parser para las componentes de la tabla
decimalNumber = Word(nums + ",") + Optional("." + OneOrMore(Word(nums)))
joinTokens = lambda tokens: "".join(tokens)
stripCommas = lambda tokens: tokens[0].replace(",", "")
convertToFloat = lambda tokens: float(tokens[0])
decimalNumber.setParseAction(joinTokens, stripCommas, convertToFloat)
conversionValue = tdStart + decimalNumber.setResultsName("factor") + tdEnd
units = SkipTo(tdEnd)
## rutina auxiliar para limpiar la tabla
def htmlCleanup(t):
unitText = t[0]
unitText = " ".join(unitText.split())
unitText = unitText.replace("<br>", "")
return unitText
units.setParseAction(htmlCleanup)
## componente del parser para leer la tabla
fromUnit = tdStart + units.setResultsName("fromUnit") + tdEnd
toUnit = tdStart + units.setResultsName("toUnit") + tdEnd
conversion = trStart + fromUnit + toUnit + conversionValue + trEnd
## imprime los resultados
for tokens, start, end in conversion.scanString(html):
print(tokens, '>')
print(
"{:s} : {:s} : {:s}".format(
tokens["fromUnit"], tokens["toUnit"], tokens["factor"]
)
)
[25]:
##
## Ejemplo S-expression Parser
##
## 1
## x
## (+ 1 2)
## (* (+ 1 2) (+ 3 4))
##
alphaword = pp.Word(pp.alphas)
integer = pp.Word(pp.nums)
sexp = Forward()
LPAREN = pp.Suppress("(")
RPAREN = pp.Suppress(")")
sexp << ( alphaword | integer | ( LPAREN + ZeroOrMore(sexp) + RPAREN ))
tests = """\
red
100
( red 100 blue )
( green ( ( 1 2 ) mauve ) plaid () )""".splitlines()
for t in tests:
print(t)
print(sexp.parseString(t))
print()
red
['red']
100
['100']
( red 100 blue )
['red', '100', 'blue']
( green ( ( 1 2 ) mauve ) plaid () )
['green', '1', '2', 'mauve', 'plaid']
[26]:
##
## Agrupación de las expresiones S
##
alphaword = pp.Word(pp.alphas)
integer = pp.Word(pp.nums)
sexp = Forward()
LPAREN = pp.Suppress("(")
RPAREN = pp.Suppress(")")
sexp << ( alphaword | integer | pp.Group( LPAREN + ZeroOrMore(sexp) + RPAREN ) ) # <--
tests = """\
red
100
( red 100 blue )
( green ( ( 1 2 ) mauve ) plaid () )""".splitlines()
for t in tests:
print(t)
print(sexp.parseString(t))
print()
red
['red']
100
['100']
( red 100 blue )
[['red', '100', 'blue']]
( green ( ( 1 2 ) mauve ) plaid () )
[['green', [['1', '2'], 'mauve'], 'plaid', []]]
[27]:
##
## Parser para expresiones de búsqueda
##
## wood and blue or red
## wood and (blue or red)
## (steel or iron) and "lime green"
## not steel or iron and "lime green"
## not(steel or iron) and "lime green"
##
from pyparsing import *
and_ = CaselessLiteral("and")
or_ = CaselessLiteral("or")
not_ = CaselessLiteral("not")
searchTerm = Word(alphanums) | quotedString.setParseAction(removeQuotes)
searchExpr = operatorPrecedence(
searchTerm,
[
(not_, 1, opAssoc.RIGHT),
(and_, 2, opAssoc.LEFT),
(or_, 2, opAssoc.LEFT),
],
)
tests = """\
wood and blue or red
wood and (blue or red)
(steel or iron) and "lime green"
not steel or iron and "lime green"
not(steel or iron) and "lime green" """.splitlines()
for t in tests:
print(t.strip())
print(searchExpr.parseString(t)[0])
print()
wood and blue or red
[['wood', 'and', 'blue'], 'or', 'red']
wood and (blue or red)
['wood', 'and', ['blue', 'or', 'red']]
(steel or iron) and "lime green"
[['steel', 'or', 'iron'], 'and', 'lime green']
not steel or iron and "lime green"
[['not', 'steel'], 'or', ['iron', 'and', 'lime green']]
not(steel or iron) and "lime green"
[['not', ['steel', 'or', 'iron']], 'and', 'lime green']