Mika Saari
2013-04-12 07:54:23 UTC
Hi,
I do have small application which is using pyparsing from multiple
threads. The pyparsing is singleton and also the actual parseString is
inside Lock()s, so it should be thread safe. (Below cuts from the script)
The problem is that after the parseBlock has returned the ParseResults
for me, and I go through the whole list, I can not get free the the full
ParseResult dictionaries and if my parseBlock is called quite many times
with different scripts, I end in the situation where I do have huge amount
of dictionaries (len(objgraph.by_type('dict'))) and loosing memory bit by
bit. I have tried deleting the entries with del, but haven't fully figured
out the correct way of cleaning the ParseResults. How could I do the
deleting for returned ParseResults ?
I have tested using both scanString and parseString for my case, but I
think parseString would be more suitable. And both raises the memory usage.
Thank you very much for any tips and huge thanks for pyparsing,
-Mika
@MySingleton
class MyScriptParser:
def __init__(self):
self.syntax()
def syntax(self):
LPAR,RPAR,LBRACE,RBRACE,SEMI,COMMA,PROCENT,DOL = map(Suppress, "(){};,%$")
# Types
NAME = Word(alphas+"_", alphanums+"_")
NUMBER = Word(nums)
STRING = QuotedString('"')
VARSTR = dblQuotedString
CALL = Keyword("call")
IF = Keyword("if")
FOR = Keyword("for")
FUNC = Suppress("function")
PRINT = Keyword("print")
ELSE = Keyword("else")
# Collection types
var = DOL + NAME | VARSTR
# Arithmetic expression
operand = NAME | var | NUMBER | STRING
expr = Forward()
expr << (operatorPrecedence(operand,
[
("!", 1, opAssoc.LEFT),
(oneOf("+ -"), 1, opAssoc.RIGHT), # leading sign
(oneOf("++ --"), 1, opAssoc.RIGHT), # Add / Substract
(oneOf("++ --"), 1, opAssoc.LEFT), # Add / substract
(oneOf("* / %"), 2, opAssoc.LEFT), # Multiply
(oneOf("+ -"), 2, opAssoc.LEFT), # Add / Substract
(oneOf("< == > <= >= !="), 2, opAssoc.LEFT), # Coparation
("=", 2, opAssoc.LEFT) # Assign
]) + Optional(LPAR + Group(Optional(delimitedList(expr))) + RPAR))
expr.setParseAction(createTokenObject)
# Initialize Statement
stmt = Forward()
# Body
body = ZeroOrMore(stmt)
# Function
funcdecl = FUNC - Dict(Group(OneOrMore(STRING + LPAR +
Group(Optional(Group(delimitedList(var)))) + RPAR + LBRACE + Group(body) +
RBRACE)))
#funcdecl.setName("funcdecl").setDebug()
funcdecl.setName("funcdecl")
funcdecl.setParseAction(createTokenObject)
# Keyword statements
ifstmt = OneOrMore(Group(IF + LPAR + expr + RPAR + Group(stmt) +
Optional(Group(ELSE + Group(stmt)))))
#ifstmt.setName("ifstmt").setDebug()
ifstmt.setName("ifstmt")
ifstmt.setParseAction(createTokenObject)
callstmt = Group(CALL + LPAR + Group(Optional(delimitedList(var))) + RPAR)
+ SEMI
# callstmt.setName("callstmt").setDebug()
callstmt.setName("callstmt")
callstmt.setParseAction(createTokenObject)
forstmt = Group(FOR + LPAR + Group(Optional(expr) + SEMI + Optional(expr) +
SEMI + Optional(expr)) + RPAR + Group(stmt))
#forstmt.setName("forstmt").setDebug()
forstmt.setName("forstmt")
forstmt.setParseAction(createTokenObject)
printstmt = Group(PRINT + LPAR + Optional(delimitedList(var)) +
Optional(STRING + Optional(PROCENT + LPAR + delimitedList(var) + RPAR)) +
RPAR) + SEMI
#printstmt.setName("printstmt").setDebug()
printstmt.setName("printstmt")
printstmt.setParseAction(createTokenObject)
genericstmt = Group(NAME + LPAR + Group(Optional(delimitedList(var))) +
RPAR) + SEMI
# genericstmt.setName("genericstmt").setDebug()
genericstmt.setName("genericstmt")
genericstmt.setParseAction(createTokenObject)
# Setup statement
stmt << (callstmt | ifstmt | forstmt | printstmt | genericstmt | expr +
SEMI | LBRACE + ZeroOrMore(stmt) + RBRACE)
# Main program
self.program = ZeroOrMore(funcdecl)
self.program.ignore(pythonStyleComment)
ParserElement.enablePackrat()
def parseBlock(self, script):
# Parse the script
myglobalvariablehere.acquire()
parsed = self.program.parseString(script, parseAll=True)
# parsed = self.program.scanString(script)
myglobalvariablehere.release()
# And return the list
return parsed
I do have small application which is using pyparsing from multiple
threads. The pyparsing is singleton and also the actual parseString is
inside Lock()s, so it should be thread safe. (Below cuts from the script)
The problem is that after the parseBlock has returned the ParseResults
for me, and I go through the whole list, I can not get free the the full
ParseResult dictionaries and if my parseBlock is called quite many times
with different scripts, I end in the situation where I do have huge amount
of dictionaries (len(objgraph.by_type('dict'))) and loosing memory bit by
bit. I have tried deleting the entries with del, but haven't fully figured
out the correct way of cleaning the ParseResults. How could I do the
deleting for returned ParseResults ?
I have tested using both scanString and parseString for my case, but I
think parseString would be more suitable. And both raises the memory usage.
Thank you very much for any tips and huge thanks for pyparsing,
-Mika
@MySingleton
class MyScriptParser:
def __init__(self):
self.syntax()
def syntax(self):
LPAR,RPAR,LBRACE,RBRACE,SEMI,COMMA,PROCENT,DOL = map(Suppress, "(){};,%$")
# Types
NAME = Word(alphas+"_", alphanums+"_")
NUMBER = Word(nums)
STRING = QuotedString('"')
VARSTR = dblQuotedString
CALL = Keyword("call")
IF = Keyword("if")
FOR = Keyword("for")
FUNC = Suppress("function")
PRINT = Keyword("print")
ELSE = Keyword("else")
# Collection types
var = DOL + NAME | VARSTR
# Arithmetic expression
operand = NAME | var | NUMBER | STRING
expr = Forward()
expr << (operatorPrecedence(operand,
[
("!", 1, opAssoc.LEFT),
(oneOf("+ -"), 1, opAssoc.RIGHT), # leading sign
(oneOf("++ --"), 1, opAssoc.RIGHT), # Add / Substract
(oneOf("++ --"), 1, opAssoc.LEFT), # Add / substract
(oneOf("* / %"), 2, opAssoc.LEFT), # Multiply
(oneOf("+ -"), 2, opAssoc.LEFT), # Add / Substract
(oneOf("< == > <= >= !="), 2, opAssoc.LEFT), # Coparation
("=", 2, opAssoc.LEFT) # Assign
]) + Optional(LPAR + Group(Optional(delimitedList(expr))) + RPAR))
expr.setParseAction(createTokenObject)
# Initialize Statement
stmt = Forward()
# Body
body = ZeroOrMore(stmt)
# Function
funcdecl = FUNC - Dict(Group(OneOrMore(STRING + LPAR +
Group(Optional(Group(delimitedList(var)))) + RPAR + LBRACE + Group(body) +
RBRACE)))
#funcdecl.setName("funcdecl").setDebug()
funcdecl.setName("funcdecl")
funcdecl.setParseAction(createTokenObject)
# Keyword statements
ifstmt = OneOrMore(Group(IF + LPAR + expr + RPAR + Group(stmt) +
Optional(Group(ELSE + Group(stmt)))))
#ifstmt.setName("ifstmt").setDebug()
ifstmt.setName("ifstmt")
ifstmt.setParseAction(createTokenObject)
callstmt = Group(CALL + LPAR + Group(Optional(delimitedList(var))) + RPAR)
+ SEMI
# callstmt.setName("callstmt").setDebug()
callstmt.setName("callstmt")
callstmt.setParseAction(createTokenObject)
forstmt = Group(FOR + LPAR + Group(Optional(expr) + SEMI + Optional(expr) +
SEMI + Optional(expr)) + RPAR + Group(stmt))
#forstmt.setName("forstmt").setDebug()
forstmt.setName("forstmt")
forstmt.setParseAction(createTokenObject)
printstmt = Group(PRINT + LPAR + Optional(delimitedList(var)) +
Optional(STRING + Optional(PROCENT + LPAR + delimitedList(var) + RPAR)) +
RPAR) + SEMI
#printstmt.setName("printstmt").setDebug()
printstmt.setName("printstmt")
printstmt.setParseAction(createTokenObject)
genericstmt = Group(NAME + LPAR + Group(Optional(delimitedList(var))) +
RPAR) + SEMI
# genericstmt.setName("genericstmt").setDebug()
genericstmt.setName("genericstmt")
genericstmt.setParseAction(createTokenObject)
# Setup statement
stmt << (callstmt | ifstmt | forstmt | printstmt | genericstmt | expr +
SEMI | LBRACE + ZeroOrMore(stmt) + RBRACE)
# Main program
self.program = ZeroOrMore(funcdecl)
self.program.ignore(pythonStyleComment)
ParserElement.enablePackrat()
def parseBlock(self, script):
# Parse the script
myglobalvariablehere.acquire()
parsed = self.program.parseString(script, parseAll=True)
# parsed = self.program.scanString(script)
myglobalvariablehere.release()
# And return the list
return parsed