[Mumps2Py:] [83] lots of parsing bug fixes, added parsing support for structured system variables.

[ Thread Index | Date Index | More lists.mumps2py.org/discuss Archives ]


Revision: 83
Author:   pgallot
Date:     2008-02-04 23:33:14 +0000 (Mon, 04 Feb 2008)

Log Message:
-----------
lots of parsing bug fixes, added parsing support for structured system variables.

Modified Paths:
--------------
    trunk/mumps2py/mumps2tok.py


Modified: trunk/mumps2py/mumps2tok.py
===================================================================
--- trunk/mumps2py/mumps2tok.py	2008-02-04 23:28:05 UTC (rev 82)
+++ trunk/mumps2py/mumps2tok.py	2008-02-04 23:33:14 UTC (rev 83)
@@ -19,8 +19,6 @@
 import re, fileinput
 from tokens import *
 
-GLOBALDB = 0 # debugging variable.
-
 class ParseError(Exception):
     """ Exception class for errors relating to parsing Mumps code."""
     def __init__(self, the_module, line, dscr = "", pos = 0, lineno = -1):
@@ -45,15 +43,16 @@
         return errormsg
 
 MUMPS_RE_DICT = {
-    "var":re.compile(r"((?P<globalvar>\^{1})|(?P<pbr>[.]{1}))?(?P<var>[A-Za-z%][A-Za-z0-9]*)(?P<indexed>[(]{1})?"),
+    "var":re.compile(r"((?P<globalvar>\^{1}))?(?P<var>[A-Za-z%][A-Za-z0-9]*)(?P<indexed>[(]{1})?"),
     "indirection":re.compile(r"@"),
     "op":re.compile(r"[-_/+']|([']?(([*]{1,2})|([]]{1,2})|\\|[[=><#&!/]))"),
     "str":re.compile(r'"'),
     "num":re.compile(r"([0-9]*[.])?[0-9]+([eE][+-][0-9]+)?"),
-    "extr":re.compile(r"[$]{2}(?P<label>\w+)(\^(?P<routine>[%A-Za-z][A-Za-z0-9]*))?(?P<func>[(]{1})?"),
+    "extr":re.compile(r"[$]{2}"),
     "intr":re.compile(r"[$]{1}(?P<intrinsic>[A-Za-z0-9]+)(?P<func>[(]{1})?"),
+    "structsysvar":re.compile(r"\^[$]{1}(?P<var>[A-Za-z0-9]+)(?P<indexed>[(]{1})?"),
     "patmat":re.compile(r"[?]"),
-    "patAtom":re.compile(r"(((?P<mnrp>\d+)?[.](?P<mxrp>\d+)?)|(?P<rpcount>\d+)){1}((?P<altlist>[(])|(?P<patcode>[ACELNPUacelnpu]+)|(?P<str>([\"]([\"]{2}|.)*?[\"])))"),
+    "patAtom":re.compile(r"(((?P<mnrp>\d+)?[.](?P<mxrp>\d+)?)|(?P<rpcount>\d+)){1}((?P<altlist>[(])|(?P<patcode>[ACELNPUacelnpu]+)|(?P<str>([\"])))"),
     "nakedRef":re.compile(r"\^\("),
     "subexpr":re.compile(r"[(]"),
     "command":re.compile(r"(?P<indents>[ .]*)(?P<cmd>[A-Za-z]+)"),
@@ -68,7 +67,13 @@
     """ parses a list of parameters """
     actuallist = []
     while line[pos] != ")":
+        pass_by_ref = False
+        if line[pos] == '.' and not MUMPS_RE_DICT['num'].match(line, pos):
+            pass_by_ref = True
+            pos = pos + 1            
         exprtok = parse_expr(mumps_module, line, pos, r"([:,)])")
+        if pass_by_ref:
+            exprtok.pass_by_ref = True            
         actuallist.append(exprtok)
         pos = exprtok.end
         if line[pos] == ":":
@@ -82,16 +87,12 @@
 
 def consume_entry_ref(mumps_module, line, pos):
     """ parses a Mumps Entry Ref"""
-    #print "entry_ref:(%s)" % line[pos:]
-    #global GLOBALDB
-    #GLOBALDB=1
-
     token = Token(ENTRYREF, pos)
     if line[pos] == '@':
         if MUMPS_RE_DICT["nakedRef"].match(line, pos + 1):
             indirect = parse_expr(mumps_module, line, pos+1, r"([,+: ]|\s$)")
             token.indirect = indirect
-            pos = indirect.end + 1
+            pos = indirect.end
         else:
             indirect = parse_expr(mumps_module, line, pos, \
                                   r"(\^|[ +),:]|\s*$)")
@@ -128,12 +129,9 @@
             else:
                 token.routine = m_name.group()
                 pos = m_name.end()
-                if not MUMPS_RE_DICT["cmdEnd"].match(line, pos) and \
-                   line[pos] == '(':
-                    pos, token.params = \
-                         consume_actuallist(mumps_module, line, pos + 1)
-
-    #GLOBALDB=0
+        if not MUMPS_RE_DICT["cmdEnd"].match(line, pos) and line[pos] == '(':
+            pos, token.params = consume_actuallist(mumps_module, line, pos + 1)
+    
     token.end = pos
     return (token, pos)
 
@@ -176,8 +174,7 @@
 
 def parse_expr(mumps_module, line, startpos, terminator = r"([ ]|\s*$)"):
     """ parses a Mumps expression by recursive descent"""
-    if GLOBALDB > 0:
-        print "parse_expr %d [%s]" % (startpos, line[startpos:])
+    #if __debug__: print "parse_expr %d [%s]" % (startpos, line[startpos:])
     
     def consume_pattern(lineno, line, pos):
         """ parses a mumps-style pattern-matchine pattern"""
@@ -194,8 +191,9 @@
                 if atom.group("mxrp"):
                     sub_token.maxrep = int(atom.group("mxrp"))
             if atom.group("str"):
-                sub_token.match_str = atom.group("str")
-                pos = atom.end()
+                sub_token.match_str = parse_expr(\
+                    mumps_module, line, atom.start("str"), r'\w|[ .,:)(]|\s*$')
+                pos = sub_token.match_str.end
             elif atom.group("altlist"):
                 sub_token.alt_list = []
                 pos = atom.end()
@@ -249,11 +247,6 @@
         token = Token(OP_TOKEN_DICT[op_type], mobj.start())
         return (token, mobj.end())
 
-    def consume_unknown(mobj):
-        token = Token(UNKNOWNITEM, mobj.start())
-        token.item = mobj.group()
-        return (token, mobj.end())
-
     def consume_naked_ref(mobj):
         """ parses a Mumps naked reference"""
         token = Token(OPNAKEDREF, mobj.start())
@@ -265,7 +258,7 @@
         """ parses a Mumps indirection argument"""
         token = Token(INDIRECTION, mobj.start())
         expr = parse_expr(mumps_module, line, mobj.end(), \
-                         r"(\\|[-@:,+=><#&!*/ ')]|\s*$)")
+                         r"(\^|\\|[-@:,+=><#&!*/ ')]|\s*$)")
         token.expr = expr
         pos = expr.end
         if line[pos] == '@' and line[pos + 1] == '(':
@@ -279,30 +272,20 @@
             token = VarToken(GLOBALVAR, mobj.start())
         else:
             token = VarToken(LOCALVAR, mobj.start())
-        if mobj.group("pbr"):
-            token.pass_by_reference = True 
         token.varname = mobj.group("var")
         pos = mobj.end("var")
         if mobj.group("indexed"):
-            pos = pos + 1
-            pos, indices = consume_actuallist(mumps_module, line, pos)
+            pos, indices = consume_actuallist(mumps_module, line, pos + 1)
             token.indices = indices
         return (token, pos)
 
     def consume_extrinsic(mobj):
         """ parses a Mumps extrinsic function or variable"""
-        token = Token(USERFUNC, mobj.start())
         pos = mobj.end()
-        if mobj.group("label"):
-            token.label = mobj.group("label")
-        if mobj.group("routine"):
-            token.routine = mobj.group("routine")
-        if mobj.group("func"):
-            pos, params = consume_actuallist(mumps_module, line, pos)
-            token.params = params
-        else: # vars...
-            token.params = []
-        return (token, pos)
+        token = Token(USERFUNC, pos)
+        (entry_ref, end_pos) = consume_entry_ref(mumps_module, line, pos)
+        token.entry_ref = entry_ref
+        return (token, end_pos)
 
     def consume_intrinsic(mobj):
         """ parses a Mumps intrinsic function or variable"""
@@ -326,6 +309,19 @@
                 token.name = name
         return (token, pos)
 
+    def consume_structsysvar(mobj):
+        """ parses a Mumps Structured System variable"""
+        name = mobj.group("var").upper()
+        pos = mobj.end("var")
+        toktype = STRUCT_SYS_VARS.get(name, SSV_UNKNOWN)
+        token = Token(toktype, mobj.start())
+        if toktype == SSV_UNKNOWN:
+            token.name = name
+        if mobj.group("indexed"):
+            pos, indices = consume_actuallist(mumps_module, line, pos + 1)
+            token.indices = indices
+        return (token, pos)
+        
     def consume_pattmatch(mobj):
         """ parses mumps-style pattern-matchine patterns"""
         pos = mobj.end()
@@ -338,13 +334,13 @@
         (MUMPS_RE_DICT["indirection"], consume_indirection, "indirection"),
         (MUMPS_RE_DICT["var"], consume_var, "var"), 
         (MUMPS_RE_DICT["op"], consume_op, "op"),
+        (MUMPS_RE_DICT["patmat"], consume_pattmatch, "patmat"),
         (MUMPS_RE_DICT["str"], consume_str, "str"),
         (MUMPS_RE_DICT["num"], consume_num, "num"),
         (MUMPS_RE_DICT["extr"], consume_extrinsic, "extr"),
         (MUMPS_RE_DICT["intr"], consume_intrinsic, "intr"),
-        (MUMPS_RE_DICT["patmat"], consume_pattmatch, "patmat"),
         (MUMPS_RE_DICT["nakedRef"], consume_naked_ref, "nakedRef"),
-        (re.compile(r"(\^\$JOB)"), consume_unknown, "unknown"),
+        (MUMPS_RE_DICT["structsysvar"], consume_structsysvar, "structsysvar"),
         (MUMPS_RE_DICT["subexpr"], consume_sub_expr, "subexpr")]
 
     pos = startpos
@@ -385,7 +381,7 @@
         mterminated = MUMPS_RE_DICT["cmdEnd"]
         if not mterminated.match(line, pos) and \
            line[pos:pos+2] == '@(':
-            indirection = parse_expr(mumps_module, line, pos, r"([ ,]|\s*$)")
+            indirection = parse_expr(mumps_module, line, pos, r"([ ,:]|\s*$)")
 
             new_pos = indirection.end
             if mterminated.match(line, new_pos):
@@ -548,10 +544,14 @@
                     pos = pos + 1
                     keyword_list = []
                     while line[pos] != ')':
-                        keyword = parse_expr(mumps_module, line, pos, r"([:)])")
+                        keyword = parse_expr(mumps_module, line, pos, r"[,:=)]")
                         keyword_list.append(keyword)
                         pos = keyword.end
-                        if line[pos] == ':':
+                        if line[pos] == "=":
+                            kval = parse_expr(mumps_module, line, pos, r"[,:)]")
+                            keyword.keyval = kval
+                            pos = kval.end
+                        if line[pos] in (':', ','):
                             pos = pos + 1
                     device["keyword_list"] = keyword_list
                     pos = pos + 1
@@ -622,6 +622,7 @@
                     if line[pos] == ',':
                         pos = pos + 1
                 lock_item["nrefs"] = nref_list
+                pos = pos + 1 # eat the closing parenthesis
             else:
                 nref = parse_expr(mumps_module, line, pos, r"([ ,:]|\s*$)")
                 pos = nref.end
@@ -764,7 +765,6 @@
 
     def parse_goto(mumps_module, line, pos):
         """parse the Mumps Goto command"""
-        #NOTE: a goto seems very similar to a do, but with no arguments passed.
         entry_ref_list = []
         mterminated = MUMPS_RE_DICT["cmdEnd"]
         while not mterminated.match(line, pos):
@@ -904,8 +904,7 @@
     mumps_module.add_token(Token(toktype, startpos))
     if toktype == UNKNOWNCMD:
         mumps_module.last_token().name = cmd
-    if re_match.group("indents"):
-        # count the periods for the nesting level...
+    if re_match.group("indents"): # count the periods for the nesting level...
         mumps_module.indent_token( \
             len(re.findall("[.]", line[startpos:re_match.start("cmd")]))) 
     if line[re_match.end("cmd")] == ':':
@@ -922,7 +921,7 @@
 
     return mumps_module.last_token()
 
-def parseMumps(mumps_module):
+def parseMumps(mumps_module, open_fileinput= None):
     """ Parse all the code of given Mumps module."""
     mumps_module.empty_tokenlist()
     
@@ -932,8 +931,13 @@
         (MUMPS_RE_DICT["emptyline"], parse_emptyline, "emptyline"),
         (MUMPS_RE_DICT["cmd"], parse_command, "cmd")]
 
+    if open_fileinput:
+        input_file = open_fileinput
+    else:
+        input_file = fileinput.input(mumps_module.input_file)
+
     try:
-        for line in fileinput.input(mumps_module.input_file):
+        for line in input_file:
             if fileinput.lineno() < mumps_module.start:
                 continue
 
@@ -970,7 +974,8 @@
                 raise ParseError(mumps_module, line, "no Pattern match", pos, \
                                  fileinput.lineno())
     finally:
-        fileinput.close()
+        if not open_fileinput:
+            fileinput.close()
 
 if __name__ == '__main__':
     from mumps_module import parse_for_routines
@@ -983,7 +988,6 @@
                     parseMumps(the_module)
 
     try:
-        #mods = parse_for_routines("../testfiles/vista.m2p", "./out")
         mods = parse_for_routines("../testfiles/fm22.m2p", "./out")
         #parsemodule(mods,"ACKQUTL1")
         for the_module in mods:


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/