[Mumps2Py:] [70] implementation-specific command and intrinsics removal; now the unknown command/function/variable name is captured, but the toktype is (F_|V_)UNKNOWN(CMD).

[ Thread Index | Date Index | More lists.mumps2py.org/discuss Archives ]


Revision: 70
Author:   pgallot
Date:     2008-01-31 00:11:11 +0000 (Thu, 31 Jan 2008)

Log Message:
-----------
implementation-specific command and intrinsics removal; now the unknown command/function/variable name is captured, but the toktype is (F_|V_)UNKNOWN(CMD).  Also fixed string literal handling. expanded comment-handling for the empty case, and the case where the comment may be accessed via $TEXT().  Label-handling has been simplified.

Modified Paths:
--------------
    trunk/mumps2py/mumps2tok.py


Modified: trunk/mumps2py/mumps2tok.py
===================================================================
--- trunk/mumps2py/mumps2tok.py	2008-01-30 23:47:15 UTC (rev 69)
+++ trunk/mumps2py/mumps2tok.py	2008-01-31 00:11:11 UTC (rev 70)
@@ -49,7 +49,7 @@
     "var":re.compile(r"((?P<globalvar>\^{1})|(?P<pbr>[.]{1}))?(?P<var>[A-Za-z%][A-Za-z0-9]*)(?P<indexed>[(]{1})?"),
     "indirection":re.compile(r"@"),
     "op":re.compile(r"[-_/+']|([']?(([*]{1,2})|([]]{1,2})|\\|[[=><#&!/]))"),
-    "str":re.compile(r"[\"](.|[\"]{2})*?[\"]"),
+    "str":re.compile(r"([\"]{3}|[\"]{1})(([\"]{2})*?|.)*([\"]{3}|[\"]{1})"),
     "num":re.compile(r"([0-9]*[.])?[0-9]+([eE][+-][0-9]+)?"),
     "extr":re.compile(r"[$]{2}(?P<label>\w+)(\^(?P<routine>[%A-Za-z][A-Za-z0-9]*))?(?P<func>[(]{1})?"),
     "intr":re.compile(r"[$]{1}(?P<intrinsic>[A-Za-z0-9]+)(?P<func>[(]{1})?"),
@@ -59,7 +59,7 @@
     "subexpr":re.compile(r"[(]"),
     "command":re.compile(r"(?P<indents>[ .]*)(?P<cmd>[A-Za-z]+)"),
     "label":re.compile(r"^(\^)?[%a-zA-Z0-9][A-Za-z0-9]*"),
-    "comment":re.compile(r"[ ]*?[ .]*?;(?P<comment>.*)"),
+    "comment":re.compile(r"[ ]*?[ .]*?(?P<comment>[;]+.*)"),
     "emptyline":re.compile(r"[ .]*\s*$"),
     "cmd":re.compile(r"[. ]*([A-Za-z]+\w*?(?:[ :]|\s*$))"),
     "cmdEnd":re.compile(r"[ ]|\s*$")
@@ -123,33 +123,32 @@
 def parse_label(mumps_module, line):
     """ parses a Mumps label"""
     token = Token(LABEL, 0)
-    pos = 0
-    if line[0] == "^": # NOTE: I'm not completely sure of the semantics here...
-        token.externally_visible = True
-        pos = 1
-    else:
-        token.externally_visible = False
-        
-    re_match = re.match(r"[%a-zA-Z0-9]+\w*", line[pos:])
-    token.val = line[pos:re_match.end() + pos]
-    pos = re_match.end() + pos
-
-    params = re.match(r"[(].*?[)]+?", line[pos:])
+    re_match = re.match(r"[%a-zA-Z0-9]+", line)
+    token.val = re_match.group()
+    pos = re_match.end()
+    
+    params = re.match(r"[(].*?[)]", line[pos:])
     if params:
-        token.params = re.split(r"\W+", line[pos + 1:pos + params.end() - 1])
+        token.params = re.split(r",", line[pos + 1:pos + params.end() - 1])
         pos = pos + params.end()
     else:
         token.params = None
     token.end = pos
     mumps_module.add_token(token)
     return token
-   
 
 def parse_comment(mumps_module, line, startpos = 0):
     """ parses a Mumps comment"""
     re_match = MUMPS_RE_DICT["comment"].match(line, startpos)
-    token = Token(COMMENT, startpos)
-    token.val = re_match.group("comment")
+    val = re_match.group("comment")
+    if re.match(r";\s*$", val):
+        token = Token(EMPTYLINE, startpos)
+    elif val[1]==';':
+        token = Token(TEXTVAL, startpos)
+        token.val = val
+    else:
+        token = Token(COMMENT, startpos)
+        token.val = val[1:]
     token.end = len(line)
     mumps_module.add_token(token)
     return token
@@ -308,22 +307,25 @@
 
     def consume_intrinsic(mobj):
         """ parses a Mumps intrinsic function or variable"""
-        name = re_match.group("intrinsic").upper()
+        name = mobj.group("intrinsic").upper()
         pos = mobj.end()
         if mobj.group("func"):
-            toktype = IntrinsicFuncDict[name]
+            toktype = INTRINSIC_FUNCS.get(name, F_UNKNOWN)
             token = Token(toktype, mobj.start())
             if toktype == F_TEXT:
                 (params, end_pos) = consume_entry_ref(mumps_module, line, pos)
                 token.params = params
                 pos = end_pos + 1
             else:
+                if toktype == F_UNKNOWN:
+                    token.name = name
                 token.params = consume_actuallist(mumps_module, line, pos)
                 pos = token.params[-1].end + 1
         else: # vars...
-            toktype = IntrinsicVarDict[name]
+            toktype = INTRINSIC_VARS.get(name, V_UNKNOWN)
             token = Token(toktype, mobj.start())
-
+            if toktype == V_UNKNOWN:
+                token.name = name
         return (token, pos)
 
     def consume_pattmatch(mobj):
@@ -549,7 +551,7 @@
             if not mterminated.match(line, pos) and line[pos] == ':':
                 pos = pos + 1
                 timeout = parse_expr(mumps_module, line, pos, r"[ ,]|\s*$")
-                device["time_out"] = timeout
+                device["Expr"].timeout = timeout
                 pos = timeout.end
             if not mterminated.match(line, pos) and line[pos] == ',':
                 pos = pos + 1
@@ -620,7 +622,8 @@
             if not mterminated.match(line, pos) and line[pos] == ':':
                 pos = pos + 1
                 timeout = parse_expr(mumps_module, line, pos, r"[ ,]|\s*$")
-                lock_item["time_out"] = timeout
+                for nref in lock_item["nrefs"]:
+                    nref.timeout = timeout
                 pos = timeout.end
             lock_list.append(lock_item)
             if line[pos] == ',':
@@ -662,8 +665,8 @@
             if not mterminated.match(line, pos) and line[pos] == ':':
                 pos = pos + 1
                 timeout = parse_expr(mumps_module, line, pos, r"[ ,]|\s*$")
-                pos = timeout.end
                 entry_ref.timeout = timeout
+                pos = timeout.end
             if not mterminated.match(line, pos) and line[pos] == ',':
                 pos = pos + 1
             entry_ref_list.append(entry_ref)
@@ -852,13 +855,15 @@
         mumps_module.last_token().Xecute = expr_list
         mumps_module.end_token(pos + 1)
 
-    def parse_zquit(mumps_module, line, startpos=0):
-        """parse the ZQuit command"""
-        #for lack of better info, treat this just like quit?
-        condtok = parse_expr(mumps_module, line, startpos)
-        mumps_module.last_token().Condition = condtok
-        mumps_module.end_token(condtok.end)
-        #raise ParseError(mumps_module, line, "unhandled command", pos)
+    def consume_unknowncommand(mumps_module, line, pos):
+        """eat the argument of an unknown command"""
+        val = ""
+        mterminated = MUMPS_RE_DICT["cmdEnd"]
+        while not mterminated.match(line, pos):
+            val = val + line[pos]
+            pos = pos + 1
+        mumps_module.last_token().val = val
+        mumps_module.end_token(pos)
         
 
     command_list = {BREAKCMD:parse_break,
@@ -881,9 +886,8 @@
                    VIEWCMD:parse_view,
                    WRITECMD:parse_write,
                    XECUTECMD:parse_xecute,
-                   ZQUITCMD:parse_zquit,
                    ZWRITECMD:parse_write,
-                   ZETRAPCMD:parse_goto, # NOTE: this is a guess...
+                   UNKNOWNCMD:consume_unknowncommand,
                    COMMENT:parse_comment}
 
 
@@ -892,26 +896,24 @@
     cmd = re_match.group("cmd").upper()
     pos = re_match.end("cmd") + 1
 
-    if cmd in CMD_TOKEN_DICT:
-        toktype = CMD_TOKEN_DICT[cmd]
-        mumps_module.add_token(Token(toktype, startpos))
-        
-        if re_match.group("indents"):
-            # count the periods for the nesting level...
-            mumps_module.indent_token( \
-                len(re.findall("[.]", line[startpos:re_match.start("cmd")]))) 
-        if line[re_match.end("cmd")] == ':':
-            condtok = parse_expr(mumps_module, line, pos)
-            mumps_module.post_condition(condtok)
-            pos = condtok.end + 1
-        
-        if toktype in command_list:
-            parse_func = command_list[toktype]
-            parse_func(mumps_module, line, pos)
-        else:
-            mumps_module.end_token(pos)
+    toktype = CMD_TOKEN_DICT.get(cmd, UNKNOWNCMD)
+    mumps_module.add_token(Token(toktype, startpos))
+    if toktype == UNKNOWNCMD:
+        mumps_module.last_token().name = cmd
+    if re_match.group("indents"):
+        # count the periods for the nesting level...
+        mumps_module.indent_token( \
+            len(re.findall("[.]", line[startpos:re_match.start("cmd")]))) 
+    if line[re_match.end("cmd")] == ':':
+        condtok = parse_expr(mumps_module, line, pos)
+        mumps_module.post_condition(condtok)
+        pos = condtok.end + 1
+    
+    if toktype in command_list:
+        parse_func = command_list[toktype]
+        parse_func(mumps_module, line, pos)
     else:
-        raise ParseError(mumps_module, line, "Unknown command", startpos)
+        mumps_module.end_token(pos)
 
     return mumps_module.last_token()
 
@@ -940,7 +942,6 @@
             for (pattern, parser, dscr) in pattern_list:
                 if pattern.match(line):
                     pattern_match = True
-                    #print "<%s>" % dscr
                     token = parser(mumps_module, line)
                     token.line_no(fileinput.lineno())
                     pos = token.end
@@ -951,7 +952,6 @@
                             # no need to scan for labels...
                             if pattern.match(line, pos):
                                 inside_pattern_match = True
-                                #print "<%s>" % dscr
                                 token = parser(mumps_module, line, pos)
                                 pos = token.end
                                 break
@@ -960,7 +960,6 @@
                             raise ParseError(mumps_module, line, \
                                              "no Matching pattern", pos, \
                                              fileinput.lineno())
-                           
                     break
 
             if not pattern_match:
@@ -976,8 +975,6 @@
     MumpsFile = "fm22.rsa"
     m2py_dir = ".\\out"
 
-    #Todo: test if dir exists, if not, create it...
-
     f = open(MumpsFile)
     s = f.readline()
     f.close()
@@ -989,7 +986,7 @@
                 parseMumps(module)
         except ParseError, e:
             print e.error_msg()
-    else:#todo: does this work ?
+    else:
         outputname = re.split(r"\..*$", MumpsFile)[0]
         
         if __debug__:


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/