[Mumps2Py:] [104] added support for translating Mumps-style pattern-matching to Python regular expressions .

[ Thread Index | Date Index | More lists.mumps2py.org/discuss Archives ]


Revision: 104
Author:   pgallot
Date:     2008-02-13 21:00:33 +0000 (Wed, 13 Feb 2008)

Log Message:
-----------
added support for translating Mumps-style pattern-matching to Python regular expressions.

Modified Paths:
--------------
    trunk/mumps2py/tok2python.py


Modified: trunk/mumps2py/tok2python.py
===================================================================
--- trunk/mumps2py/tok2python.py	2008-02-13 20:59:28 UTC (rev 103)
+++ trunk/mumps2py/tok2python.py	2008-02-13 21:00:33 UTC (rev 104)
@@ -16,7 +16,7 @@
 ##    along with Mumps2Py.  If not, see <http://www.gnu.org/licenses/>.
 """ translate the tokens extracted from the Mumps code into Python code."""
 import re
-import mumps_module, tokprepass
+import tokprepass
 from tokens import *
 
 class TranslationError(Exception):
@@ -331,7 +331,6 @@
                           (inputstr_str, subst_str, repl_str)
         return translation_str
         
-    
     def translate_local_var(translation, token):
         """ translates a local variable into a Python variable"""
         if not token.is_indexed() or \
@@ -345,54 +344,92 @@
     def translate_expr_list(translation, token):
         """ translates a sequence of literals, variables, operators, etc."""
 
-        def trans_unary_expr(left_token, right_token):
-            if left_token.toktype == OPSUB:
-                return "-"+translate_expr(translation, right_token)
-            elif left_token.is_not():
-                return "not "+translate_expr(translation, right_token)
+        def translate_atom(token):
+            """translates an individual pattern-matching atom to re-style"""
+            if token.repcount:
+                rep_str = "{%d}" % token.repcount
+            elif token.minrep or token.maxrep:
+                rep_str = "{%s,%s}" % (token.__dict__.get("minrep",""),
+                                       token.__dict__.get("maxrep",""))
             else:
-                raise TranslationError(translation, left_token,
-                                       "unknown expression pattern")
-        def trans_binary_op(token):
-            op_trans_dict = {OPADD:'+', OPSUB:'-', OPMULT:'*', OPEXP:'**',
-                             OPMODULO:'%', OPGT:'>', OPLT:'<', OPNGT:'<=',
-                             OPNLT:'>=', OPEQ:'==', OPNEQ:'!=', OPAND:'and',
-                             OPOR:'or', OPFRACDIV:'/', OPINTDIV:'/'}
+                rep_str = "*"
+                
+            if token.params:
+                match_str = ""
+                for alt_pattern in token.params:
+                    atom_str = ""
+                    for atom in alt_pattern:
+                        atom_str = atom_str + translate_atom(atom)
+                    match_str = "%s%s |" % (match_str, atom_str)
+                match_str = "(%)" % match_str[:-2]
+            elif token.match_str:
+                match_str = re.escape(token.match_str.val[1:-1])
+            else:
+                code_dict = {'A':r'\w',
+                             'C':r'[\0-\37\177]',
+                             'E':r'.',
+                             'L':r'[a-z]',
+                             'N':r'\d',
+                             'P':r'[\40-\57\72-\100\133-\140\173-\176]',
+                             'U':r'[A-Z]'}
+                match_str = code_dict[token.pat_code]
+            return match_str + rep_str
+        
+        def translate_pattern_atoms(token):
+            """translates a Mumps pattern-matching expression to re-style"""
+            atom_str = 'r\"'
+            for atom in token.params:
+                atom_str = atom_str + translate_atom(atom)
 
-            if op_trans_dict.has_key(token.toktype):
-                return op_trans_dict[token.toktype]
-            else:
-                raise TranslationError(translation, token,
-                                       "unknown expression pattern")
-            
+            return atom_str + '\"'
+
+        op_trans_dict = {OPADD:'+', OPSUB:'-', OPMULT:'*', OPEXP:'**',
+                         OPMODULO:'%', OPGT:'>', OPLT:'<', OPNGT:'<=',
+                         OPNLT:'>=', OPEQ:'==', OPNEQ:'!=', OPAND:'and',
+                         OPOR:'or', OPFRACDIV:'/', OPINTDIV:'/',
+                         OPFOLLOWS:'>', OPCONCAT:'+', OPNOT:'not '}
+        
         expr_list = token.expr_list[:]
         first_token = expr_list.pop(0)
-        if first_token.is_unaryop():
-            left_str = "%s" % (trans_unary_expr(first_token, expr_list.pop(0)))
+        if first_token.is_op() and first_token.is_unaryop():
+            op_str = op_trans_dict.get(first_token.toktype, None)
+            assert(op_str)
+            left_str = op_str + translate_expr(translation, expr_list.pop(0))
             if len(expr_list):
-                left_str = '(' + left_str + ')'
+                left_str = '(%s)' % left_str
         else:
             left_str = translate_expr(translation, first_token)
         while expr_list:
-            not_str = ""
+            b_not = False
             binop_token = expr_list.pop(0)
-            if binop_token.is_not():
-                not_str = "not "
+
+            if binop_token.is_op() and binop_token.is_not():
+                b_not = True
                 binop_token = expr_list.pop(0)
+            elif binop_token.is_op() and binop_token.is_patmatch():
+                translation.add_import("re")
+                left_str = "re.match(%s, %s)" % \
+                           (translate_pattern_atoms(binop_token), left_str)
+                continue
                 
             right_token = expr_list.pop(0)
-            if right_token.is_unaryop():
-                right_str = trans_unary_expr(right_token, expr_list.pop(0))
+            if right_token.is_op() and right_token.is_unaryop():
+                rop_str = op_trans_dict.get(right_token.toktype, None)
+                assert(rop_str)
+                right_str = rop_str + translate_expr(translation,
+                                                     expr_list.pop(0))
             else:
                 right_str = translate_expr(translation, right_token)
 
-            if len(expr_list) or len(not_str):
-                left_str = "%s(%s %s %s)" % (not_str, left_str,
-                                             trans_binary_op(binop_token),
-                                             right_str)
+            op_str = op_trans_dict.get(binop_token.toktype, None)
+            assert(op_str)
+
+            if b_not:
+                left_str = "not (%s %s %s)" % (left_str, op_str, right_str)
+            elif len(expr_list):
+                left_str = "(%s %s %s)" % (left_str, op_str, right_str)
             else:
-                left_str = "%s %s %s" % \
-                           (left_str, trans_binary_op(binop_token), right_str )
+                left_str = "%s %s %s" % (left_str, op_str, right_str )
         return left_str
 
     expr_transl_dict = {


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/