[fusil] [PATCH] python-ptrace: following a byte string across a program's execution

[ Thread Index | Date Index | More lists.tuxfamily.org/fusil Archives ]


Hello all,

attached you will find a small patch that allows one to "follow" a sequence of bytes (a regular string or a byte string) across the
execution of a program.

The gdb.py "follow" command takes one argument, the term that the user
wishes to follow. The user can add multiple terms to the list of terms
to be "followed", as shown below:

(gdb) follow "dimitris"
(gdb) follow "\x01\x02\x03\x04"

The "showfollow" command shows the current list of "followed" terms

(gdb) showfollow
['dimitris', '\x01\x02\x03\x04']

At any point during the debugging of the program, one can use the
"xray" command to inspect the memory of all debugged processes
for instances of the "followed" terms:

(gdb) xray
term['dimitris'] pid[2686] 0x093a0000-0x09c29000 => [heap] (rw-p) 0x09552aa0 pointers: 0x09552b88 term['dimitris'] pid[2686] 0x093a0000-0x09c29000 => [heap] (rw-p) 0x09552b40 pointers: 0x09552b20 term['dimitris'] pid[2686] 0x093a0000-0x09c29000 => [heap] (rw-p) 0x095532a0 pointers: 0x09553290 term['dimitris'] pid[2686] 0x093a0000-0x09c29000 => [heap] (rw-p) 0x095e9ce0 pointers: 0x095e82d8 term['dimitris'] pid[2686] 0x093a0000-0x09c29000 => [heap] (rw-p) 0x09a272a0 pointers: 0x09a2730c term['dimitris'] pid[2686] 0x093a0000-0x09c29000 => [heap] (rw-p) 0x09bc3f9e pointers:
....

The "pointers: " clause, shows the addresses of possible pointers pointing to a "followed" term.

The command "resetfollow" resets the list of "followed" terms.
(gdb) resetfollow
(gdb) showfollow
[]

It seems that python-ptrace has some serious potential in the field
of taint analysis :-)

Hope you will find this useful!

cheers,

dimitris
--- gdb.py	2009-04-14 00:53:03.000000000 +0300
+++ /usr/local/bin/gdb.py	2009-04-15 01:20:29.000000000 +0300
@@ -12,7 +12,7 @@
 from ptrace.binding import HAS_PTRACE_SINGLESTEP
 from ptrace.disasm import HAS_DISASSEMBLER
 from ptrace.ctypes_tools import (truncateWord,
-    formatWordHex, formatAddress, formatAddressRange)
+    formatWordHex, formatAddress, formatAddressRange, word2bytes)
 from ptrace.process_tools import dumpProcessInfo
 from ptrace.tools import inverseDict
 from ptrace.func_call import FunctionCallOptions
@@ -22,6 +22,8 @@
 from errno import ESRCH
 from ptrace.cpu_info import CPU_POWERPC
 from ptrace.debugger import ChildError
+from ptrace.debugger.memory_mapping import readProcessMappings
+
 import re
 try:
     # Use readline for better raw_input()
@@ -66,12 +68,27 @@
     ("proclist", "list of traced processes"),
     ("switch", "switch active process (switch or switch <pid>)"),
 
+    ("follow", 'follow a term (eg. "follow \'\\0x12\\0x14\\0x27\\0x13\'")'),
+    ("showfollow", 'show all "followed" terms'),
+    ("resetfollow", 'reset all "followed" terms'),
+    ("xray", 'show addresses of (and pointers to) "followed" terms'),
+
     # other
     ("dbginfo", "informations about the debugger"),
     ("quit", "quit debugger"),
     ("help", "display this help"),
 )
 
+# finds possible pointer values in process memory space, 
+# pointing to address
+def getPointers(process, address):
+    retlist = []
+    procmaps = readProcessMappings(process)
+    for pm in procmaps:
+        for found in pm.search(word2bytes(address)):
+            retlist.append(found)
+    return retlist
+
 class Gdb(Application):
     def __init__(self):
         Application.__init__(self)
@@ -94,6 +111,8 @@
         # FIXME: Remove self.breaks!
         self.breaks = dict()
 
+        self.followterms = []
+
     def setupLog(self):
         self._setupLog(stdout)
 
@@ -205,6 +224,33 @@
             values.append(value)
         return values
 
+    def addfollowterm(self, term):
+        # Allow terms of the form 'string', "string", '\x04', "\x01\x14"
+        #
+        # fixme: this is not really safe, since the user can always
+        # input a string like 'bla\'
+        if ((term.startswith("'") and term.endswith("'")) or
+            (term.startswith('"') and term.endswith('"'))):
+            eval("self.followterms.append(%s)" % term)
+        else:
+            return 'Follow term must be enclosed in quotes!'
+
+    def showfollowterms(self):
+        print self.followterms
+
+    # displays the offsets of all terms found in the process memory mappings
+    # along with possible addresses of pointers pointing to these terms
+    def xray(self):
+        for term in self.followterms:
+            for process in self.debugger:
+                for procmap in readProcessMappings(process):
+                    for found in procmap.search(term):
+                        print "term[%s] pid[%i] %s %s pointers: %s" % (
+                            repr(term), process.pid, procmap, 
+                            formatAddress(found),
+                            " ".join([formatAddress(x) for x in 
+                                      getPointers(process, found)]))
+
     def execute(self, command):
         errmsg = None
         if command == "cont":
@@ -257,6 +303,14 @@
             errmsg = self.signal(command[7:])
         elif command.startswith("print "):
             errmsg = self.print_(command[6:])
+        elif command.startswith("follow "):
+            errmsg = self.addfollowterm(command[7:])
+        elif command == "showfollow":
+            self.showfollowterms()
+        elif command == "resetfollow":
+            self.followterms = []
+        elif command == "xray":
+            self.xray()
         else:
             errmsg = "Unknown command: %r" % command
         if errmsg:
--- ptrace/debugger/memory_mapping.py	2009-04-13 19:11:35.000000000 +0300
+++ /usr/local/lib/python2.5/site-packages/ptrace/debugger/memory_mapping.py	2009-04-15 01:08:04.000000000 +0300
@@ -31,12 +31,16 @@
      - major_device / minor_device (int): major / minor device number
      - inode (int)
      - pathname (str)
+     - pid (int)
 
     Operations:
      - "address in mapping" checks the address is in the mapping.
+     - "search(somestring)" returns the offsets of "somestring" in the mapping
      - "str(mapping)" create one string describing the mapping
+     - "repr(mapping)" create a string represantation of the mapping,
+       useful in list contexts
     """
-    def __init__(self, start, end, permissions, offset, major_device, minor_device, inode, pathname):
+    def __init__(self, start, end, permissions, offset, major_device, minor_device, inode, pathname, pid):
         self.start = start
         self.end = end
         self.permissions = permissions
@@ -45,6 +49,7 @@
         self.minor_device = minor_device
         self.inode = inode
         self.pathname = pathname
+        self.pid = pid
 
     def __contains__(self, address):
         return self.start <= address < self.end
@@ -55,6 +60,29 @@
             text += " => %s" % self.pathname
         text += " (%s)" % self.permissions
         return text
+    
+    def search(self, bytestr):
+        retlist = []
+        bytestr_len = len(bytestr)
+        proc_mem = open("/proc/%i/mem" % self.pid, "r")
+        proc_mem.seek(self.start)
+        covered = self.start
+        data = proc_mem.read(self.end - self.start)
+        while (data != ""):
+            offset = data.find(bytestr)
+            if (offset == -1):
+                proc_mem.close()
+                return retlist
+            else:
+                retlist.append(offset + covered)
+                covered += offset + bytestr_len
+                proc_mem.seek(covered)
+                data = proc_mem.read(self.end - covered)
+        proc_mem.close()
+        return retlist
+
+    def __repr__(self):
+        return self.__str__()
 
 def readProcessMappings(process):
     """
@@ -86,7 +114,7 @@
                 int(match.group(5), 16),
                 int(match.group(6), 16),
                 int(match.group(7)),
-                match.group(8))
+                match.group(8), process.pid)
             maps.append(map)
     finally:
         mapsfile.close()


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/