[hatari-devel] Patch to enhance symbol names of Pure-C executables

[ Thread Index | Date Index | More lists.tuxfamily.org/hatari-devel Archives ]


Hi,

 

the attached patch tries to read the real names from the debug-information of executables compiled by Pure-C and (i think also PurePascal, if someone uses that). That should get rid of the annoying fact that the linker truncates the symbol names in the DRI symbol table to 8 characters (it does not even support the GST extension to 22 chars).

 

A few notes:

 

- i've changed only the external tool (gst2ascii), but i think it can easily be adopted to src/debug/symbols.c

 

- i had to comment out one line, that adds the section offset to the symbols value. I'm not entirely sure whether that is a bug in the tool, or in the Pure-C linker, but values in the symbol table generated by PLINK are all relative to the start of the text segment, not relative to the section they are defined in.

 

- It is a bit brute-force, by going through all symbols from the DRI symbol table, then trying to locate them in the debug info. That could maybe be optimized, but maybe it does no matter much (i've tried this with ORCS with debug information of ~1MB, and hardly noticed a difference)

 

- you may still get truncated symbol names from linked-in libraries (unless those libraries also had debug information)

 

- since the debug format works a lot with offsets, i'm reading the whole file into a buffer and work with that. Maybe the remaining routines should be changed before calling that function, and do the same (currently they do a lot of seeking/single reads)

 

diff --git a/tools/debugger/gst2ascii.c b/tools/debugger/gst2ascii.c
index 8572a93f..880b7650 100644
--- a/tools/debugger/gst2ascii.c
+++ b/tools/debugger/gst2ascii.c
@@ -18,6 +18,7 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdbool.h>
+#include <errno.h>
 #if defined(__MINT__)	/* assume MiNT/lib is always big-endian */
 # define SDL_SwapBE16(x) x
 # define SDL_SwapBE32(x) x
@@ -276,6 +277,190 @@ static bool symbol_remove_obj(const char *name)
 }
 
 
+static uint32_t get_be32(const uint8_t *p)
+{
+	const uint32_t *p32 = (const uint32_t *)p;
+	return SDL_SwapBE32(*p32);
+}
+
+
+/*
+ * functions to deal with Pure-C Debug informations
+ */
+
+struct pdb_h {
+	uint32_t magic;
+	uint32_t size_fileinfos;
+	uint32_t size_lineinfo;
+	uint32_t size_varinfo;
+	uint32_t size_unknown;
+	uint32_t size_typeinfo;
+	uint32_t size_structinfo;
+	uint32_t size_stringtable;
+};
+#define SIZEOF_PDB_HEADER (8 * sizeof(uint32_t))
+
+#define PDB_STORAGE_NONE     0 /* no storage; absolute value */
+#define PDB_STORAGE_TEXT     4 /* in text segment */
+#define PDB_STORAGE_DATA     5 /* in data segment */
+#define PDB_STORAGE_BSS      6 /* in bss segment */
+
+struct pdb_varinfo {
+	int8_t type;
+	uint8_t storage;
+	uint32_t name_offset;
+	uint32_t typeinfo_offset;
+	uint32_t value;
+};
+#define SIZEOF_VARINFO ((size_t)14)
+
+
+static void read_pc_debug_header(const uint8_t *ptr, struct pdb_h *header)
+{
+	header->magic = get_be32(ptr + 0);
+	header->size_fileinfos = get_be32(ptr + 4);
+	header->size_lineinfo = get_be32(ptr + 8);
+	header->size_varinfo = get_be32(ptr + 12);
+	header->size_unknown = get_be32(ptr + 16);
+	header->size_typeinfo = get_be32(ptr + 20);
+	header->size_structinfo = get_be32(ptr + 24);
+	header->size_stringtable = get_be32(ptr + 28);
+}
+
+
+static void read_varinfo(const uint8_t *ptr, struct pdb_varinfo *info)
+{
+	info->type = ptr[0];
+	info->storage = ptr[1];
+	info->name_offset = get_be32(ptr + 2);
+	info->typeinfo_offset = get_be32(ptr + 6);
+	info->value = get_be32(ptr + 10);
+}
+
+
+static int read_pc_debug_names(FILE *fp, symbol_list_t *list, uint32_t offset)
+{
+	uint8_t *buf;
+	size_t filesize;
+	size_t nread;
+	uint8_t *p, *end;
+	uint32_t reloc_offset;
+	uint32_t debug_offset;
+	uint32_t varinfo_offset;
+	uint32_t strtable_offset;
+	struct pdb_h pdb_h;
+	int len;
+	uint8_t storage;
+	int i;
+
+	fseek(fp, 0, SEEK_END);
+	filesize = ftell(fp);
+	fseek(fp, 0, SEEK_SET);
+	buf = malloc(filesize);
+	if (buf == NULL)
+	{
+		perror("");
+		return 0;
+	}
+	nread = fread(buf, 1, filesize, fp);
+	if (nread != filesize)
+	{
+		perror("ERROR: reading failed");
+		return 0;
+	}
+	reloc_offset = offset;
+
+	/*
+	 * skip the TPA relocation table
+	 */
+	{
+		uint32_t first_reloc = get_be32(buf + reloc_offset);
+		reloc_offset += 4;
+		if (first_reloc != 0)
+		{
+			while (reloc_offset < filesize && buf[reloc_offset] != 0)
+				reloc_offset++;
+			reloc_offset++;
+		}
+		if (reloc_offset & 1)
+			reloc_offset++;
+		debug_offset = reloc_offset;
+	}
+
+	if (debug_offset + SIZEOF_PDB_HEADER >= filesize)
+	{
+		fprintf(stderr, "no debug information present\n");
+		/* this is not an error */
+		return 1;
+	}
+	read_pc_debug_header(buf + debug_offset, &pdb_h);
+	if (pdb_h.magic != 0x51444231UL) /* 'QDB1' (in executables) */
+	{
+		fprintf(stderr, "unknown debug format 0x%08lx\n", (unsigned long)pdb_h.magic);
+		return 0;
+	}
+	varinfo_offset = SIZEOF_PDB_HEADER + debug_offset + pdb_h.size_fileinfos + pdb_h.size_lineinfo;
+	strtable_offset = varinfo_offset + pdb_h.size_varinfo + pdb_h.size_unknown + pdb_h.size_typeinfo + pdb_h.size_structinfo;
+	if (pdb_h.size_varinfo != 0)
+	{
+		for (i = 0; i < list->count; i++)
+		{
+			storage = PDB_STORAGE_NONE;
+			if (list->names[i].type == SYMTYPE_TEXT)
+			{
+				storage = PDB_STORAGE_TEXT;
+			} else if (list->names[i].type == SYMTYPE_DATA)
+			{
+				storage = PDB_STORAGE_DATA;
+			} else if (list->names[i].type == SYMTYPE_BSS)
+			{
+				storage = PDB_STORAGE_BSS;
+			}
+			if (storage != PDB_STORAGE_NONE)
+			{
+				len = (int)strlen(list->names[i].name);
+				/*
+				 * only need to care about possibly truncated names
+				 */
+				if (len == 8 || len == 22)
+				{
+					/*
+					 * Fixme: slurp the infos all in, and sort them so we can do a binary search
+					 */
+					p = buf + varinfo_offset;
+					end = p + pdb_h.size_varinfo;
+					while (p < end)
+					{
+						struct pdb_varinfo info;
+
+						read_varinfo(p, &info);
+						if (info.storage == storage && info.value == list->names[i].address &&
+							((storage == PDB_STORAGE_TEXT && (info.type == 7 || info.type == 8)) ||
+							 ((storage == PDB_STORAGE_DATA || storage == PDB_STORAGE_BSS) && (info.type == 4 || info.type == 5 || info.type == 6))))
+						{
+							char *name = (char *)buf + strtable_offset + info.name_offset;
+							if (strcmp(list->names[i].name, name) != 0)
+							{
+								name = strdup(name);
+								if (name != NULL)
+								{
+									free(list->names[i].name);
+									list->names[i].name = name;
+								}
+							}
+							break;
+						}
+						p += SIZEOF_VARINFO;
+					}
+				}
+			}
+		}
+	}
+
+	return 1;
+}
+
+
 /**
  * Load symbols of given type and the symbol address addresses from
  * DRI/GST format symbol table, and add given offsets to the addresses:
@@ -371,7 +556,9 @@ static symbol_list_t* symbols_load_dri(FILE *fp, prg_section_t *sections, uint32
 			}
 		}
 		if (section) {
+			/* disabled; this seems to be wrong
 			address += section->offset;
+			*/
 			if (address > section->end) {
 				fprintf(stderr, "WARNING: ignoring symbol '%s' of type %c in slot %d with invalid offset 0x%x (>= 0x%x).\n",
 					name, symbol_char(symtype), i, address, section->end);
@@ -412,6 +599,12 @@ static symbol_list_t* symbols_load_dri(FILE *fp, prg_section_t *sections, uint32
 	}
 	list->symbols = symbols;
 	list->count = count;
+
+	/*
+	 * now try to read the real names from Pure-C debug info
+	 */
+	read_pc_debug_names(fp, list, 28 + sections[2].offset + tablesize);
+
 	return list;
 }
 


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/