[hatari-devel] Support for loading GNU-style symbols from a.out executables

[ Thread Index | Date Index | More lists.tuxfamily.org/hatari-devel Archives ]


Hi,

attached is a patch that should support loading the symbol table produced by 
gcc from executables.

Have fun
# HG changeset patch
# User Thorsten Otto <admin@xxxxxxxxxxx>
# Date 1509972332 -3600
#      Mon Nov 06 13:45:32 2017 +0100
# Node ID 4b81a2a3a39f367ae968bff67200beea1865d7a0
# Parent  4c83b2af0915dad78ba45f3df35a3f49c6c709fe
Support loading GNU-style symbols

diff -r 4c83b2af0915 -r 4b81a2a3a39f src/debug/a.out.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/debug/a.out.h	Mon Nov 06 13:45:32 2017 +0100
@@ -0,0 +1,86 @@
+/* a.out.h - Definitions and declarations for GNU-style a.out
+   binaries.
+   Written by Guido Flohr (gufl0000@xxxxxxxxxxxxxx).
+
+   This file is in the public domain. */
+
+#ifndef __A_OUT_GNU_H__
+#define __A_OUT_GNU_H__ 1
+
+struct nlist {
+  union {
+    const char *n_name;     /* in memory address */
+    struct nlist *n_next;
+    size_t n_strx;          /* string table offset */
+  } n_un;
+  unsigned char n_type;
+  char n_other;
+  short n_desc;
+  uint32_t n_value;
+};
+
+/* sizeof(struct nlist) on disk */
+#define SIZEOF_STRUCT_NLIST 12
+
+
+#define N_UNDF   0x00       /* undefined */
+#define N_ABS    0x02       /* absolute */
+#define N_TEXT   0x04       /* text */
+#define N_DATA   0x06       /* data */
+#define N_BSS    0x08       /* bss */
+#define N_SIZE   0x0c       /* pseudo type, defines a symbol's size */
+#define N_FN     0x1f       /* File name of a .o file */
+#define N_COMM   0x12       /* common (internal to ld) */
+
+#define N_EXT    0x01       /* external bit, or'ed in */
+#define N_TYPE   0x1e       /* mask for all the type bits */
+#define N_STAB   0xe0       /* if any of these bits set, don't discard */
+
+/* The following type indicates the definition of a symbol as being
+   an indirect reference to another symbol.  The other symbol
+   appears as an undefined reference, immediately following this symbol.
+
+   Indirection is asymmetrical.  The other symbol's value will be used
+   to satisfy requests for the indirect symbol, but not vice versa.
+   If the other symbol does not have a definition, libraries will
+   be searched to find a definition. */
+#define N_INDR 0x0a
+
+/* The following symbols refer to set elements.
+   All the N_SET[ATDB] symbols with the same name form one set.
+   Space is allocated for the set in the text section, and each set
+   element's value is stored into one word of the space.
+   The first word of the space is the length of the set (number of elements).
+
+   The address of the set is made into an N_SETV symbol
+   whose name is the same as the name of the set.
+   This symbol acts like a N_DATA global symbol
+   in that it can satisfy undefined external references. */
+
+/* These appear as input to LD, in a .o file. */
+#define N_SETA  0x14        /* Absolute set element symbol */
+#define N_SETT  0x16        /* Text set element symbol */
+#define N_SETD  0x18        /* Data set element symbol */
+#define N_SETB  0x1A        /* Bss set element symbol */
+
+/* This is output from LD. */
+#define N_SETV  0x1C        /* Pointer to set vector in data area. */
+
+/* Warning symbol. The text gives a warning message, the next symbol
+   in the table will be undefined. When the symbol is referenced, the
+   message is printed. */
+
+#define N_WARNING 0x1e
+
+/* Weak symbols.  These are a GNU extension to the a.out format.  The
+   semantics are those of ELF weak symbols.  Weak symbols are always
+   externally visible.  The N_WEAK? values are squeezed into the
+   available slots.  The value of a N_WEAKU symbol is 0.  The values
+   of the other types are the definitions. */
+#define N_WEAKU 0x0d        /* Weak undefined symbol. */
+#define N_WEAKA 0x0e        /* Weak absolute symbol. */
+#define N_WEAKT 0x0f        /* Weak text symbol. */
+#define N_WEAKD 0x10        /* Weak data symbol. */
+#define N_WEAKB 0x11        /* Weak bss symbol. */
+
+#endif /* __A_OUT_GNU_H__ */
diff -r 4c83b2af0915 -r 4b81a2a3a39f src/debug/symbols.c
--- a/src/debug/symbols.c	Sun Nov 05 20:11:38 2017 +0200
+++ b/src/debug/symbols.c	Mon Nov 06 13:45:32 2017 +0100
@@ -34,6 +34,7 @@
 #include "debugInfo.h"
 #include "evaluate.h"
 #include "configuration.h"
+#include "a.out.h"
 
 typedef struct {
 	char *name;
@@ -46,6 +47,7 @@
 	int symbols;		/* initial symbol count */
 	symbol_t *addresses;	/* items sorted by address */
 	symbol_t *names;	/* items sorted by symbol name */
+	char *strtab;
 } symbol_list_t;
 
 typedef struct {
@@ -59,6 +61,8 @@
  */
 #define MAX_SYM_SIZE 32
 
+/* Magic used to denote symbols in GNU-style (a.out) format */
+#define SYMBOL_FORMAT_GNU 0x474E555f
 
 /* TODO: add symbol name/address file names to configuration? */
 static symbol_list_t *CpuSymbolsList;
@@ -295,6 +299,208 @@
 }
 
 
+/* Utility macro. Get a 16- or 32 bit value from a pointer to
+   unsigned char. */
+#define get16be(c) (((c)[0] << 8) | ((c)[1]))
+#define get32be(c) (((uint32_t)((c)[0]) << 24) | ((uint32_t)((c)[1]) << 16) | ((uint32_t)((c)[2]) << 8) | ((uint32_t)((c)[3])))
+
+/**
+ * Load symbols of given type and the symbol address addresses from
+ * a.out format symbol table, and add given offsets to the addresses:
+ * Return symbols list or NULL for failure.
+ */
+static symbol_list_t* symbols_load_gnu(FILE *fp, prg_section_t *sections, symtype_t gettype, Uint32 tablesize, Uint32 stroff, Uint32 strsize)
+{
+	size_t slots = tablesize / SIZEOF_STRUCT_NLIST;
+	size_t i;
+	size_t strx;
+	unsigned char *p;
+	symbol_t *sym;
+	uint32_t nread;
+	symbol_list_t *list;
+	unsigned char n_type;
+	unsigned char n_other;
+	unsigned short n_desc;
+	int dtypes, locals, ofiles, count, outside;
+	size_t len;
+	static char invalid[] = "<invalid>";
+	static char empty[] = "";
+	prg_section_t *section;
+
+	if (!(list = symbol_list_alloc(slots))) {
+		return NULL;
+	}
+
+	list->strtab = (char *)malloc(tablesize + strsize);
+
+	if (list->strtab == NULL)
+	{
+		symbol_list_free(list);
+		return NULL;
+	}
+
+	nread = fread(list->strtab, tablesize + strsize, 1, fp);
+	if (nread != 1)
+	{
+		perror("ERROR: reading symbols failed");
+		symbol_list_free(list);
+		return NULL;
+	}
+
+	p = (unsigned char *)list->strtab;
+	sym = list->names;
+
+	outside = dtypes = ofiles = locals = count = 0;
+	for (i = 0; i < slots; i++)
+	{
+		strx = get32be(p);
+		p += 4;
+		n_type = *p++;
+		n_other = *p++;
+		n_desc = get16be(p);
+		p += 2;
+		sym->address = get32be(p);
+		p += 4;
+		if (strx)
+		{
+			if (strx >= strsize)
+			{
+				fprintf(stderr, "symbol name index %x out of range\n", (unsigned int)strx);
+				sym->name = invalid;
+			} else
+			{
+				sym->name = list->strtab + strx + stroff;
+			}
+		} else
+		{
+			sym->name = empty;
+		}
+		if (n_type & N_STAB)
+		{
+			dtypes++;
+			continue;
+		}
+		section = NULL;
+		switch (n_type & (N_TYPE|N_EXT))
+		{
+		case N_UNDF:
+		case N_UNDF|N_EXT:
+			/* shouldn't happen here */
+			locals++;
+			continue;
+		case N_ABS:
+		case N_ABS|N_EXT:
+			sym->type = SYMTYPE_ABS;
+			break;
+		case N_TEXT:
+			len = strlen(sym->name);
+			if (strchr(sym->name, '/') || (len > 2 && sym->name[len-2] == '.' && sym->name[len-1] == 'o')) {
+				ofiles++;
+				continue;
+			}
+			sym->type = SYMTYPE_TEXT;
+			section = &(sections[0]);
+			break;
+		case N_TEXT|N_EXT:
+			sym->type = SYMTYPE_TEXT;
+			section = &(sections[0]);
+			break;
+		case N_DATA:
+		case N_DATA|N_EXT:
+			sym->type = SYMTYPE_DATA;
+			section = &(sections[1]);
+			break;
+		case N_BSS:
+		case N_BSS|N_EXT:
+		case N_COMM:
+		case N_COMM|N_EXT:
+			sym->type = SYMTYPE_BSS;
+			section = &(sections[2]);
+			break;
+		case N_FN:
+			ofiles++;
+			continue;
+		case N_SIZE:
+		case N_WARNING:
+		case N_SETA:
+		case N_SETT:
+		case N_SETD:
+		case N_SETB:
+		case N_SETV:
+			dtypes++;
+			continue;
+		case N_WEAKU:
+		case N_WEAKT:
+		case N_WEAKD:
+		case N_WEAKB:
+			locals++;
+			continue;
+		default:
+			fprintf(stderr, "WARNING: ignoring symbol '%s' in slot %u of unknown type 0x%x.\n", sym->name, (unsigned int)i, n_type);
+			continue;
+		}
+		/*
+		 * the value of a common symbol is its size, not its address:
+		 */
+		if (((n_type & N_TYPE) == N_COMM) ||
+			(((n_type & N_EXT) && (n_type & N_TYPE) == N_UNDF && sym->address != 0)))
+		{
+			/* if we ever want to know a symbols size, get that here */
+			fprintf(stderr, "WARNING: ignoring common symbol '%s' in slot %u.\n", sym->name, (unsigned int)i);
+			continue;
+		}
+		if (!(gettype & sym->type)) {
+			continue;
+		}
+		if (sym->name[0] == '.' && sym->name[1] == 'L') {
+			locals++;
+			continue;
+		}
+		if (section) {
+			sym->address += sections[0].offset;
+			if (sym->address > (section->end + 1)) {
+				++outside;
+#if 0
+				/* VBCC has 1 symbol outside of its section */
+				if (outside > 2) {
+					/* potentially buggy version of VBCC vlink used */
+					fprintf(stderr, "ERROR: too many invalid offsets, skipping rest of symbols!\n");
+					symbol_list_free(list);
+					return NULL;
+				}
+#endif
+				fprintf(stderr, "WARNING: ignoring symbol '%s' of %c type in slot %u with invalid offset 0x%x (>= 0x%x).\n",
+					sym->name, symbol_char(sym->type), (unsigned int)i, sym->address, section->end);
+				continue;
+			}
+		}
+		sym++;
+		count++;
+		(void) n_desc;
+		(void) n_other;
+	}
+
+	if (dtypes) {
+		fprintf(stderr, "NOTE: ignored %d debugging symbols.\n", dtypes);
+	}
+	if (locals) {
+		fprintf(stderr, "NOTE: ignored %d unnamed / local symbols (= name starts with '.L').\n", locals);
+	}
+	if (ofiles) {
+		/* object file path names most likely get truncated and
+		 * as result cause unnecessary symbol name conflicts in
+		 * addition to object file addresses conflicting with
+		 * first symbol in the object file.
+		 */
+		fprintf(stderr, "NOTE: ignored %d object file names (= name has '/' or ends in '.o').\n", ofiles);
+	}
+
+	list->symbols = slots;
+	list->count = count;
+	return list;
+}
+
+
 /**
  * Print program header information.
  * Return false for unrecognized symbol table type.
@@ -318,6 +524,9 @@
 	case 0x4D694E54:	/* "MiNT" */
 		info = "GCC/MiNT executable, GST symbol table";
 		break;
+	case SYMBOL_FORMAT_GNU:	/* "GNU_" */
+		info = "GCC/MiNT executable, a.out symbol table";
+		break;
 	case 0x0:
 		info = "TOS executable, DRI / GST symbol table";
 		break;
@@ -355,6 +564,9 @@
 	int offset, reads = 0;
 	Uint16 relocflag;
 	symbol_list_t* symbols;
+	Uint32 symoff = 0;
+	Uint32 stroff = 0;
+	Uint32 strsize = 0;
 
 	/* get TEXT, DATA & BSS section sizes */
 	fseek(fp, 2, SEEK_SET);
@@ -381,6 +593,76 @@
 		fprintf(stderr, "ERROR: program header reading failed!\n");
 		return NULL;
 	}
+	/*
+	 * check for GNU-style symbol table in aexec header
+	 */
+	if (tabletype == 0x4D694E54) { /* MiNT */
+		Uint32 magic1, magic2;
+		Uint32 dummy;
+		Uint32 a_text, a_data, a_bss, a_syms, a_entry, a_trsize, a_drsize;
+		Uint32 g_tparel_pos, g_tparel_size, g_stkpos, g_symbol_format;
+
+		reads  = fread(&magic1, sizeof(magic1), 1, fp);
+		magic1 = SDL_SwapBE32(magic1);
+		reads += fread(&magic2, sizeof(magic2), 1, fp);
+		magic2 = SDL_SwapBE32(magic2);
+		if (reads == 2 &&
+			((magic1 == 0x283a001a && magic2 == 0x4efb48fa) || 	/* Original binutils: move.l 28(pc),d4; jmp 0(pc,d4.l) */
+			 (magic1 == 0x203a001a && magic2 == 0x4efb08fa))) {	/* binutils >= 2.18-mint-20080209: move.l 28(pc),d0; jmp 0(pc,d0.l) */
+			reads += fread(&dummy, sizeof(dummy), 1, fp);	/* skip a_info */
+			reads += fread(&a_text, sizeof(a_text), 1, fp);
+			a_text = SDL_SwapBE32(a_text);
+			reads += fread(&a_data, sizeof(a_data), 1, fp);
+			a_data = SDL_SwapBE32(a_data);
+			reads += fread(&a_bss, sizeof(a_bss), 1, fp);
+			a_bss = SDL_SwapBE32(a_bss);
+			reads += fread(&a_syms, sizeof(a_syms), 1, fp);
+			a_syms = SDL_SwapBE32(a_syms);
+			reads += fread(&a_entry, sizeof(a_entry), 1, fp);
+			a_entry = SDL_SwapBE32(a_entry);
+			reads += fread(&a_trsize, sizeof(a_trsize), 1, fp);
+			a_trsize = SDL_SwapBE32(a_trsize);
+			reads += fread(&a_drsize, sizeof(a_drsize), 1, fp);
+			a_drsize = SDL_SwapBE32(a_drsize);
+			reads += fread(&g_tparel_pos, sizeof(g_tparel_pos), 1, fp);
+			g_tparel_pos = SDL_SwapBE32(g_tparel_pos);
+			reads += fread(&g_tparel_size, sizeof(g_tparel_size), 1, fp);
+			g_tparel_size = SDL_SwapBE32(g_tparel_size);
+			reads += fread(&g_stkpos, sizeof(g_stkpos), 1, fp);
+			g_stkpos = SDL_SwapBE32(g_stkpos);
+			reads += fread(&g_symbol_format, sizeof(g_symbol_format), 1, fp);
+			g_symbol_format = SDL_SwapBE32(g_symbol_format);
+			if (g_symbol_format == 0)
+			{
+				tabletype = SYMBOL_FORMAT_GNU;
+			}
+			if ((a_text + (256 - 28)) != textlen)
+				fprintf(stderr, "warning: insonsistent text segment size %08x != %08x\n", textlen, a_text + (256 - 28));
+			if (a_data != datalen)
+				fprintf(stderr, "warning: insonsistent data segment size %08x != %08x\n", datalen, a_data);
+			if (a_bss != bsslen)
+				fprintf(stderr, "warning: insonsistent bss segment size %08x != %08x\n", bsslen, a_bss);
+			/*
+			 * the symbol table size in the GEMDOS header includes the string table,
+			 * the symbol table size in the exec header does not.
+			 */
+			if (tabletype == SYMBOL_FORMAT_GNU)
+			{
+				strsize = tablesize - a_syms;
+				tablesize = a_syms;
+				stroff = a_syms;
+			}
+
+			textlen = a_text + (256 - 28);
+			datalen = a_data;
+			bsslen = a_bss;
+			symoff = 0x100 + /* sizeof(extended exec header) */
+				a_text +
+				a_data +
+				a_trsize +
+				a_drsize;
+		}
+	}
 	if (!symbols_print_prg_info(tabletype, prgflags, relocflag)) {
 		return NULL;
 	}
@@ -410,26 +692,37 @@
 	sections[2].offset = start;
 	sections[2].end = start + bsslen - 1;
 
-	/* go to start of symbol table */
-	offset = 0x1C + textlen + datalen;
-	if (fseek(fp, offset, SEEK_SET) < 0) {
-		perror("ERROR: seeking to symbol table failed");
-		return NULL;
-	}
-	fprintf(stderr, "Trying to load symbol table at offset 0x%x...\n", offset);
-	symbols = symbols_load_dri(fp, sections, gettype, tablesize);
+	if (tabletype == SYMBOL_FORMAT_GNU) {
+		/* go to start of symbol table */
+		offset = symoff;
+		if (fseek(fp, offset, SEEK_SET) < 0) {
+			perror("ERROR: seeking to symbol table failed");
+			return NULL;
+		}
+		fprintf(stderr, "Trying to load symbol table at offset 0x%x...\n", offset);
+		symbols = symbols_load_gnu(fp, sections, gettype, tablesize, stroff, strsize);
+	} else {
+		/* go to start of symbol table */
+		offset = 0x1C + textlen + datalen;
+		if (fseek(fp, offset, SEEK_SET) < 0) {
+			perror("ERROR: seeking to symbol table failed");
+			return NULL;
+		}
+		fprintf(stderr, "Trying to load symbol table at offset 0x%x...\n", offset);
+		symbols = symbols_load_dri(fp, sections, gettype, tablesize);
 
-	if (symbols == INVALID_SYMBOL_OFFSETS && fseek(fp, offset, SEEK_SET) == 0) {
-		fprintf(stderr, "Re-trying with TEXT-relative BSS/DATA section offsets...\n");
-		start = DebugInfo_GetTEXT();
-		sections[1].offset = start;
-		sections[2].offset = start;
-		sections[1].end += textlen;
-		sections[2].end += (textlen + datalen);
-		symbols = symbols_load_dri(fp, sections, gettype, tablesize);
-	}
-	if (symbols == INVALID_SYMBOL_OFFSETS) {
-		return NULL;
+		if (symbols == INVALID_SYMBOL_OFFSETS && fseek(fp, offset, SEEK_SET) == 0) {
+			fprintf(stderr, "Re-trying with TEXT-relative BSS/DATA section offsets...\n");
+			start = DebugInfo_GetTEXT();
+			sections[1].offset = start;
+			sections[2].offset = start;
+			sections[1].end += textlen;
+			sections[2].end += (textlen + datalen);
+			symbols = symbols_load_dri(fp, sections, gettype, tablesize);
+		}
+		if (symbols == INVALID_SYMBOL_OFFSETS) {
+			return NULL;
+		}
 	}
 	return symbols;
 }
@@ -605,8 +898,13 @@
 		return;
 	}
 	assert(list->count);
-	for (i = 0; i < list->count; i++) {
-		free(list->names[i].name);
+	if (list->strtab) {
+		free(list->strtab);
+		list->strtab = NULL;
+	} else {
+		for (i = 0; i < list->count; i++) {
+			free(list->names[i].name);
+		}
 	}
 	free(list->addresses);
 	free(list->names);


Mail converted by MHonArc 2.6.19+ http://listengine.tuxfamily.org/