[hatari-devel] Support for loading GNU-style symbols from a.out executables |
[ Thread Index |
Date Index
| More lists.tuxfamily.org/hatari-devel Archives
]
Hi,
attached is a patch that should support loading the symbol table produced by
gcc from executables.
Have fun
# HG changeset patch
# User Thorsten Otto <admin@xxxxxxxxxxx>
# Date 1509972332 -3600
# Mon Nov 06 13:45:32 2017 +0100
# Node ID 4b81a2a3a39f367ae968bff67200beea1865d7a0
# Parent 4c83b2af0915dad78ba45f3df35a3f49c6c709fe
Support loading GNU-style symbols
diff -r 4c83b2af0915 -r 4b81a2a3a39f src/debug/a.out.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/debug/a.out.h Mon Nov 06 13:45:32 2017 +0100
@@ -0,0 +1,86 @@
+/* a.out.h - Definitions and declarations for GNU-style a.out
+ binaries.
+ Written by Guido Flohr (gufl0000@xxxxxxxxxxxxxx).
+
+ This file is in the public domain. */
+
+#ifndef __A_OUT_GNU_H__
+#define __A_OUT_GNU_H__ 1
+
+struct nlist {
+ union {
+ const char *n_name; /* in memory address */
+ struct nlist *n_next;
+ size_t n_strx; /* string table offset */
+ } n_un;
+ unsigned char n_type;
+ char n_other;
+ short n_desc;
+ uint32_t n_value;
+};
+
+/* sizeof(struct nlist) on disk */
+#define SIZEOF_STRUCT_NLIST 12
+
+
+#define N_UNDF 0x00 /* undefined */
+#define N_ABS 0x02 /* absolute */
+#define N_TEXT 0x04 /* text */
+#define N_DATA 0x06 /* data */
+#define N_BSS 0x08 /* bss */
+#define N_SIZE 0x0c /* pseudo type, defines a symbol's size */
+#define N_FN 0x1f /* File name of a .o file */
+#define N_COMM 0x12 /* common (internal to ld) */
+
+#define N_EXT 0x01 /* external bit, or'ed in */
+#define N_TYPE 0x1e /* mask for all the type bits */
+#define N_STAB 0xe0 /* if any of these bits set, don't discard */
+
+/* The following type indicates the definition of a symbol as being
+ an indirect reference to another symbol. The other symbol
+ appears as an undefined reference, immediately following this symbol.
+
+ Indirection is asymmetrical. The other symbol's value will be used
+ to satisfy requests for the indirect symbol, but not vice versa.
+ If the other symbol does not have a definition, libraries will
+ be searched to find a definition. */
+#define N_INDR 0x0a
+
+/* The following symbols refer to set elements.
+ All the N_SET[ATDB] symbols with the same name form one set.
+ Space is allocated for the set in the text section, and each set
+ element's value is stored into one word of the space.
+ The first word of the space is the length of the set (number of elements).
+
+ The address of the set is made into an N_SETV symbol
+ whose name is the same as the name of the set.
+ This symbol acts like a N_DATA global symbol
+ in that it can satisfy undefined external references. */
+
+/* These appear as input to LD, in a .o file. */
+#define N_SETA 0x14 /* Absolute set element symbol */
+#define N_SETT 0x16 /* Text set element symbol */
+#define N_SETD 0x18 /* Data set element symbol */
+#define N_SETB 0x1A /* Bss set element symbol */
+
+/* This is output from LD. */
+#define N_SETV 0x1C /* Pointer to set vector in data area. */
+
+/* Warning symbol. The text gives a warning message, the next symbol
+ in the table will be undefined. When the symbol is referenced, the
+ message is printed. */
+
+#define N_WARNING 0x1e
+
+/* Weak symbols. These are a GNU extension to the a.out format. The
+ semantics are those of ELF weak symbols. Weak symbols are always
+ externally visible. The N_WEAK? values are squeezed into the
+ available slots. The value of a N_WEAKU symbol is 0. The values
+ of the other types are the definitions. */
+#define N_WEAKU 0x0d /* Weak undefined symbol. */
+#define N_WEAKA 0x0e /* Weak absolute symbol. */
+#define N_WEAKT 0x0f /* Weak text symbol. */
+#define N_WEAKD 0x10 /* Weak data symbol. */
+#define N_WEAKB 0x11 /* Weak bss symbol. */
+
+#endif /* __A_OUT_GNU_H__ */
diff -r 4c83b2af0915 -r 4b81a2a3a39f src/debug/symbols.c
--- a/src/debug/symbols.c Sun Nov 05 20:11:38 2017 +0200
+++ b/src/debug/symbols.c Mon Nov 06 13:45:32 2017 +0100
@@ -34,6 +34,7 @@
#include "debugInfo.h"
#include "evaluate.h"
#include "configuration.h"
+#include "a.out.h"
typedef struct {
char *name;
@@ -46,6 +47,7 @@
int symbols; /* initial symbol count */
symbol_t *addresses; /* items sorted by address */
symbol_t *names; /* items sorted by symbol name */
+ char *strtab;
} symbol_list_t;
typedef struct {
@@ -59,6 +61,8 @@
*/
#define MAX_SYM_SIZE 32
+/* Magic used to denote symbols in GNU-style (a.out) format */
+#define SYMBOL_FORMAT_GNU 0x474E555f
/* TODO: add symbol name/address file names to configuration? */
static symbol_list_t *CpuSymbolsList;
@@ -295,6 +299,208 @@
}
+/* Utility macro. Get a 16- or 32 bit value from a pointer to
+ unsigned char. */
+#define get16be(c) (((c)[0] << 8) | ((c)[1]))
+#define get32be(c) (((uint32_t)((c)[0]) << 24) | ((uint32_t)((c)[1]) << 16) | ((uint32_t)((c)[2]) << 8) | ((uint32_t)((c)[3])))
+
+/**
+ * Load symbols of given type and the symbol address addresses from
+ * a.out format symbol table, and add given offsets to the addresses:
+ * Return symbols list or NULL for failure.
+ */
+static symbol_list_t* symbols_load_gnu(FILE *fp, prg_section_t *sections, symtype_t gettype, Uint32 tablesize, Uint32 stroff, Uint32 strsize)
+{
+ size_t slots = tablesize / SIZEOF_STRUCT_NLIST;
+ size_t i;
+ size_t strx;
+ unsigned char *p;
+ symbol_t *sym;
+ uint32_t nread;
+ symbol_list_t *list;
+ unsigned char n_type;
+ unsigned char n_other;
+ unsigned short n_desc;
+ int dtypes, locals, ofiles, count, outside;
+ size_t len;
+ static char invalid[] = "<invalid>";
+ static char empty[] = "";
+ prg_section_t *section;
+
+ if (!(list = symbol_list_alloc(slots))) {
+ return NULL;
+ }
+
+ list->strtab = (char *)malloc(tablesize + strsize);
+
+ if (list->strtab == NULL)
+ {
+ symbol_list_free(list);
+ return NULL;
+ }
+
+ nread = fread(list->strtab, tablesize + strsize, 1, fp);
+ if (nread != 1)
+ {
+ perror("ERROR: reading symbols failed");
+ symbol_list_free(list);
+ return NULL;
+ }
+
+ p = (unsigned char *)list->strtab;
+ sym = list->names;
+
+ outside = dtypes = ofiles = locals = count = 0;
+ for (i = 0; i < slots; i++)
+ {
+ strx = get32be(p);
+ p += 4;
+ n_type = *p++;
+ n_other = *p++;
+ n_desc = get16be(p);
+ p += 2;
+ sym->address = get32be(p);
+ p += 4;
+ if (strx)
+ {
+ if (strx >= strsize)
+ {
+ fprintf(stderr, "symbol name index %x out of range\n", (unsigned int)strx);
+ sym->name = invalid;
+ } else
+ {
+ sym->name = list->strtab + strx + stroff;
+ }
+ } else
+ {
+ sym->name = empty;
+ }
+ if (n_type & N_STAB)
+ {
+ dtypes++;
+ continue;
+ }
+ section = NULL;
+ switch (n_type & (N_TYPE|N_EXT))
+ {
+ case N_UNDF:
+ case N_UNDF|N_EXT:
+ /* shouldn't happen here */
+ locals++;
+ continue;
+ case N_ABS:
+ case N_ABS|N_EXT:
+ sym->type = SYMTYPE_ABS;
+ break;
+ case N_TEXT:
+ len = strlen(sym->name);
+ if (strchr(sym->name, '/') || (len > 2 && sym->name[len-2] == '.' && sym->name[len-1] == 'o')) {
+ ofiles++;
+ continue;
+ }
+ sym->type = SYMTYPE_TEXT;
+ section = &(sections[0]);
+ break;
+ case N_TEXT|N_EXT:
+ sym->type = SYMTYPE_TEXT;
+ section = &(sections[0]);
+ break;
+ case N_DATA:
+ case N_DATA|N_EXT:
+ sym->type = SYMTYPE_DATA;
+ section = &(sections[1]);
+ break;
+ case N_BSS:
+ case N_BSS|N_EXT:
+ case N_COMM:
+ case N_COMM|N_EXT:
+ sym->type = SYMTYPE_BSS;
+ section = &(sections[2]);
+ break;
+ case N_FN:
+ ofiles++;
+ continue;
+ case N_SIZE:
+ case N_WARNING:
+ case N_SETA:
+ case N_SETT:
+ case N_SETD:
+ case N_SETB:
+ case N_SETV:
+ dtypes++;
+ continue;
+ case N_WEAKU:
+ case N_WEAKT:
+ case N_WEAKD:
+ case N_WEAKB:
+ locals++;
+ continue;
+ default:
+ fprintf(stderr, "WARNING: ignoring symbol '%s' in slot %u of unknown type 0x%x.\n", sym->name, (unsigned int)i, n_type);
+ continue;
+ }
+ /*
+ * the value of a common symbol is its size, not its address:
+ */
+ if (((n_type & N_TYPE) == N_COMM) ||
+ (((n_type & N_EXT) && (n_type & N_TYPE) == N_UNDF && sym->address != 0)))
+ {
+ /* if we ever want to know a symbols size, get that here */
+ fprintf(stderr, "WARNING: ignoring common symbol '%s' in slot %u.\n", sym->name, (unsigned int)i);
+ continue;
+ }
+ if (!(gettype & sym->type)) {
+ continue;
+ }
+ if (sym->name[0] == '.' && sym->name[1] == 'L') {
+ locals++;
+ continue;
+ }
+ if (section) {
+ sym->address += sections[0].offset;
+ if (sym->address > (section->end + 1)) {
+ ++outside;
+#if 0
+ /* VBCC has 1 symbol outside of its section */
+ if (outside > 2) {
+ /* potentially buggy version of VBCC vlink used */
+ fprintf(stderr, "ERROR: too many invalid offsets, skipping rest of symbols!\n");
+ symbol_list_free(list);
+ return NULL;
+ }
+#endif
+ fprintf(stderr, "WARNING: ignoring symbol '%s' of %c type in slot %u with invalid offset 0x%x (>= 0x%x).\n",
+ sym->name, symbol_char(sym->type), (unsigned int)i, sym->address, section->end);
+ continue;
+ }
+ }
+ sym++;
+ count++;
+ (void) n_desc;
+ (void) n_other;
+ }
+
+ if (dtypes) {
+ fprintf(stderr, "NOTE: ignored %d debugging symbols.\n", dtypes);
+ }
+ if (locals) {
+ fprintf(stderr, "NOTE: ignored %d unnamed / local symbols (= name starts with '.L').\n", locals);
+ }
+ if (ofiles) {
+ /* object file path names most likely get truncated and
+ * as result cause unnecessary symbol name conflicts in
+ * addition to object file addresses conflicting with
+ * first symbol in the object file.
+ */
+ fprintf(stderr, "NOTE: ignored %d object file names (= name has '/' or ends in '.o').\n", ofiles);
+ }
+
+ list->symbols = slots;
+ list->count = count;
+ return list;
+}
+
+
/**
* Print program header information.
* Return false for unrecognized symbol table type.
@@ -318,6 +524,9 @@
case 0x4D694E54: /* "MiNT" */
info = "GCC/MiNT executable, GST symbol table";
break;
+ case SYMBOL_FORMAT_GNU: /* "GNU_" */
+ info = "GCC/MiNT executable, a.out symbol table";
+ break;
case 0x0:
info = "TOS executable, DRI / GST symbol table";
break;
@@ -355,6 +564,9 @@
int offset, reads = 0;
Uint16 relocflag;
symbol_list_t* symbols;
+ Uint32 symoff = 0;
+ Uint32 stroff = 0;
+ Uint32 strsize = 0;
/* get TEXT, DATA & BSS section sizes */
fseek(fp, 2, SEEK_SET);
@@ -381,6 +593,76 @@
fprintf(stderr, "ERROR: program header reading failed!\n");
return NULL;
}
+ /*
+ * check for GNU-style symbol table in aexec header
+ */
+ if (tabletype == 0x4D694E54) { /* MiNT */
+ Uint32 magic1, magic2;
+ Uint32 dummy;
+ Uint32 a_text, a_data, a_bss, a_syms, a_entry, a_trsize, a_drsize;
+ Uint32 g_tparel_pos, g_tparel_size, g_stkpos, g_symbol_format;
+
+ reads = fread(&magic1, sizeof(magic1), 1, fp);
+ magic1 = SDL_SwapBE32(magic1);
+ reads += fread(&magic2, sizeof(magic2), 1, fp);
+ magic2 = SDL_SwapBE32(magic2);
+ if (reads == 2 &&
+ ((magic1 == 0x283a001a && magic2 == 0x4efb48fa) || /* Original binutils: move.l 28(pc),d4; jmp 0(pc,d4.l) */
+ (magic1 == 0x203a001a && magic2 == 0x4efb08fa))) { /* binutils >= 2.18-mint-20080209: move.l 28(pc),d0; jmp 0(pc,d0.l) */
+ reads += fread(&dummy, sizeof(dummy), 1, fp); /* skip a_info */
+ reads += fread(&a_text, sizeof(a_text), 1, fp);
+ a_text = SDL_SwapBE32(a_text);
+ reads += fread(&a_data, sizeof(a_data), 1, fp);
+ a_data = SDL_SwapBE32(a_data);
+ reads += fread(&a_bss, sizeof(a_bss), 1, fp);
+ a_bss = SDL_SwapBE32(a_bss);
+ reads += fread(&a_syms, sizeof(a_syms), 1, fp);
+ a_syms = SDL_SwapBE32(a_syms);
+ reads += fread(&a_entry, sizeof(a_entry), 1, fp);
+ a_entry = SDL_SwapBE32(a_entry);
+ reads += fread(&a_trsize, sizeof(a_trsize), 1, fp);
+ a_trsize = SDL_SwapBE32(a_trsize);
+ reads += fread(&a_drsize, sizeof(a_drsize), 1, fp);
+ a_drsize = SDL_SwapBE32(a_drsize);
+ reads += fread(&g_tparel_pos, sizeof(g_tparel_pos), 1, fp);
+ g_tparel_pos = SDL_SwapBE32(g_tparel_pos);
+ reads += fread(&g_tparel_size, sizeof(g_tparel_size), 1, fp);
+ g_tparel_size = SDL_SwapBE32(g_tparel_size);
+ reads += fread(&g_stkpos, sizeof(g_stkpos), 1, fp);
+ g_stkpos = SDL_SwapBE32(g_stkpos);
+ reads += fread(&g_symbol_format, sizeof(g_symbol_format), 1, fp);
+ g_symbol_format = SDL_SwapBE32(g_symbol_format);
+ if (g_symbol_format == 0)
+ {
+ tabletype = SYMBOL_FORMAT_GNU;
+ }
+ if ((a_text + (256 - 28)) != textlen)
+ fprintf(stderr, "warning: insonsistent text segment size %08x != %08x\n", textlen, a_text + (256 - 28));
+ if (a_data != datalen)
+ fprintf(stderr, "warning: insonsistent data segment size %08x != %08x\n", datalen, a_data);
+ if (a_bss != bsslen)
+ fprintf(stderr, "warning: insonsistent bss segment size %08x != %08x\n", bsslen, a_bss);
+ /*
+ * the symbol table size in the GEMDOS header includes the string table,
+ * the symbol table size in the exec header does not.
+ */
+ if (tabletype == SYMBOL_FORMAT_GNU)
+ {
+ strsize = tablesize - a_syms;
+ tablesize = a_syms;
+ stroff = a_syms;
+ }
+
+ textlen = a_text + (256 - 28);
+ datalen = a_data;
+ bsslen = a_bss;
+ symoff = 0x100 + /* sizeof(extended exec header) */
+ a_text +
+ a_data +
+ a_trsize +
+ a_drsize;
+ }
+ }
if (!symbols_print_prg_info(tabletype, prgflags, relocflag)) {
return NULL;
}
@@ -410,26 +692,37 @@
sections[2].offset = start;
sections[2].end = start + bsslen - 1;
- /* go to start of symbol table */
- offset = 0x1C + textlen + datalen;
- if (fseek(fp, offset, SEEK_SET) < 0) {
- perror("ERROR: seeking to symbol table failed");
- return NULL;
- }
- fprintf(stderr, "Trying to load symbol table at offset 0x%x...\n", offset);
- symbols = symbols_load_dri(fp, sections, gettype, tablesize);
+ if (tabletype == SYMBOL_FORMAT_GNU) {
+ /* go to start of symbol table */
+ offset = symoff;
+ if (fseek(fp, offset, SEEK_SET) < 0) {
+ perror("ERROR: seeking to symbol table failed");
+ return NULL;
+ }
+ fprintf(stderr, "Trying to load symbol table at offset 0x%x...\n", offset);
+ symbols = symbols_load_gnu(fp, sections, gettype, tablesize, stroff, strsize);
+ } else {
+ /* go to start of symbol table */
+ offset = 0x1C + textlen + datalen;
+ if (fseek(fp, offset, SEEK_SET) < 0) {
+ perror("ERROR: seeking to symbol table failed");
+ return NULL;
+ }
+ fprintf(stderr, "Trying to load symbol table at offset 0x%x...\n", offset);
+ symbols = symbols_load_dri(fp, sections, gettype, tablesize);
- if (symbols == INVALID_SYMBOL_OFFSETS && fseek(fp, offset, SEEK_SET) == 0) {
- fprintf(stderr, "Re-trying with TEXT-relative BSS/DATA section offsets...\n");
- start = DebugInfo_GetTEXT();
- sections[1].offset = start;
- sections[2].offset = start;
- sections[1].end += textlen;
- sections[2].end += (textlen + datalen);
- symbols = symbols_load_dri(fp, sections, gettype, tablesize);
- }
- if (symbols == INVALID_SYMBOL_OFFSETS) {
- return NULL;
+ if (symbols == INVALID_SYMBOL_OFFSETS && fseek(fp, offset, SEEK_SET) == 0) {
+ fprintf(stderr, "Re-trying with TEXT-relative BSS/DATA section offsets...\n");
+ start = DebugInfo_GetTEXT();
+ sections[1].offset = start;
+ sections[2].offset = start;
+ sections[1].end += textlen;
+ sections[2].end += (textlen + datalen);
+ symbols = symbols_load_dri(fp, sections, gettype, tablesize);
+ }
+ if (symbols == INVALID_SYMBOL_OFFSETS) {
+ return NULL;
+ }
}
return symbols;
}
@@ -605,8 +898,13 @@
return;
}
assert(list->count);
- for (i = 0; i < list->count; i++) {
- free(list->names[i].name);
+ if (list->strtab) {
+ free(list->strtab);
+ list->strtab = NULL;
+ } else {
+ for (i = 0; i < list->count; i++) {
+ free(list->names[i].name);
+ }
}
free(list->addresses);
free(list->names);