CRiSP Blog

ELF - elfrewrite finished

After a hard battle, it looks like it works. I dont promise
this will work for everyones executables, but it works for my
Linux 32 and 64 bit crisp executables.

I had to totally take a different direction for ELF32. For ELF32, we
dont have a gap between .text and the .data/.bss and other sections, so
we cannot shoe horn in the new .hash table.

Instead, we can overwrite the .gnu.hash with a .hash conforming table -
assuming our table is no larger than .gnu.hash. (.gnu.hash is supposed
to be smaller than .hash, but looks like for ELF32 binaries, this
is not the case).

Ideally, the next step is to package up the binary and/or source
so others can play with it.

I wrote my own ELF library, rather than rely on -lelf and -lgelf, since
I hit some horror stories in those libraries and didnt fancy debugging
or coping with older and newer libraries.

My elf library tries to hide some of the 32/64 bit ELF differences,
and is "not bad" - could do with more work to shield more differences,
but the design pattern of handling ELF32 + ELF64 is fine.

I also had to enhance my very old elfdump tool (like readelf and objdump),
since I was not happy with either of those: they work "mostly", but when
tracking down ELF brokeness, they can fall over or just ignore
the issue.

I attach the code below, which wont compile, because of the
need for the libraries, but, if theres enough interest, I will make
it available.


/**********************************************************************/
/* */
/* CRiSP - Programmable editor */
/* =========================== */
/* */
/* File: elfrewrite.c */
/* Author: P. D. Fox */
/* Created: 16 May 2010 */
/* */
/* Copyright (c) 2010, Foxtrot Systems Ltd */
/* All Rights Reserved. */
/* */
/*--------------------------------------------------------------------*/
/* Description: Tool to create portable Linux binaries */
/*--------------------------------------------------------------------*/
/* $Header: Last edited: 16-May-2010 1.1 $ */
/* */
/* ld --hash-style=both [sysv|gnu|both] */
/**********************************************************************/

/*
gcc -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o elfrewrite elfrewrite.c -lelf
make make_foxlib ; gcc -g -I. -DCR_LINUX_X86_64 -Iinclude -Ifoxlib/elf -o /tmp/rw ~/tmp/rw.c bin/foxlib.a -lelf && cp /tmp/x1 /tmp/x && /tmp/rw /tmp/x
*/

# include <machine.h>
# include <stdio.h>
# include <string.h>
# include <stdlib.h>
# include <unistd.h>
# include <libelf.h>
# include <elf.h>
# include <gelf.h>
# include <sys/types.h>
# include <sys/stat.h>
# include <elf/elflib.h>

# define TRUE 1
# define FALSE 0

static int debug;
static int v_flag;

char *fname;
int patched = FALSE;
int patched_strtab = FALSE;
void *hash_addr;
elf_t *elf;

/**********************************************************************/
/* Prototypes. */
/**********************************************************************/
int get_hash_size(int n);
int do_file(void);
void patch_dynamic(void);
void patch_glibc_functions(void);
void patch_hash(void);

int do_switches(int argc, char **argv)
{ int i;

for (i = 1; i < argc; i++) {
char *cp = argv[i];
if (*cp++ != '-')
return i;
while (*cp) {
switch (*cp++) {
case 'd':
debug = TRUE;
break;

case 'v':
v_flag = 1;
break;
}
}
}
return i;
}
int main(int argc, char **argv)
{
int i, arg_index;

arg_index = do_switches(argc, argv);

if (arg_index >= argc) {
printf("Usage: elfrewrite <a.out>\n");
exit(1);
}

for (i = arg_index; i < argc; i++) {
patched = FALSE;
patched_strtab = FALSE;
hash_addr = NULL;
fname = argv[i];
do_file();
}
exit(0);
}
int
do_file()
{
# if !defined(HAVE_LIBELF_H)
printf("Sorry, libelf.h not on this system - so this is a dummy\n");
return 0;
# else

if ((elf = elf_read(fname)) == NULL) {
char *cp;
int ret = elf_get_error(&cp);
printf("ELF error: %d %s\n", ret, cp);
return -1;
}

/***********************************************/
/* Patch GLIBC functions. */
/***********************************************/
patch_glibc_functions();

/***********************************************/
/* Patch the .gnu.hash or add a .hash */
/* hiding in the ELF. */
/***********************************************/
patch_hash();

/***********************************************/
/* Patch the .dynamic entries, e.g. based */
/* on the hash table updates. */
/***********************************************/
patch_dynamic();

/***********************************************/
/* Now save the results. */
/***********************************************/
if (patched) {
if (elf_write_file(elf, fname) < 0) {
printf("Failed to write file\n");
perror(fname);
}
}

elf_free(elf);
return 0;
#endif
}
/**********************************************************************/
/* Patch the entries which cause versioning issues or .gnu.hash */
/* issues on older glibcs. */
/**********************************************************************/
void
patch_dynamic()
{ char *cp;
char *cpend;
char *ip;
int dynamic;
int size = elf->is_64 ? sizeof(long) : sizeof(int);

/***********************************************/
/* Patch the Dynamic section for GLIBC */
/* dependencies. */
/***********************************************/
if ((dynamic = elf_get_section_by_name(elf, ".dynamic")) < 0)
return;

cp = elf->e_sections[dynamic].s_data;
cpend = cp + elf->e_sections[dynamic].s_size;
for (ip = cp; ip < cpend; ip += 2 * size) {
long v = elf->is_64 ? ((long *) ip)[0] : ((int *) ip)[0];
long v1 = elf->is_64 ? ((long *) ip)[1] : ((int *) ip)[1];
switch (v) {
case DT_VERNEED:
# define padding 0x8000000
printf("DT_VERNEED: %08lx patched\n", v1);
if (elf->is_64)
((long *)ip)[0] = padding;
else
((int *)ip)[0] = padding;
patched = 1;
break;

case DT_VERNEEDNUM:
printf("DT_VERNEEDNUM: %08lx patched\n", v1);
if (elf->is_64)
((long *)ip)[0] = padding;
else
((int *)ip)[0] = padding;
patched = 1;
break;

case DT_VERSYM:
printf("DT_VERSYM: %08lx patched\n", v1);
if (elf->is_64)
((long *)ip)[0] = padding;
else
((int *)ip)[0] = padding;
patched = 1;
break;

case DT_GNU_HASH:
printf("DT_GNU_HASH: %08lx -> %08lx\n",
v1, hash_addr);
if (elf->is_64) {
((long *)ip)[0] = DT_HASH;
((long *)ip)[1] = hash_addr;
}
else {
((int *)ip)[0] = DT_HASH;
((int *)ip)[1] = hash_addr;
}
patched = 1;
break;
}
}
}
/**********************************************************************/
/* Patch the versionised glibc functions to be non-versionised. */
/**********************************************************************/
void
patch_glibc_functions()
{ int i;
char *cp;
char *cpend;

for (i = 0; i < elf->e_shnum; i++) {
if (elf->e_sections[i].s_type != SHT_STRTAB)
continue;

cp = elf->e_sections[i].s_data;
cpend = cp + elf->e_sections[i].s_size;
if (debug)
printf("Examining strings in %s\n", elf_section_name(elf, i));
while (cp < cpend) {
char *next_cp = cp + strlen(cp) + 1;
if (debug)
printf("str=%s\n", cp);
if (strcmp(cp, "__isoc99_sscanf@@GLIBC_2.7") == 0) {
strcpy(cp, "sscanf");
patched = TRUE;
patched_strtab = TRUE;
printf("%s: patched %s: __isoc99_sscanf@@GLIBC_2.7 -> sscanf\n",
fname, elf->e_sections[i].s_name
);
}
if (strcmp(cp, "__isoc99_sscanf") == 0) {
strcpy(cp, "sscanf");
patched = TRUE;
patched_strtab = TRUE;
printf("%s: patched %s: __isoc99_sscanf -> sscanf\n",
fname, elf->e_sections[i].s_name
);
}
cp = next_cp;
}
}
}
/**********************************************************************/
/* Patch the hash table in for older glibc implementations. */
/**********************************************************************/
void
patch_hash()
{ int i;
int hash;
int gnu_hash;
char *cp;
char *cpend;
int dynsym;
int dynstr;
int nsyms;
int seg;

int nchain, nbuckets, w_size;
int first_chain;
Elf64_Word *bucket_array;
Elf64_Word *chain_array;
Elf64_Word *wp;

/***********************************************/
/* See if we have a .hash or .gnu_hash */
/***********************************************/
hash = elf_get_section_by_name(elf, ".hash");
gnu_hash = elf_get_section_by_name(elf, ".gnu.hash");
if (hash > 0)
return;

if (debug && hash < 0) {
if (gnu_hash < 0)
printf("Missing .hash and .gnu.hash sections\n");
else
printf("Missing .hash, but have .gnu.hash section\n");
}

dynstr = elf_get_section_by_name(elf, ".dynstr");
dynsym = elf_get_section_by_name(elf, ".dynsym");
nsyms = elf->e_sections[dynsym].s_size / sizeof(Elf64_Sym);

/***********************************************/
/* Create the hash table. */
/***********************************************/
first_chain = 1;
nchain = nsyms;
nbuckets = get_hash_size(nchain);
w_size = (1 + /* zero entry unused */
1 + /* nbuckets */
1 + /* nchain */
nbuckets +
nchain) * sizeof(*wp);
wp = calloc(w_size, 1);

wp[0] = nbuckets;
wp[1] = nchain;

if (v_flag)
printf("w_size=%d, buckets=%d, chains=%d\n", w_size, nbuckets, nchain);
bucket_array = wp + 2;
chain_array = bucket_array + nbuckets;

for (i = 1; i < nchain; i++) {
Elf64_Sym *symp = (Elf64_Sym *) elf->e_sections[dynsym].s_data + i;
char *name = elf->e_sections[dynstr].s_data + symp->st_name;
int h = elf_hash((unsigned char *) name) % nbuckets;
chain_array[first_chain] = bucket_array[h];
bucket_array[h] = first_chain;
first_chain++;
}
//printf("crc=%lx\n", crc32(wp, w_size));

/***********************************************/
/* Merge this into the executable code */
/* segment. We have two strategies. For */
/* ELF32, we can overwrite the .gnu.hash */
/* section, since our hash is smaller than */
/* the generated one. (This isnt */
/* guaranteed, but gets us out of a hole). */
/* The hole is that in ELF32, the data+bss */
/* immediately follows the .text area and */
/* theres not a big enough gap to slide our */
/* new hash table into the binary. */
/* */
/* For ELF64, we tack the hash chain onto */
/* the last data section - since the */
/* .gnu.hash section is typically too small */
/* for us. (We could probably */
/* compress/optimise the hash table, but it */
/* is touch and go). */
/* */
/* We want a unified approach, but */
/* debugging and handling sections moving */
/* can cause us to need to do a full */
/* relink, which is even more treacherous. */
/***********************************************/

/***********************************************/
/* Handle ELF32. */
/***********************************************/
if (elf->is_64 == FALSE) {
if (elf_section_size(elf, gnu_hash) < w_size) {
printf("Help! Need %d section, but only have %d (.gnu.hash)\n",
w_size, elf_section_size(elf, gnu_hash));
exit(1);
}
hash_addr = elf_section_addr(elf, gnu_hash);
memcpy(elf->e_sections[gnu_hash].s_data, wp, w_size);
return;
}

/***********************************************/
/* Handle ELF64. */
/***********************************************/

/***********************************************/
/* Round up size to a 4K boundary, else */
/* kernel will KILL the binary due to */
/* misalignment of the data LOAD Phdr. */
/***********************************************/
int w_size1 = (w_size | (0x1000 -1)) + 1;

/***********************************************/
/* Find first writable section - we want */
/* the one before that, so we can dump our */
/* payload in. */
/***********************************************/
for (i = 1; i < elf->e_shnum; i++) {
if (elf->e_sections[i].s_flags & SHF_WRITE)
break;
}
i--;

if (debug || v_flag)
printf("Extending seg#%d\n", i);

/***********************************************/
/* Modify the target segment to append our */
/* payload. */
/***********************************************/
cp = malloc(elf->e_sections[i].s_size + w_size);
memcpy(cp, elf->e_sections[i].s_data, elf->e_sections[i].s_size);
memcpy(cp + elf->e_sections[i].s_size, wp, w_size);
w_size = w_size1;
free(elf->e_sections[i].s_data);
elf->e_sections[i].s_data = cp;
elf->e_sections[i].s_size += w_size;

/***********************************************/
/* We need to update the .dynamic section */
/* to put our .hash "segment" in, so */
/* remember where we left it. */
/***********************************************/
if (elf->is_64)
hash_addr = (void *) (elf->e_shdr64[i].sh_addr + elf->e_shdr64[i].sh_size);
else
hash_addr = (void *) (elf->e_shdr32[i].sh_addr + elf->e_shdr32[i].sh_size);

/***********************************************/
/* This is horrible but necessary - destroy */
/* the ".eh_frame" name so that the */
/* debugger cannot find it, otherwise we */
/* will core dump gdb when hitting a */
/* breakpoint. */
/***********************************************/
if (elf->is_64)
elf->e_shdr64[i].sh_name++;
else
elf->e_shdr32[i].sh_name++;

/***********************************************/
/* Update subsequent sections because we */
/* moved them in memory (these only affect */
/* the loadable offset, not the p/v addr). */
/***********************************************/
if (elf->is_64) {
elf->e_shdr64[i].sh_size += w_size;
for (i++; i < elf->e_shnum; i++) {
elf->e_shdr64[i].sh_offset += w_size;
}
}
else {
elf->e_shdr32[i].sh_size += w_size;
for (i++; i < elf->e_shnum; i++) {
elf->e_shdr32[i].sh_offset += w_size;
}
}

/***********************************************/
/* Now update the program header so the */
/* kernel can load the executable with the */
/* updated file layout. */
/***********************************************/
seg = elf_phdr_find_by_type(elf, PT_LOAD);
if (elf->is_64) {
Elf64_Phdr *p = (Elf64_Phdr *) elf_phdr_ptr(elf, 0);
p[seg].p_memsz += w_size;
p[seg].p_filesz += w_size;
p[seg+1].p_offset += w_size;
}
else {
Elf32_Phdr *p = (Elf32_Phdr *) elf_phdr_ptr(elf, 0);
p[seg].p_memsz += w_size;
p[seg].p_filesz += w_size;
p[seg+1].p_offset += w_size;
}
}
/**********************************************************************/
/* Compute .hash bucket size. */
/**********************************************************************/
int
get_hash_size(int n)
{ int i;
/**********************************************************************/
/* Table of bucket sizes depending on the symbol table size. */
/**********************************************************************/
# define NBKTS 2579
static const int hashsize[] = { 3, 17, 31, 37, 67, 97, 131, 197, 263, 397,
619, 1039, 1553, 1709, 1949, 2711, 4019, 7177, NBKTS };

for (i = 0; i < (int) (sizeof hashsize / sizeof hashsize[0]) - 1; i++) {
if (hashsize[i+1] >= n)
return hashsize[i];
}
printf("Too many items to hash, please extend the array: %d\n", n);
return NBKTS;
}



Post created by CRiSP v10.0.2c-b5917


Read more http://crtags.blogspot.com/2010/12/elf-elfrewrite-finished.html

Testimonials

"CRiSP overall is a terrific text editor. It contains the changes to Brief which I would have liked...It is nice to be able to at last have a good BRIEF like editor for Linux machines. In my reports to the minerals exploration industry, I have extolled the virtues of CRiSP."

paypal_horiz