#! /opt/local/bin/perl -w # loadimm - a utility to examine or modify SPARC "load immediate" instructions # Dave Plonka, Dec 4 1999 =head1 NAME loadimm - a utility to examine or modify SPARC "load immediate" instructions in ELF 32-bit SPARC executables =head1 SYNOPSIS loadimm [-Vv] [-e] [-x] [-a] [ <-n new_value | -i inc_value> ] ... =head1 DESCRIPTION This utility is used to examine or modify the "load immediate" instructions in ELF 32-bit SPARC executables. These instructions are those which load "hard-coded" 32-bit constant values into registers. As-is, this utility will probably only be of use under Solaris on a 32-bit SPARC machine because (a) the "elfprint" utility it uses is based on the source found in Solaris' elf(3E) man page, (b) "elfprint" requires libelf(4) to link, and (c) it invokes adb(1), expecting it work exactly like Solaris' adb(1) command. The options and arguments are: =over 4 =item -v be verbose with messages (mnemonic: 'v'erbose) =item -e just print the source to the "elfprint" utility to standard output. (This is for redirecting to "elfprint.c".) This utility is used to determine the attributes of the executables' text segments. ("elfprint" must be compiled installed in the user's PATH before "loadimm" is fully functional.) mnemonic: 'e'lfprint =item -x show addresses in hex (mnemonic: he'x') =item -a invoke adb(1). If C<-n> or C<-i> is specified, adb(1) will be invoked in write mode (i.e. C) and will modify the executable(s). (mnemonic: invoke 'a'db) =item -n new_value change constant to this new value. This value can be specified in decimal or hexadecimal (with "0x" prefix). (mnemonic: 'n'ew value) =item -i inc_value increment constant by this value (mnemonic: 'i'ncrement) =item constant The constant value to be found or changed (if C<-n> or C<-i> is used). This value can be specified in decimal or hex (with "0x" prefix). =item executable The name of the 32-bit SPARC ELF executable to be scanned or modified (if C<-n> or C<-i> is used). =back On the SPARC, as with many (most, all?) RISC processors, it generally takes more than one instruction to load a long word constant into a register. This is most commonly done, for example, like this: sethi %hi(val), %r17 or %r17, %lo(val), %r17 or like this: sethi %hi(val), %r17 and %r17, %lo(val), %r17 So, if we can determine the file offset and size of the text segment within the executable, it is a simple matter of programming to modify those instructions to load a different value. This is useful in executables for which one doesn't have the source code but would like to change some hard- coded "magic numbers" within the executable. For instance, one can change a hard-coded TCP port number to which a given executable bind(2)s and listen(2)s. This utility works by: =over 4 =item 1 using an external command called "elfprint". The "elfprint" used here is a slightly modified version of the example.c in the Solaris 2.6 elf(3E) man page. It acts like the "elfprint" command demonstrated there, but also prints the file offset, size, and address of each segment. (Invoke this utility with the C<-e> option to generate the "elfprint" source, then make and install that executable in the PATH.) =item 2 scanning the text segment for what look like "load immediate" instructions, i.e. SETHI followed by OR or sethi followed by AND. (Precalculated bitmasks are used to identify these instructions, rather than trying to dis(1)assemble/decompile the executables which would be prohibitively time consuming. strtoul was *very* useful here.) =item 3 optionally invoking adb(1) to patch those instruction pairs within the executable. (If the C<-a> option is not specified, it just shows you the adb(1) commands that would perform the change.) =back =head1 CAVEATS This is a brute force approach to modifying the executable without trying to understand the context in which the instructions occur. As such it could do bad things - specifically, it could modify all the occurences of instructions that load the given constant, rather than just the ones sufficient to effect the change that you intended. If in doubt, don't use C<-i> or C<-n> with C<-a>!, instead just do, for example: $ loadimm 0xd00d my_executable and then invoke C manually. As you invoke the "suggested" C command, carefully consider the context in which the target instructions (which are displayed with the C C commands) occur before issuing the C adb instructions (which actually perform the modifications.) Of course, always save a backup of your executable and the file systems containing the files/databases on which it operates in the event that something goes terribly wrong. Also, if you modify an executable authored by someone else (which is presumably what you're doing since you don't have source code), *don't* report a bug that has only been observed in the modified executable. (Once the executable has been modified - essentially all warranties/ guarantees are void.) Go back to the original, and reproduce the bug there before calling for support. Before doing an update, it would be a good idea to scan your executable to be sure it doesn't already contain other references to your new constant's value, so that you can be sure that, in the future, it would be possible to locate the modified instructions in case you want to undo what's been done. =head1 EXAMPLE If you'd like an executable to bind port 0xbabe rather than 0xd00d, follow these steps: First, if you haven't already done so, build the "elfprint" utility (Note that this utility differs slightly from the "elfprint" that is shown in the Solaris elf(3E) man page): $ loadimm -e > elfprint.c $ chmod +x elfprint.c $ ./elfprint.c # yes, actually execute the source file (as a shell script)! gcc -o ./elfprint ./elfprint.c -lelf $ # if necessary, install "elfprint" so that it is in your PATH Check that a "load immediate 0xbabe" instruction doesn't already exist in the executable: $ loadimm -v 0xbabe my_executable If no occurences are found, do the modification: $ loadimm -n 0xbabe 0xd00d my_executable If ever necessary, revert to original: $ loadimm -n 0xd00d 0xbabe my_executable =head1 SEE ALSO L, L, L =head1 COPYRIGHT Copyright 1999-2000 Dave Plonka. =head1 AUTHOR Dave Plonka =cut use Getopt::Std; use POSIX; # for SEEK_SET, strtoul use Config; use FindBin; if ('sun4-solaris' ne $Config{archname}) { # only works on SPARC (with ELF) die "Invalid architecture: $Config{archname}!\n" } die unless 4 == length(pack("L")); # must be 32-bit long words! my $elfprint = 'elfprint'; # from PATH my $adb = '/usr/bin/adb'; $opt_s = 0; # skip to text segment at this offset $opt_v = 0; # verbose (show the sort of SPARC instructions we're looking for) $opt_V = 0; # very verbose (show all 32-bit values in scanned segment) $opt_t = 0; # stop scanning after this many bytes (ie. the size of the text seg) $opt_x = 0; # show addresses in hex rather than decimal $opt_a = 0; if (!getopts('s:Vvt:xi:n:ae') || (!$opt_e && !@ARGV) || ($opt_i && $opt_n)) { die <<_EOF_ usage: $FindBin::Script [-Vv] [-e] [-x] [-a] [ <-n new_value | -i inc_value> ] executable [...] -v - be verbose with messages (mnemonic: 'v'erbose) -e - just print the source to the "elfprint" utility to standard output. This utility is used to determine the attributes of the executables' text segments. ("elfprint" must be compiled installed in the user's PATH before \"$FindBin::Script\" is fully functional.) mnemonic: 'e'lfprint -x - show addresses in hex (mnemonic: he'x') -a - invoke adb(1). If "-i" or "-n" is also specified, adb(1) will modify the executable(s). (mnemonic: invoke 'a'db) -n new_value - change constant to this new value. This value can be specified in decimal or hexadecimal (with "0x" prefix). (mnemonic: 'n'ew value) -i inc_value increment constant by this value (mnemonic: 'i'ncrement) constant The constant value to be found or changed (if "-n" or "-i" is used). This value can be specified in decimal or hex (with "0x" prefix). executable The name of the 32-bit SPARC ELF executable to be scanned or modified (if "-n" or "-i" is used). _EOF_ } if ($opt_e) { print ; exit 0 } my $addr_format; if ($opt_x) { $addr_format = '%08x' } else { $addr_format = '%d' } my $val = shift @ARGV; if ($val =~ m/^0x([0-9a-fA-F]*)$/) { $val = hex($1) } if ($opt_n && ($opt_n =~ m/^0x([0-9a-fA-F]*)$/)) { $opt_n = hex($1); if (0 == $opt_n) { # handle "-n 0x0" $opt_n = '0e0' } } if ($opt_i && ($opt_i =~ m/^0x([0-9a-fA-F]*)$/)) { $opt_i = hex($1) } if ($opt_V) { printf("sethi %%hi(0x%x), %%o0\n", hi($val)); printf("add %%o0, 0x%x, %%o1\n", lo($val)); printf("or %%o0, 0x%x, %%o1\n", lo($val)) } # Below, 'v' chars indicate the value (22 bits for sethi, 13 bits for add/or): # sethi: 00xxxxx100vvvvvvvvvvvvvvvvvvvvvv my $sethi_mask = strtoul('11000001111111111111111111111111', 2); my $sethi_value = hi($val) | strtoul('00000001000000000000000000000000', 2); # add: 10xxxxx0?0000yyyyy1vvvvvvvvvvvvv my $add_mask = strtoul('11000001011110000011111111111111', 2); my $add_value = lo($val) | strtoul('10000000000000000010000000000000', 2); # or: 10xxxxx000010yyyyy1vvvvvvvvvvvvv my $or_mask = strtoul('11000001111110000011111111111111', 2); my $or_value = lo($val) | strtoul('10000000000100000010000000000000', 2); LOOP: foreach $ARGV (@ARGV) { if (!open(FILE, "<$ARGV")) { warn "$ARGV - open: $!\n"; next } if ($opt_a) { die "\"./core\" could interfere with adb!\n" if (-f 'core'); if ($opt_i || $opt_n) { # updating... open(STDOUT, "|${adb} -w \"$ARGV\"") || die } else { # read-only open(STDOUT, "|${adb} \"$ARGV\"") || die } } my $start_addr = 0; print(STDERR "$ARGV - \"elfprint\"... ") if $opt_v; foreach (split("\n", `${elfprint} $ARGV`)) { if (m/text\s+offset:\s+(\d+)\s+size:\s+(\d+)\s+address:\s+(\d+)/) { $opt_s = $1; $opt_t = $2; $start_addr = $3 } } if (!$opt_t || !$opt_s || !$start_addr) { die "\n$ARGV - Couldn't determine text offset ($opt_s), size ($opt_t), or address ($start_addr)!\n" } else { printf(STDERR "text offset %d, size %d, addr 0x%x\n", $opt_s, $opt_t, $start_addr) if $opt_v; } if ($opt_s) { # skip leading bytes die "$ARGV - sysseek(,$opt_s, SEEK_SET) != $opt_s!\n" unless $opt_s == sysseek(FILE, $opt_s, SEEK_SET) } print STDERR "adb -w $ARGV\n" if ($opt_n || $opt_i) && $opt_v; my $buffer; my $sethi = 0; my $offset = 0; while (sysread(FILE, $buffer, 8192) > 0) { foreach $word (unpack("L*", $buffer)) { if ($opt_t && $offset >= $opt_t) { print("\$q\n") if $opt_n || $opt_i; close(FILE); next LOOP } printf("${addr_format} %08x\n", $opt_s + $offset, $word) if $opt_V; if ($sethi) { # look for "or" or "add" if ($add_value == ($word & $add_mask)) { if ($opt_a || $opt_n || $opt_i) { printf("%x?i\n", $start_addr+$offset-4); printf("%x?i\n", $start_addr+$offset); if ($opt_n || $opt_i) { printf("%x?W0x%x\n", $start_addr+$offset-4, ($sethi & strtoul('11111111110000000000000000000000', 2)) | hi($opt_n? $opt_n : $val+$opt_i)); printf("%x?W0x%x\n", $start_addr+$offset, ($word & strtoul('11111111111111111110000000000000', 2)) | lo($opt_n? $opt_n : $val+$opt_i)); printf("%x?i\n", $start_addr+$offset-4); printf("%x?i\n", $start_addr+$offset); } } else { printf("sethi: ${addr_format} %08x\n", $opt_s+$offset-4, $sethi); printf("add : ${addr_format} %08x ! 0x%x\n", $opt_s+$offset, $word, $val); } } elsif ($or_value == ($word & $or_mask)) { if ($opt_a || $opt_n || $opt_i) { printf("%x?i\n", $start_addr+$offset-4); printf("%x?i\n", $start_addr+$offset); if ($opt_n || $opt_i) { printf("%x?W0x%x\n", $start_addr+$offset-4, ($sethi & strtoul('11111111110000000000000000000000', 2)) | hi($opt_n? $opt_n : $val+$opt_i)); printf("%x?W0x%x\n", $start_addr+$offset, ($word & strtoul('11111111111111111110000000000000', 2)) | lo($opt_n? $opt_n : $val+$opt_i)); printf("%x?i\n", $start_addr+$offset-4); printf("%x?i\n", $start_addr+$offset); } } else { printf("sethi: ${addr_format} %08x\n", $opt_s+$offset-4, $sethi); printf("or : ${addr_format} %08x ! 0x%x\n", $opt_s+$offset, $word, $val); } } } if ($sethi_value == ($word & $sethi_mask)) { # look for sethi $sethi = $word } else { $sethi = 0 } $offset += 4 } } print("\$q\n") if $opt_n || $opt_i; close(FILE) } if ($opt_a) { # wait for adb to exit(2)... close STDOUT } exit; # These subroutines emulate the SPARC %lo and %hi macros: sub lo { $_[0] & 0x3ff } sub hi { $_[0] >> 10 } __END__ # /* exec xargs -t ${CC-gcc} $@ -o ${0%.c} $0 -lelf #include #include #include #include static void failure(void); int main(int argc, char ** argv) { Elf32_Shdr * shdr; Elf32_Ehdr * ehdr; Elf * elf; Elf_Scn * scn; Elf_Data * data; int fd; unsigned int cnt; /* Open the input file */ if ((fd = open(argv[1], O_RDONLY)) == -1) exit(1); /* Obtain the ELF descriptor */ (void) elf_version(EV_CURRENT); if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL) failure(); /* Obtain the .shstrtab data buffer */ if (((ehdr = elf32_getehdr(elf)) == NULL) || ((scn = elf_getscn(elf, ehdr->e_shstrndx)) == NULL) || ((data = elf_getdata(scn, NULL)) == NULL)) failure(); /* Traverse input filename, printing each section */ for (cnt = 1, scn = NULL; scn = elf_nextscn(elf, scn); cnt++) { if ((shdr = elf32_getshdr(scn)) == NULL) failure(); (void) printf("[%d]%s offset: %d size: %d address: %d\n", cnt, (char *)data->d_buf + shdr->sh_name, shdr->sh_offset, shdr->sh_size, shdr->sh_addr); } } /* end main */ static void failure() { (void) fprintf(stderr, "%s\n", elf_errmsg(elf_errno())); exit(1); }