#! /usr/local/bin/perl # ip2anonip - a filter to turn IP addresses into host names OR anonymous IPs # $Id: ip2anonip,v 1.10 2003/01/08 21:24:43 plonka Exp $ # Dave Plonka # This script uses portions of the C source code from tcpdpriv-1.1.10, # by Greg Minshall. See the copyright information under "__C__" below. # tcpdpriv-1.1.10 was found here: # http://ita.ee.lbl.gov/html/contrib/tcpdpriv.html # # A patch to build tcpdpriv under Linux was found here: # http://www.ethereal.com/~gerald/tcpdpriv-1.1.10-gerald.patch # # This script sets the default options to those used by WIDE ("-A50", etc.): # http://tracer.csl.sony.co.jp/mawi/guideline.txt # # WIDE's modified tcpdpriv (including those defaults plus IPv6 support) can # be found in their "tcpd-tools" package, here: # ftp://tracer.csl.sony.co.jp/pub/mawi/tools/tcpd-tools.tar.gz # # Lastly, consider this analysis of potenial attacks agains "-A50": # http://ita.ee.lbl.gov/html/contrib/attack50/attack50.html use FindBin; use Socket; use Getopt::Std; use Inline C; sub usage { my $status = shift; print STDERR <<_EOF_ usage: $FindBin::Script [-h] [ -p|a printf_format ] [[-s seed] -A50] [ [-i extension] file [...] ] -h - help (shows this usage information) (mnemonic: 'h'elp) -p printf_format - use this printf format for IP address and hostname, respectively. The default format is '%.0s%s', which supresses the printing of the IP address (i.e. "%.0s" specifies printing a string with a maximum width of zero). To maintain column widths (since both the IP address and hostname vary in lenght), a format like this may be useful: '%-16.16s %-20s' (mnemonic: 'p'rintf format) -a printf_format - anonymize IP addresses by replacing them with invented hostnames. E.g. The format 'host%d.our.domain' would yield hostnames such as "host1.our.domain", "host2.our.domain", etc. (mnemonic: 'a'nonymize) -A50 - anonymize IP addresses using tcpdpriv(1)'s "-A50 -C4 -M0" mode (mnemonic: 'A'nonymize) -s seed - seed the random number generator with unsigned integer If this option is not used, but "-A" is used, it will be seeded automatically, using tcpdpriv's method. (mnemonic: 's'eed) -i extension - edit the files in place (rather than sending to standard output) This option requires file name(s) argument(s). The extension is added to the name of the old file to make a backup copy. If you don't wish to make a backup, use "-I". (mnemonic: edit 'i'n place) -I - like "-i" but no backup is made. (mnemonic: edit 'I'n place, trusting this script 'I'mplicitly. ;^) -F - read additional file names from standard input. This is useful when the argument list is too long and you can't use xargs because the filtering (such as "-A50") must run in only one invocation of $FindBin::Script so that it is consistent across all input files. ('F'ile names on stdin) _EOF_ ; exit $status } getopts('hp:Ii:a:A:s:F') || usage(2); usage(0) if ($opt_h); usage(1) if ($opt_A && 50 != $opt_A); if ($opt_F) { # read additional file names from standard input usage(1) if @ARGV; map { chomp; push(@ARGV, $_) } } if ($opt_s) { Initialize($opt_s) } $| = 1; my $oldargv; my %cache; $n = 1; my $regex = qr<(\d+)\.(\d+)\.(\d+)\.(\d+)>; while (<>) { # { this is basically straight from the "perlrun" man page (see "-i"): if ('-' ne $ARGV && ($opt_I || $opt_i) && $ARGV ne $oldargv) { if ('' eq $opt_i) { unlink($ARGV) or die "unlink \"$ARGV\": $!\n" } else { rename($ARGV, $ARGV . $opt_i) or die "rename \"$ARGV\": $!\n" } open(ARGVOUT, ">$ARGV"); select(ARGVOUT); $oldargv = $ARGV; } # } # based on ideas from NetAddr::IP::Find-0.02: s{$regex}{ my $ip = $&; if (4 == grep { 0 <= $_[0] && $_[0] <= 255 } $1, $2, $3, $4) { if ($opt_A) { inet_ntoa(pack("N", AnonymizeIPAddr(unpack("N", inet_aton($ip))))); } else { my $name; if (defined($cache{$ip})) { $name = $cache{$ip} } else { if ($opt_a) { $name = sprintf($opt_a, $n++); } else { $name = gethostbyaddr(inet_aton($ip), AF_INET); } $cache{$ip} = $name } if ('' eq $name) { $name = $ip } if ($opt_p) { sprintf($opt_p, $ip, $name) } else { $name } } } else { $orig_match } }eg; print } __END__ __C__ /* * Copyright (c) 1996 * Ipsilon Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Ipsilon Networks, Inc. * 4. The name of Ipsilon Networks, Inc., may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY IPSILON NETWORKS, INC., ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL IPSILON NETWORKS, INC., BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * tcpdpriv - make a tcpdump file private (so it can be shared) * * TODO: * * 1. PRIVACY FOR LINK-LEVEL HEADER??? XXX ??? XXX ??? XXX ??? * (One method would be to have -L0 imply "convert to * DLT_NULL; unforunately, libpcap doesn't support this.) * 2. -P|-T|-U >= 2 * 3. Don't use tree for byte-wide counters; maybe not for 16-bit? * 4. If can tell via link hdr that is broadcast or multicast, * does that open up an attack on the destination net * encoding? * 5. Retain all zeros and all ones addresses? (Actually, can * you *safely* retain trailing 0s and trailing 1s?) * 6. Use table to preserve classness. (Actually, *without* this, * non class-D addresses may get mapped to class-D addresses?) * 8. Should we retain local subnet broadcast information? * 11. PRIVACY for TCP sequence numbers??? * */ #include #include #include #include #if defined(linux) #define __FAVOR_BSD 1 #endif #if defined(SVR4) #include #endif /* defined(SVR4) */ #include #include #if !defined(SVR4) && !defined(linux) #include #endif /* !defined(SVR4) */ #include #include #if defined(sun) || defined(linux) #include #endif /* defined(sun) */ #include #include #include #if !defined(SVR4) && !defined(linux) #include #endif /* !defined(SVR4) */ #include #include #include #include #if !defined(sun) && !defined(linux) #include #if !defined(osf1) #include #endif /* !defined(osf1) */ #include #include #endif /* !defined(sun) */ #include #include #include #include #if defined(sun) #include #endif /* defined(sun) */ #include #define EXTRACT_BIT(value,bitno) (((value)>>(32-(bitno)))&1) /* * Support for TTLs */ /* TTLs are <= (so, <= 128 --> continent-local) */ #define MCAST_TTL_NODE_LOCAL 0 #define MCAST_TTL_LINK_LOCAL 1 #define MCAST_TTL_SITE_LOCAL 32 #define MCAST_TTL_CONTINENT_LOCAL 128 #define MCAST_OPT_NODE_LOCAL 90 #define MCAST_OPT_LINK_LOCAL 80 #define MCAST_OPT_SITE_LOCAL 70 #define MCAST_OPT_CONTINENT_LOCAL 20 #define MCAST_OPT_GLOBAL 10 #define optTOttlLOW(opt) (\ ((opt) == MCAST_OPT_NODE_LOCAL) ? 0 : \ (((opt) == MCAST_OPT_LINK_LOCAL) ? (MCAST_TTL_NODE_LOCAL+1) : \ (((opt) == MCAST_OPT_SITE_LOCAL) ? (MCAST_TTL_LINK_LOCAL+1) : \ (((opt) == MCAST_OPT_CONTINENT_LOCAL) ? (MCAST_TTL_SITE_LOCAL+1) : \ (MCAST_TTL_CONTINENT_LOCAL+1))))) /* * typedefs... */ typedef struct node node_t, *node_p; /* type of a tree node */ struct node { u_long input, /* input value */ output; /* output value */ node_p down[2]; /* children */ }; typedef struct nodehdr nodehdr_t, *nodehdr_p; /* type of a tree */ struct nodehdr { u_long flags, /* see below */ addr_mask, /* mask of bits to copy from input */ counter, /* for NH_FL_COUNTER */ bump, /* amount by which to bump counter */ cur_input; /* what address is currently being masked */ node_p head; }; #define NH_FL_RANDOM_PROPAGATE 1 /* propagate random number down */ #define NH_FL_COUNTER 2 /* bump a counter */ /* * globally scoped variables */ /* * Trees for addressing. * * addr_propagate is for -A50. * * The 0x01000000 is to compensate for a bug in tcpdump (where * it has problems dealing with IP addresses that have zero (0) * in the high order byte). */ nodehdr_t addr_propagate = { NH_FL_RANDOM_PROPAGATE, 0xffffffff, 0x01000000 }, addr_whole = { NH_FL_COUNTER, 0xffffffff, 0x01000000 }, addr_upper = { NH_FL_COUNTER, 0xffff0000, 0x01000000 }, addr_lower = { NH_FL_COUNTER, 0x0000ffff, 0}, addr_byte_0 = { NH_FL_COUNTER, 0xff000000, 0 }, addr_byte_1 = { NH_FL_COUNTER, 0x00ff0000, 0 }, addr_byte_2 = { NH_FL_COUNTER, 0x0000ff00, 0 }, addr_byte_3 = { NH_FL_COUNTER, 0x000000ff, 0 }; /* trees for tcp ports */ nodehdr_t tcpport_whole, tcpport_byte_0, tcpport_byte_1; /* trees for udp ports */ nodehdr_t udpport_whole, udpport_byte_0, udpport_byte_1; /* options (from command line) */ /* set wide default values */ int opt_ipaddr = 50; int opt_mcastaddr = 0; int opt_tcpports = 99; int opt_udpports = 99; int opt_class = 4; int opt_tcpipopts = 50; int qflag = 0; /* -q */ /* * U T I L I Y R O U T I N E S */ /* * like ffs(3), but looking from the MSB. */ int bi_ffs(u_long value) { int add = 0; static u_char bvals[] = { 0, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 }; if ((value&0xFFFF0000) == 0) { if (value == 0) { /* zero input ==> zero output */ return 0; } add += 16; } else { value >>= 16; } if ((value&0xFF00) == 0) { add += 8; } else { value >>= 8; } if ((value&0xF0) == 0) { add += 4; } else { value >>= 4; } return add+bvals[value&0xf]; } /* * R A N D O M */ /* * return 32-bits of random() * * (on most 32-bit machines, random() returns only 31 bits) */ static long rand32() { #if defined(SVR4) return ((lrand48()&0xffff)<<15)|(lrand48()&0xfff); #else /* defined(SVR4) */ return ((random()&0xffff)<<16)|(random()&0xffff); #endif /* defined(SVR4) */ } /* * run through an area, accumulating the values into a seed. */ static unsigned rand_accum(unsigned prev, unsigned *px, int ints) { /* now, sum it all, shifting all the time */ while (ints--) { prev ^= *px++; prev = (prev<<1)|(prev>>31); } return prev; } /* * at startup, generate a seed for the random number generator * * (it is somewhat amusing how driven i am to say "sum = 0" and * "memset(&x, 0, sizeof x)" below, given that i *want* random * bits...) */ static void rand_start(void) { struct { struct timeval tv; struct timezone tz; uid_t uid; pid_t pid; } x; unsigned sum = 0; int n, gotline; unsigned line[200/sizeof (unsigned)]; FILE *pfd; memset(&x, 0, sizeof x); if (gettimeofday(&x.tv, &x.tz) == -1) { perror("gettimeofday"); exit(1); } x.uid = getuid(); x.pid = getpid(); /* now, sum it all, shifting all the time */ sum = rand_accum(sum, (unsigned *)&x, sizeof x/sizeof(unsigned)); /* * we run through all the mounted file systems * (as reported by mount, anyway) doing a stat * on them. note that SVR4 uses "mountpoint on device", * whereas BSD uses "device on mountpoint". */ pfd = popen("/bin/mount", "r"); if (pfd == NULL) { pfd = popen("mount", "r"); if (pfd == NULL) { fprintf(stderr, "unable to popen() /sbin/mount or mount"); perror(""); exit(1); } } gotline = 0; while (fgets((char *)line, sizeof line, pfd) != NULL) { #if !defined(SVR4) struct statfs stat; #else /* !defined(SVR4) */ struct statvfs stat; #endif /* !defined(SVR4) */ char first[sizeof line], second[sizeof line]; n = sscanf((char *)line, "%s on %s %*s\n", first, second); if (n != 2) { fprintf(stderr, "ill-formatted output from mount(1) command\n"); exit(1); } #if !defined(SVR4) n = statfs(second, &stat); #else /* !defined(SVR4) */ n = statvfs(first, &stat); #endif /* !defined(SVR4) */ if (n == -1) { perror("statfs"); exit(1); } sum = rand_accum(sum, (unsigned *)&stat, sizeof stat/sizeof (unsigned)); gotline = 1; } pclose(pfd); if (gotline == 0) { /* nothing in output from mount command... */ fprintf(stderr, "no output from mount(1) command\n"); exit(1); } /* * now, do the same as mount, but this time with "netstat -in" */ if (((pfd = popen("netstat -in", "r")) == NULL) && ((pfd = popen("/bin/netstat -in", "r")) == NULL) && ((pfd = popen("/usr/ucb/netstat -in", "r")) == NULL) && ((pfd = popen("/usr/sbin/netstat -in", "r")) == NULL) && ((pfd = popen("/usr/bin/netstat -in", "r")) == NULL)) { fprintf(stderr, "unable to popen {,/bin/,/usr/ucb/,/usr/sbin,/usr/bin/}netstat -in"); perror(""); exit(1); } gotline = 0; while (fgets((char *)line, sizeof line, pfd) != NULL) { sum = rand_accum(sum, line, strlen((char *)line)/sizeof (unsigned)); gotline = 1; } pclose(pfd); if (gotline == 0) { fprintf(stderr, "no output from 'netstat -in' command\n"); exit(1); } #if defined(SVR4) srand48(sum); #else /* defined(SVR4) */ srandom(sum); #endif /* defined(SVR4) */ } /* * T R E E R O U T I N E S */ static node_p newnode(void) { node_p node; node = (node_p) malloc(sizeof *node); if (node == 0) { fprintf(stderr, "malloc failed %s:%d\n", __FILE__, __LINE__); exit(2); } return node; } static void freetree(node_p node) { node_p next; while (node) { next = node->down[0]; if (node->down[1]){ freetree(node->down[1]); } free(node); node = next; } } /* * M A S K I N G */ /* * figure out what the output for a given input should be. * * value the old output * flip bit (MSB == 0) at which inputs differ * hdr the tree we are in * * note that only hide_addr() sets cur_input (and, that only the "addr" * trees set addr_mask). * * also, addr_mask is munged (by lookup_init()) to have at most * opt_class high order bits set as one. */ static inline u_long make_output(u_long value, int flip, nodehdr_p hdr) { if (hdr->flags&NH_FL_RANDOM_PROPAGATE) { /* * the output is: * bits 1-(flip-1): copied from value * bit flip: flip bit (XOR with 1) in value * bits (flip+1)-32: random */ if (flip == 32) { return value^1; } else { /* get left AND flipped bit */ return ((((value>>(32-flip))^1)<<(32-flip)) | ((rand32()&0x7fffffff)>>flip)); /* and get right part */ } } else if (hdr->flags&NH_FL_COUNTER) { hdr->counter += hdr->bump; /* now, do we need to copy any bits from head? */ if (hdr->addr_mask) { int n; u_long m; /* * retain consecutive high order ONE (1) bits from * cur_input. number of consecutive high order one * bits to retain is constrained by addr_mask. */ n = bi_ffs(~hdr->cur_input); /* n == first ZERO (0) bit */ if (n) { m = hdr->cur_input>>(32-n); return hdr->counter|((m<<(32-n))&hdr->addr_mask); } else { /* n == 0 ==> cur_input all ones */ return hdr->counter&hdr->addr_mask; } } return hdr->counter; } else { fprintf(stderr, "unknown flags field %s:%d\n", __FILE__, __LINE__); exit(2); } } /* * make a peer that corresponds to input. return input's node. */ static inline node_p make_peer(u_long input, node_p old, nodehdr_p hdr) { node_p down[2]; int swivel, bitvalue; /* * become a peer * algo: create two nodes, the two peers. leave orig node as * the parent of the two new ones. */ down[0] = newnode(); down[1] = newnode(); swivel = bi_ffs(input^old->input); bitvalue = EXTRACT_BIT(input, swivel); down[bitvalue]->input = input; down[bitvalue]->output = make_output(old->output, swivel, hdr); down[bitvalue]->down[0] = down[bitvalue]->down[1] = 0; *down[1-bitvalue] = *old; /* copy orig node down one level */ old->input = down[1]->input; /* NB: 1s to the right (0s to the left) */ old->output = down[1]->output; old->down[0] = down[0]; /* point to children */ old->down[1] = down[1]; return down[bitvalue]; } /* * L O O K U P */ /* * initialize a lookup structure. * * addr_mask is non-zero if this is a header for IP addresses, * in which case it is a mask of the bits covered in the IP * address by this header. */ static void lookup_init(nodehdr_p hdr) { node_p node; if (hdr->head) { freetree(hdr->head); hdr->head = 0; } /* * this is all a bit cryptic, so here's the deal * * if addr_mask is zero, or doesn't cover any of the * classness bits preserved by -Cnn, then we create * exactly one node whose input value is zero, and whose * output value is random. * * on the other hand, if addr_mask covers some of the * classness bits, we create a node which performs the * identity map on those bits in addr_mask covered by * -Cnn and the rest of which is random. */ hdr->head = newnode(); node = hdr->head; /* if this is high order address byte, prime classness if needed */ if (hdr->addr_mask) { /* compute bump as lsb of addr_mask */ hdr->bump = 1<<(ffs(hdr->addr_mask)-1); /* NOTE -- traditional ffs() */ if (hdr->flags == NH_FL_COUNTER) { node->output = hdr->bump; } else { /* whatever we do, don't pick up any bits outside of addr_mask */ /* zeros for high order opt_class bits */ node->output = rand32()>>opt_class; /* no bits outside of addr_mask */ node->output &= hdr->addr_mask; } if (opt_class) { /* extract bits in addr_mask covered by opt_class */ hdr->addr_mask = hdr->addr_mask>>(32-opt_class); hdr->addr_mask = hdr->addr_mask<<(32-opt_class); node->input = hdr->addr_mask; node->output |= hdr->addr_mask; } else { hdr->addr_mask = 0; node->input = 0; } } else { node->input = 0; /* * by using rand32(), we get bit 0 (MSB) randomized; * passing 0 wouldn't do at all... */ node->output = rand32(); hdr->bump = 1; } node->down[0] = node->down[1] = 0; } /* * EVERY NON-LEAF NODE HAS ***2*** CHILDREN!!! * (otherwise, the code below dies badly!) */ u_long lookup(u_long input, nodehdr_p hdr) { node_p node; int swivel; node = hdr->head; /* non-zero, 'cause lookup_init() already called */ if (hdr->head == 0) { /* (but...) */ fprintf(stderr, "unexpected zero head %s:%d\n", __FILE__, __LINE__); } while (node) { if (input == node->input) { /* we found our node! */ return node->output; } if (node->down[0] == 0) { /* need to descend, but can't */ node = make_peer(input, node, hdr); /* create a peer */ } else { /* swivel is the first bit the left and right children differ in */ swivel = bi_ffs(node->down[0]->input^node->down[1]->input); if (bi_ffs(input^node->input) < swivel) {/* input differs earlier */ node = make_peer(input, node, hdr); /* make a peer */ } else if (input&(1<<(32-swivel))) { node = node->down[1]; /* NB: 1s to the right */ } else { node = node->down[0]; /* NB: 0s to the left */ } } } /* ??? should not occur! */ fprintf(stderr, "unexpected loop termination %s:%d\n", __FILE__, __LINE__); exit(1); } /* * H I D I N G */ u_long hide_addr(u_long addr, u_int ttl) { u_long answer; if (IN_CLASSD(addr) && (ttl >= optTOttlLOW(opt_mcastaddr))) { return addr; } switch (opt_ipaddr) { case 0: addr_whole.cur_input = addr; answer = lookup(addr, &addr_whole); break; case 1: addr_upper.cur_input = addr_lower.cur_input = addr; answer = lookup(addr&0xffff0000, &addr_upper) | lookup(addr&0xffff, &addr_lower); break; case 2: addr_byte_0.cur_input = addr_byte_1.cur_input = addr_byte_2.cur_input = addr_byte_3.cur_input = addr; /* if i had a hammer... */ answer = lookup(addr&0xff000000, &addr_byte_0) | lookup(addr&0x00ff0000, &addr_byte_1) | lookup(addr&0x0000ff00, &addr_byte_2) | lookup(addr&0x000000ff, &addr_byte_3); break; case 50: addr_propagate.cur_input = addr; answer = lookup(addr, &addr_propagate); break; case 99: answer = addr; break; default: fprintf(stderr, "unknown opt_ipaddr %s:%d\n", __FILE__, __LINE__); exit(1); } return answer; } static void verify_and_print_args(char *cmd) { lookup_init(&addr_propagate); lookup_init(&addr_whole); lookup_init(&addr_upper); lookup_init(&addr_lower); lookup_init(&addr_byte_0); lookup_init(&addr_byte_1); lookup_init(&addr_byte_2); lookup_init(&addr_byte_3); tcpport_whole.flags = NH_FL_COUNTER; lookup_init(&tcpport_whole); tcpport_byte_0.flags = tcpport_byte_1.flags = NH_FL_COUNTER; lookup_init(&tcpport_byte_0); lookup_init(&tcpport_byte_1); udpport_whole.flags = NH_FL_COUNTER; lookup_init(&udpport_whole); udpport_byte_0.flags = udpport_byte_1.flags = NH_FL_COUNTER; lookup_init(&udpport_byte_0); lookup_init(&udpport_byte_1); } static int initialized = 0; void Initialize(U32 seed) { #if defined(SVR4) srand48(seed); #else srandom(seed); #endif verify_and_print_args((char *)0); initialized = 1; } U32 AnonymizeIPAddr(U32 addr) { U32 value; if (!initialized) { rand_start(); verify_and_print_args((char *)0); initialized = 1; } value = hide_addr(addr, 0); /* second arg (ttl) affects mcast processing */ return value; }