#!/bin/sh # script created 14-MAR-04 by William Baguhn, kc9asi # This script is in the public domain. # Comments or suggestions to kc9asi@arrl.net # This will take a GNIS datapoint file (typically for a whole state, 8+Mb), # break it down into smaller chunks (typically for a county, 30-200k) # it will also throw away the stupid trailing spaces and 's at EOL. # My short experiment: the state of wisconsin. # Started with a 12.5Mb file. # ended with 93 files, totaling 6.7Mb. # and, the data files run a whole lot faster, especially when zoomed in. test -e $1 || (echo Try calling $0 with a file as an argument.; exit) # field 4 isn't just counties, but it's an acceptable label # as it's usually counties cut -f4 -d, <$1 >$1.counties # remove duplicates (sort, uniq) # the cut here gets rid of any "quirks" because of commas that came earlier # than were expected, as cut doesn't recognize quoting depths sort <$1.counties | uniq | cut -f2 -d\" >$1.counties.uniq # now we want to replace spaces with periods, so that counties with # spaces in their names work appropriately both for grep and filenaming tr " " . <$1.counties.uniq >$1.counties rm $1.counties.uniq # OK, now we should have a file with a list of the various divisions # SO, split each one apart # the regexp for grep assures that we just get "county" and not county, # hopefully this will make more sensible breaks as county names are # sometimes found in other names as well. # (i.e. Grant county, and Grant Community Park) # the \"county\" should get the former, and ignore the latter. # the fromdos/sed call will drop any dead whitespace at the end of a line # the test/rm call will delete files if they are zero length. for foo in `cat $1.counties` ; do rm -f $1.$foo echo Extracting $foo grep ,\"$foo\", $1 | fromdos | sed -e 's/[ ]*$//g' >>$1.$foo.gnis test -s $1.$foo.gnis || rm $1.$foo.gnis done # clean up after ourselves rm $1.counties