# This program finds the distribution of leading nonzero digits in a set of # numbers. The program is useful for exploring the phenomenon known as # Benford's Law. It simply ignores tokens that don't have a nonzero digit in # them, so you can feed it input files that have extra text like labels. from string import * # returns the first nonzero digit of a string or 0 if no such digit exists def firstDigitOf(str): for ch in str: if ch >= "1" and ch <= "9": return atoi(ch) return 0 # open input file print "Let's count those leading digits..." name = raw_input("input file name? ") f = open(name, 'r') print # count occurrences of digits count = [] for i in range(10): count.append(0) for line in f: for token in split(line): digit = firstDigitOf(token) count[digit] += 1 # compute overall sum total = 0 for n in count: total += n if count[0] > 0: print "excluding", count[0], "tokens" total -= count[0] # print table print "Digit Count Percent" for i in range(1, 10): pct = count[i] * 100.0 / total print "%5d %5d %6.2f" % (i, count[i], pct) print "Total %5d %6.2f" % (total, 100.0)