#!/usr/bin/env python # Copyright Jim Richardson, 2000 # This is a data visualization aide. It takes a filename ( 8 bit ascii only # please), and calculates the relative frequency of each symbol, then # graphically represents how often the symbol appears in the text, by # placing each symbol from the text, in a circle, the distance from the # center, being related to how frequent the symbol occurs. So if you # have text, which consists of only the letter e (100 times) and the # letter m (once) m will appear in the center, and e, on the edge of the # scale. If there is a letter l, which appears 50 times, it will appear # halfway between e and m, but displaced radially. Compare the # differences between plaintext, random symbols, and ciphertext. Scale # is adjusted automatically, to fit all symbols on a letter size page in # the postscript output. Changelog=""" Dec 1 2003, Rev 0.01: initial version. Dec 12 2003, Rev 0.02: Changes to catch errors in input. Dec 29 2005, Rev 0.03: Rewrite a lot, including the rect-polar conversion Dec 30 2005, Rev 0.04: Fix div/0 error for datasets that don't use all symbols available in good_symbols. """ import os,sys,math,string def get_biggest(dict): list=[] for item in dict.keys(): list.append(dict[item]) list.sort() # calculate the largest offset, and divide all offsets by one tenth # of it, in order to get a 0 to 10 spread so that all the files run # through this will be in roughly the same scale. temp=[list.pop(0)*-1,list.pop()] temp.sort() return temp.pop() def build_list(dataset): # Pass this a string or tuple of characters, get back a dict, of # char:number where number is the number of times that char occurs in # the dataset # We chop off cr/lf, and # whitespace. In fact, anything other than numerical, alphabetical are # pulled. All we care about are the actual symbols. See good_symbols # for details on what we want to play with. Adjust accordingly. # First, we strip out bad_symbols, which are anything not in # good_symbols. Then we fill a dict with everything from plaintext that # is also present in good symbols, and set the value to 0 to start the # ball rolling. If you change good_symbols, be wary of stuff like \ # which are postscript operators and must be dealt with or your output # will not be correct. good_symbols=string.digits+string.letters symbol_count={} for symbol in good_symbols: symbol_count[symbol]=0 for symbol in dataset: if good_symbols.count(symbol): symbol_count[symbol]=symbol_count.get(symbol,[0])+1 return symbol_count def build_coords(symbol_dict,char_sep): angle=0 R_coords={} P_coords={} biggest=0 for char in symbol_dict.keys(): freq=symbol_dict[char] if biggest