#!/usr/bin/env python ## Hide data in word list permutations ## Copyright (c) 2005, Josep Mones i Teixidor ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA import sys from zlib import compress, decompress from math import log, floor from random import Random def prepare_list(the_list): "Strips leading and trailing whitespace of each word and sorts the the_list" the_list = [el.strip() for el in the_list] the_list = [el for el in the_list if el != ''] the_list.sort() return the_list def calculate_space(the_list): "Calculates number of bytes we can hide as permutations of the_list" space=0.0 for ii in range(2, len(the_list)+1): space+=log(ii) space=int(floor(space/(log(2)*8))) return space def pad_data(data, final_length): "We pad data as follows: data_length(2 bytes)|data|random_data" length=len(data) if length>0xffff: raise OverflowError, "Data too long" if final_length<(length+2): raise OverflowError, ("Data too long for the number of elements in the_list (it's %d and it should be less than %d)" % (length, final_length-2)) ## we encode length in little endian data=chr(length&0xff)+chr((length>>8)&0xff)+data ## pad using random data rnd=Random() for ii in range(len(data), final_length): data+=chr(rnd.randrange(0,256)) return data def unpad_data(data): "We unpad data by retrieving length and stripping random padding" length=ord(data[0])+(ord(data[1])<<8) data = data[2:2+length] data+=chr(0)*(length-len(data)) return data def encode_data(data, final_length): "Encodes data" return pad_data(compress(data), final_length) def decode_data(data): "Decodes data" return decompress(unpad_data(data)) def string_to_bigint(string): "Converts a string to a big int in little endian" result = 0 the_list= list(string) the_list.reverse() for ii in the_list: result = (result << 8) + ord(ii) return result def bigint_to_string(bigint): "Converts a big int to a little endian string representing binary data" string="" while(bigint>0): string+=chr(bigint&0xff) bigint=bigint>>8 return string def bigint_to_permbase(bigint, final_length): "Converts a big integer to a permbase number, first the number with greater base" pos=2 permbase=[] while bigint>0: permbase.append(bigint%pos) bigint=bigint/pos pos+=1 for ii in range(len(permbase), final_length-1): permbase.append(0) permbase.reverse() return permbase def permbase_to_bigint(permbase): "Converts a number with permutation base to a big integer" bigint = permbase[0] current = len(permbase) for ii in permbase[1:]: bigint = bigint*current+ii current -= 1 return bigint def permbase_to_listperm(permbase, the_list): "Converts a permutation base the_list of numbers to a permutation of the_list items" listperm=[] for ii in permbase: listperm.append(the_list.pop(ii)) return listperm+the_list def listperm_to_permbase(listperm, the_list): "Converts a permutation of the_list items to permutation base the_list of numbers" permbase = [] for codel in listperm: idx=the_list.index(codel) permbase.append(idx) the_list.pop(idx) return permbase[:-1] def hide_data(the_list, data): "Hides data as a permutation of the_list" the_list = prepare_list(the_list) final_length = calculate_space(the_list) data = encode_data(data, final_length) bigint = string_to_bigint(data) permbase = bigint_to_permbase(bigint, len(the_list)) listperm = permbase_to_listperm(permbase, the_list) return listperm def recover_data(listperm): "Recovers data from the_list" the_list = prepare_list(listperm) permbase = listperm_to_permbase(listperm, the_list) bigint = permbase_to_bigint(permbase) data = bigint_to_string(bigint) data = decode_data(data) return data def hide_data_function(word_list_file, data_file, encoded_word_list_file): word_list = open(word_list_file).read().split('\n') data = open(data_file).read() listperm = hide_data(word_list, data) file(encoded_word_list_file,"w").write("\n".join(listperm)) def recover_data_function(encoded_word_list_file, recovered_data_file): encoded_word_list = open(encoded_word_list_file).read().split('\n') data = recover_data(encoded_word_list) file(recovered_data_file, "w").write(data) def usage(): print "stegotation hide [word_list] [data] [encoded_word_list]" print "stegotation recover [encoded_word_list] [recovered_data]" if __name__ == '__main__': if(len(sys.argv) < 4): usage() sys.exit(1) if sys.argv[1] == 'hide': if(len(sys.argv) != 5): usage() sys.exit(1) hide_data_function(sys.argv[2], sys.argv[3], sys.argv[4]) elif sys.argv[1] == 'recover': if(len(sys.argv) != 4): usage() sys.exit(1) recover_data_function(sys.argv[2], sys.argv[3]) else: usage() sys.exit(1)