Commit my random junk
[sandbox] / cryptopals-python / cryptopals.py
index 8752786..24392b8 100644 (file)
@@ -2,7 +2,57 @@ import codecs
 import unittest
 
 def base64_from_hex(_hex):
-    return codecs.encode(codecs.decode(_hex, 'hex'), 'base64').decode('utf-8')
+    return codecs.encode(bytes.fromhex(_hex), 'base64').decode('utf-8')
+
+def xor_bytes(bytes0, bytes1):
+    return bytes(b0 ^ b1 for b0, b1 in zip(bytes0, bytes1))
+
+def xor_hex(hex0, hex1):
+    assert len(hex0) == len(hex1)
+    bytes0 = bytes.fromhex(hex0)
+    bytes1 = bytes.fromhex(hex1)
+    return codecs.encode(xor_bytes(bytes0, bytes1), 'hex').decode('utf-8')
+
+def get_character_frequencies(source):
+    frequencies = {}
+
+    for source_character in source:
+        frequencies[source_character] = frequencies.get(source_character, 0) + 1
+
+    return frequencies
+
+def compare_frequency_deviation(base_frequency, comparison_frequency):
+    return sum(
+        abs(frequency - comparison_frequency.get(character, 0))
+        for character, frequency in base_frequency.items()
+    ) / len(base_frequency)
+
+with open('sample.txt','r') as sample_file:
+    sample_text = sample_file.read()
+
+SAMPLE_FREQUENCIES = get_character_frequencies(sample_text)
+
+def encrypt_with_repeating_xor(plaintext, key):
+    plaintext_bytes = plaintext.encode('utf-8')
+    key_bytes = key.encode('utf-8')
+
+    return xor_bytes(
+        plaintext_bytes,
+        (key_bytes * ((len(plaintext_bytes) // len(key_bytes)) + 1))[:len(plaintext_bytes)],
+    )
+
+def hamming_weight(_bytes):
+    def hamming_weight_of_byte(b):
+        count = 0
+        while b > 0:
+            count += 1
+            b &= b - 1
+        return count
+
+    return sum(hamming_weight_of_byte(b) for b in _bytes)
+
+def hamming_distance(bytes0, bytes1):
+    return hamming_weight(xor_bytes(bytes0, bytes1))
 
 class Set1Challenge1Tests(unittest.TestCase):
     def test_converts_hex_to_base64(self):
@@ -10,5 +60,99 @@ class Set1Challenge1Tests(unittest.TestCase):
         actual = base64_from_hex('49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d')
         self.assertEqual(expected, actual)
 
+class Set1Challenge2Tests(unittest.TestCase):
+    def test_xors_hex_strings(self):
+        hex0 = '1c0111001f010100061a024b53535009181c'
+        hex1 = '686974207468652062756c6c277320657965'
+
+        expected = '746865206b696420646f6e277420706c6179'
+        actual = xor_hex(hex0, hex1)
+
+        self.assertEqual(expected, actual)
+
+class Set1Challenge3Tests(unittest.TestCase):
+    def test_gets_message(self):
+        xored_string = bytes.fromhex('1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736')
+
+        lowest_frequency_deviation_string = None
+        lowest_frequency_deviation = None
+
+        for i in range(128):
+            key_char = bytes([i]).decode('utf-8')
+            key = bytes([i]) * len(xored_string)
+            try_string = xor_bytes(xored_string, key).decode('utf-8')
+            try_string_frequency_deviation = compare_frequency_deviation(
+                SAMPLE_FREQUENCIES,
+                get_character_frequencies(try_string),
+            )
+
+            if lowest_frequency_deviation is None or try_string_frequency_deviation < lowest_frequency_deviation:
+                lowest_frequency_deviation_string = try_string
+                lowest_frequency_deviation = try_string_frequency_deviation
+
+
+        expected = "Cooking MC's like a pound of bacon"
+        actual = lowest_frequency_deviation_string
+
+        self.assertEqual(expected, actual)
+
+class Set1Challenge4Tests(unittest.TestCase):
+    def test_gets_message(self):
+        with open('set1challenge4.txt','r') as f:
+            lines = f.readlines()
+
+
+        lowest_frequency_deviation_string = None
+        lowest_frequency_deviation = None
+
+        for line in lines:
+            line_bytes = bytes.fromhex(line)
+
+            for i in range(128):
+                key_char = bytes([i]).decode('utf-8')
+                key = bytes([i]) * len(line_bytes)
+
+                try:
+                    try_string = xor_bytes(line_bytes, key).decode('utf-8')
+                    try_string_frequency_deviation = compare_frequency_deviation(
+                        SAMPLE_FREQUENCIES,
+                        get_character_frequencies(try_string),
+                    )
+
+                    if lowest_frequency_deviation is None or try_string_frequency_deviation < lowest_frequency_deviation:
+                        lowest_frequency_deviation_string = try_string
+                        lowest_frequency_deviation = try_string_frequency_deviation
+                except:
+                    pass
+
+        expected = 'Now that the party is jumping\n'
+        actual = lowest_frequency_deviation_string
+
+        self.assertEqual(expected, actual)
+
+class Set1Challenge5Tests(unittest.TestCase):
+    def test_encrypts_with_repeating_xor(self):
+        plaintext = "Burning 'em, if you ain't quick and nimble\nI go crazy when I hear a cymbal"
+        key = 'ICE'
+
+        expected = '0b3637272a2b2e63622c2e69692a23693a2a3c6324202d623d63343c2a26226324272765272a282b2f20430a652e2c652a3124333a653e2b2027630c692b20283165286326302e27282f'
+        actual = encrypt_with_repeating_xor(plaintext, key).hex()
+
+        self.assertEqual(expected, actual)
+
+with open('set1challenge6.txt','r') as f:
+    set1challenge6text = f.read()
+
+class Set1Challenge6Tests(unittest.TestCase):
+    def test_hamming_distance(self):
+        expected = 37
+        actual = hamming_distance(b'this is a test', b'wokka wokka!!!')
+        self.assertEqual(expected, actual)
+
+    def test_find_repeated_xor_keysize(self):
+        expected = 0
+        actual = find_repeated_xor_keysize(set1challenge6text)
+        self.assertEqual(expected, actual)
+
 if __name__ == '__main__':
     unittest.main()