Set 1, challenge 3
authorDavid Kerkeslager <kerkeslager@gmail.com>
Fri, 12 Feb 2021 19:08:46 +0000 (14:08 -0500)
committerDavid Kerkeslager <kerkeslager@gmail.com>
Fri, 12 Feb 2021 19:08:46 +0000 (14:08 -0500)
cryptopals-python/cryptopals.py

index d2eacf5..c5d2704 100644 (file)
@@ -4,12 +4,28 @@ import unittest
 def base64_from_hex(_hex):
     return codecs.encode(bytes.fromhex(_hex), 'base64').decode('utf-8')
 
-def hex_xor(hex0, hex1):
+def xor_bytes(bytes0, bytes1):
+    return bytes(b0 ^ b1 for b0, b1 in zip(bytes0, bytes1))
+
+def xor_hex(hex0, hex1):
     assert len(hex0) == len(hex1)
     bytes0 = bytes.fromhex(hex0)
     bytes1 = bytes.fromhex(hex1)
-    xored_bytes = bytes(byte0 ^ byte1 for byte0, byte1 in zip(bytes0, bytes1))
-    return codecs.encode(xored_bytes, 'hex').decode('utf-8')
+    return codecs.encode(xor_bytes(bytes0, bytes1), 'hex').decode('utf-8')
+
+def get_character_frequencies(source):
+    frequencies = {}
+
+    for source_character in source:
+        frequencies[source_character] = frequencies.get(source_character, 0) + 1
+
+    return frequencies
+
+def compare_frequency_deviation(base_frequency, comparison_frequency):
+    return sum(
+        abs(frequency - comparison_frequency.get(character, 0))
+        for character, frequency in base_frequency.items()
+    ) / len(base_frequency)
 
 class Set1Challenge1Tests(unittest.TestCase):
     def test_converts_hex_to_base64(self):
@@ -23,7 +39,38 @@ class Set1Challenge2Tests(unittest.TestCase):
         hex1 = '686974207468652062756c6c277320657965'
 
         expected = '746865206b696420646f6e277420706c6179'
-        actual = hex_xor(hex0, hex1)
+        actual = xor_hex(hex0, hex1)
+
+        self.assertEqual(expected, actual)
+
+class Set1Challenge3Tests(unittest.TestCase):
+    def test_gets_message(self):
+        with open('sample.txt','r') as sample_file:
+            sample_text = sample_file.read()
+
+        sample_frequencies = get_character_frequencies(sample_text)
+
+        xored_string = bytes.fromhex('1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736')
+
+        lowest_frequency_deviation_string = None
+        lowest_frequency_deviation = None
+
+        for i in range(128):
+            key_char = bytes([i]).decode('utf-8')
+            key = bytes([i]) * len(xored_string)
+            try_string = xor_bytes(xored_string, key).decode('utf-8')
+            try_string_frequency_deviation = compare_frequency_deviation(
+                sample_frequencies,
+                get_character_frequencies(try_string),
+            )
+
+            if lowest_frequency_deviation is None or try_string_frequency_deviation < lowest_frequency_deviation:
+                lowest_frequency_deviation_string = try_string
+                lowest_frequency_deviation = try_string_frequency_deviation
+
+
+        expected = "Cooking MC's like a pound of bacon"
+        actual = lowest_frequency_deviation_string
 
         self.assertEqual(expected, actual)