"""Frequency Finder
Analyzes frequency of letters in given message compared to the most common occurring
letters to determine if message is in the English language.
Attributes:
ETAOIN (str): String containing uppercase latin letters in order from most to least common.
LETTERS (str): String containing uppercase latin letters in alphabetical order.
Note:
* Compares six most and six least common letters in the English language.
* https://www.nostarch.com/crackingcodes/ (BSD Licensed)
"""
ETAOIN = 'ETAOINSHRDLCUMWFGYPBVKJXQZ'
LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
[docs]def getLetterCount(message: str) -> dict:
"""Get letter count
Counts the frequency of all latin letters in a given message.
Args:
message: String containing message to analyze letter frequency.
Returns:
Dictionary with keys of single letters and values of the count of how many
times they appear in the message parameter.
"""
letterCount = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0,
'G': 0, 'H': 0, 'I': 0, 'J': 0, 'K': 0, 'L': 0, 'M': 0, 'N': 0,
'O': 0, 'P': 0, 'Q': 0, 'R': 0, 'S': 0, 'T': 0, 'U': 0, 'V': 0,
'W': 0, 'X': 0, 'Y': 0, 'Z': 0}
for letter in message.upper():
if letter in LETTERS:
letterCount[letter] += 1
return letterCount
[docs]def getItemAtIndexZero(items: tuple):
"""Get element at index zero
Helper function that returns the first element of a given tuple.
Args:
items: Tuple containing a latin letter and its frequency count.
Returns:
The first element of the given tuple: the latin letter.
"""
return items[0]
[docs]def getFrequencyOrder(message: str) -> str:
"""Get frequency order
Analyzes frequency of each letter in given message and returns string with each letter from most to least frequent.
Args:
message: String containing message to analyze frequency.
Returns:
String of the alphabet letters arranged in order of most frequently occurring in the message parameter.
"""
# First, get a dictionary of each letter and its frequency count:
letterToFreq = getLetterCount(message)
# Second, make a dictionary of each frequency count to the letter(s)
# with that frequency:
freqToLetter = {}
for letter in LETTERS:
if letterToFreq[letter] not in freqToLetter:
freqToLetter[letterToFreq[letter]] = [letter]
else:
freqToLetter[letterToFreq[letter]].append(letter)
# Third, put each list of letters in reverse "ETAOIN" order, and then
# convert it to a string:
for freq in freqToLetter:
freqToLetter[freq].sort(key=ETAOIN.find, reverse=True)
freqToLetter[freq] = ''.join(freqToLetter[freq])
# Fourth, convert the freqToLetter dictionary to a list of
# tuple pairs (key, value), and then sort them:
freqPairs = list(freqToLetter.items())
freqPairs.sort(key=getItemAtIndexZero, reverse=True)
# Fifth, now that the letters are ordered by frequency, extract all
# the letters for the final string:
freqOrder = []
for freqPair in freqPairs:
freqOrder.append(freqPair[1])
return ''.join(freqOrder)
[docs]def englishFreqMatchScore(message: str) -> int:
"""English Frequency Match Score
Calculates number of matches that the string in the message parameter has when its letter frequency is
compared to English letter frequency.
Args:
message: String containing message to calculate English match score.
Returns:
Number representing message's matches to English letter frequency.
Note:
* A "match" is how many of its six most frequent and six least frequent letters are among the six
most frequent and six least frequent letters for English.
* A "perfect score" is 12
"""
freqOrder = getFrequencyOrder(message)
matchScore = 0
# Find how many matches for the six most common letters there are:
for commonLetter in ETAOIN[:6]:
if commonLetter in freqOrder[:6]:
matchScore += 1
# Find how many matches for the six least common letters there are:
for uncommonLetter in ETAOIN[-6:]:
if uncommonLetter in freqOrder[-6:]:
matchScore += 1
return matchScore