Source code for src.ch03.p1_digram_counter

"""Counts the occurrence of all possible digrams of a word in a dictionary."""
from itertools import permutations
from collections import Counter
from src.ch01.practice.p2_poor_bar_chart import print_bar_chart
from src.ch02.p1_cleanup_dictionary import read_from_file
from src.ch02 import DICTIONARY_FILE_PATH
from src.ch03 import GET_DIGRAMS_ERROR, COUNT_DIGRAMS_ERROR


[docs]def get_digrams(word: str) -> set: """Get a set of digrams given a word. Generate all possible digrams of a given word. Args: word (str): String to get digrams of. Returns: :py:obj:`set` of all possible digrams of the given word. Raises: TypeError: If **word** isn't a string. """ if not isinstance(word, str): raise TypeError(GET_DIGRAMS_ERROR) # Generate all possible permutations of the word. return set(''.join(i) for i in permutations(word, 2))
[docs]def count_digrams(digrams: set, dict_list: list) -> dict: """Count digrams in word dictionary. Count frequency of each digram in the set in a word dictionary list. Args: digrams (set): Set of digrams to count frequency of. dict_list (list): Word dictionary list. Returns: :py:class:`~collections.Counter` with digrams as keys and their counts as values. Raises: TypeError: If **digrams** isn't a set or if **dict_list** isn't a list. """ if not all([isinstance(digrams, set), isinstance(dict_list, list)]): raise TypeError(COUNT_DIGRAMS_ERROR) # Initialize Counter with the digram set. digram_count = Counter(digrams) digram_count.subtract(digrams) # Set values to `0` # Iterate through each digram in the set. for digram in digrams: # For each digram, iterate through dict_list to find the digram. for word in dict_list: if digram in word: # If found, increment counter. digram_count[digram] += 1 return digram_count
[docs]def digram_counter(word: str, dict_file: str = DICTIONARY_FILE_PATH) -> dict: """Wrap get_digrams, count_digrams, and read_from_file. Send **word** through :func:`get_digrams` to get a set of digrams which is then passed through :func:`count_digrams` along with the list made by passing **dict_file** through :py:func:`~src.ch01.challenge.c2_name_generator.read_from_file`. Args: word (str): Word to get digrams of. dict_file (str): Path of dictionary file to get a frequency analysis of each digram. Defaults to :py:const:`~src.ch02.DICTIONARY_FILE_PATH`. Returns: :py:class:`~collections.Counter` with digrams as keys and their counts as values. """ return count_digrams(get_digrams(word), read_from_file(dict_file))
[docs]def main(): """Demonstrate the digram counter.""" print('I\'m a digram counter.\nIf you don\'t know what I can be used ' 'for, then you don\'t need me.\nSeriously, though, I can be used ' 'for cryptographic frequency analysis - which\nprobably makes even ' 'less sense...\n') word = 'volvo' print(f'Analyzing: {word}\n') print('In the Ubuntu default `american-english` dictionary, these are\n' 'the digram counts for the above word:') digram_count = digram_counter(word, DICTIONARY_FILE_PATH) print_bar_chart(digram_count) top_digram = sorted(digram_count.keys())[0] print(f'\nThe "{top_digram}" digram occurs {digram_count[top_digram]} ' f'times!')
if __name__ == '__main__': main()