Source code for src.ch02.p1_cleanup_dictionary

"""Cleanup word dictionary.

Various functions for cleaning up a word dictionary.

Attributes:
    APPROVED_WORDS (list): Words that should always appear in a word
        dictionary.

"""
from string import ascii_lowercase
from src.ch01.challenge.c2_name_generator import read_from_file
from src.ch02 import DICTIONARY_FILE_PATH, CLEANUP_LIST_ERROR

APPROVED_WORDS = ['i', 'a', 'me', 'an', 'qi', 'at', 'to', 'as', 'am', 'ad',
                  'be', 'by', 'go', 'he', 'hi', 'if', 'in', 'is', 'it', 'my',
                  'no', 'of', 'oh', 'ox', 'so', 'up', 'us', 'we']


[docs]def cleanup_list(word_list: list) -> list: """Cleanup word list. Remove single letter words from a :py:obj:`list` of words. Args: word_list (list): List with words as elements. Returns: List with words as elements excluding single letter words. Raises: IndexError: If **word_list** is empty. """ if not word_list: raise IndexError(CLEANUP_LIST_ERROR) return [word for word in word_list if len(word) > 1]
[docs]def cleanup_list_more(word_list: list) -> list: """Cleanup word list even more. First, remove words with apostrophes, double letter words, duplicates, and words with letters not in :py:obj:`string.ascii_lowercase` from a :py:obj:`list` of words. Then, add :py:const:`APPROVED_WORDS` back into list. Finally, sort list. Args: word_list (list): List with words as elements. Returns: Sorted list with words as elements excluding cleaned words and :py:const:`APPROVED_WORDS` added. Raises: IndexError: If **word_list** is empty. """ if not word_list: raise IndexError(CLEANUP_LIST_ERROR) clean_list = [] for word in word_list: if any(["'" in word, len(word) == 2]): # Skip words with apostrophes and double letter words. continue if any([letter not in ascii_lowercase for letter in word.lower()]): # Skip words with letters not in ascii_lowercase. continue clean_list.append(word.lower()) # Add approved words to list if missing. for word in APPROVED_WORDS: if word not in clean_list: clean_list.append(word) # Remove duplicates and sort. clean_list = sorted(list(set(clean_list))) return clean_list
[docs]def cleanup_dict(filepath: str) -> list: """Wrap read_from_file and cleanup_list. Passes given **filepath** through :func:`~src.ch01.challenge.c2_name_generator.read_from_file` to get a list of words, then :func:`cleanup_list` to remove single letter words. Args: filepath (str): String with path to word dictionary file. Returns: List with words as elements excluding single letter words. """ return cleanup_list(read_from_file(filepath))
[docs]def main(): """Demonstrate cleanup dictionary.""" print('I\'m a word dictionary cleaner.\n' 'I remove those annoying one letter words.\n') word_list = read_from_file(DICTIONARY_FILE_PATH) word_list_len = len(word_list) clean_word_list = cleanup_dict(DICTIONARY_FILE_PATH) clean_word_list_len = len(clean_word_list) print(f'Original word list had {word_list_len} words.\n' f'Cleaned word list has {clean_word_list_len} words.\n' f'I cleaned up {word_list_len - clean_word_list_len} words! ' f'Yay, me!')
if __name__ == '__main__': main()