Source code for src.ch02.p1_cleanup_dictionary
"""Cleanup word dictionary.
Various functions for cleaning up a word dictionary.
Attributes:
    APPROVED_WORDS (list): Words that should always appear in a word
        dictionary.
"""
from string import ascii_lowercase
from src.ch01.challenge.c2_name_generator import read_from_file
from src.ch02 import DICTIONARY_FILE_PATH, CLEANUP_LIST_ERROR
APPROVED_WORDS = ['i', 'a', 'me', 'an', 'qi', 'at', 'to', 'as', 'am', 'ad',
                  'be', 'by', 'go', 'he', 'hi', 'if', 'in', 'is', 'it', 'my',
                  'no', 'of', 'oh', 'ox', 'so', 'up', 'us', 'we']
[docs]def cleanup_list(word_list: list) -> list:
    """Cleanup word list.
    Remove single letter words from a :py:obj:`list` of words.
    Args:
        word_list (list): List with words as elements.
    Returns:
        List with words as elements excluding single letter words.
    Raises:
        IndexError: If **word_list** is empty.
    """
    if not word_list:
        raise IndexError(CLEANUP_LIST_ERROR)
    return [word for word in word_list if len(word) > 1] 
[docs]def cleanup_list_more(word_list: list) -> list:
    """Cleanup word list even more.
    First, remove words with apostrophes, double letter words, duplicates,
    and words with letters not in :py:obj:`string.ascii_lowercase` from a
    :py:obj:`list` of words. Then, add :py:const:`APPROVED_WORDS` back into
    list. Finally, sort list.
    Args:
        word_list (list): List with words as elements.
    Returns:
        Sorted list with words as elements excluding cleaned words and
        :py:const:`APPROVED_WORDS` added.
    Raises:
        IndexError: If **word_list** is empty.
    """
    if not word_list:
        raise IndexError(CLEANUP_LIST_ERROR)
    clean_list = []
    for word in word_list:
        if any(["'" in word, len(word) == 2]):
            # Skip words with apostrophes and double letter words.
            continue
        if any([letter not in ascii_lowercase for letter in word.lower()]):
            # Skip words with letters not in ascii_lowercase.
            continue
        clean_list.append(word.lower())
    # Add approved words to list if missing.
    for word in APPROVED_WORDS:
        if word not in clean_list:
            clean_list.append(word)
    # Remove duplicates and sort.
    clean_list = sorted(list(set(clean_list)))
    return clean_list 
[docs]def cleanup_dict(filepath: str) -> list:
    """Wrap read_from_file and cleanup_list.
    Passes given **filepath** through
    :func:`~src.ch01.challenge.c2_name_generator.read_from_file`
    to get a list of words, then :func:`cleanup_list` to remove single letter
    words.
    Args:
        filepath (str): String with path to word dictionary file.
    Returns:
        List with words as elements excluding single letter words.
    """
    return cleanup_list(read_from_file(filepath)) 
[docs]def main():
    """Demonstrate cleanup dictionary."""
    print('I\'m a word dictionary cleaner.\n'
          'I remove those annoying one letter words.\n')
    word_list = read_from_file(DICTIONARY_FILE_PATH)
    word_list_len = len(word_list)
    clean_word_list = cleanup_dict(DICTIONARY_FILE_PATH)
    clean_word_list_len = len(clean_word_list)
    print(f'Original word list had {word_list_len} words.\n'
          f'Cleaned word list has {clean_word_list_len} words.\n'
          f'I cleaned up {word_list_len - clean_word_list_len} words! '
          f'Yay, me!') 
if __name__ == '__main__':
    main()