Source code for src.ch03.c1_anagram_generator

"""Generate phrase anagrams from a word or phrase."""
from collections import defaultdict, Counter
from os import cpu_count
from string import ascii_lowercase
from sys import setrecursionlimit
from threading import Thread
from src.ch02 import DICTIONARY_FILE_PATH
from src.ch02.p1_cleanup_dictionary import cleanup_dict, cleanup_list_more

setrecursionlimit(9000)  # Default is 1000


[docs]def get_primes(length: int = 26, min_prime: int = 2, max_prime: int = 101) -> list: """Get list of primes. Given a length, minimum, and maximum prime number, return a list of prime numbers. Args: length (int): Number of prime numbers to return. Defaults to ``26``. min_prime (int): Smallest prime number to return. Defaults to ``2``. max_prime (int): Largest prime number to return. Defaults to ``101``. Returns: :py:obj:`list` of **length** prime numbers with **min_prime** as the smallest prime number and **max_prime** as the largest prime number in the list. """ primes = [] if min_prime <= 3: # If min_prime includes ``2``, skip it during calculations. min_prime, primes = 3, [2] elif min_prime % 2 == 0: # If min_prime is even, make exclusive odd. min_prime += 1 while len(primes) < length: # Iterate over odd numbers. for num in range(min_prime, max_prime + 1, 2): # If num can't be divided by all odd numbers from min_prime to # sqrt(num), it's a prime. if all(num % i != 0 for i in range(3, int(num**.5) + 1, 2)): primes.append(num) if len(primes) == length: # Stop as soon as we have enough primes. break return primes
[docs]def get_id(word: str) -> int: """Get ID number of word. Assign a unique prime number to each letter in :py:obj:`~string.ascii_lowercase`. The product of each letter in **word** is its ID number. Args: word (str): Word to get ID of. Returns: :py:obj:`int` representing ID of **word**. """ # Assign each ASCII lowercase letter a prime number. primes = get_primes() letter_id = dict(zip(ascii_lowercase, primes)) # Find the product of each letter in the word. product = 1 for letter in word: product *= letter_id[letter] return product
[docs]def get_anagram_dict(word_list: list) -> dict: """Get an anagram dictionary from word_list. Get the ID of each word in **word list** and add it to a dictionary with the ID as the key. Args: word_list (list): List of words to make into anagram dictionary. Returns: :py:class:`~collections.defaultdict` of :py:obj:`list` with an ID (:py:obj:`int`) as the key and words whose product of letters equal that ID as values. """ anagram_dict = defaultdict(list) # Find the product of each letter for each word in a dictionary. for word in word_list: anagram_dict[get_id(word)].append(word) return anagram_dict
[docs]def split(a_list: list, parts: int) -> list: """Split a list into parts. Split given list into given number of parts. Args: a_list (list): List to split. parts (int): Number of parts to split list into. Returns: List of lists with **a_list** split into **parts**. Example: >>> import src.ch03.c1_anagram_generator.split as split >>> some_list = ['this', 'is', 'a', 'list'] >>> split_list = split(some_list, 2) >>> print(split_list) [['this', 'is'], ['a', 'list']] """ quotient, remainder = divmod(len(a_list), parts) return list((a_list[i * quotient + min(i, remainder):(i + 1) * quotient + min(i + 1, remainder)] for i in range(parts)))
[docs]def extend_anagram_dict(word_list: list, dictionary: dict): """Extend an anagram dictionary. Adds words from given word list to a given anagram dictionary. Args: word_list (list): List of words to add to anagram dictionary. dictionary (dict): Anagram dictionary to add words to. Returns: None. If words in **word_list** are in **dictionary** they are not added. Otherwise, they are added. """ new_anagram_dict = get_anagram_dict(word_list) for key, value in new_anagram_dict.items(): for element in value: if element not in dictionary[key]: dictionary[key].extend(value)
[docs]def multi_get_anagram_dict(word_list: list) -> dict: """Multithreaded get anagram dictionary. Uses :py:func:`os.cpu_count` and :py:class:`threading.Thread` to use all CPUs to make an anagram dictionary with the intent of being more efficient than :py:func:`~src.ch03.c1_anagram_generator.get_anagram_dict`. Args: word_list (list): List of words to make into anagram dictionary. Returns: :py:class:`~collections.defaultdict` of :py:obj:`list` with an ID (:py:obj:`int`) as the key and words whose product of letters equal that ID as values. Warning: Avoids race conditions by heavily relying on CPython's `Global Interpreter Lock`_. More info about `Thread Objects`_. .. _Global Interpreter Lock: https://docs.python.org/3/glossary.html#term-global-interpreter-lock .. _Thread Objects: https://docs.python.org/3/library/threading.html#thread-objects """ super_dict = defaultdict(list) divisions = split(word_list, cpu_count()) threads = [] for division in divisions: thread = Thread(target=extend_anagram_dict, args=(division, super_dict)) threads.append(thread) thread.start() for thread in threads: thread.join() return super_dict
[docs]def find_anagrams(word: str, anagram_dict: dict) -> list: """Find anagrams in word. Find all anagrams in a given word (or phrase) using anagram dictionary. Args: word (str): Word to find anagrams of. anagram_dict: Dictionary from :func:`get_anagram_dict`. Returns: :py:obj:`list` of :py:obj:`str` with all anagrams in **word**. """ if not word.islower(): word = word.lower() if ' ' in word: # If word is a phrase with spaces, remove spaces. word = ''.join(word.split()) anagrams = [] id_num = get_id(word) keys = list(anagram_dict.keys()) # Make keys indexable. Python3.6 only? # If an anagram is IN the word, the modulo of the anagram's ID and the # word's ID will be 0. for key in keys: if id_num % key == 0: anagrams.extend(anagram_dict[key]) return sorted(anagrams)
[docs]def remove_unusable_words(anagram_dict: dict, usable_letters: list) -> dict: """Remove unusable words from anagram dictionary. Creates new anagram dictionary by including only IDs that can be IN **usable_letters**. Args: anagram_dict (dict): Anagram dictionary to prune. usable_letters (list): List of letters that must be used. Returns: :py:class:`~collections.defaultdict` of :py:obj:`list` with an ID (:py:obj:`int`) as the key and words whose product of letters equal that ID as values. """ new_word = ''.join(usable_letters) new_anagram_dict = defaultdict(list) id_num = get_id(new_word) keys = list(anagram_dict.keys()) # Make keys indexable. Python3.6 only? # If anagram can be IN new_word, add to new_anagram_dict. for key in keys: if id_num % key == 0: new_anagram_dict[key] = anagram_dict[key] return new_anagram_dict
[docs]def find_anagram_phrases(phrases: list, word: str, anagram_dict: dict, phrase: list) -> None: """Find anagram phrases. Recursively finds anagram phrases of **word** by removing unusable words from the **anagram_dict**, finding remaining anagrams given the **phrase**, then adding any found anagram phrases to **phrases**. Args: phrases (list): List of anagram phrases. word (str): Current word to find anagram phrases of. anagram_dict (dict): Current anagram dictionary to find anagrams with. phrase (list): Current anagram phrase candidate. Returns: None. **phrases** is updated with any found anagram phrases. """ letters = Counter(word.replace(' ', '')) letters.subtract(''.join(phrase)) letters_left = list(letters.elements()) new_anagram_dict = remove_unusable_words(anagram_dict, letters_left) # Once the length is equal, we have an anagram phrase. if len(word.replace(' ', '')) == len(''.join(phrase)): if Counter(word.replace(' ', '')) == Counter(''.join(phrase)): # Add to phrases phrases.append(' '.join(phrase)) return None # Find new anagrams and recurse. anagrams = find_anagrams(''.join(letters_left), anagram_dict) for anagram in anagrams: new_phrase = phrase[:] new_phrase.append(anagram) find_anagram_phrases(phrases, word, new_anagram_dict, new_phrase) return None
[docs]def anagram_generator(word: str) -> list: """Generate phrase anagrams. Make phrase anagrams from a given word or phrase. Args: word (str): Word to get phrase anagrams of. Returns: :py:obj:`list` of phrase anagrams of **word**. """ phrases, phrase = [], [] dictionary = cleanup_list_more(cleanup_dict(DICTIONARY_FILE_PATH)) anagram_dict = multi_get_anagram_dict(dictionary) # Build anagram dictionary specific to word. new_anagram_dict = remove_unusable_words(anagram_dict, list(word.replace(' ', ''))) # Recursively find phrases using new_anagram_dict. find_anagram_phrases(phrases, word, new_anagram_dict, phrase) return phrases
[docs]def main(): """Demonstrate the Anagram Generator.""" print('I can find all phrase anagrams given a word or phrase.\n' 'I\'m fun at parties.') # Print first 500 results. word = 'see shells' print(f'\nAnalyzing: {word}\n') anagram_phrases = anagram_generator(word) for i, phrase in enumerate(anagram_phrases): if i > 500: break print(phrase)
if __name__ == '__main__': main()