"""Generate phrase anagrams from a word or phrase."""
from collections import defaultdict, Counter
from os import cpu_count
from string import ascii_lowercase
from sys import setrecursionlimit
from threading import Thread
from src.ch02 import DICTIONARY_FILE_PATH
from src.ch02.p1_cleanup_dictionary import cleanup_dict, cleanup_list_more
setrecursionlimit(9000) # Default is 1000
[docs]def get_primes(length: int = 26, min_prime: int = 2,
max_prime: int = 101) -> list:
"""Get list of primes.
Given a length, minimum, and maximum prime number, return a list of prime
numbers.
Args:
length (int): Number of prime numbers to return. Defaults to ``26``.
min_prime (int): Smallest prime number to return. Defaults to ``2``.
max_prime (int): Largest prime number to return. Defaults to ``101``.
Returns:
:py:obj:`list` of **length** prime numbers with **min_prime** as the
smallest prime number and **max_prime** as the largest prime number
in the list.
"""
primes = []
if min_prime <= 3:
# If min_prime includes ``2``, skip it during calculations.
min_prime, primes = 3, [2]
elif min_prime % 2 == 0:
# If min_prime is even, make exclusive odd.
min_prime += 1
while len(primes) < length:
# Iterate over odd numbers.
for num in range(min_prime, max_prime + 1, 2):
# If num can't be divided by all odd numbers from min_prime to
# sqrt(num), it's a prime.
if all(num % i != 0 for i in range(3, int(num**.5) + 1, 2)):
primes.append(num)
if len(primes) == length:
# Stop as soon as we have enough primes.
break
return primes
[docs]def get_id(word: str) -> int:
"""Get ID number of word.
Assign a unique prime number to each letter in
:py:obj:`~string.ascii_lowercase`. The product of each letter in **word**
is its ID number.
Args:
word (str): Word to get ID of.
Returns:
:py:obj:`int` representing ID of **word**.
"""
# Assign each ASCII lowercase letter a prime number.
primes = get_primes()
letter_id = dict(zip(ascii_lowercase, primes))
# Find the product of each letter in the word.
product = 1
for letter in word:
product *= letter_id[letter]
return product
[docs]def get_anagram_dict(word_list: list) -> dict:
"""Get an anagram dictionary from word_list.
Get the ID of each word in **word list** and add it to a dictionary with
the ID as the key.
Args:
word_list (list): List of words to make into anagram dictionary.
Returns:
:py:class:`~collections.defaultdict` of :py:obj:`list` with an ID
(:py:obj:`int`) as the key and words whose product of letters equal
that ID as values.
"""
anagram_dict = defaultdict(list)
# Find the product of each letter for each word in a dictionary.
for word in word_list:
anagram_dict[get_id(word)].append(word)
return anagram_dict
[docs]def split(a_list: list, parts: int) -> list:
"""Split a list into parts.
Split given list into given number of parts.
Args:
a_list (list): List to split.
parts (int): Number of parts to split list into.
Returns:
List of lists with **a_list** split into **parts**.
Example:
>>> import src.ch03.c1_anagram_generator.split as split
>>> some_list = ['this', 'is', 'a', 'list']
>>> split_list = split(some_list, 2)
>>> print(split_list)
[['this', 'is'], ['a', 'list']]
"""
quotient, remainder = divmod(len(a_list), parts)
return list((a_list[i * quotient + min(i, remainder):(i + 1) * quotient +
min(i + 1, remainder)] for i in range(parts)))
[docs]def extend_anagram_dict(word_list: list, dictionary: dict):
"""Extend an anagram dictionary.
Adds words from given word list to a given anagram dictionary.
Args:
word_list (list): List of words to add to anagram dictionary.
dictionary (dict): Anagram dictionary to add words to.
Returns:
None. If words in **word_list** are in **dictionary** they are not
added. Otherwise, they are added.
"""
new_anagram_dict = get_anagram_dict(word_list)
for key, value in new_anagram_dict.items():
for element in value:
if element not in dictionary[key]:
dictionary[key].extend(value)
[docs]def multi_get_anagram_dict(word_list: list) -> dict:
"""Multithreaded get anagram dictionary.
Uses :py:func:`os.cpu_count` and :py:class:`threading.Thread` to use
all CPUs to make an anagram dictionary with the intent of being more
efficient than :py:func:`~src.ch03.c1_anagram_generator.get_anagram_dict`.
Args:
word_list (list): List of words to make into anagram dictionary.
Returns:
:py:class:`~collections.defaultdict` of :py:obj:`list` with an ID
(:py:obj:`int`) as the key and words whose product of letters equal
that ID as values.
Warning:
Avoids race conditions by heavily relying on CPython's
`Global Interpreter Lock`_. More info about `Thread Objects`_.
.. _Global Interpreter Lock:
https://docs.python.org/3/glossary.html#term-global-interpreter-lock
.. _Thread Objects:
https://docs.python.org/3/library/threading.html#thread-objects
"""
super_dict = defaultdict(list)
divisions = split(word_list, cpu_count())
threads = []
for division in divisions:
thread = Thread(target=extend_anagram_dict,
args=(division, super_dict))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
return super_dict
[docs]def find_anagrams(word: str, anagram_dict: dict) -> list:
"""Find anagrams in word.
Find all anagrams in a given word (or phrase) using anagram dictionary.
Args:
word (str): Word to find anagrams of.
anagram_dict: Dictionary from :func:`get_anagram_dict`.
Returns:
:py:obj:`list` of :py:obj:`str` with all anagrams in **word**.
"""
if not word.islower():
word = word.lower()
if ' ' in word:
# If word is a phrase with spaces, remove spaces.
word = ''.join(word.split())
anagrams = []
id_num = get_id(word)
keys = list(anagram_dict.keys()) # Make keys indexable. Python3.6 only?
# If an anagram is IN the word, the modulo of the anagram's ID and the
# word's ID will be 0.
for key in keys:
if id_num % key == 0:
anagrams.extend(anagram_dict[key])
return sorted(anagrams)
[docs]def remove_unusable_words(anagram_dict: dict, usable_letters: list) -> dict:
"""Remove unusable words from anagram dictionary.
Creates new anagram dictionary by including only IDs that can be IN
**usable_letters**.
Args:
anagram_dict (dict): Anagram dictionary to prune.
usable_letters (list): List of letters that must be used.
Returns:
:py:class:`~collections.defaultdict` of :py:obj:`list` with an ID
(:py:obj:`int`) as the key and words whose product of letters equal
that ID as values.
"""
new_word = ''.join(usable_letters)
new_anagram_dict = defaultdict(list)
id_num = get_id(new_word)
keys = list(anagram_dict.keys()) # Make keys indexable. Python3.6 only?
# If anagram can be IN new_word, add to new_anagram_dict.
for key in keys:
if id_num % key == 0:
new_anagram_dict[key] = anagram_dict[key]
return new_anagram_dict
[docs]def find_anagram_phrases(phrases: list, word: str, anagram_dict: dict,
phrase: list) -> None:
"""Find anagram phrases.
Recursively finds anagram phrases of **word** by removing unusable words
from the **anagram_dict**, finding remaining anagrams given the
**phrase**, then adding any found anagram phrases to **phrases**.
Args:
phrases (list): List of anagram phrases.
word (str): Current word to find anagram phrases of.
anagram_dict (dict): Current anagram dictionary to find anagrams with.
phrase (list): Current anagram phrase candidate.
Returns:
None. **phrases** is updated with any found anagram phrases.
"""
letters = Counter(word.replace(' ', ''))
letters.subtract(''.join(phrase))
letters_left = list(letters.elements())
new_anagram_dict = remove_unusable_words(anagram_dict, letters_left)
# Once the length is equal, we have an anagram phrase.
if len(word.replace(' ', '')) == len(''.join(phrase)):
if Counter(word.replace(' ', '')) == Counter(''.join(phrase)):
# Add to phrases
phrases.append(' '.join(phrase))
return None
# Find new anagrams and recurse.
anagrams = find_anagrams(''.join(letters_left), anagram_dict)
for anagram in anagrams:
new_phrase = phrase[:]
new_phrase.append(anagram)
find_anagram_phrases(phrases, word, new_anagram_dict, new_phrase)
return None
[docs]def anagram_generator(word: str) -> list:
"""Generate phrase anagrams.
Make phrase anagrams from a given word or phrase.
Args:
word (str): Word to get phrase anagrams of.
Returns:
:py:obj:`list` of phrase anagrams of **word**.
"""
phrases, phrase = [], []
dictionary = cleanup_list_more(cleanup_dict(DICTIONARY_FILE_PATH))
anagram_dict = multi_get_anagram_dict(dictionary)
# Build anagram dictionary specific to word.
new_anagram_dict = remove_unusable_words(anagram_dict,
list(word.replace(' ', '')))
# Recursively find phrases using new_anagram_dict.
find_anagram_phrases(phrases, word, new_anagram_dict, phrase)
return phrases
[docs]def main():
"""Demonstrate the Anagram Generator."""
print('I can find all phrase anagrams given a word or phrase.\n'
'I\'m fun at parties.')
# Print first 500 results.
word = 'see shells'
print(f'\nAnalyzing: {word}\n')
anagram_phrases = anagram_generator(word)
for i, phrase in enumerate(anagram_phrases):
if i > 500:
break
print(phrase)
if __name__ == '__main__':
main()