Skip to content

Commit

Permalink
Separated the all_solution() method from words.py by creating words_o…
Browse files Browse the repository at this point in the history
…ffline.py

Purpose: with this commit, words.py returns a solution which requires
         Moby's thesaurus, gensim's glove-wiki-gigaword-100 and nltk's Wordnet
         whereas words_offline.py returns a solution which requires all-clues.bz2
  • Loading branch information
pncnmnp committed Dec 20, 2019
1 parent d8b8b5b commit 52176b6
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 83 deletions.
2 changes: 1 addition & 1 deletion clues.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"{'__ of bad news': ['sitson', 'edible', 'better', 'imsory', 'yerout', 'ashame', 'lesser', 'goodor', 'severe', 'isgood', 'oopsie', 'noroom', 'rotten', 'pileup', 'nohits', 'odious', 'recall', 'itssad', 'bearer', 'rancid', 'decent', 'nosale', 'whoops', 'rialto', 'grimly', 'delays', 'illuse'], 'Posture problem': ['issue', 'tough', 'allok', 'stoop', 'asnap', 'poser', 'whats', 'minds', 'sorry', 'seems', 'cando', 'itsok'], 'Loads': ['rearms', 'plenty', 'oodles', 'cargos', 'oceans', 'adored', 'roadie', 'adores', 'onuses', 'washes', 'scores'], 'Laundry appliance': ['drier', 'dryer'], 'Lectured': ['spoke', 'pupil'], 'One who weeps': ['group', 'trust', 'sided', 'twoto', 'arent', 'oweme', 'these', 'cuber', 'digit', 'ahead', 'asoul', 'alike', 'cryer', 'lucky', 'akind', 'their', 'alone', 'longe', 'acter', 'rewed', 'equal', 'atime', 'ortwo', 'sixof', 'bogey', 'icare', 'alien', 'crier', 'wedto', 'shirt', 'again', 'admit', 'whine', 'nonot', 'oneto', 'which', 'oneby', 'ahalf', 'units', 'title', 'tryit', 'fiber', 'model', 'iwant', 'vowel', 'unite', 'among', 'idiom', 'riser', 'cries', 'along', 'agree', 'piece', 'grade', 'excon', 'groan', 'loser', 'tento', 'puzle', 'those', 'orthe', 'every', 'motto', 'owner', 'satan', 'sroot'], 'Grassy clump': ['sod', 'wad'], 'Pie chart portion': ['sector'], '\"Scary Movie,\" e.g.': ['teaser', 'scream', 'scarer', 'promos', 'rental', 'weeper', 'sequel', 'parody'], \"Maryland's state bird\": ['grouse', 'thrush', 'oriole'], 'Something worth saving': ['usable', 'assets', 'keeper'], '\"To __ is human\"': ['ist', 'aah', 'eve', 'ity', 'sin', 'cpa', 'err', 'men', 'ism', 'all', 'art', 'ape', 'oid', 'soc', 'spy', 'lap', 'man', 'jon', 'arm']}"
"{'__ of bad news': ['ashame', 'bearer', 'better', 'decent', 'delays', 'edible', 'goodor', 'grimly', 'illuse', 'imsory', 'isgood', 'itssad', 'lesser', 'nohits', 'noroom', 'nosale', 'odious', 'oopsie', 'pileup', 'rancid', 'recall', 'rialto', 'rotten', 'severe', 'sitson', 'whoops', 'yerout'], 'Posture problem': ['allok', 'asnap', 'cando', 'issue', 'itsok', 'minds', 'poser', 'seems', 'sorry', 'stoop', 'tough', 'whats'], 'Loads': ['adored', 'adores', 'cargos', 'oceans', 'onuses', 'oodles', 'plenty', 'rearms', 'roadie', 'scores', 'washes'], 'Laundry appliance': ['drier', 'dryer'], 'Lectured': ['pupil', 'spoke'], 'One who weeps': ['acter', 'admit', 'again', 'agree', 'ahalf', 'ahead', 'akind', 'alien', 'alike', 'alone', 'along', 'among', 'arent', 'asoul', 'atime', 'bogey', 'crier', 'cries', 'cryer', 'cuber', 'digit', 'equal', 'every', 'excon', 'fiber', 'grade', 'groan', 'group', 'icare', 'idiom', 'iwant', 'longe', 'loser', 'lucky', 'model', 'motto', 'nonot', 'oneby', 'oneto', 'orthe', 'ortwo', 'oweme', 'owner', 'piece', 'puzle', 'rewed', 'riser', 'satan', 'shirt', 'sided', 'sixof', 'sroot', 'tento', 'their', 'these', 'those', 'title', 'trust', 'tryit', 'twoto', 'unite', 'units', 'vowel', 'wedto', 'which', 'whine'], 'Grassy clump': ['sod', 'wad'], 'Pie chart portion': ['sector'], '\"Scary Movie,\" e.g.': ['parody', 'promos', 'rental', 'scarer', 'scream', 'sequel', 'teaser', 'weeper'], \"Maryland's state bird\": ['grouse', 'oriole', 'thrush'], 'Something worth saving': ['assets', 'keeper', 'usable'], '\"To __ is human\"': ['aah', 'all', 'ape', 'arm', 'art', 'cpa', 'err', 'eve', 'ism', 'ist', 'ity', 'jon', 'lap', 'man', 'men', 'oid', 'sin', 'soc', 'spy']}"
109 changes: 27 additions & 82 deletions words.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,14 @@
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from ast import literal_eval
from collections import Counter
import gensim.downloader as api
from file_path import *
from schema import CROSSWORD_GRID
import pickle
import string
import requests
import inflect # Library used to check whether a sentence is singular or plural
import json
import math
import re

"""
TODO: >> Plural detection and conversion [done]
Expand Down Expand Up @@ -158,57 +156,6 @@ def sentence_solution(self, sentence_clues, clues):

return clue_mapping

def all_solution(self, clues):
stop = stopwords.words('english') + [""]

with open(ALL_CLUES, encoding="latin-1") as fp:
dict_guesses = fp.readlines()

clue_mapping = dict()
all_lengths = []
for clue in clues:
clue_mapping[clue] = list()
if clues[clue] not in all_lengths:
all_lengths.append(clues[clue])

clue_statements = list(clues.keys())
clue_vecs = dict()
for clue in clue_statements:
clue_vecs[clue] = [word for word in [word.strip(string.punctuation) for word in clue.lower().split()] if word not in stop]

print(">>> STARTING ALL CLUES FETCH (V.1).....")
for guess in dict_guesses:
if len(guess.split()[0]) not in all_lengths:
continue

guess_statement = " ".join(guess.split()[4:])
guess_vec = Counter([word for word in [word.strip(string.punctuation) for word in guess_statement.lower().split()] if word not in stop])

for clue in clue_statements:
if len(guess.split()[0]) == clues[clue]:
clue_vec = Counter(clue_vecs[clue])

# https://stackoverflow.com/questions/15173225/calculate-cosine-similarity-given-2-sentence-strings
intersection = set(guess_vec.keys()) & set(clue_vec.keys())
numerator = sum([guess_vec[x] * clue_vec[x] for x in intersection])

sum1 = sum([guess_vec[x]**2 for x in guess_vec.keys()])
sum2 = sum([clue_vec[x]**2 for x in clue_vec.keys()])
denominator = math.sqrt(sum1) * math.sqrt(sum2)

if not denominator:
sim = 0.0
else:
sim = float(numerator) / denominator

if sim > 0.65:
clue_mapping[clue] += [guess.split()[0].lower()]

for clue in clues:
clue_mapping[clue] = list(set(clue_mapping[clue]))

return clue_mapping

def one_word_solution(self, one_word_clues, clues):
fp = open(MOBY_PATH)
moby_lines = fp.readlines()
Expand Down Expand Up @@ -289,11 +236,9 @@ def store_words(self, one_word_solved, one_word_clues, sentence_solved, wikipedi
for key in list(sentence_solved.keys()):
try:
clues[key] += [word[0] for word in sentence_solved[key]]
clues[key] += wikipedia_solved[key]
except:
clues[key] = list()
clues[key] += [word[0] for word in sentence_solved[key]]
clues[key] += wikipedia_solved[key]

for key in list(wikipedia_solved.keys()):
try:
Expand All @@ -320,31 +265,31 @@ def fetch_words(self, clues):
Param: clues - dict
{clue_1: word_len_1, clue_2: word_len_2}
"""
# all_clues = list(clues.keys())
# one_word_clues = [(clue.lower(),clue) for clue in all_clues if len(clue.split(" ")) == 1]

# # converting words such as extra-large into large
# one_word_clues += [(clue.split("-")[-1].lower(),clue) for clue in all_clues
# if ("-" in clue) and (len(clue.split("-"))) == 2]
# one_word_solved = self.one_word_solution_alternate([clue[0] for clue in one_word_clues], clues)

# sentence_clues = list(set(all_clues).difference(set(one_word_clues)))
# sentence_solved = self.sentence_solution(sentence_clues, clues)

# wikipedia_clues = list()
# # Print top N results
# N = 40
# for clue in sentence_solved:
# sentence_solved[clue] = sentence_solved[clue][:N]

# wikipedia_solved = self.wikipedia_solution(sentence_clues, clues)
all_solved = self.all_solution(clues)
print(all_solved)
print(">>> STORED CLUES.....")
with open(CLUES_PATH, "w") as fp:
json.dump(str(all_solved), fp)
all_clues = list(clues.keys())
one_word_clues = [(clue.lower(),clue) for clue in all_clues if len(clue.split(" ")) == 1]

# converting words such as extra-large into large
one_word_clues += [(clue.split("-")[-1].lower(),clue) for clue in all_clues
if ("-" in clue) and (len(clue.split("-"))) == 2]
one_word_solved = self.one_word_solution_alternate([clue[0] for clue in one_word_clues], clues)

sentence_clues = list(set(all_clues).difference(set(one_word_clues)))
sentence_solved = self.sentence_solution(sentence_clues, clues)

wikipedia_clues = list()
# Print top N results
N = 40
for clue in sentence_solved:
sentence_solved[clue] = sentence_solved[clue][:N]

wikipedia_solved = self.wikipedia_solution(sentence_clues, clues)

self.store_words(one_word_solved, one_word_clues, sentence_solved, wikipedia_solved)

if __name__ == '__main__':
# THE WORDS INSERTED SHOULD HAVE THEIR STARTING LETTER CAPITALIZED
# Words().fetch_words({"A type of cheese": 4, "Indian Grandmaster": 5, "A small european singing bird": 5, "A plant of the cabbage species": 8, "Director of Raging Bull": 8, "Fireplace": 7, "A popular game character created by Shigeru Miyamoto": 5, "Author who created Sherlock Holmes": 5, "The science of life": 7, "Used for baking or roasting": 4})
Words().fetch_words({"__ of bad news": 6, "Posture problem": 5, "Loads": 6, "Laundry appliance": 5, "Lectured": 5, "One who weeps": 5, "Grassy clump": 3, "Pie chart portion": 6, "\"Scary Movie,\" e.g.": 6, "Maryland's state bird": 6, "Something worth saving": 6, "\"To __ is human\"": 3})
grid = CROSSWORD_GRID
clues = dict()
for clue in CROSSWORD_GRID:
clues[clue] = CROSSWORD_GRID[clue]["length"]

Words().fetch_words(clues)
77 changes: 77 additions & 0 deletions words_offline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from nltk.corpus import stopwords
from collections import Counter
from schema import CROSSWORD_GRID
from file_path import *
import string
import math
import re
import json

class Words_Offline():
def __init__(self):
pass

def all_solution(self, clues):
stop = stopwords.words('english') + [""]

with open(ALL_CLUES, encoding="latin-1") as fp:
dict_guesses = fp.readlines()

clue_mapping = dict()
all_lengths = []
for clue in clues:
clue_mapping[clue] = list()
if clues[clue] not in all_lengths:
all_lengths.append(clues[clue])

clue_statements = list(clues.keys())
clue_vecs = dict()
for clue in clue_statements:
clue_vecs[clue] = [word for word in [word.strip(string.punctuation) for word in clue.lower().split()] if word not in stop]

print(">>> STARTING ALL CLUES FETCH (V.1).....")
for guess in dict_guesses:
if len(guess.split()[0]) not in all_lengths:
continue

guess_statement = " ".join(guess.split()[4:])
guess_vec = Counter([word for word in [word.strip(string.punctuation) for word in guess_statement.lower().split()] if word not in stop])

for clue in clue_statements:
if len(guess.split()[0]) == clues[clue]:
clue_vec = Counter(clue_vecs[clue])

# https://stackoverflow.com/questions/15173225/calculate-cosine-similarity-given-2-sentence-strings
intersection = set(guess_vec.keys()) & set(clue_vec.keys())
numerator = sum([guess_vec[x] * clue_vec[x] for x in intersection])

sum1 = sum([guess_vec[x]**2 for x in guess_vec.keys()])
sum2 = sum([clue_vec[x]**2 for x in clue_vec.keys()])
denominator = math.sqrt(sum1) * math.sqrt(sum2)

if not denominator:
sim = 0.0
else:
sim = float(numerator) / denominator

if sim > 0.65:
clue_mapping[clue] += [guess.split()[0].lower()]

for clue in clues:
clue_mapping[clue] = list(set(clue_mapping[clue]))

return clue_mapping

def fetch_words(self, clues):
all_solved = self.all_solution(clues)
print(">>> STORED CLUES.....")
with open(CLUES_PATH, "w") as fp:
json.dump(str(all_solved), fp)

if __name__ == '__main__':
grid = CROSSWORD_GRID
clues = dict()
for clue in CROSSWORD_GRID:
clues[clue] = CROSSWORD_GRID[clue]["length"]

Words_Offline().fetch_words(clues)

0 comments on commit 52176b6

Please sign in to comment.