Skip to content

Commit 52176b6

Browse files
committed
Separated the all_solution() method from words.py by creating words_offline.py
Purpose: with this commit, words.py returns a solution which requires Moby's thesaurus, gensim's glove-wiki-gigaword-100 and nltk's Wordnet whereas words_offline.py returns a solution which requires all-clues.bz2
1 parent d8b8b5b commit 52176b6

File tree

3 files changed

+105
-83
lines changed

3 files changed

+105
-83
lines changed

clues.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
"{'__ of bad news': ['sitson', 'edible', 'better', 'imsory', 'yerout', 'ashame', 'lesser', 'goodor', 'severe', 'isgood', 'oopsie', 'noroom', 'rotten', 'pileup', 'nohits', 'odious', 'recall', 'itssad', 'bearer', 'rancid', 'decent', 'nosale', 'whoops', 'rialto', 'grimly', 'delays', 'illuse'], 'Posture problem': ['issue', 'tough', 'allok', 'stoop', 'asnap', 'poser', 'whats', 'minds', 'sorry', 'seems', 'cando', 'itsok'], 'Loads': ['rearms', 'plenty', 'oodles', 'cargos', 'oceans', 'adored', 'roadie', 'adores', 'onuses', 'washes', 'scores'], 'Laundry appliance': ['drier', 'dryer'], 'Lectured': ['spoke', 'pupil'], 'One who weeps': ['group', 'trust', 'sided', 'twoto', 'arent', 'oweme', 'these', 'cuber', 'digit', 'ahead', 'asoul', 'alike', 'cryer', 'lucky', 'akind', 'their', 'alone', 'longe', 'acter', 'rewed', 'equal', 'atime', 'ortwo', 'sixof', 'bogey', 'icare', 'alien', 'crier', 'wedto', 'shirt', 'again', 'admit', 'whine', 'nonot', 'oneto', 'which', 'oneby', 'ahalf', 'units', 'title', 'tryit', 'fiber', 'model', 'iwant', 'vowel', 'unite', 'among', 'idiom', 'riser', 'cries', 'along', 'agree', 'piece', 'grade', 'excon', 'groan', 'loser', 'tento', 'puzle', 'those', 'orthe', 'every', 'motto', 'owner', 'satan', 'sroot'], 'Grassy clump': ['sod', 'wad'], 'Pie chart portion': ['sector'], '\"Scary Movie,\" e.g.': ['teaser', 'scream', 'scarer', 'promos', 'rental', 'weeper', 'sequel', 'parody'], \"Maryland's state bird\": ['grouse', 'thrush', 'oriole'], 'Something worth saving': ['usable', 'assets', 'keeper'], '\"To __ is human\"': ['ist', 'aah', 'eve', 'ity', 'sin', 'cpa', 'err', 'men', 'ism', 'all', 'art', 'ape', 'oid', 'soc', 'spy', 'lap', 'man', 'jon', 'arm']}"
1+
"{'__ of bad news': ['ashame', 'bearer', 'better', 'decent', 'delays', 'edible', 'goodor', 'grimly', 'illuse', 'imsory', 'isgood', 'itssad', 'lesser', 'nohits', 'noroom', 'nosale', 'odious', 'oopsie', 'pileup', 'rancid', 'recall', 'rialto', 'rotten', 'severe', 'sitson', 'whoops', 'yerout'], 'Posture problem': ['allok', 'asnap', 'cando', 'issue', 'itsok', 'minds', 'poser', 'seems', 'sorry', 'stoop', 'tough', 'whats'], 'Loads': ['adored', 'adores', 'cargos', 'oceans', 'onuses', 'oodles', 'plenty', 'rearms', 'roadie', 'scores', 'washes'], 'Laundry appliance': ['drier', 'dryer'], 'Lectured': ['pupil', 'spoke'], 'One who weeps': ['acter', 'admit', 'again', 'agree', 'ahalf', 'ahead', 'akind', 'alien', 'alike', 'alone', 'along', 'among', 'arent', 'asoul', 'atime', 'bogey', 'crier', 'cries', 'cryer', 'cuber', 'digit', 'equal', 'every', 'excon', 'fiber', 'grade', 'groan', 'group', 'icare', 'idiom', 'iwant', 'longe', 'loser', 'lucky', 'model', 'motto', 'nonot', 'oneby', 'oneto', 'orthe', 'ortwo', 'oweme', 'owner', 'piece', 'puzle', 'rewed', 'riser', 'satan', 'shirt', 'sided', 'sixof', 'sroot', 'tento', 'their', 'these', 'those', 'title', 'trust', 'tryit', 'twoto', 'unite', 'units', 'vowel', 'wedto', 'which', 'whine'], 'Grassy clump': ['sod', 'wad'], 'Pie chart portion': ['sector'], '\"Scary Movie,\" e.g.': ['parody', 'promos', 'rental', 'scarer', 'scream', 'sequel', 'teaser', 'weeper'], \"Maryland's state bird\": ['grouse', 'oriole', 'thrush'], 'Something worth saving': ['assets', 'keeper', 'usable'], '\"To __ is human\"': ['aah', 'all', 'ape', 'arm', 'art', 'cpa', 'err', 'eve', 'ism', 'ist', 'ity', 'jon', 'lap', 'man', 'men', 'oid', 'sin', 'soc', 'spy']}"

words.py

Lines changed: 27 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,14 @@
22
from nltk.tokenize import word_tokenize
33
from nltk.corpus import stopwords
44
from ast import literal_eval
5-
from collections import Counter
65
import gensim.downloader as api
76
from file_path import *
7+
from schema import CROSSWORD_GRID
88
import pickle
99
import string
1010
import requests
1111
import inflect # Library used to check whether a sentence is singular or plural
1212
import json
13-
import math
14-
import re
1513

1614
"""
1715
TODO: >> Plural detection and conversion [done]
@@ -158,57 +156,6 @@ def sentence_solution(self, sentence_clues, clues):
158156

159157
return clue_mapping
160158

161-
def all_solution(self, clues):
162-
stop = stopwords.words('english') + [""]
163-
164-
with open(ALL_CLUES, encoding="latin-1") as fp:
165-
dict_guesses = fp.readlines()
166-
167-
clue_mapping = dict()
168-
all_lengths = []
169-
for clue in clues:
170-
clue_mapping[clue] = list()
171-
if clues[clue] not in all_lengths:
172-
all_lengths.append(clues[clue])
173-
174-
clue_statements = list(clues.keys())
175-
clue_vecs = dict()
176-
for clue in clue_statements:
177-
clue_vecs[clue] = [word for word in [word.strip(string.punctuation) for word in clue.lower().split()] if word not in stop]
178-
179-
print(">>> STARTING ALL CLUES FETCH (V.1).....")
180-
for guess in dict_guesses:
181-
if len(guess.split()[0]) not in all_lengths:
182-
continue
183-
184-
guess_statement = " ".join(guess.split()[4:])
185-
guess_vec = Counter([word for word in [word.strip(string.punctuation) for word in guess_statement.lower().split()] if word not in stop])
186-
187-
for clue in clue_statements:
188-
if len(guess.split()[0]) == clues[clue]:
189-
clue_vec = Counter(clue_vecs[clue])
190-
191-
# https://stackoverflow.com/questions/15173225/calculate-cosine-similarity-given-2-sentence-strings
192-
intersection = set(guess_vec.keys()) & set(clue_vec.keys())
193-
numerator = sum([guess_vec[x] * clue_vec[x] for x in intersection])
194-
195-
sum1 = sum([guess_vec[x]**2 for x in guess_vec.keys()])
196-
sum2 = sum([clue_vec[x]**2 for x in clue_vec.keys()])
197-
denominator = math.sqrt(sum1) * math.sqrt(sum2)
198-
199-
if not denominator:
200-
sim = 0.0
201-
else:
202-
sim = float(numerator) / denominator
203-
204-
if sim > 0.65:
205-
clue_mapping[clue] += [guess.split()[0].lower()]
206-
207-
for clue in clues:
208-
clue_mapping[clue] = list(set(clue_mapping[clue]))
209-
210-
return clue_mapping
211-
212159
def one_word_solution(self, one_word_clues, clues):
213160
fp = open(MOBY_PATH)
214161
moby_lines = fp.readlines()
@@ -289,11 +236,9 @@ def store_words(self, one_word_solved, one_word_clues, sentence_solved, wikipedi
289236
for key in list(sentence_solved.keys()):
290237
try:
291238
clues[key] += [word[0] for word in sentence_solved[key]]
292-
clues[key] += wikipedia_solved[key]
293239
except:
294240
clues[key] = list()
295241
clues[key] += [word[0] for word in sentence_solved[key]]
296-
clues[key] += wikipedia_solved[key]
297242

298243
for key in list(wikipedia_solved.keys()):
299244
try:
@@ -320,31 +265,31 @@ def fetch_words(self, clues):
320265
Param: clues - dict
321266
{clue_1: word_len_1, clue_2: word_len_2}
322267
"""
323-
# all_clues = list(clues.keys())
324-
# one_word_clues = [(clue.lower(),clue) for clue in all_clues if len(clue.split(" ")) == 1]
325-
326-
# # converting words such as extra-large into large
327-
# one_word_clues += [(clue.split("-")[-1].lower(),clue) for clue in all_clues
328-
# if ("-" in clue) and (len(clue.split("-"))) == 2]
329-
# one_word_solved = self.one_word_solution_alternate([clue[0] for clue in one_word_clues], clues)
330-
331-
# sentence_clues = list(set(all_clues).difference(set(one_word_clues)))
332-
# sentence_solved = self.sentence_solution(sentence_clues, clues)
333-
334-
# wikipedia_clues = list()
335-
# # Print top N results
336-
# N = 40
337-
# for clue in sentence_solved:
338-
# sentence_solved[clue] = sentence_solved[clue][:N]
339-
340-
# wikipedia_solved = self.wikipedia_solution(sentence_clues, clues)
341-
all_solved = self.all_solution(clues)
342-
print(all_solved)
343-
print(">>> STORED CLUES.....")
344-
with open(CLUES_PATH, "w") as fp:
345-
json.dump(str(all_solved), fp)
268+
all_clues = list(clues.keys())
269+
one_word_clues = [(clue.lower(),clue) for clue in all_clues if len(clue.split(" ")) == 1]
270+
271+
# converting words such as extra-large into large
272+
one_word_clues += [(clue.split("-")[-1].lower(),clue) for clue in all_clues
273+
if ("-" in clue) and (len(clue.split("-"))) == 2]
274+
one_word_solved = self.one_word_solution_alternate([clue[0] for clue in one_word_clues], clues)
275+
276+
sentence_clues = list(set(all_clues).difference(set(one_word_clues)))
277+
sentence_solved = self.sentence_solution(sentence_clues, clues)
278+
279+
wikipedia_clues = list()
280+
# Print top N results
281+
N = 40
282+
for clue in sentence_solved:
283+
sentence_solved[clue] = sentence_solved[clue][:N]
284+
285+
wikipedia_solved = self.wikipedia_solution(sentence_clues, clues)
286+
287+
self.store_words(one_word_solved, one_word_clues, sentence_solved, wikipedia_solved)
346288

347289
if __name__ == '__main__':
348-
# THE WORDS INSERTED SHOULD HAVE THEIR STARTING LETTER CAPITALIZED
349-
# Words().fetch_words({"A type of cheese": 4, "Indian Grandmaster": 5, "A small european singing bird": 5, "A plant of the cabbage species": 8, "Director of Raging Bull": 8, "Fireplace": 7, "A popular game character created by Shigeru Miyamoto": 5, "Author who created Sherlock Holmes": 5, "The science of life": 7, "Used for baking or roasting": 4})
350-
Words().fetch_words({"__ of bad news": 6, "Posture problem": 5, "Loads": 6, "Laundry appliance": 5, "Lectured": 5, "One who weeps": 5, "Grassy clump": 3, "Pie chart portion": 6, "\"Scary Movie,\" e.g.": 6, "Maryland's state bird": 6, "Something worth saving": 6, "\"To __ is human\"": 3})
290+
grid = CROSSWORD_GRID
291+
clues = dict()
292+
for clue in CROSSWORD_GRID:
293+
clues[clue] = CROSSWORD_GRID[clue]["length"]
294+
295+
Words().fetch_words(clues)

words_offline.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
from nltk.corpus import stopwords
2+
from collections import Counter
3+
from schema import CROSSWORD_GRID
4+
from file_path import *
5+
import string
6+
import math
7+
import re
8+
import json
9+
10+
class Words_Offline():
11+
def __init__(self):
12+
pass
13+
14+
def all_solution(self, clues):
15+
stop = stopwords.words('english') + [""]
16+
17+
with open(ALL_CLUES, encoding="latin-1") as fp:
18+
dict_guesses = fp.readlines()
19+
20+
clue_mapping = dict()
21+
all_lengths = []
22+
for clue in clues:
23+
clue_mapping[clue] = list()
24+
if clues[clue] not in all_lengths:
25+
all_lengths.append(clues[clue])
26+
27+
clue_statements = list(clues.keys())
28+
clue_vecs = dict()
29+
for clue in clue_statements:
30+
clue_vecs[clue] = [word for word in [word.strip(string.punctuation) for word in clue.lower().split()] if word not in stop]
31+
32+
print(">>> STARTING ALL CLUES FETCH (V.1).....")
33+
for guess in dict_guesses:
34+
if len(guess.split()[0]) not in all_lengths:
35+
continue
36+
37+
guess_statement = " ".join(guess.split()[4:])
38+
guess_vec = Counter([word for word in [word.strip(string.punctuation) for word in guess_statement.lower().split()] if word not in stop])
39+
40+
for clue in clue_statements:
41+
if len(guess.split()[0]) == clues[clue]:
42+
clue_vec = Counter(clue_vecs[clue])
43+
44+
# https://stackoverflow.com/questions/15173225/calculate-cosine-similarity-given-2-sentence-strings
45+
intersection = set(guess_vec.keys()) & set(clue_vec.keys())
46+
numerator = sum([guess_vec[x] * clue_vec[x] for x in intersection])
47+
48+
sum1 = sum([guess_vec[x]**2 for x in guess_vec.keys()])
49+
sum2 = sum([clue_vec[x]**2 for x in clue_vec.keys()])
50+
denominator = math.sqrt(sum1) * math.sqrt(sum2)
51+
52+
if not denominator:
53+
sim = 0.0
54+
else:
55+
sim = float(numerator) / denominator
56+
57+
if sim > 0.65:
58+
clue_mapping[clue] += [guess.split()[0].lower()]
59+
60+
for clue in clues:
61+
clue_mapping[clue] = list(set(clue_mapping[clue]))
62+
63+
return clue_mapping
64+
65+
def fetch_words(self, clues):
66+
all_solved = self.all_solution(clues)
67+
print(">>> STORED CLUES.....")
68+
with open(CLUES_PATH, "w") as fp:
69+
json.dump(str(all_solved), fp)
70+
71+
if __name__ == '__main__':
72+
grid = CROSSWORD_GRID
73+
clues = dict()
74+
for clue in CROSSWORD_GRID:
75+
clues[clue] = CROSSWORD_GRID[clue]["length"]
76+
77+
Words_Offline().fetch_words(clues)

0 commit comments

Comments
 (0)