-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Separated the all_solution() method from words.py by creating words_o…
…ffline.py Purpose: with this commit, words.py returns a solution which requires Moby's thesaurus, gensim's glove-wiki-gigaword-100 and nltk's Wordnet whereas words_offline.py returns a solution which requires all-clues.bz2
- Loading branch information
Showing
3 changed files
with
105 additions
and
83 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
"{'__ of bad news': ['sitson', 'edible', 'better', 'imsory', 'yerout', 'ashame', 'lesser', 'goodor', 'severe', 'isgood', 'oopsie', 'noroom', 'rotten', 'pileup', 'nohits', 'odious', 'recall', 'itssad', 'bearer', 'rancid', 'decent', 'nosale', 'whoops', 'rialto', 'grimly', 'delays', 'illuse'], 'Posture problem': ['issue', 'tough', 'allok', 'stoop', 'asnap', 'poser', 'whats', 'minds', 'sorry', 'seems', 'cando', 'itsok'], 'Loads': ['rearms', 'plenty', 'oodles', 'cargos', 'oceans', 'adored', 'roadie', 'adores', 'onuses', 'washes', 'scores'], 'Laundry appliance': ['drier', 'dryer'], 'Lectured': ['spoke', 'pupil'], 'One who weeps': ['group', 'trust', 'sided', 'twoto', 'arent', 'oweme', 'these', 'cuber', 'digit', 'ahead', 'asoul', 'alike', 'cryer', 'lucky', 'akind', 'their', 'alone', 'longe', 'acter', 'rewed', 'equal', 'atime', 'ortwo', 'sixof', 'bogey', 'icare', 'alien', 'crier', 'wedto', 'shirt', 'again', 'admit', 'whine', 'nonot', 'oneto', 'which', 'oneby', 'ahalf', 'units', 'title', 'tryit', 'fiber', 'model', 'iwant', 'vowel', 'unite', 'among', 'idiom', 'riser', 'cries', 'along', 'agree', 'piece', 'grade', 'excon', 'groan', 'loser', 'tento', 'puzle', 'those', 'orthe', 'every', 'motto', 'owner', 'satan', 'sroot'], 'Grassy clump': ['sod', 'wad'], 'Pie chart portion': ['sector'], '\"Scary Movie,\" e.g.': ['teaser', 'scream', 'scarer', 'promos', 'rental', 'weeper', 'sequel', 'parody'], \"Maryland's state bird\": ['grouse', 'thrush', 'oriole'], 'Something worth saving': ['usable', 'assets', 'keeper'], '\"To __ is human\"': ['ist', 'aah', 'eve', 'ity', 'sin', 'cpa', 'err', 'men', 'ism', 'all', 'art', 'ape', 'oid', 'soc', 'spy', 'lap', 'man', 'jon', 'arm']}" | ||
"{'__ of bad news': ['ashame', 'bearer', 'better', 'decent', 'delays', 'edible', 'goodor', 'grimly', 'illuse', 'imsory', 'isgood', 'itssad', 'lesser', 'nohits', 'noroom', 'nosale', 'odious', 'oopsie', 'pileup', 'rancid', 'recall', 'rialto', 'rotten', 'severe', 'sitson', 'whoops', 'yerout'], 'Posture problem': ['allok', 'asnap', 'cando', 'issue', 'itsok', 'minds', 'poser', 'seems', 'sorry', 'stoop', 'tough', 'whats'], 'Loads': ['adored', 'adores', 'cargos', 'oceans', 'onuses', 'oodles', 'plenty', 'rearms', 'roadie', 'scores', 'washes'], 'Laundry appliance': ['drier', 'dryer'], 'Lectured': ['pupil', 'spoke'], 'One who weeps': ['acter', 'admit', 'again', 'agree', 'ahalf', 'ahead', 'akind', 'alien', 'alike', 'alone', 'along', 'among', 'arent', 'asoul', 'atime', 'bogey', 'crier', 'cries', 'cryer', 'cuber', 'digit', 'equal', 'every', 'excon', 'fiber', 'grade', 'groan', 'group', 'icare', 'idiom', 'iwant', 'longe', 'loser', 'lucky', 'model', 'motto', 'nonot', 'oneby', 'oneto', 'orthe', 'ortwo', 'oweme', 'owner', 'piece', 'puzle', 'rewed', 'riser', 'satan', 'shirt', 'sided', 'sixof', 'sroot', 'tento', 'their', 'these', 'those', 'title', 'trust', 'tryit', 'twoto', 'unite', 'units', 'vowel', 'wedto', 'which', 'whine'], 'Grassy clump': ['sod', 'wad'], 'Pie chart portion': ['sector'], '\"Scary Movie,\" e.g.': ['parody', 'promos', 'rental', 'scarer', 'scream', 'sequel', 'teaser', 'weeper'], \"Maryland's state bird\": ['grouse', 'oriole', 'thrush'], 'Something worth saving': ['assets', 'keeper', 'usable'], '\"To __ is human\"': ['aah', 'all', 'ape', 'arm', 'art', 'cpa', 'err', 'eve', 'ism', 'ist', 'ity', 'jon', 'lap', 'man', 'men', 'oid', 'sin', 'soc', 'spy']}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
from nltk.corpus import stopwords | ||
from collections import Counter | ||
from schema import CROSSWORD_GRID | ||
from file_path import * | ||
import string | ||
import math | ||
import re | ||
import json | ||
|
||
class Words_Offline(): | ||
def __init__(self): | ||
pass | ||
|
||
def all_solution(self, clues): | ||
stop = stopwords.words('english') + [""] | ||
|
||
with open(ALL_CLUES, encoding="latin-1") as fp: | ||
dict_guesses = fp.readlines() | ||
|
||
clue_mapping = dict() | ||
all_lengths = [] | ||
for clue in clues: | ||
clue_mapping[clue] = list() | ||
if clues[clue] not in all_lengths: | ||
all_lengths.append(clues[clue]) | ||
|
||
clue_statements = list(clues.keys()) | ||
clue_vecs = dict() | ||
for clue in clue_statements: | ||
clue_vecs[clue] = [word for word in [word.strip(string.punctuation) for word in clue.lower().split()] if word not in stop] | ||
|
||
print(">>> STARTING ALL CLUES FETCH (V.1).....") | ||
for guess in dict_guesses: | ||
if len(guess.split()[0]) not in all_lengths: | ||
continue | ||
|
||
guess_statement = " ".join(guess.split()[4:]) | ||
guess_vec = Counter([word for word in [word.strip(string.punctuation) for word in guess_statement.lower().split()] if word not in stop]) | ||
|
||
for clue in clue_statements: | ||
if len(guess.split()[0]) == clues[clue]: | ||
clue_vec = Counter(clue_vecs[clue]) | ||
|
||
# https://stackoverflow.com/questions/15173225/calculate-cosine-similarity-given-2-sentence-strings | ||
intersection = set(guess_vec.keys()) & set(clue_vec.keys()) | ||
numerator = sum([guess_vec[x] * clue_vec[x] for x in intersection]) | ||
|
||
sum1 = sum([guess_vec[x]**2 for x in guess_vec.keys()]) | ||
sum2 = sum([clue_vec[x]**2 for x in clue_vec.keys()]) | ||
denominator = math.sqrt(sum1) * math.sqrt(sum2) | ||
|
||
if not denominator: | ||
sim = 0.0 | ||
else: | ||
sim = float(numerator) / denominator | ||
|
||
if sim > 0.65: | ||
clue_mapping[clue] += [guess.split()[0].lower()] | ||
|
||
for clue in clues: | ||
clue_mapping[clue] = list(set(clue_mapping[clue])) | ||
|
||
return clue_mapping | ||
|
||
def fetch_words(self, clues): | ||
all_solved = self.all_solution(clues) | ||
print(">>> STORED CLUES.....") | ||
with open(CLUES_PATH, "w") as fp: | ||
json.dump(str(all_solved), fp) | ||
|
||
if __name__ == '__main__': | ||
grid = CROSSWORD_GRID | ||
clues = dict() | ||
for clue in CROSSWORD_GRID: | ||
clues[clue] = CROSSWORD_GRID[clue]["length"] | ||
|
||
Words_Offline().fetch_words(clues) |