Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
# buildweek3-simpsons-says-ds
This is the Data Science subrepository.

The `.ipnyb` files contain the notebooks where the original data processing was developed.

* `Simpions_says.ipynb` contains the search function that we used to match a user query to the most similar show quotes.
* `Simpsons_Writes_V4.ipynb` Contains the recurrent neural network that was used to generate synthetic dialogue for particular characters.
* `app.py` contains the Flask web app that empowers our website. Most of the remaining files are pickled parts of our NLP model that must be loaded into this file.
51 changes: 26 additions & 25 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
from flask import Flask , request
from flask import Flask , request, make_response
import pandas as pd
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
#nltk.download('punkt')
import pickle
import gensim

import os

dirpath = os.getcwd()
dirpath = dirpath +'/'

import random
import numpy as np
import json

APP = Flask(__name__)

Expand All @@ -19,24 +15,21 @@
s = pickle.load( open( "sims2.p", "rb" ) )
df = pickle.load( open( "df.p", "rb" ) )
df = df.rename(columns={'spoken_words_x':'spoken_words','raw_character_text_x':'raw_character_text'})
#df3 = pickle.load( open( "scripts.pkl", "rb" ) )
corpus = pickle.load( open( "c.p", "rb" ) )
#s = gensim.similarities.Similarity('/app/',t[corpus],num_features=len(d))
#pickle.dump(s,open('sims2.p','wb'))

print(df.head())
quote_dump = pickle.load(open("quote_dump.pkl", "rb" ))
## Uncomment and run once for local operation
# s = gensim.similarities.Similarity('/app/',t[corpus],num_features=len(d))
# pickle.dump(s,open('sims2.p','wb'))


@APP.route('/')
@APP.route('/api',methods=['POST'])
@APP.route('/api', methods=['POST'])
def hello_world():

user_input = "the goggles do nothing"
if request.method == 'POST':
user_input = request.values['quote']
print(user_input)
query_doc = [w.lower() for w in word_tokenize(user_input)]
print(query_doc)
query_doc_bow = d.doc2bow(query_doc)
query_doc_tf_idf = t[query_doc_bow]
v = s[query_doc_tf_idf]
Expand All @@ -46,30 +39,38 @@ def hello_world():
column = ['quote_id', 'raw_character_text', 'spoken_words','episode_title','season','number_in_season']
response = response[column]
response.to_json(orient='records')
print(response)


return response.to_json(orient='records')


@APP.route('/getquote',methods=['POST'])
@APP.route('/getquote')
def getquote():
inputs = '[1,2,3]'
if request.method=='POST':
inputs = request.values['input']
inputs2 = [int(x) for x in inputs.strip('[]').split(',')]

#l =[9560, 41110, 76160, 76216, 105073]
condition = (df.quote_id.isin(inputs2))
inputs = request.get_json(force=True)['input']

condition = (df.quote_id.isin(inputs))
response = df[condition]
print(response)
column = ['quote_id', 'raw_character_text', 'spoken_words','episode_title','season','number_in_season']
response = response[column]
response.to_json(orient='records')
print(response)


return response.to_json(orient='records')


@APP.route('/gen', methods=['POST'])
@APP.route('/gen')
def generator():
# Acceptable inputs = ['homer', 'marge', 'bart', 'lisa', 'moe', 'grampa', 'skinner']
name = 'homer'
if request.method=='POST':
name = request.values['input']

rand_quotes = random.choices(quote_dump[name], k=10)
quotes2 = [{'charname':name, 'quote':x} for x in rand_quotes]
return_list = json.dumps(quotes2)
return return_list


Binary file added quote_dump.pkl
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.