Skip to content

Commit 9ecc90f

Browse files
authored
Merge pull request #5 from patham9/Evaluation
Evaluation
2 parents 43d30ca + 4ad89a4 commit 9ecc90f

14 files changed

+1396
-77
lines changed

All.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

Correct.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

ExportSentence.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from NarsGPT import *
2+
import json
3+
4+
Line_Input_Output_ExpectedOutput = []
5+
Line = 1
6+
while True:
7+
try:
8+
line = input()
9+
except EOFError:
10+
#print(NarsGPT_AddInput("where is the cat?"))
11+
exit(0)
12+
parts = ",".join(line.split(",")[1:]).split(",,,,,,")
13+
Input, expectedOutput = parts
14+
Input = Input.strip()
15+
expectedOutput = expectedOutput.strip()
16+
if expectedOutput != "":
17+
if not Input.endswith("?"):
18+
Input += "?"
19+
actualOutput = NarsGPT_AddInput(Input)
20+
Dic = {"Line": Line, "Input": Input, "actualOutput": actualOutput, "expectedOutput": expectedOutput}
21+
Line_Input_Output_ExpectedOutput.append(Dic)
22+
for k in Dic:
23+
print(k+":", Dic[k])
24+
print("\n")
25+
filename = "OUT.json"
26+
with open(filename, 'w') as f:
27+
json.dump((Line_Input_Output_ExpectedOutput, currentTime), f)
28+
Line += 1
29+
30+

INT_Inf_benchmarkTest.csv

Lines changed: 1158 additions & 0 deletions
Large diffs are not rendered by default.

Incorrect.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

Judge.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import json
2+
import openai
3+
import time
4+
import sys
5+
for x in sys.argv:
6+
if x.startswith("API_KEY="):
7+
openai.api_key = x.split("API_KEY=")[1]
8+
9+
with open("OUT.json") as json_file:
10+
ListOfDicts, _ = json.load(json_file)
11+
12+
# {"Line": Line, "Input": Input, "actualOutput": actualOutput, "expectedOutput": expectedOutput}
13+
Questions = []
14+
for D in ListOfDicts:
15+
Line = D["Line"]
16+
Input = D["Input"]
17+
actualOutput = D["actualOutput"]
18+
expectedOutput = D["expectedOutput"]
19+
if expectedOutput != "":
20+
Questions.append(D)
21+
22+
with open("QUESTIONS.json", 'w') as f:
23+
json.dump(Questions, f)
24+
25+
PROMPT = """Does the actual output contain the asked information answered in the expected output?
26+
The question: _QUESTION_
27+
The actual output: _ACTUAL_OUTPUT_
28+
The expected output: _EXPECTED_OUTPUT_
29+
Please answer yes/no only!"""
30+
31+
All = []
32+
Correct = []
33+
Incorrect = []
34+
for D in Questions:
35+
Line = D["Line"]
36+
Input = D["Input"]
37+
actualOutput = D["actualOutput"]
38+
expectedOutput = D["expectedOutput"]
39+
send_prompt = PROMPT.replace("_QUESTION_", Input).replace("_ACTUAL_OUTPUT_",actualOutput).replace("_EXPECTED_OUTPUT_",expectedOutput)
40+
print(send_prompt)
41+
while True:
42+
try:
43+
response = openai.ChatCompletion.create(model='gpt-3.5-turbo', messages=[ {"role": "user", "content": send_prompt}], max_tokens=200, temperature=0)
44+
ret = response['choices'][0]['message']['content']
45+
except:
46+
print("Error: API call failed, will try repeating it in 10 seconds!")
47+
time.sleep(10) #wait 10 seconds
48+
continue
49+
break
50+
YES = "yes" in ret.lower()
51+
D["Correct"] = YES
52+
print("Correct?", YES)
53+
if YES:
54+
Correct.append(D)
55+
else:
56+
Incorrect.append(D)
57+
All.append(D)
58+
scores = {"Correct": len(Correct), "Incorrect": len(Incorrect), "Ratio" : float(len(Correct)) / float(len(All))}
59+
print("So far:", scores)
60+
with open("All.json", 'w') as f:
61+
json.dump(All, f)
62+
with open("Correct.json", 'w') as f:
63+
json.dump(Correct, f)
64+
with open("Incorrect.json", 'w') as f:
65+
json.dump(Incorrect, f)
66+
with open("scores.json", 'w') as f:
67+
json.dump(scores, f)

Memory.py

Lines changed: 70 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
* THE SOFTWARE.
2323
* """
2424

25+
from openai.embeddings_utils import get_embedding, cosine_similarity
2526
from ast import literal_eval
2627
from os.path import exists
2728
import json
@@ -33,58 +34,82 @@
3334
import NAR
3435
os.chdir(cwd)
3536
from Truth import *
37+
import time
3638

37-
def RetrieveQuestionContent(memory, attention_buf, inp, max_LTM_retrievals=5):
39+
def get_embedding_robust(inp):
40+
while True:
41+
try:
42+
ret = get_embedding(inp)
43+
except:
44+
print("//Failed get embedding, will retry API call in 10s")
45+
time.sleep(10)
46+
continue
47+
break
48+
return ret
49+
50+
def ProductPrettify(term):
51+
if " --> " in term and " * " in term.split(" --> ")[0]:
52+
arg1 = term.split(" * ")[0].strip()
53+
arg2 = term.split(" * ")[1].split(" --> ")[0].strip()
54+
relarg = term.split(" --> ")[1].strip()
55+
term = arg1 + " " + relarg + " " + arg2
56+
return term.replace("(","").replace(")","")
57+
58+
def Term_AsSentence(T):
59+
term = T[1:-1] if "<" in T else T
60+
if "=/>" not in term:
61+
term = ProductPrettify(term)
62+
else:
63+
if " =/> " in term:
64+
prec_op = [ProductPrettify(p) for p in term.split(" =/> ")[0].split(" &/ ")]
65+
removeParentheses = lambda u: u.replace(" --> ["," hasproperty ").replace(" --> "," isa ").replace(" - ", " and not ").replace("(",""). \
66+
replace("<","").replace(")","").replace(">","").replace(" "," ").strip()
67+
precs = removeParentheses(" and when then ".join(prec_op[:-1]))
68+
op = prec_op[-1]
69+
if " --> " in op:
70+
op = removeParentheses(prec_op[-1].split(" --> ")[1] + " " + prec_op[-1].split(" --> ")[0]).replace("{SELF} *", "")
71+
term = "When '" + precs + "' then '" + removeParentheses(op) + "' causes '" + removeParentheses(term.split(" =/> ")[1]) + "'"
72+
term = term.replace(" --> [", " hasproperty ").replace("]","").replace("[","").replace(" --> ", " isa ").replace(" &/ ", " then ").replace(" =/> ", " causes ")
73+
return term
74+
75+
def Term_Embedded(T):
76+
return get_embedding_robust(Term_AsSentence(T).replace("-"," ").replace("_"," "))
77+
78+
def RetrieveQuestionContent(memory, attention_buf, inp, max_LTM_retrievals=30):
3879
primed = {}
39-
words = [x.strip().replace("?","") for x in inp.split(" ")]
40-
for x in words:
41-
n = Lemmatize(x, wordnet.NOUN)
42-
v = Lemmatize(x, wordnet.VERB)
43-
for m in list(memory.items()):
44-
padded = lambda w: " " + w.replace(">"," ").replace("<"," ").replace("("," ").replace(")"," ") + " "
45-
if padded(n) in padded(m[0][0]) or padded(v) in padded(m[0][0]):
46-
if m not in attention_buf:
47-
matchQuality = 2 if (padded(n) in padded(m[0][0]) and padded(v) in padded(m[0][0])) else 1
48-
if m[0] not in primed:
49-
primed[m[0]] = (matchQuality, m[1])
50-
else:
51-
primed[m[0]] = (primed[m[0]][0] + matchQuality, primed[m[0]][1])
80+
qu_embed = get_embedding_robust(inp)
81+
for m in list(memory.items()):
82+
if m not in attention_buf:
83+
matchQuality = cosine_similarity(qu_embed, m[1][3])
84+
primed[m[0]] = (matchQuality, m[1])
5285
primed = list(primed.items())
5386
primed.sort(key=lambda x: (-x[1][0], -Truth_Expectation(x[1][1][2]))) #sort by query match first then by truth expectation
5487
primed = primed[:max_LTM_retrievals]
5588
#for m in primed:
56-
# print("//Retrieved from LTM:", m)
89+
# print("//Retrieved from LTM:", m[0], m[1][:-1])
5790
primed = [(x[0],x[1][1]) for x in primed]
5891
return list(reversed(primed))
5992

6093
def Memory_attention_buffer(memory, attention_buffer_size, inpQuestion = None):
6194
attention_buf=[]
62-
relevant_item_list = list(memory.items())
95+
#relevant_item_list = list(memory.items())
6396
#find attention_buffer_size/2 newest items:
64-
relevant_item_list.sort(key=lambda x: -x[1][0])
65-
attention_buf += reversed(relevant_item_list[0:int(attention_buffer_size/2)]) #newer comes later in prompt
97+
#relevant_item_list.sort(key=lambda x: -x[1][0])
98+
#attention_buf += reversed(relevant_item_list[0:int(attention_buffer_size/2)]) #newer comes later in prompt
6699
#find additional attention_buffer_size/2 useful items which were not already part of the newest
67-
relevant_item_list.sort(key=lambda x: -x[1][1])
68-
for x in attention_buf:
69-
if x in relevant_item_list:
70-
relevant_item_list.remove(x) #so we won't select it as it is already part of mem
71-
i = 0
72-
while len(attention_buf) < attention_buffer_size and i < len(relevant_item_list):
73-
attention_buf = [relevant_item_list[i]] + attention_buf
74-
i += 1
100+
#relevant_item_list.sort(key=lambda x: -x[1][1])
101+
#for x in attention_buf:
102+
# if x in relevant_item_list:
103+
# relevant_item_list.remove(x) #so we won't select it as it is already part of mem
104+
#i = 0
105+
#while len(attention_buf) < attention_buffer_size and i < len(relevant_item_list):
106+
# attention_buf = [relevant_item_list[i]] + attention_buf
107+
# i += 1
75108
#pull in question content that is not already included
76109
if inpQuestion is not None:
77-
attention_buf = RetrieveQuestionContent(memory, attention_buf, inpQuestion) + attention_buf
110+
attention_buf = RetrieveQuestionContent(memory, attention_buf, inpQuestion) #+ attention_buf
78111
return attention_buf
79112

80-
def ProductPrettify(term):
81-
if " --> " in term and " * " in term.split(" --> ")[0]:
82-
arg1 = term.split(" * ")[0].strip()
83-
arg2 = term.split(" * ")[1].split(" --> ")[0].strip()
84-
relarg = term.split(" --> ")[1].strip()
85-
term = arg1 + " " + relarg + " " + arg2
86-
return term.replace("(","").replace(")","")
87-
88113
def Memory_generate_prompt(currentTime, memory, prompt_start, prompt_end, attention_buffer_size, inpQuestion = None, TimeHandling = True):
89114
prompt_memory = ""
90115
buf = Memory_attention_buffer(memory, attention_buffer_size, inpQuestion)
@@ -110,20 +135,7 @@ def Memory_generate_prompt(currentTime, memory, prompt_start, prompt_end, attent
110135
flags.append("Contradictory")
111136
certainty = Truth_Expectation((f,c))
112137
truthtype = '"' + " ".join(flags) + '"'
113-
term = x[0][0][1:-1] if "<" in x[0][0] else x[0][0]
114-
if "=/>" not in term:
115-
term = ProductPrettify(term)
116-
else:
117-
if " =/> " in term:
118-
prec_op = [ProductPrettify(p) for p in term.split(" =/> ")[0].split(" &/ ")]
119-
removeParentheses = lambda u: u.replace(" --> ["," hasproperty ").replace(" --> "," isa ").replace(" - ", " and not ").replace("(",""). \
120-
replace("<","").replace(")","").replace(">","").replace(" "," ").strip()
121-
precs = removeParentheses(" and when then ".join(prec_op[:-1]))
122-
op = prec_op[-1]
123-
if " --> " in op:
124-
op = removeParentheses(prec_op[-1].split(" --> ")[1] + " " + prec_op[-1].split(" --> ")[0]).replace("{SELF} *", "")
125-
term = "When '" + precs + "' then '" + removeParentheses(op) + "' causes '" + removeParentheses(term.split(" =/> ")[1]) + "'"
126-
term = term.replace(" --> [", " hasproperty ").replace("]","").replace("[","").replace(" --> ", " isa ").replace(" &/ ", " then ").replace(" =/> ", " causes ")
138+
term = Term_AsSentence(x[0][0])
127139
prompt_memory += f"i={i}: {term}. {timeterm}truthtype={truthtype} certainty={certainty}\n"
128140
return buf, prompt_start + prompt_memory + prompt_end
129141

@@ -153,7 +165,7 @@ def query(currentTime, memory, term, time):
153165
return currentTime
154166
if (term, time) not in retrieved and (term, time) in memory:
155167
retrieved.add((term, time))
156-
(_, _, (f, c)) = memory[(term, time)]
168+
(_, _, (f, c), _) = memory[(term, time)]
157169
if time == "eternal":
158170
_, currentTime = ProcessInput(currentTime, memory, f"{term}. {{{f} {c}}}")
159171
if time == currentTime:
@@ -162,7 +174,7 @@ def query(currentTime, memory, term, time):
162174
parts = term.split("?1")
163175
bestTerm, bestTruth, bestTime = (None, (0.0, 0.5), "eternal")
164176
for (term2, time2) in memory:
165-
(_, _, (f2, c2)) = memory[(term2, time2)]
177+
(_, _, (f2, c2), _) = memory[(term2, time2)]
166178
if time2 == time and term2.startswith(parts[0]) and term2.endswith(parts[1]):
167179
if Truth_Expectation((f2, c2)) > Truth_Expectation((bestTruth[0], bestTruth[1])):
168180
bestTerm = term2
@@ -206,11 +218,11 @@ def ProcessInput(currentTime, memory, inputforNAR, backups = ["input", "answers"
206218
c2 = float(derivation["truth"]["confidence"])
207219
usefulnessAddition = 1000000 if "Priority" not in derivation or derivation["Priority"] == 1.0 else 1
208220
if (term, time) in memory:
209-
(t, usefulness, (f, c)) = memory[(term, time)]
221+
(t, usefulness, (f, c), embedding) = memory[(term, time)]
210222
if c2 > c:
211-
memory[(term, time)] = (currentTime, usefulness + usefulnessAddition, (f2, c2))
223+
memory[(term, time)] = (currentTime, usefulness + usefulnessAddition, (f2, c2), embedding)
212224
else:
213-
memory[(term, time)] = (currentTime, usefulnessAddition, (f2, c2))
225+
memory[(term, time)] = (currentTime, usefulnessAddition, (f2, c2), Term_Embedded(term))
214226
if ">." in inputforNAR or "! :|:" in inputforNAR:
215227
currentTime += 1
216228
if inputforNAR.isdigit():
@@ -220,7 +232,7 @@ def ProcessInput(currentTime, memory, inputforNAR, backups = ["input", "answers"
220232
relations = set(["isa", "are", "hasproperty"])
221233
def Relation(inp, currentTime, memory, s, v, p, punctuation_tv):
222234
global relations
223-
if s.replace("_", " ") not in inp or p.replace("_", " ") not in inp:
235+
if s.replace("_", " ") not in inp.replace(". "," ").replace("'","") or p.replace("_", " ") not in inp.replace(". "," ").replace("'",""):
224236
#print("//!!!! filtered out", s, v, p)
225237
return False, currentTime
226238
s = Lemmatize(s, wordnet.NOUN)
@@ -238,7 +250,7 @@ def Relation(inp, currentTime, memory, s, v, p, punctuation_tv):
238250
return True, currentTime
239251

240252
def Property(inp, currentTime, memory, s, p, punctuation_tv):
241-
if s.replace("_", " ") not in inp or p.replace("_", " ") not in inp:
253+
if s.replace("_", " ") not in inp.replace(". "," ").replace("'","") or p.replace("_", " ") not in inp.replace(". "," ").replace("'",""):
242254
#print("//!!!! filtered out", s, "hasproperty", p)
243255
return False, currentTime
244256
s = Lemmatize(s, wordnet.NOUN)
@@ -252,6 +264,7 @@ def Property(inp, currentTime, memory, s, p, punctuation_tv):
252264
hadRelation = set([])
253265
def Memory_digest_sentence(inp, currentTime, memory, sentence, truth, PrintMemoryUpdates, TimeHandling):
254266
global lastTime, hadRelation
267+
#print(">>>>", sentence)
255268
if currentTime != lastTime:
256269
hadRelation = set([])
257270
if sentence in hadRelation:
@@ -312,7 +325,7 @@ def Memory_Eternalize(currentTime, memory, eternalizationDistance = 3):
312325
belief = memory[(m, t)]
313326
if t != "eternal" and currentTime - t > eternalizationDistance:
314327
deletes.append((m, t))
315-
additions.append(((m, "eternal"), (belief[0], belief[1], Truth_Eternalize(belief[2]))))
328+
additions.append(((m, "eternal"), (belief[0], belief[1], Truth_Eternalize(belief[2]), belief[3])))
316329
for k in deletes:
317330
del memory[k]
318331
for (k, v) in additions:

0 commit comments

Comments
 (0)