Skip to content

Commit eb4532c

Browse files
committed
update data
1 parent 08e4d3d commit eb4532c

9 files changed

+44522
-44489
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@ examples/aaai_dkt_improve_v1
3939
tabchen
4040
examples/aaai2023/
4141
examples/all_wandbs/
42-
data_new
42+
data_old

configs/data_config.json

+3
Original file line numberDiff line numberDiff line change
@@ -210,9 +210,11 @@
210210
"questions",
211211
"concepts"
212212
],
213+
"max_concepts": 5,
213214
"min_seq_len": 3,
214215
"maxlen": 200,
215216
"emb_path": "",
217+
"train_valid_original_file": "train_valid.csv",
216218
"train_valid_file": "train_valid_sequences.csv",
217219
"folds": [
218220
0,
@@ -221,6 +223,7 @@
221223
3,
222224
4
223225
],
226+
"test_original_file": "test.csv",
224227
"test_file": "test_sequences.csv",
225228
"test_window_file": "test_window_sequences.csv",
226229
"test_question_file": "test_question_sequences.csv",

data/assist2015/keyid2idx.json

+1-1
Large diffs are not rendered by default.

data/assist2015/test.csv

+3,818-3,818
Large diffs are not rendered by default.

data/assist2015/test_sequences.csv

+3,866-3,866
Large diffs are not rendered by default.

data/assist2015/test_window_sequences.csv

+6,102-6,102
Large diffs are not rendered by default.

data/assist2015/train_valid.csv

+15,275-15,275
Large diffs are not rendered by default.

data/assist2015/train_valid_sequences.csv

+15,426-15,426
Large diffs are not rendered by default.

tests/check_dataset.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import sys
2+
# sys.path.append("..")
3+
from pykt.preprocess.split_datasets import read_data
4+
5+
cols = ['concepts', 'timestamps', 'usetimes', 'questions', 'responses', 'uid']
6+
7+
def check_result(path1,path2,sort=True):
8+
#sort
9+
df_1 = read_data(path1,min_seq_len=-1)[0]
10+
check_cols = [x for x in cols if x in df_1.columns]
11+
if sort:
12+
df_1 = df_1.sort_values('uid')
13+
14+
df_2 = read_data(path2,min_seq_len=-1)[0]
15+
if sort:
16+
df_2 = df_2.sort_values('uid')
17+
18+
for col in check_cols:
19+
print(col)
20+
print((df_1[col].values==df_2[col].values).mean())
21+
22+
23+
if __name__ =="__main__":
24+
dataset_str = "assist2009 algebra2005 nips_task34 statics2011 assist2015 poj bridge2algebra2006"
25+
for dataset in dataset_str.split():
26+
dataset = dataset.strip()
27+
print('+',"-"*40,dataset,"-"*40,'+')
28+
path1 = f'/share/tabchen/tal_project/pykt-toolkit/data/{dataset}/data.txt'
29+
path2 = f'/share/tabchen/tal_project/pykt-toolkit/data_old/{dataset}/data.txt'
30+
check_result(path1,path2,sort=False)

0 commit comments

Comments
 (0)