Skip to content

Commit 1928f2a

Browse files
USERUSER
USER
authored and
USER
committed
updated by new feedbacks on 20201218
1 parent d9e7c01 commit 1928f2a

5 files changed

+46
-42
lines changed

4.TEXT_CLASSIFICATION/4.1.4 Linear Regression with Word2Vec.ipynb

+13-29
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
"outputs": [],
3939
"source": [
4040
"DATA_IN_PATH = './data_in/'\n",
41-
"DATA_OUT_PATH = './data_out/'\n",
4241
"TRAIN_CLEAN_DATA = 'train_clean.csv'\n",
4342
"\n",
4443
"RANDOM_SEED = 42\n",
@@ -192,18 +191,7 @@
192191
"metadata": {},
193192
"outputs": [],
194193
"source": [
195-
"predicted = lgs.predict(X_test)\n",
196-
"from sklearn import metrics\n",
197-
"\n",
198-
"fpr, tpr, _ = metrics.roc_curve(y_test, (lgs.predict_proba(X_test)[:, 1]))\n",
199-
"auc = metrics.auc(fpr, tpr)\n",
200-
"\n",
201-
"print(\"------------\")\n",
202-
"print(\"Accuracy: %f\" % lgs.score(X_test, y_test)) #checking the accuracy\n",
203-
"print(\"Precision: %f\" % metrics.precision_score(y_test, predicted))\n",
204-
"print(\"Recall: %f\" % metrics.recall_score(y_test, predicted))\n",
205-
"print(\"F1-Score: %f\" % metrics.f1_score(y_test, predicted))\n",
206-
"print(\"AUC: %f\" % auc)"
194+
"print(\"Accuracy: %f\" % lgs.score(X_test, y_test)) "
207195
]
208196
},
209197
{
@@ -254,18 +242,16 @@
254242
"metadata": {},
255243
"outputs": [],
256244
"source": [
257-
"test_predicted = lgs.predict(test_data_vecs)"
258-
]
259-
},
260-
{
261-
"cell_type": "code",
262-
"execution_count": null,
263-
"metadata": {},
264-
"outputs": [],
265-
"source": [
266-
"ids = list(test_data['id'])\n",
245+
"DATA_OUT_PATH = './data_out/'\n",
246+
"\n",
247+
"test_predicted = lgs.predict(test_data_vecs)\n",
267248
"\n",
268-
"answer_dataset = pd.DataFrame({'id': ids, 'sentiment': test_predicted})"
249+
"if not os.path.exists(DATA_OUT_PATH):\n",
250+
" os.makedirs(DATA_OUT_PATH)\n",
251+
" \n",
252+
"ids = list(test_data['id'])\n",
253+
"answer_dataset = pd.DataFrame({'id': ids, 'sentiment': test_predicted})\n",
254+
"answer_dataset.to_csv(DATA_OUT_PATH + 'lgs_w2v_answer.csv', index=False, quoting=3)"
269255
]
270256
},
271257
{
@@ -274,10 +260,8 @@
274260
"metadata": {},
275261
"outputs": [],
276262
"source": [
277-
"if not os.path.exists(DATA_OUT_PATH):\n",
278-
" os.makedirs(DATA_OUT_PATH)\n",
279-
"\n",
280-
"answer_dataset.to_csv(DATA_OUT_PATH + 'lgs_w2v_answer.csv', index=False, quoting=3)"
263+
"model_name = \"300features_40minwords_10context\"\n",
264+
"model.save(model_name)"
281265
]
282266
}
283267
],
@@ -297,7 +281,7 @@
297281
"name": "python",
298282
"nbconvert_exporter": "python",
299283
"pygments_lexer": "ipython3",
300-
"version": "3.6.8"
284+
"version": "3.8.3"
301285
}
302286
},
303287
"nbformat": 4,

7.PRETRAIN_METHOD/7.4.1.gpt2_finetune_LM.ipynb

+9-4
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,13 @@
3333
"metadata": {},
3434
"outputs": [],
3535
"source": [
36-
"!wget https://www.dropbox.com/s/nzfa9xpzm4edp6o/gpt_ckpt.zip -O gpt_ckpt.zip\n",
37-
"!unzip -o gpt_ckpt.zip"
36+
"import wget\n",
37+
"import zipfile\n",
38+
"\n",
39+
"wget.download('https://github.com/NLP-kr/tensorflow-ml-nlp-tf2/releases/download/v1.0/gpt_ckpt.zip')\n",
40+
"\n",
41+
"with zipfile.ZipFile('gpt_ckpt.zip') as z:\n",
42+
" z.extractall()"
3843
]
3944
},
4045
{
@@ -157,7 +162,7 @@
157162
"DATA_IN_PATH = './data_in/KOR/'\n",
158163
"TRAIN_DATA_FILE = 'finetune_data.txt'\n",
159164
"\n",
160-
"sents = [s[:-1] for s in open(DATA_IN_PATH + TRAIN_DATA_FILE).readlines()]"
165+
"sents = [s[:-1] for s in open(DATA_IN_PATH + TRAIN_DATA_FILE, encoding='utf-8').readlines()]"
161166
]
162167
},
163168
{
@@ -286,7 +291,7 @@
286291
"name": "python",
287292
"nbconvert_exporter": "python",
288293
"pygments_lexer": "ipython3",
289-
"version": "3.6.9"
294+
"version": "3.8.3"
290295
}
291296
},
292297
"nbformat": 4,

7.PRETRAIN_METHOD/7.4.2.gpt2_finetune_NSMC.ipynb

+8-3
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,13 @@
3838
"metadata": {},
3939
"outputs": [],
4040
"source": [
41-
"!wget https://www.dropbox.com/s/nzfa9xpzm4edp6o/gpt_ckpt.zip -O gpt_ckpt.zip\n",
42-
"!unzip -o gpt_ckpt.zip"
41+
"import wget\n",
42+
"import zipfile\n",
43+
"\n",
44+
"wget.download('https://github.com/NLP-kr/tensorflow-ml-nlp-tf2/releases/download/v1.0/gpt_ckpt.zip')\n",
45+
"\n",
46+
"with zipfile.ZipFile('gpt_ckpt.zip') as z:\n",
47+
" z.extractall()"
4348
]
4449
},
4550
{
@@ -365,7 +370,7 @@
365370
"name": "python",
366371
"nbconvert_exporter": "python",
367372
"pygments_lexer": "ipython3",
368-
"version": "3.7.1"
373+
"version": "3.8.3"
369374
}
370375
},
371376
"nbformat": 4,

7.PRETRAIN_METHOD/7.4.3.gpt2_finetune_KorNLI.ipynb

+8-3
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,13 @@
3636
"metadata": {},
3737
"outputs": [],
3838
"source": [
39-
"!wget https://www.dropbox.com/s/nzfa9xpzm4edp6o/gpt_ckpt.zip -O gpt_ckpt.zip\n",
40-
"!unzip -o gpt_ckpt.zip"
39+
"import wget\n",
40+
"import zipfile\n",
41+
"\n",
42+
"wget.download('https://github.com/NLP-kr/tensorflow-ml-nlp-tf2/releases/download/v1.0/gpt_ckpt.zip')\n",
43+
"\n",
44+
"with zipfile.ZipFile('gpt_ckpt.zip') as z:\n",
45+
" z.extractall()"
4146
]
4247
},
4348
{
@@ -424,7 +429,7 @@
424429
"name": "python",
425430
"nbconvert_exporter": "python",
426431
"pygments_lexer": "ipython3",
427-
"version": "3.7.6"
432+
"version": "3.8.3"
428433
}
429434
},
430435
"nbformat": 4,

7.PRETRAIN_METHOD/7.4.4.gpt2_finetune_KorSTS.ipynb

+8-3
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,13 @@
3737
"metadata": {},
3838
"outputs": [],
3939
"source": [
40-
"!wget https://www.dropbox.com/s/nzfa9xpzm4edp6o/gpt_ckpt.zip -O gpt_ckpt.zip\n",
41-
"!unzip -o gpt_ckpt.zip"
40+
"import wget\n",
41+
"import zipfile\n",
42+
"\n",
43+
"wget.download('https://github.com/NLP-kr/tensorflow-ml-nlp-tf2/releases/download/v1.0/gpt_ckpt.zip')\n",
44+
"\n",
45+
"with zipfile.ZipFile('gpt_ckpt.zip') as z:\n",
46+
" z.extractall()"
4247
]
4348
},
4449
{
@@ -494,7 +499,7 @@
494499
"name": "python",
495500
"nbconvert_exporter": "python",
496501
"pygments_lexer": "ipython3",
497-
"version": "3.7.1"
502+
"version": "3.8.3"
498503
}
499504
},
500505
"nbformat": 4,

0 commit comments

Comments
 (0)