Skip to content

Commit 59d5bb6

Browse files
adetorcykamwoods
authored andcommitted
Add tf-idf notebook
move data files into their own directory
1 parent 35124a4 commit 59d5bb6

9 files changed

+458
-16
lines changed

data/edrm_subset.sqlite3.gz

12.9 MB
Binary file not shown.

entities.ipynb

+3-3
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@
7070
"metadata": {},
7171
"outputs": [],
7272
"source": [
73-
"src = Path(\"RevisedEDRMv1_Complete/albert_meyers/\") # for PST files\n",
74-
"# src = Path(\"httpd-users\") # for mbox files"
73+
"src = Path(\"data/RevisedEDRMv1_Complete/albert_meyers/\") # for PST files\n",
74+
"# src = Path(\"data/httpd-users\") # for mbox files"
7575
]
7676
},
7777
{
@@ -319,7 +319,7 @@
319319
"name": "python",
320320
"nbconvert_exporter": "python",
321321
"pygments_lexer": "ipython3",
322-
"version": "3.7.5"
322+
"version": "3.8.2"
323323
},
324324
"pycharm": {
325325
"stem_cell": {

export_eml.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
"metadata": {},
6262
"outputs": [],
6363
"source": [
64-
"location = Path('RevisedEDRMv1_Complete/andrea_ring/')"
64+
"location = Path('data/RevisedEDRMv1_Complete/andrea_ring/')"
6565
]
6666
},
6767
{
@@ -113,7 +113,7 @@
113113
"name": "python",
114114
"nbconvert_exporter": "python",
115115
"pygments_lexer": "ipython3",
116-
"version": "3.7.5"
116+
"version": "3.8.2"
117117
}
118118
},
119119
"nbformat": 4,

message_viewer.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
"metadata": {},
5656
"outputs": [],
5757
"source": [
58-
"PST_FILE = Path(\"RevisedEDRMv1_Complete/andrea_ring/andrea_ring_000_1_1.pst\")"
58+
"PST_FILE = Path(\"data/RevisedEDRMv1_Complete/andrea_ring/andrea_ring_000_1_1.pst\")"
5959
]
6060
},
6161
{
@@ -1373,7 +1373,7 @@
13731373
"name": "python",
13741374
"nbconvert_exporter": "python",
13751375
"pygments_lexer": "ipython3",
1376-
"version": "3.7.5"
1376+
"version": "3.8.2"
13771377
},
13781378
"pycharm": {
13791379
"stem_cell": {

postBuild

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#!/bin/bash
22

3-
PST_DATA_DIR="RevisedEDRMv1_Complete"
3+
PST_DATA_DIR="data/RevisedEDRMv1_Complete"
44
ZIPPED_PST_FILES=(albert_meyers.zip andrea_ring.zip)
55

6-
MBOX_DATA_DIR="httpd-users"
6+
MBOX_DATA_DIR="data/httpd-users"
77

88
mkdir -p $PST_DATA_DIR $MBOX_DATA_DIR
99

pst_walk.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
"outputs": [],
5454
"source": [
5555
"# Edit as appropriate\n",
56-
"CACHED_ENRON_DATA_DIR = Path(\"RevisedEDRMv1_Complete\")\n",
56+
"CACHED_ENRON_DATA_DIR = Path(\"data/RevisedEDRMv1_Complete\")\n",
5757
"\n",
5858
"# Generate the list of files to know how many there are\n",
5959
"files = sorted(CACHED_ENRON_DATA_DIR.glob('**/*.pst'))"
@@ -184,7 +184,7 @@
184184
"name": "python",
185185
"nbconvert_exporter": "python",
186186
"pygments_lexer": "ipython3",
187-
"version": "3.7.5"
187+
"version": "3.8.2"
188188
}
189189
},
190190
"nbformat": 4,

requirements.in

+1
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ jupyterlab
77
ipywidgets
88
matplotlib
99
sklearn
10+
terminado==0.8.3 # for mybinder.org

requirements.txt

+5-5
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ blessed==1.17.8 # via enlighten, libratom
1414
blis==0.4.1 # via libratom, spacy, thinc
1515
catalogue==1.0.0 # via libratom, spacy, thinc
1616
certifi==2020.6.20 # via libratom, matplotlib, requests
17-
cffi==1.14.2 # via argon2-cffi
17+
cffi==1.14.3 # via argon2-cffi
1818
chardet==3.0.4 # via libratom, requests
1919
click-log==0.3.2 # via libratom
2020
click==7.1.2 # via click-log, libratom
@@ -39,7 +39,7 @@ json5==0.9.5 # via jupyterlab-server
3939
jsonschema==3.2.0 # via jupyterlab-server, libratom, nbformat
4040
jupyter-client==6.1.7 # via ipykernel, nbclient, notebook
4141
jupyter-core==4.6.3 # via jupyter-client, nbconvert, nbformat, notebook
42-
jupyterlab-pygments==0.1.1 # via nbconvert
42+
jupyterlab-pygments==0.1.2 # via nbconvert
4343
jupyterlab-server==1.2.0 # via jupyterlab
4444
jupyterlab==2.2.8 # via -r requirements.in
4545
kiwisolver==1.2.0 # via matplotlib
@@ -50,9 +50,9 @@ matplotlib==3.3.2 # via -r requirements.in
5050
mistune==0.8.4 # via nbconvert
5151
murmurhash==1.0.2 # via libratom, preshed, spacy, thinc
5252
nbclient==0.5.0 # via nbconvert
53-
nbconvert==6.0.4 # via notebook
53+
nbconvert==6.0.6 # via notebook
5454
nbformat==5.0.7 # via ipywidgets, nbclient, nbconvert, notebook
55-
nest-asyncio==1.4.0 # via nbclient
55+
nest-asyncio==1.4.1 # via nbclient
5656
notebook==6.1.4 # via jupyterlab, jupyterlab-server, widgetsnbextension
5757
numpy==1.19.0 # via blis, libratom, matplotlib, pandas, scikit-learn, scipy, spacy, thinc
5858
packaging==20.4 # via bleach, libratom
@@ -88,7 +88,7 @@ sqlalchemy==1.3.18 # via -r requirements.in, libratom
8888
srsly==1.0.2 # via libratom, spacy, thinc
8989
striprtf==0.0.10 # via libratom
9090
tabulate==0.8.7 # via libratom
91-
terminado==0.9.1 # via notebook
91+
terminado==0.8.3 # via -r requirements.in, notebook
9292
testpath==0.4.4 # via nbconvert
9393
thinc==7.4.1 # via libratom, spacy
9494
threadpoolctl==2.1.0 # via scikit-learn

0 commit comments

Comments
 (0)