-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathjson_error_checker.py
220 lines (178 loc) · 11.9 KB
/
json_error_checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# -*- coding: utf-8 -*-
"""
Author: Scott Miller
PhD Candidate at Florida State University
Advisor: Andrew Rassweiler
This script was written for NSF project, "CNH-L: Multiscale Dynamics of Coral Reef Fisheries: Feedbacks Between Fishing Practices, Livelihood Strategies, and Shifting Dominance of Coral and Algae" (Award #1714704).
This script checks the exported JSON generated from coralnet_api_deployer.py for errors. If no errors are detected, then it prints "No errors!" and completes quickly. If errors
are detected, it will then attempt to fix the errors by sending new post/get requests to CoralNet and overwriting the error entries with data. If it successfully
fixes all errors, it will print "A-OK!" when completed and the final script, json_parser.py, can be run. Otherwise, it will print "STILL HAVE ISSUES!!!!" and you
can either try running it again to fix these or you may have other issues.
"""
#Imports required libraries
import requests
import json
import re
import dropbox
import random
import time
import os
#Defining variables to locate the relevant files
site_to_use = '' #Same name as in previous script
local_path = '' #Pathway leading up to where these files will be saved on your local drive
image_extension = '' #The file extension (without the preceding period) for you uploaded images. In my case, I use JPEGs, so this value is "JPG"
dropbox_token = '' #Your Dropbox token
dropbox_path = '' #This variable is the pathway leading to the folder you want to search for on Dropbox (ex, I have my folder_to_use inside a folder called "CoralNet", so this would be "/CoralNet/")
#Defining variables used to interface with CoralNet
classifier_url = '' #The URL of the CoralNet source you are using to annotate your images
coralnet_token = '' #Your CoralNet authorization token
#Sets up headers in case we need to send more requests to CoralNet to fix errors
headers = {"Authorization": f"Token {coralnet_token}",
"Content-type": "application/vnd.api+json"}
#Connects to our Dropbox account
dbx = dropbox.Dropbox(dropbox_token)
dbx.users_get_current_account()
#This script takes the newly-generated json file from coralnet_api_deployer.py, checks for errors, and attempts to correct those errors
#Opens the new file
f = open(f"{local_path}{site_to_use}_export.json",)
#Converts the json file into a dictionary and extracts the value from the only key
dat = json.load(f)
d = dat['data']
#Checks for images that returned errors instead of data for points
#If it finds any, it generates a new json file for these images that can be added to
error_images = []
error_index = []
error_dat = {"data":[]}
for i in range(len(d)):
attrib = d[i]['attributes']
if 'error' in attrib.keys(): #Checks to see if the 'error' key is in the image attribute dictionary
error_images.append(re.search(f'^.*[/](.*[.{image_extension}])', d[i]['id']).group(1)) #If it is, it adds the image name to the error_images list
error_index.append(i) #And it saves the index to be removed later
if len(error_images) > 0:
#Now that we know which elements are errors, we want to remove these from the lists so they can be replaced
for ele in sorted(error_index, reverse = True):
del dat['data'][ele] #Deletes the entries from the shorter list we're working with and from the larger json
#Now, we take the images that threw errors and generate a new json file for them
error_dat = {"data":[]}
for entry in error_images:
pathway = f'{dropbox_path}{site_to_use}/' + entry
tmp_name = dbx.sharing_create_shared_link(path = pathway, short_url=False, pending_upload=None)
to_append = {"type": "image",
"attributes":{"url":tmp_name.url[:-1] + '1',
"points":[
{"row": random.randint(450, 870),
"column": random.randint(600,1067)},
{"row": random.randint(450,870),
"column": random.randint(1067,1533)},
{"row": random.randint(450,870),
"column": random.randint(1533,2000)},
{"row": random.randint(450,870),
"column": random.randint(2000,2467)},
{"row": random.randint(450,870),
"column": random.randint(2467,2933)},
{"row": random.randint(450,870),
"column": random.randint(2933,3400)},
{"row": random.randint(870,1290),
"column": random.randint(600,1067)},
{"row": random.randint(870,1290),
"column": random.randint(1067,1533)},
{"row": random.randint(870,1290),
"column": random.randint(1533,2000)},
{"row": random.randint(870,1290),
"column": random.randint(2000,2467)},
{"row": random.randint(870,1290),
"column": random.randint(2467,2933)},
{"row": random.randint(870,1290),
"column": random.randint(2933,3400)},
{"row": random.randint(1290,1710),
"column": random.randint(600,1067)},
{"row": random.randint(1290,1710),
"column": random.randint(1067,1533)},
{"row": random.randint(1290,1710),
"column": random.randint(1533,2000)},
{"row": random.randint(1290,1710),
"column": random.randint(2000,2467)},
{"row": random.randint(1290,1710),
"column": random.randint(2467,2933)},
{"row": random.randint(1290,1710),
"column": random.randint(2933,3400)},
{"row": random.randint(1710,2130),
"column": random.randint(600,1067)},
{"row": random.randint(1710,2130),
"column": random.randint(1067,1533)},
{"row": random.randint(1710,2130),
"column": random.randint(1533,2000)},
{"row": random.randint(1710,2130),
"column": random.randint(2000,2467)},
{"row": random.randint(1710,2130),
"column": random.randint(2467,2933)},
{"row": random.randint(1710,2130),
"column": random.randint(2933,3400)},
{"row": random.randint(2130,2550),
"column": random.randint(600,1067)},
{"row": random.randint(2130,2550),
"column": random.randint(1067,1533)},
{"row": random.randint(2130,2550),
"column": random.randint(1533,2000)},
{"row": random.randint(2130,2550),
"column": random.randint(2000,2467)},
{"row": random.randint(2130,2550),
"column": random.randint(2467,2933)},
{"row": random.randint(2130,2550),
"column": random.randint(2933,3400)},
]
}
}
error_dat['data'].append(to_append)
#Now that we have identified error images, deleted them from the master json structure, and created a new json for them
#We need to query coralnet again to attempt to get actual data from them
k = 0
dat_length = 100
export = {"data":[]}
while dat_length == 100:
error_data = {"data":error_dat['data'][k:k+100]}
dat_length = len(error_data['data'])
if dat_length == 0:
break
#This writes the error json to a file so it can be used by the coralnet post request
with open(f'{local_path}error_json.json', 'w') as outfile:
json.dump(error_data, outfile)
#Sends the initial post request to coralnet
r = requests.post(url=classifier_url, data=open(f"{local_path}error_json.json"), headers=headers)
time.sleep(60)
in_progress = True
while in_progress: #This pings CoralNet every 60 seconds to check the status of the job
r_status = requests.get(url = 'https://coralnet.ucsd.edu'+r.headers['Location'], headers = {"Authorization": f"Token {coralnet_token}"})
curr_status = json.loads(r_status.content) #Extracts the content from the json request
if 'status' in curr_status['data'][0]['attributes'].keys(): #Checks to see if the status key is in the request dictionary - if not it is complete
print(curr_status['data'][0]['attributes']['successes'])
time.sleep(60) #Waits 60s before attempting the next status update
else: #If it doesn't find 'status' key, then it sets in_progress to false and saves the request data
export['data'].extend(curr_status['data'])
in_progress = False
k += 100
#Now checks the new data from coralnet for new errors
#If it finds any, it alerts the user that there's STILL issues
#Otherwise, it adds the new data to the old json, and exports a new file with all the data to be parsed by json_parser
exp_dat = export['data']
error_images = []
error_index = []
error_dat = {"data":[]}
for i in range(len(exp_dat)):
attrib = exp_dat[i]['attributes']
if 'error' in attrib.keys(): #Checks to see if the 'error' key is in the image attribute dictionary
error_images.append(re.search(f'^.*[/](.*[.{image_extension}])', d[i]['id']).group(1)) #If it is, it adds the image name to the error_images list
error_index.append(i) #And it saves the index to be removed later
if len(error_images) > 0:
print('STILL HAVE ISSUES!!!!')
else:
dat['data'].extend(exp_dat)
print('A-OK!')
#Now, we export the error-checked file!
with open(f'{local_path}{site_to_use}_export.json', 'w') as outfile:
json.dump(dat, outfile)
#...and we delete the error_json.json file
del r
os.remove(f'{local_path}error_json.json')
else:
print('No errors!')