-
Notifications
You must be signed in to change notification settings - Fork 14
Description
I have obtained a set of tweets in the json format from the Twitter API. However, there is an error showing up when I try to process the json into the db file. I'm running the code on Jupyter. Here is the code and the error.
CODE
%pip install coordination_network_toolkit
%pip install networkx
%pip install pandas
%pip install pyvis
import coordination_network_toolkit as coord_net_tk
import networkx as nx
import pandas as pd
from pyvis import network as net
from IPython.core.display import display, HTML
json_filename = "C:/Users/asus/Documents/Israel/Israel.json"
db_filename = "C:/Users/asus/Documents/Israel/Israel.db"
coord_net_tk.preprocess.preprocess_twitter_json_files(db_filename, [json_filename])
ERROR
UnicodeDecodeError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\coordination_network_toolkit\preprocess.py in preprocess_twitter_json_files(db_path, input_filenames)
124 # Try v2 format
--> 125 preprocess_twitter_v2_json_data(db_path, tweets)
126 except:
C:\ProgramData\Anaconda3\lib\site-packages\coordination_network_toolkit\preprocess.py in preprocess_twitter_v2_json_data(db_path, tweets)
227
--> 228 for page in tweets:
229
C:\ProgramData\Anaconda3\lib\encodings\cp1252.py in decode(self, input, final)
22 def decode(self, input, final=False):
---> 23 return codecs.charmap_decode(input,self.errors,decoding_table)[0]
24
UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 1299: character maps to
During handling of the above exception, another exception occurred:
UnicodeDecodeError Traceback (most recent call last)
in
1 json_filename = "C:/Users/asus/Documents/Israel/Israel.json"
2 db_filename = "C:/Users/asus/Documents/Israel/Israel.db"
----> 3 coord_net_tk.preprocess.preprocess_twitter_json_files(db_filename, [json_filename])
C:\ProgramData\Anaconda3\lib\site-packages\coordination_network_toolkit\preprocess.py in preprocess_twitter_json_files(db_path, input_filenames)
126 except:
127 # Fallback to v1.1 format
--> 128 preprocess_twitter_json_data(db_path, tweets)
129
130 print(f"Done preprocessing {message_file} into {db_path}")
C:\ProgramData\Anaconda3\lib\site-packages\coordination_network_toolkit\preprocess.py in preprocess_twitter_json_data(db_path, tweets)
145 db.execute("begin")
146
--> 147 for raw_tweet in tweets:
148
149 tweet = json.loads(raw_tweet)
C:\ProgramData\Anaconda3\lib\encodings\cp1252.py in decode(self, input, final)
21 class IncrementalDecoder(codecs.IncrementalDecoder):
22 def decode(self, input, final=False):
---> 23 return codecs.charmap_decode(input,self.errors,decoding_table)[0]
24
25 class StreamWriter(Codec,codecs.StreamWriter):
UnicodeDecodeError: 'charmap' codec can't decode byte 0x8f in position 1339: character maps to