Skip to content

Commit f7c702d

Browse files
fix: Nonetypes causing error in preprocessing
1 parent 6eac264 commit f7c702d

File tree

2 files changed

+25
-5
lines changed

2 files changed

+25
-5
lines changed

elysia/preprocessing/collection.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,11 @@ async def _evaluate_field_statistics(
188188
# For text, we want to evaluate the length of the text in tokens (use spacy)
189189
lengths = []
190190
for obj in sample_objects:
191-
if property in obj and isinstance(obj[property], str):
191+
if (
192+
property in obj
193+
and isinstance(obj[property], str)
194+
and obj[property] is not None
195+
):
192196
lengths.append(len(nlp(obj[property])))
193197

194198
if len(lengths) == 0:
@@ -228,7 +232,11 @@ async def _evaluate_field_statistics(
228232

229233
# List (lengths)
230234
elif properties[property].endswith("[]"):
231-
lengths = [len(obj[property]) for obj in sample_objects]
235+
lengths = [
236+
len(obj[property])
237+
for obj in sample_objects
238+
if isinstance(obj[property], list)
239+
]
232240

233241
if len(lengths) == 0:
234242
out["range"] = None

elysia/util/client.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,6 @@ def __init__(
101101
```
102102
"""
103103

104-
self.logger = logger
105-
106104
if client_timeout is None:
107105
self.client_timeout = datetime.timedelta(
108106
minutes=int(os.getenv("CLIENT_TIMEOUT", 3))
@@ -117,6 +115,11 @@ def __init__(
117115
else:
118116
self.settings = settings
119117

118+
if logger is None:
119+
self.logger = self.settings.logger
120+
else:
121+
self.logger = logger
122+
120123
# Set the weaviate url and api key
121124
if wcd_url is None:
122125
self.wcd_url = self.settings.WCD_URL
@@ -217,7 +220,16 @@ def __init__(
217220
return
218221

219222
# Start sync client
220-
self.client = self.get_client()
223+
try:
224+
self.client = self.get_client()
225+
except Exception as e:
226+
if self.logger:
227+
self.logger.error(
228+
"Error initialising Weaviate client. Please check your Weaviate configuration is set correctly (WCD_URL, WCD_API_KEY, WEAVIATE_IS_LOCAL, LOCAL_WEAVIATE_PORT, LOCAL_WEAVIATE_GRPC_PORT)."
229+
)
230+
self.logger.error(f"Full Weaviate connection error message: {e}")
231+
self.is_client = False
232+
return
221233
self.sync_restart_event.set()
222234

223235
def _get_local_host_and_port(self) -> tuple[str, int]:

0 commit comments

Comments
 (0)