@@ -164,7 +164,7 @@ def __init__(
164
164
else :
165
165
self .tokenizer = tokenizer
166
166
self .chunker = chunker
167
-
167
+
168
168
# Content type is unknown at initialization
169
169
self .document_type = None
170
170
@@ -253,11 +253,11 @@ def static_context(self):
253
253
def _compute_required_tokens (self ):
254
254
pass
255
255
256
- @bind (engine = "neurosymbolic" , property = "api_max_context_tokens" )( lambda : 0 )
256
+ @bind (engine = "neurosymbolic" , property = "max_context_tokens" )
257
257
def _max_context_tokens (_ ):
258
258
pass
259
259
260
- @bind (engine = "neurosymbolic" , property = "api_max_response_tokens" )( lambda : 0 )
260
+ @bind (engine = "neurosymbolic" , property = "max_response_tokens" )
261
261
def _max_response_tokens (_ ):
262
262
pass
263
263
@@ -311,7 +311,7 @@ async def summarize_chunk(chunk):
311
311
chunk ,
312
312
preview = False ,
313
313
response_format = {"type" : "json_object" },
314
- ** kwargs
314
+ ** kwargs ,
315
315
)
316
316
return await loop .run_in_executor (None , forward_fn )
317
317
@@ -419,7 +419,7 @@ def forward(self, **kwargs) -> Summary:
419
419
data = self .content
420
420
doc_type = None
421
421
422
- while summary_token_count > self .max_output_tokens :
422
+ while summary_token_count > self .max_output_tokens :
423
423
logger .debug ("Chunking content..." )
424
424
chunks = self .chunk_by_token_count (str (data ), chunk_size )
425
425
if doc_type is None :
@@ -444,8 +444,6 @@ def forward(self, **kwargs) -> Summary:
444
444
if hasattr (res , "type" ):
445
445
res .type = doc_type
446
446
447
- # collect and return results
448
- return res
449
447
else :
450
448
logger .debug ("Content is within token limit, processing in one go..." )
451
449
logger .debug ("Determining document type and language..." )
@@ -463,4 +461,9 @@ def forward(self, **kwargs) -> Summary:
463
461
)
464
462
res .type = doc_type
465
463
464
+ # log compression ratio
465
+ result_tokens = self .compute_required_tokens (res , count_context = False )
466
+ logger .debug (
467
+ f"Compression ratio: { total_tokens } -> { result_tokens } ({ result_tokens / total_tokens :.2f} )"
468
+ )
466
469
return res
0 commit comments