@@ -1517,6 +1517,34 @@ def transformed_sentences(
15171517 return transformed
15181518
15191519
1520+ def fallback_rename_fields (field_names : List [str ]) -> Tuple [List [str ], List [float ]]:
1521+ """
1522+ A simple fallback renaming scheme that just makes field names lowercase
1523+ and replaces spaces with underscores.
1524+ """
1525+ length = len (field_names )
1526+ last = "null"
1527+ new_names = []
1528+ new_names_conf = []
1529+ for i , field_name in enumerate (field_names ):
1530+ new_name , new_confidence = normalize_name (
1531+ "" ,
1532+ "" ,
1533+ i ,
1534+ i / length ,
1535+ last ,
1536+ field_name ,
1537+ )
1538+ new_names .append (new_name )
1539+ new_names_conf .append (new_confidence )
1540+ last = field_name
1541+ new_names = [
1542+ v + "__" + str (new_names [:i ].count (v ) + 1 ) if new_names .count (v ) > 1 else v
1543+ for i , v in enumerate (new_names )
1544+ ]
1545+ return new_names , new_names_conf
1546+
1547+
15201548def parse_form (
15211549 in_file : str ,
15221550 title : Optional [str ] = None ,
@@ -1690,7 +1718,6 @@ def _looks_reasonable(candidate: str) -> bool:
16901718 title = fallback_title if fallback_title else "(Untitled)"
16911719 nsmi = spot (title + ". " + text , token = spot_token ) if spot_token else []
16921720 if normalize :
1693- # Use enhanced LLM-powered field renaming with PDF context
16941721 if (openai_creds or resolved_api_key ) and field_names :
16951722 try :
16961723 field_mappings = rename_pdf_fields_with_context (
@@ -1700,57 +1727,13 @@ def _looks_reasonable(candidate: str) -> bool:
17001727 api_key = resolved_api_key ,
17011728 )
17021729 new_names = [field_mappings .get (name , name ) or name for name in field_names ]
1703- # Set high confidence for LLM-generated names
17041730 new_names_conf = [0.8 if field_mappings .get (name ) else 0.1 for name in field_names ]
1705- llm_renamed_count = len ([n for n in new_names if n and not n .startswith ('*' )])
1706- print (f"Successfully renamed { llm_renamed_count } fields using LLM" )
17071731 except Exception as e :
17081732 print (f"LLM field renaming failed: { e } , falling back to traditional approach" )
17091733 # Fallback to traditional approach
1710- length = len (field_names )
1711- last = "null"
1712- new_names = []
1713- new_names_conf = []
1714- for i , field_name in enumerate (field_names ):
1715- new_name , new_confidence = normalize_name (
1716- jur or "" ,
1717- cat or "" ,
1718- i ,
1719- i / length ,
1720- last ,
1721- field_name ,
1722- tools_token = tools_token ,
1723- )
1724- new_names .append (new_name )
1725- new_names_conf .append (new_confidence )
1726- last = field_name
1727- new_names = [
1728- v + "__" + str (new_names [:i ].count (v ) + 1 ) if new_names .count (v ) > 1 else v
1729- for i , v in enumerate (new_names )
1730- ]
1734+ new_names , new_names_conf = fallback_rename_fields (field_names )
17311735 else :
1732- # Traditional approach when no OpenAI credentials available
1733- length = len (field_names )
1734- last = "null"
1735- new_names = []
1736- new_names_conf = []
1737- for i , field_name in enumerate (field_names ):
1738- new_name , new_confidence = normalize_name (
1739- jur or "" ,
1740- cat or "" ,
1741- i ,
1742- i / length ,
1743- last ,
1744- field_name ,
1745- tools_token = tools_token ,
1746- )
1747- new_names .append (new_name )
1748- new_names_conf .append (new_confidence )
1749- last = field_name
1750- new_names = [
1751- v + "__" + str (new_names [:i ].count (v ) + 1 ) if new_names .count (v ) > 1 else v
1752- for i , v in enumerate (new_names )
1753- ]
1736+ new_names , new_names_conf = fallback_rename_fields (field_names )
17541737 else :
17551738 new_names = field_names
17561739 new_names_conf = []
0 commit comments