Skip to content

Commit

Permalink
fix TokenBasedDocument: convert tokens to tuple if they are a list (#380
Browse files Browse the repository at this point in the history
)
  • Loading branch information
ArneBinder authored Nov 17, 2023
1 parent 8f14412 commit 02b23ba
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion src/pytorch_ie/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,18 @@ class TextBasedDocument(WithMetadata, WithText, Document):

@dataclasses.dataclass
class TokenBasedDocument(WithMetadata, WithTokens, Document):
pass
def __post_init__(self) -> None:

# When used in a dataset, the document gets serialized to json like structure which does not know tuples,
# so they get converted to lists. This is a workaround to automatically convert the "tokens" back to tuples
# when the document is created from a dataset.
if isinstance(self.tokens, list):
object.__setattr__(self, "tokens", tuple(self.tokens))
elif not isinstance(self.tokens, tuple):
raise ValueError("tokens must be a tuple.")

# Call the default document construction code
super().__post_init__()


# backwards compatibility
Expand Down

0 comments on commit 02b23ba

Please sign in to comment.