Skip to content

Commit

Permalink
Fix up encoding and avoid encode-decode waste. Resolves #82 (#86)
Browse files Browse the repository at this point in the history
* Fix up encoding and avoid encode-decode waste.  Resolves #82

* Fix up type hint.
  • Loading branch information
alexaryn authored Oct 5, 2023
1 parent 51e2f67 commit fee43fb
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 3 deletions.
2 changes: 1 addition & 1 deletion sycamore/data/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def text_representation(self, value: str) -> None:
self.data["text_representation"] = value

@property
def binary_representation(self) -> Optional[str]:
def binary_representation(self) -> Optional[bytes]:
return self.data["binary_representation"]

@binary_representation.setter
Expand Down
11 changes: 9 additions & 2 deletions sycamore/transforms/partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,17 @@ def element_in_left_col(e: Element) -> bool:
class Partitioner(ABC):
@staticmethod
def to_element(dict: dict[str, Any]) -> Element:
text = dict.pop("text")
if isinstance(text, str):
binary = text.encode("utf-8")
else:
binary = text
text = str(binary, "utf-8")

element = Element()
element.type = dict.pop("type")
element.binary_representation = dict.pop("text")
element.text_representation = str(element.binary_representation)
element.binary_representation = binary
element.text_representation = text
element.properties.update(dict.pop("metadata"))
element.properties.update(dict)

Expand Down

0 comments on commit fee43fb

Please sign in to comment.