-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnon_printable.py
119 lines (95 loc) · 3.92 KB
/
non_printable.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from typing import Optional, Tuple
import cbor2
UTF8_MASK = 0xE0000
HEADER = "\U000e0042\U000e0042\U000e0011\U000e0011"
"""
Unique header consisting of 4 non-printable UTF-8 characters.
"""
HEADER_LENGTH = len(HEADER)
"""
Cached header byte length for optimization and minimizing repeated computations.
"""
class NonPrintableEncoder:
"""
Utility class to encode and decode hidden byte data within a string using non-printable UTF-8 characters.
Note:
This encoding method is not secure and should not be used for any cryptographic purposes.
It can be easily reversed and is intended only for simple obfuscation.
"""
@staticmethod
def encode_dict(text: str, data: dict) -> str:
"""
Encodes dict data and embeds it within a string, preserving the original text.
Args:
text (str): The text to embed the encoded data into.
data (dict): The dict data to encode.
Returns:
str: The string with the encoded data embedded.
"""
encoded_data = cbor2.dumps(data)
return NonPrintableEncoder.encode(text, encoded_data)
@staticmethod
def decode_dict(encoded_string: str) -> Tuple[str, Optional[dict]]:
"""
Extracts and decodes the hidden byte data from a string.
Args:
encoded_string (str): The string containing the hidden encoded data.
Returns:
Tuple[str, Optional[dict]]: A tuple containing the original text and the decoded dict data,
or None if decoding fails.
"""
try:
text, dict_bytes = NonPrintableEncoder.decode(encoded_string)
dict_data = cbor2.loads(dict_bytes)
except (ValueError, cbor2.CBORDecodeError):
text, dict_data = encoded_string, None
return text, dict_data
@staticmethod
def encode(text: str, data: bytes) -> str:
"""
Encodes byte data and embeds it within a string, preserving the original text.
Args:
data (bytes): The byte data to encode.
text (str): The text to embed the encoded data into.
Returns:
str: The string with the encoded data embedded.
"""
encoded_data = "".join(chr(UTF8_MASK + byte) for byte in data)
return f"{text}{HEADER}{encoded_data}"
@staticmethod
def decode(encoded_string: str) -> Tuple[str, bytes]:
"""
Extracts and decodes the hidden byte data from a string.
Args:
encoded_string (str): The string containing the hidden encoded data.
Returns:
Tuple[str, bytes]: A tuple containing the original text and the decoded byte data.
Raises:
ValueError: If the encoded string is in an incorrect format.
"""
encoded_body_start = encoded_string.find(HEADER)
if encoded_body_start == -1:
raise ValueError(
"Encoded string does not contain the expected header. Data may be corrupted or not encoded."
)
encoded_body_start += HEADER_LENGTH
encoded_body = encoded_string[encoded_body_start:]
byte_data = bytes((ord(char) - UTF8_MASK) for char in encoded_body)
return encoded_string[: encoded_body_start - HEADER_LENGTH], byte_data
# Unit tests
if __name__ == "__main__":
original_text = "This is a test"
data_to_encode = {"key1": "value1", "key2": 2}
# Encoding
encoded_string = NonPrintableEncoder.encode_dict(original_text, data_to_encode)
print(f"Encoded string: {encoded_string}")
# Decoding
decoded_text, decoded_data = NonPrintableEncoder.decode_dict(encoded_string)
print(f"Decoded text: {decoded_text}")
print(f"Decoded data: {decoded_data}")
assert (
original_text == decoded_text
), "The decoded text does not match the original text."
assert (
data_to_encode == decoded_data
), "The decoded data does not match the original data."