Skip to content

Commit 7780ca8

Browse files
fix: long variables pattern matching (#428)
1 parent bca1752 commit 7780ca8

File tree

4 files changed

+184
-21
lines changed

4 files changed

+184
-21
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# 7.8.3 - 2026-02-09
2+
3+
fix: do not pattern match long values in code variables
4+
15
# 7.8.3 - 2026-02-06
26

37
fix: openAI input image sanitization

posthog/exception_utils.py

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -43,26 +43,29 @@
4343
DEFAULT_MAX_VALUE_LENGTH = 1024
4444

4545
DEFAULT_CODE_VARIABLES_MASK_PATTERNS = [
46-
r"(?i).*password.*",
47-
r"(?i).*secret.*",
48-
r"(?i).*passwd.*",
49-
r"(?i).*pwd.*",
50-
r"(?i).*api_key.*",
51-
r"(?i).*apikey.*",
52-
r"(?i).*auth.*",
53-
r"(?i).*credentials.*",
54-
r"(?i).*privatekey.*",
55-
r"(?i).*private_key.*",
56-
r"(?i).*token.*",
57-
r"(?i).*aws_access_key_id.*",
58-
r"(?i).*_pass",
59-
r"(?i)sk_.*",
60-
r"(?i).*jwt.*",
46+
r"(?i)password",
47+
r"(?i)secret",
48+
r"(?i)passwd",
49+
r"(?i)pwd",
50+
r"(?i)api_key",
51+
r"(?i)apikey",
52+
r"(?i)auth",
53+
r"(?i)credentials",
54+
r"(?i)privatekey",
55+
r"(?i)private_key",
56+
r"(?i)token",
57+
r"(?i)aws_access_key_id",
58+
r"(?i)_pass",
59+
r"(?i)sk_",
60+
r"(?i)jwt",
6161
]
6262

6363
DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS = [r"^__.*"]
6464

6565
CODE_VARIABLES_REDACTED_VALUE = "$$_posthog_redacted_based_on_masking_rules_$$"
66+
CODE_VARIABLES_TOO_LONG_VALUE = "$$_posthog_value_too_long_$$"
67+
68+
_MAX_VALUE_LENGTH_FOR_PATTERN_MATCH = 5_000
6669

6770
DEFAULT_TOTAL_VARIABLES_SIZE_LIMIT = 20 * 1024
6871

@@ -945,23 +948,37 @@ def _pattern_matches(name, patterns):
945948
return False
946949

947950

948-
def _mask_sensitive_data(value, compiled_mask):
951+
def _mask_sensitive_data(value, compiled_mask, _seen=None):
949952
if not compiled_mask:
950953
return value
951954

955+
if isinstance(value, (dict, list, tuple)):
956+
if _seen is None:
957+
_seen = set()
958+
obj_id = id(value)
959+
if obj_id in _seen:
960+
return "<circular ref>"
961+
_seen.add(obj_id)
962+
952963
if isinstance(value, dict):
953964
result = {}
954965
for k, v in value.items():
955966
key_str = str(k) if not isinstance(k, str) else k
956-
if _pattern_matches(key_str, compiled_mask):
967+
if len(key_str) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH:
968+
result[k] = CODE_VARIABLES_TOO_LONG_VALUE
969+
elif _pattern_matches(key_str, compiled_mask):
957970
result[k] = CODE_VARIABLES_REDACTED_VALUE
958971
else:
959-
result[k] = _mask_sensitive_data(v, compiled_mask)
972+
result[k] = _mask_sensitive_data(v, compiled_mask, _seen)
960973
return result
961974
elif isinstance(value, (list, tuple)):
962-
masked_items = [_mask_sensitive_data(item, compiled_mask) for item in value]
975+
masked_items = [
976+
_mask_sensitive_data(item, compiled_mask, _seen) for item in value
977+
]
963978
return type(value)(masked_items)
964979
elif isinstance(value, str):
980+
if len(value) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH:
981+
return CODE_VARIABLES_TOO_LONG_VALUE
965982
if _pattern_matches(value, compiled_mask):
966983
return CODE_VARIABLES_REDACTED_VALUE
967984
return value
@@ -982,7 +999,9 @@ def _serialize_variable_value(value, limiter, max_length=1024, compiled_mask=Non
982999
limiter.add(result_size)
9831000
return value
9841001
elif isinstance(value, str):
985-
if compiled_mask and _pattern_matches(value, compiled_mask):
1002+
if len(value) > _MAX_VALUE_LENGTH_FOR_PATTERN_MATCH:
1003+
result = CODE_VARIABLES_TOO_LONG_VALUE
1004+
elif compiled_mask and _pattern_matches(value, compiled_mask):
9861005
result = CODE_VARIABLES_REDACTED_VALUE
9871006
else:
9881007
result = value

posthog/test/test_exception_capture.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,3 +450,143 @@ def trigger_error():
450450
assert "<CustomReprClass: custom representation>" in output
451451
assert "<lambda>" in output
452452
assert "<function trigger_error at" in output
453+
454+
455+
def test_code_variables_too_long_string_value_replaced(tmpdir):
456+
app = tmpdir.join("app.py")
457+
app.write(
458+
dedent(
459+
"""
460+
import os
461+
from posthog import Posthog
462+
463+
posthog = Posthog(
464+
'phc_x',
465+
host='https://eu.i.posthog.com',
466+
debug=True,
467+
enable_exception_autocapture=True,
468+
capture_exception_code_variables=True,
469+
project_root=os.path.dirname(os.path.abspath(__file__))
470+
)
471+
472+
def trigger_error():
473+
short_value = "I am short"
474+
long_value = "x" * 20000
475+
long_blob = "password_" + "a" * 20000
476+
477+
1/0
478+
479+
trigger_error()
480+
"""
481+
)
482+
)
483+
484+
with pytest.raises(subprocess.CalledProcessError) as excinfo:
485+
subprocess.check_output([sys.executable, str(app)], stderr=subprocess.STDOUT)
486+
487+
output = excinfo.value.output.decode("utf-8")
488+
489+
assert "ZeroDivisionError" in output
490+
assert "code_variables" in output
491+
492+
assert "'short_value': 'I am short'" in output
493+
494+
assert "$$_posthog_value_too_long_$$" in output
495+
496+
assert "'long_blob': '$$_posthog_value_too_long_$$'" in output
497+
498+
499+
def test_code_variables_too_long_string_in_nested_dict(tmpdir):
500+
app = tmpdir.join("app.py")
501+
app.write(
502+
dedent(
503+
"""
504+
import os
505+
from posthog import Posthog
506+
507+
posthog = Posthog(
508+
'phc_x',
509+
host='https://eu.i.posthog.com',
510+
debug=True,
511+
enable_exception_autocapture=True,
512+
capture_exception_code_variables=True,
513+
project_root=os.path.dirname(os.path.abspath(__file__))
514+
)
515+
516+
def trigger_error():
517+
my_data = {
518+
"short_key": "short_val",
519+
"long_key": "y" * 20000,
520+
"nested": {
521+
"deep_long": "z" * 20000,
522+
"deep_short": "ok",
523+
},
524+
}
525+
526+
1/0
527+
528+
trigger_error()
529+
"""
530+
)
531+
)
532+
533+
with pytest.raises(subprocess.CalledProcessError) as excinfo:
534+
subprocess.check_output([sys.executable, str(app)], stderr=subprocess.STDOUT)
535+
536+
output = excinfo.value.output.decode("utf-8")
537+
538+
assert "ZeroDivisionError" in output
539+
assert "code_variables" in output
540+
541+
assert "short_val" in output
542+
assert "ok" in output
543+
544+
assert "$$_posthog_value_too_long_$$" in output
545+
assert "y" * 1000 not in output
546+
assert "z" * 1000 not in output
547+
548+
549+
def test_mask_sensitive_data_too_long_dict_key():
550+
from posthog.exception_utils import (
551+
CODE_VARIABLES_TOO_LONG_VALUE,
552+
_compile_patterns,
553+
_mask_sensitive_data,
554+
)
555+
556+
compiled_mask = _compile_patterns([r"(?i)password"])
557+
558+
result = _mask_sensitive_data(
559+
{
560+
"short": "visible",
561+
"k" * 20000: "hidden_val",
562+
"password": "secret",
563+
},
564+
compiled_mask,
565+
)
566+
567+
assert result["short"] == "visible"
568+
# This then gets shortened by the JSON truncation at 1024 chars anyways so no worries
569+
assert result["k" * 20000] == CODE_VARIABLES_TOO_LONG_VALUE
570+
assert result["password"] == "$$_posthog_redacted_based_on_masking_rules_$$"
571+
572+
573+
def test_mask_sensitive_data_circular_ref():
574+
from posthog.exception_utils import _compile_patterns, _mask_sensitive_data
575+
576+
compiled_mask = _compile_patterns([r"(?i)password"])
577+
578+
# Circular dict
579+
circular_dict = {"key": "value"}
580+
circular_dict["self"] = circular_dict
581+
582+
result = _mask_sensitive_data(circular_dict, compiled_mask)
583+
assert result["key"] == "value"
584+
assert result["self"] == "<circular ref>"
585+
586+
# Circular list
587+
circular_list = ["item"]
588+
circular_list.append(circular_list)
589+
590+
result = _mask_sensitive_data(circular_list, compiled_mask)
591+
assert result[0] == "item"
592+
assert result[1] == "<circular ref>"

posthog/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
VERSION = "7.8.3"
1+
VERSION = "7.8.4"
22

33
if __name__ == "__main__":
44
print(VERSION, end="") # noqa: T201

0 commit comments

Comments
 (0)