Skip to content

Commit 48ad916

Browse files
committed
refactor: recursively extract text
1 parent a3e2087 commit 48ad916

File tree

2 files changed

+20
-9
lines changed

2 files changed

+20
-9
lines changed

openedx/core/djangoapps/content/search/plain_text_math.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
class InvalidMathEquation(Exception):
1111
"""Raised when converting mathjax equations to plain text fails"""
12-
pass
1312

1413

1514
class PlainTextMath:
@@ -36,7 +35,6 @@ class PlainTextMath:
3635
)
3736
regex_replacements = (
3837
# Makes text bold, so not required in plain text.
39-
(re.compile(r'\\mathbf{(.*?)}'), r"\1"),
4038
(re.compile(r'{\\bf (.*?)}'), r"\1"),
4139
)
4240
extract_inner_texts = (
@@ -52,7 +50,7 @@ def _nested_bracket_matcher(equation: str, opening_pattern: str) -> str:
5250
5351
Args:
5452
equation: string
55-
opening_pattern: for example, \mathbf{
53+
opening_pattern: for example, `\\mathbf{`
5654
5755
Returns:
5856
String inside the eqn brackets
@@ -108,18 +106,27 @@ def _fraction_handler(self, equation: str) -> str:
108106
equation = equation[:n_start] + f"({numerator}/{denominator})" + equation[n_end + d_end:]
109107
return equation
110108

109+
def _nested_text_extractor(self, equation: str, pattern: str) -> str:
110+
"""
111+
Recursively extracts text from equation for given pattern
112+
"""
113+
try:
114+
start, inner_start, inner_end, end = self._nested_bracket_matcher(equation, pattern)
115+
inner_text = equation[inner_start:inner_end]
116+
inner_text = self._nested_text_extractor(inner_text, pattern)
117+
equation = equation[:start] + inner_text + equation[end:]
118+
except InvalidMathEquation:
119+
pass
120+
return equation
121+
111122
def _handle_replacements(self, equation: str) -> str:
112123
"""
113124
Makes a bunch of replacements in equation string.
114125
"""
115126
for q, replacement in self.eqn_replacements:
116127
equation = equation.replace(q, replacement)
117128
for pattern in self.extract_inner_texts:
118-
try:
119-
start, inner_start, inner_end, end = self._nested_bracket_matcher(equation, pattern)
120-
equation = equation[:start] + equation[inner_start:inner_end] + equation[end:]
121-
except InvalidMathEquation:
122-
continue
129+
equation = self._nested_text_extractor(equation, pattern)
123130
for pattern, replacement in self.regex_replacements:
124131
equation = re.sub(pattern, replacement, equation)
125132
return equation

openedx/core/djangoapps/content/search/tests/test_documents.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,11 @@ def test_mathjax_plain_text_conversion_for_search(self):
553553
'Bold text: a ⋅ b = |a| |b| cos(θ)',
554554
),
555555
('Bold text: \\( \\frac{\\sqrt{\\mathbf{2}+3}}{\\sqrt{4}} \\)', 'Bold text: (√{2+3}/√{4})'),
556-
('Nested Bold text: \\( \\mathbf{ \\frac{1}{2} } \\)', 'Nested Bold text: (1/2)'),
556+
('Nested Bold text 1: \\( \\mathbf{ \\frac{1}{2} } \\)', 'Nested Bold text 1: (1/2)'),
557+
(
558+
'Nested Bold text 2: \\( \\mathbf{a \\cdot (a \\mathbf{\\times} b)} \\)',
559+
'Nested Bold text 2: a ⋅ (a × b)'
560+
),
557561
('Sqrt test 1: \\(\\sqrt\\)', 'Sqrt test 1: √'),
558562
('Sqrt test 2: \\(x^2 + \\sqrt(y)\\)', 'Sqrt test 2: x² + √(y)'),
559563
('Sqrt test 3: [mathjaxinline]x^2 + \\sqrt(y)[/mathjaxinline]', 'Sqrt test 3: x² + √(y)'),

0 commit comments

Comments
 (0)