Skip to content

Commit

Permalink
refactor: recursively extract text
Browse files Browse the repository at this point in the history
  • Loading branch information
navinkarkera committed Jan 9, 2025
1 parent a3e2087 commit 48ad916
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
23 changes: 15 additions & 8 deletions openedx/core/djangoapps/content/search/plain_text_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

class InvalidMathEquation(Exception):
"""Raised when converting mathjax equations to plain text fails"""
pass


class PlainTextMath:
Expand All @@ -36,7 +35,6 @@ class PlainTextMath:
)
regex_replacements = (
# Makes text bold, so not required in plain text.
(re.compile(r'\\mathbf{(.*?)}'), r"\1"),
(re.compile(r'{\\bf (.*?)}'), r"\1"),
)
extract_inner_texts = (
Expand All @@ -52,7 +50,7 @@ def _nested_bracket_matcher(equation: str, opening_pattern: str) -> str:
Args:
equation: string
opening_pattern: for example, \mathbf{
opening_pattern: for example, `\\mathbf{`
Returns:
String inside the eqn brackets
Expand Down Expand Up @@ -108,18 +106,27 @@ def _fraction_handler(self, equation: str) -> str:
equation = equation[:n_start] + f"({numerator}/{denominator})" + equation[n_end + d_end:]
return equation

def _nested_text_extractor(self, equation: str, pattern: str) -> str:
"""
Recursively extracts text from equation for given pattern
"""
try:
start, inner_start, inner_end, end = self._nested_bracket_matcher(equation, pattern)
inner_text = equation[inner_start:inner_end]
inner_text = self._nested_text_extractor(inner_text, pattern)
equation = equation[:start] + inner_text + equation[end:]
except InvalidMathEquation:
pass
return equation

def _handle_replacements(self, equation: str) -> str:
"""
Makes a bunch of replacements in equation string.
"""
for q, replacement in self.eqn_replacements:
equation = equation.replace(q, replacement)
for pattern in self.extract_inner_texts:
try:
start, inner_start, inner_end, end = self._nested_bracket_matcher(equation, pattern)
equation = equation[:start] + equation[inner_start:inner_end] + equation[end:]
except InvalidMathEquation:
continue
equation = self._nested_text_extractor(equation, pattern)
for pattern, replacement in self.regex_replacements:
equation = re.sub(pattern, replacement, equation)
return equation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,11 @@ def test_mathjax_plain_text_conversion_for_search(self):
'Bold text: a ⋅ b = |a| |b| cos(θ)',
),
('Bold text: \\( \\frac{\\sqrt{\\mathbf{2}+3}}{\\sqrt{4}} \\)', 'Bold text: (√{2+3}/√{4})'),
('Nested Bold text: \\( \\mathbf{ \\frac{1}{2} } \\)', 'Nested Bold text: (1/2)'),
('Nested Bold text 1: \\( \\mathbf{ \\frac{1}{2} } \\)', 'Nested Bold text 1: (1/2)'),
(
'Nested Bold text 2: \\( \\mathbf{a \\cdot (a \\mathbf{\\times} b)} \\)',
'Nested Bold text 2: a ⋅ (a × b)'
),
('Sqrt test 1: \\(\\sqrt\\)', 'Sqrt test 1: √'),
('Sqrt test 2: \\(x^2 + \\sqrt(y)\\)', 'Sqrt test 2: x² + √(y)'),
('Sqrt test 3: [mathjaxinline]x^2 + \\sqrt(y)[/mathjaxinline]', 'Sqrt test 3: x² + √(y)'),
Expand Down

0 comments on commit 48ad916

Please sign in to comment.