diff --git a/openedx/core/djangoapps/content/search/plain_text_math.py b/openedx/core/djangoapps/content/search/plain_text_math.py index dde7633326a..c42a7316ee1 100644 --- a/openedx/core/djangoapps/content/search/plain_text_math.py +++ b/openedx/core/djangoapps/content/search/plain_text_math.py @@ -9,7 +9,6 @@ class InvalidMathEquation(Exception): """Raised when converting mathjax equations to plain text fails""" - pass class PlainTextMath: @@ -36,7 +35,6 @@ class PlainTextMath: ) regex_replacements = ( # Makes text bold, so not required in plain text. - (re.compile(r'\\mathbf{(.*?)}'), r"\1"), (re.compile(r'{\\bf (.*?)}'), r"\1"), ) extract_inner_texts = ( @@ -52,7 +50,7 @@ def _nested_bracket_matcher(equation: str, opening_pattern: str) -> str: Args: equation: string - opening_pattern: for example, \mathbf{ + opening_pattern: for example, `\\mathbf{` Returns: String inside the eqn brackets @@ -108,6 +106,19 @@ def _fraction_handler(self, equation: str) -> str: equation = equation[:n_start] + f"({numerator}/{denominator})" + equation[n_end + d_end:] return equation + def _nested_text_extractor(self, equation: str, pattern: str) -> str: + """ + Recursively extracts text from equation for given pattern + """ + try: + start, inner_start, inner_end, end = self._nested_bracket_matcher(equation, pattern) + inner_text = equation[inner_start:inner_end] + inner_text = self._nested_text_extractor(inner_text, pattern) + equation = equation[:start] + inner_text + equation[end:] + except InvalidMathEquation: + pass + return equation + def _handle_replacements(self, equation: str) -> str: """ Makes a bunch of replacements in equation string. @@ -115,11 +126,7 @@ def _handle_replacements(self, equation: str) -> str: for q, replacement in self.eqn_replacements: equation = equation.replace(q, replacement) for pattern in self.extract_inner_texts: - try: - start, inner_start, inner_end, end = self._nested_bracket_matcher(equation, pattern) - equation = equation[:start] + equation[inner_start:inner_end] + equation[end:] - except InvalidMathEquation: - continue + equation = self._nested_text_extractor(equation, pattern) for pattern, replacement in self.regex_replacements: equation = re.sub(pattern, replacement, equation) return equation diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py index 1822d8b0122..5e50d855d3c 100644 --- a/openedx/core/djangoapps/content/search/tests/test_documents.py +++ b/openedx/core/djangoapps/content/search/tests/test_documents.py @@ -553,7 +553,11 @@ def test_mathjax_plain_text_conversion_for_search(self): 'Bold text: a ⋅ b = |a| |b| cos(θ)', ), ('Bold text: \\( \\frac{\\sqrt{\\mathbf{2}+3}}{\\sqrt{4}} \\)', 'Bold text: (√{2+3}/√{4})'), - ('Nested Bold text: \\( \\mathbf{ \\frac{1}{2} } \\)', 'Nested Bold text: (1/2)'), + ('Nested Bold text 1: \\( \\mathbf{ \\frac{1}{2} } \\)', 'Nested Bold text 1: (1/2)'), + ( + 'Nested Bold text 2: \\( \\mathbf{a \\cdot (a \\mathbf{\\times} b)} \\)', + 'Nested Bold text 2: a ⋅ (a × b)' + ), ('Sqrt test 1: \\(\\sqrt\\)', 'Sqrt test 1: √'), ('Sqrt test 2: \\(x^2 + \\sqrt(y)\\)', 'Sqrt test 2: x² + √(y)'), ('Sqrt test 3: [mathjaxinline]x^2 + \\sqrt(y)[/mathjaxinline]', 'Sqrt test 3: x² + √(y)'),