From 2e141f8cda7d48d61af578381c53c5008b05d0cf Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sat, 28 Sep 2024 21:02:08 +0300 Subject: [PATCH 01/42] add finite automaton word generation --- .../finite_automaton/finite_automaton.py | 24 +++++++++++++++++++ .../nondeterministic_transition_function.py | 7 ++++++ .../finite_automaton/transition_function.py | 6 +++++ 3 files changed, 37 insertions(+) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 6a719b4..785203e 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -594,6 +594,30 @@ def is_equivalent_to(self, other): self_dfa = self.to_deterministic() return self_dfa.is_equivalent_to(other) + def get_accepted_words(self): + """ Gets words accepted by the finite automaton """ + for start_state in self.start_states: + for word in self.get_words_accepted_from_state(start_state): + yield word + + def get_words_accepted_from_state(self, initial_state: State): + """ + Gets words that are accepted \ + starting from the given state. + """ + queue = [(initial_state, [])] + while len(queue) > 0: + (current_state, current_word) = queue.pop(0) + transitions = self._transition_function.get_transitions_from( + current_state) + for symbol, next_state in transitions: + temp_word = current_word.copy() + if symbol != Epsilon(): + temp_word.append(symbol) + if self.is_final_state(next_state): + yield temp_word + queue.append((next_state, temp_word)) + def to_deterministic(self): """ Turns the automaton into a deterministic one""" raise NotImplementedError diff --git a/pyformlang/finite_automaton/nondeterministic_transition_function.py b/pyformlang/finite_automaton/nondeterministic_transition_function.py index 71a6287..a97e9c7 100644 --- a/pyformlang/finite_automaton/nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/nondeterministic_transition_function.py @@ -201,3 +201,10 @@ def to_dict(self): The transitions as a dictionary. """ return copy.deepcopy(self._transitions) + + def get_transitions_from(self, state_from: State): + """ Gets transitions from the given state """ + if state_from in self._transitions: + for symb_by, states_to in self._transitions[state_from].items(): + for state_to in states_to: + yield (symb_by, state_to) diff --git a/pyformlang/finite_automaton/transition_function.py b/pyformlang/finite_automaton/transition_function.py index 0534674..75c75e8 100644 --- a/pyformlang/finite_automaton/transition_function.py +++ b/pyformlang/finite_automaton/transition_function.py @@ -199,6 +199,12 @@ def to_dict(self): """ return copy.deepcopy(self._transitions) + def get_transitions_from(self, state_from: State): + """ Gets transitions from the given state """ + if state_from in self._transitions: + for symb_by, state_to in self._transitions[state_from].items(): + yield (symb_by, state_to) + class DuplicateTransitionError(Exception): """ Signals a duplicated transition From 5a7119e6c29c6b37929ee728dc40ff085157da2f Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sun, 29 Sep 2024 00:03:54 +0300 Subject: [PATCH 02/42] add final path existance checks --- .../finite_automaton/finite_automaton.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 785203e..47cc74f 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -606,6 +606,8 @@ def get_words_accepted_from_state(self, initial_state: State): starting from the given state. """ queue = [(initial_state, [])] + transitive_closure = nx.transitive_closure( + self.to_networkx()) while len(queue) > 0: (current_state, current_word) = queue.pop(0) transitions = self._transition_function.get_transitions_from( @@ -616,7 +618,10 @@ def get_words_accepted_from_state(self, initial_state: State): temp_word.append(symbol) if self.is_final_state(next_state): yield temp_word - queue.append((next_state, temp_word)) + if exists_any_final_path(transitive_closure, + next_state, + self.final_states): + queue.append((next_state, temp_word)) def to_deterministic(self): """ Turns the automaton into a deterministic one""" @@ -701,3 +706,15 @@ def add_start_state_to_graph(graph, state): width=.0) graph.add_edge("starting_" + str(state.value), state.value) + +def exists_any_final_path(transitive_closure, source, final_nodes): + """ + Checks if there are any paths from \ + given node to one of the final nodes. + """ + return any(node_is_reachable(transitive_closure, source, final) + for final in final_nodes) + +def node_is_reachable(transitive_closure, source, target): + """ Checks if the target node can be reached from the source node """ + return target in transitive_closure[source].keys() From 2ddf4ad4e5fc3ca19174871c55c64dc592709d5a Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sun, 29 Sep 2024 16:03:17 +0300 Subject: [PATCH 03/42] set up CI for all branches, minor style changes --- .github/workflows/python-package.yml | 8 +++++--- pyformlang/finite_automaton/finite_automaton.py | 6 ++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 26f47de..6e73c20 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -5,9 +5,11 @@ name: Python package on: push: - branches: [ master ] pull_request: - branches: [ master ] + types: + - opened + - reopened + - closed jobs: build: @@ -51,7 +53,7 @@ jobs: junitxml-path: ./pytest.xml default-branch: master - name: Create coverage Badge - if: ${{ matrix.python-version == '3.8'}} + if: ${{ github.ref_name == 'master' && matrix.python-version == '3.8'}} uses: schneegans/dynamic-badges-action@v1.0.0 with: auth: ${{ secrets.GIST_SECRET }} diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 47cc74f..314a47c 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -602,8 +602,8 @@ def get_accepted_words(self): def get_words_accepted_from_state(self, initial_state: State): """ - Gets words that are accepted \ - starting from the given state. + Gets words that are accepted by finite \ + automaton starting from the given state. """ queue = [(initial_state, [])] transitive_closure = nx.transitive_closure( @@ -707,6 +707,7 @@ def add_start_state_to_graph(graph, state): graph.add_edge("starting_" + str(state.value), state.value) + def exists_any_final_path(transitive_closure, source, final_nodes): """ Checks if there are any paths from \ @@ -715,6 +716,7 @@ def exists_any_final_path(transitive_closure, source, final_nodes): return any(node_is_reachable(transitive_closure, source, final) for final in final_nodes) + def node_is_reachable(transitive_closure, source, target): """ Checks if the target node can be reached from the source node """ return target in transitive_closure[source].keys() From 001d7a9abec01d5ac8a539441d6e5217223f4ad3 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sun, 29 Sep 2024 17:49:58 +0300 Subject: [PATCH 04/42] add tests for word generation, debug --- .../finite_automaton/finite_automaton.py | 7 +-- .../test_deterministic_finite_automaton.py | 29 ++++++++++++ .../tests/test_epsilon_nfa.py | 42 ++++++++++++++++++ .../test_nondeterministic_finite_automaton.py | 44 +++++++++++++++++++ 4 files changed, 119 insertions(+), 3 deletions(-) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 314a47c..900f275 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -616,12 +616,12 @@ def get_words_accepted_from_state(self, initial_state: State): temp_word = current_word.copy() if symbol != Epsilon(): temp_word.append(symbol) - if self.is_final_state(next_state): - yield temp_word if exists_any_final_path(transitive_closure, next_state, self.final_states): queue.append((next_state, temp_word)) + if self.is_final_state(current_state): + yield current_word def to_deterministic(self): """ Turns the automaton into a deterministic one""" @@ -719,4 +719,5 @@ def exists_any_final_path(transitive_closure, source, final_nodes): def node_is_reachable(transitive_closure, source, target): """ Checks if the target node can be reached from the source node """ - return target in transitive_closure[source].keys() + return target == source or \ + target in transitive_closure[source].keys() diff --git a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py index b49d7fe..6629639 100644 --- a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py @@ -282,6 +282,14 @@ def test_regex_dfa(self): dfa_regex = dfa1.to_regex().to_epsilon_nfa() self.assertEqual(dfa1, dfa_regex) + def test_word_generation(self): + dfa = get_dfa_example_for_word_generation() + accepted_words = list(dfa.get_accepted_words()) + self.assertTrue([] in accepted_words) + self.assertTrue([Symbol("b"), Symbol("c")] in accepted_words) + self.assertTrue([Symbol("b"), Symbol("d")] in accepted_words) + self.assertEqual(len(accepted_words), 3) + def get_example0(): """ Gives a dfa """ @@ -328,3 +336,24 @@ def get_dfa_example(): dfa1.add_start_state(State("A")) dfa1.add_final_state(State("D")) return dfa1 + + +def get_dfa_example_for_word_generation(): + """ DFA example for the word generation test """ + dfa = DeterministicFiniteAutomaton() + states = [State(x) for x in range(0, 4)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + symbol_d = Symbol("d") + dfa.add_transitions([ + (states[0], symbol_a, states[1]), + (states[0], symbol_b, states[2]), + (states[1], symbol_a, states[1]), + (states[2], symbol_c, states[3]), + (states[2], symbol_d, states[3]), + ]) + dfa.add_start_state(states[0]) + dfa.add_final_state(states[0]) + dfa.add_final_state(states[3]) + return dfa diff --git a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py index 75a6112..c21f561 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py +++ b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py @@ -622,6 +622,17 @@ def test_remove_epsilon_transitions(self): self.assertEqual(nfa.get_number_transitions(), 3) self.assertTrue(nfa.is_equivalent_to(enfa)) + def test_word_generation(self): + enfa = get_enfa_example_for_word_generation() + accepted_words = list(enfa.get_accepted_words()) + self.assertTrue([] in accepted_words) + self.assertTrue([Symbol("b")] in accepted_words) + self.assertTrue([Symbol("c")] in accepted_words) + self.assertTrue([Symbol("d"), Symbol("e")] in accepted_words) + self.assertTrue( + [Symbol("d"), Symbol("e"), Symbol("f")] in accepted_words) + self.assertEqual(len(accepted_words), 5) + def get_digits_enfa(): """ An epsilon NFA to recognize digits """ @@ -730,3 +741,34 @@ def get_example_non_minimal(): enfa0.add_transition(state5, symb_b, state3) enfa0.add_transition(state6, symb_b, state4) return enfa0 + + +def get_enfa_example_for_word_generation(): + """ ENFA example for the word generation test """ + enfa = EpsilonNFA() + states = [State(x) for x in range(0, 9)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + symbol_d = Symbol("d") + symbol_e = Symbol("e") + symbol_f = Symbol("f") + epsilon = Epsilon() + enfa.add_transitions([ + (states[0], symbol_a, states[1]), + (states[0], epsilon, states[2]), + (states[1], symbol_a, states[1]), + (states[2], symbol_b, states[3]), + (states[2], symbol_c, states[3]), + (states[4], symbol_d, states[5]), + (states[5], symbol_e, states[6]), + (states[5], symbol_e, states[7]), + (states[7], symbol_f, states[8]), + ]) + enfa.add_start_state(states[0]) + enfa.add_start_state(states[4]) + enfa.add_final_state(states[3]) + enfa.add_final_state(states[4]) + enfa.add_final_state(states[6]) + enfa.add_final_state(states[8]) + return enfa diff --git a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py index b7acb48..7417e7f 100644 --- a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py @@ -117,3 +117,47 @@ def test_epsilon_refused(self): state1 = State(1) with self.assertRaises(InvalidEpsilonTransition): dfa.add_transition(state0, Epsilon(), state1) + + def test_word_generation(self): + nfa = get_nfa_example_for_word_generation() + accepted_words = list(nfa.get_accepted_words()) + self.assertTrue([] in accepted_words) + self.assertTrue([Symbol("a"), Symbol("b")] in accepted_words) + self.assertTrue([Symbol("a"), Symbol("c")] in accepted_words) + self.assertTrue([Symbol("d"), Symbol("e")] in accepted_words) + self.assertTrue( + [Symbol("d"), Symbol("e"), Symbol("f")] in accepted_words) + self.assertEqual(len(accepted_words), 5) + + +def get_nfa_example_for_word_generation(): + """ + Gets Nondeterministic Finite Automaton \ + example for the word generation test. + """ + nfa = NondeterministicFiniteAutomaton() + states = [State(x) for x in range(0, 9)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + symbol_d = Symbol("d") + symbol_e = Symbol("e") + symbol_f = Symbol("f") + nfa.add_transitions([ + (states[0], symbol_a, states[1]), + (states[0], symbol_a, states[2]), + (states[1], symbol_a, states[1]), + (states[2], symbol_b, states[3]), + (states[2], symbol_c, states[3]), + (states[4], symbol_d, states[5]), + (states[5], symbol_e, states[6]), + (states[5], symbol_e, states[7]), + (states[7], symbol_f, states[8]), + ]) + nfa.add_start_state(states[0]) + nfa.add_start_state(states[4]) + nfa.add_final_state(states[3]) + nfa.add_final_state(states[4]) + nfa.add_final_state(states[6]) + nfa.add_final_state(states[8]) + return nfa From a97e69fe283f1a688d8d818a2104aa8be62c1f12 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sun, 29 Sep 2024 18:38:07 +0300 Subject: [PATCH 05/42] refactor --- pyformlang/finite_automaton/finite_automaton.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 900f275..39a37b0 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -597,8 +597,7 @@ def is_equivalent_to(self, other): def get_accepted_words(self): """ Gets words accepted by the finite automaton """ for start_state in self.start_states: - for word in self.get_words_accepted_from_state(start_state): - yield word + yield from self.get_words_accepted_from_state(start_state) def get_words_accepted_from_state(self, initial_state: State): """ @@ -613,12 +612,12 @@ def get_words_accepted_from_state(self, initial_state: State): transitions = self._transition_function.get_transitions_from( current_state) for symbol, next_state in transitions: - temp_word = current_word.copy() - if symbol != Epsilon(): - temp_word.append(symbol) if exists_any_final_path(transitive_closure, next_state, self.final_states): + temp_word = current_word.copy() + if symbol != Epsilon(): + temp_word.append(symbol) queue.append((next_state, temp_word)) if self.is_final_state(current_state): yield current_word From 660b8e9f72a62052da860e7b8b4d83cb2f52251a Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 30 Sep 2024 19:13:34 +0300 Subject: [PATCH 06/42] make path check methods private, refactor, add type signatures --- .../finite_automaton/finite_automaton.py | 51 ++++++++++--------- .../nondeterministic_transition_function.py | 5 +- .../finite_automaton/transition_function.py | 8 +-- 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 39a37b0..7998293 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -1,6 +1,6 @@ """ A general finite automaton representation """ -from typing import List, Any +from typing import List, Iterable, Any import networkx as nx from networkx.drawing.nx_pydot import write_dot @@ -42,6 +42,7 @@ def __init__(self): self._transition_function = None self._start_state = set() self._final_states = set() + self.__transitive_closure = None def add_transition(self, s_from: State, symb_by: Symbol, s_to: State) -> int: @@ -594,27 +595,25 @@ def is_equivalent_to(self, other): self_dfa = self.to_deterministic() return self_dfa.is_equivalent_to(other) - def get_accepted_words(self): + def get_accepted_words(self) -> Iterable[List[Symbol]]: """ Gets words accepted by the finite automaton """ for start_state in self.start_states: yield from self.get_words_accepted_from_state(start_state) - def get_words_accepted_from_state(self, initial_state: State): + def get_words_accepted_from_state(self, initial_state: State) \ + -> Iterable[List[Symbol]]: """ Gets words that are accepted by finite \ automaton starting from the given state. """ queue = [(initial_state, [])] - transitive_closure = nx.transitive_closure( - self.to_networkx()) + self.__set_transitive_closure() while len(queue) > 0: (current_state, current_word) = queue.pop(0) transitions = self._transition_function.get_transitions_from( current_state) for symbol, next_state in transitions: - if exists_any_final_path(transitive_closure, - next_state, - self.final_states): + if self.__exists_any_final_path_from(next_state): temp_word = current_word.copy() if symbol != Epsilon(): temp_word.append(symbol) @@ -622,6 +621,27 @@ def get_words_accepted_from_state(self, initial_state: State): if self.is_final_state(current_state): yield current_word + def __set_transitive_closure(self): + """ + Bulds MultiDiGraph transitive closure \ + of FA and sets it to the private field. + """ + self.__transitive_closure = nx.transitive_closure( + self.to_networkx()) + + def __exists_any_final_path_from(self, source: State) -> bool: + """ + Checks if there are any paths from \ + given state to one of the final states. + """ + return any(self.__exists_path(source, final) + for final in self.final_states) + + def __exists_path(self, source: State, target: State) -> bool: + """ Checks if the target state can be reached from the source state """ + return target == source or \ + target in self.__transitive_closure[source].keys() + def to_deterministic(self): """ Turns the automaton into a deterministic one""" raise NotImplementedError @@ -705,18 +725,3 @@ def add_start_state_to_graph(graph, state): width=.0) graph.add_edge("starting_" + str(state.value), state.value) - - -def exists_any_final_path(transitive_closure, source, final_nodes): - """ - Checks if there are any paths from \ - given node to one of the final nodes. - """ - return any(node_is_reachable(transitive_closure, source, final) - for final in final_nodes) - - -def node_is_reachable(transitive_closure, source, target): - """ Checks if the target node can be reached from the source node """ - return target == source or \ - target in transitive_closure[source].keys() diff --git a/pyformlang/finite_automaton/nondeterministic_transition_function.py b/pyformlang/finite_automaton/nondeterministic_transition_function.py index a97e9c7..357065b 100644 --- a/pyformlang/finite_automaton/nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/nondeterministic_transition_function.py @@ -2,7 +2,7 @@ A nondeterministic transition function """ import copy -from typing import Set +from typing import Set, Iterable from .state import State from .symbol import Symbol @@ -202,7 +202,8 @@ def to_dict(self): """ return copy.deepcopy(self._transitions) - def get_transitions_from(self, state_from: State): + def get_transitions_from(self, state_from: State) \ + -> Iterable[tuple[Symbol, State]]: """ Gets transitions from the given state """ if state_from in self._transitions: for symb_by, states_to in self._transitions[state_from].items(): diff --git a/pyformlang/finite_automaton/transition_function.py b/pyformlang/finite_automaton/transition_function.py index 75c75e8..91a6b2b 100644 --- a/pyformlang/finite_automaton/transition_function.py +++ b/pyformlang/finite_automaton/transition_function.py @@ -2,7 +2,7 @@ Representation of a transition function """ import copy -from typing import List +from typing import List, Iterable from pyformlang.finite_automaton.epsilon import Epsilon @@ -199,11 +199,11 @@ def to_dict(self): """ return copy.deepcopy(self._transitions) - def get_transitions_from(self, state_from: State): + def get_transitions_from(self, state_from: State) \ + -> Iterable[tuple[Symbol, State]]: """ Gets transitions from the given state """ if state_from in self._transitions: - for symb_by, state_to in self._transitions[state_from].items(): - yield (symb_by, state_to) + yield from self._transitions[state_from].items() class DuplicateTransitionError(Exception): From 7ca040895a68c40c99770eb2470b86536cfc0362 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 30 Sep 2024 19:25:14 +0300 Subject: [PATCH 07/42] import Tuple to debug --- .../finite_automaton/nondeterministic_transition_function.py | 4 ++-- pyformlang/finite_automaton/transition_function.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyformlang/finite_automaton/nondeterministic_transition_function.py b/pyformlang/finite_automaton/nondeterministic_transition_function.py index 357065b..71facbc 100644 --- a/pyformlang/finite_automaton/nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/nondeterministic_transition_function.py @@ -2,7 +2,7 @@ A nondeterministic transition function """ import copy -from typing import Set, Iterable +from typing import Set, Iterable, Tuple from .state import State from .symbol import Symbol @@ -203,7 +203,7 @@ def to_dict(self): return copy.deepcopy(self._transitions) def get_transitions_from(self, state_from: State) \ - -> Iterable[tuple[Symbol, State]]: + -> Iterable[Tuple[Symbol, State]]: """ Gets transitions from the given state """ if state_from in self._transitions: for symb_by, states_to in self._transitions[state_from].items(): diff --git a/pyformlang/finite_automaton/transition_function.py b/pyformlang/finite_automaton/transition_function.py index 91a6b2b..72390fa 100644 --- a/pyformlang/finite_automaton/transition_function.py +++ b/pyformlang/finite_automaton/transition_function.py @@ -2,7 +2,7 @@ Representation of a transition function """ import copy -from typing import List, Iterable +from typing import List, Iterable, Tuple from pyformlang.finite_automaton.epsilon import Epsilon @@ -200,7 +200,7 @@ def to_dict(self): return copy.deepcopy(self._transitions) def get_transitions_from(self, state_from: State) \ - -> Iterable[tuple[Symbol, State]]: + -> Iterable[Tuple[Symbol, State]]: """ Gets transitions from the given state """ if state_from in self._transitions: yield from self._transitions[state_from].items() From d4e7a2ff825ec201ed483a80a7d78ed68e78be74 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Wed, 2 Oct 2024 23:40:02 +0300 Subject: [PATCH 08/42] add tests for transitions iteration --- ...st_nondeterministic_transition_function.py | 22 ++++++++++++++++++- .../tests/test_transition_function.py | 18 +++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/pyformlang/finite_automaton/tests/test_nondeterministic_transition_function.py b/pyformlang/finite_automaton/tests/test_nondeterministic_transition_function.py index ad6ee54..bf168d0 100644 --- a/pyformlang/finite_automaton/tests/test_nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/tests/test_nondeterministic_transition_function.py @@ -6,7 +6,7 @@ import unittest from pyformlang.finite_automaton import State, Symbol, \ - NondeterministicTransitionFunction + NondeterministicTransitionFunction, Epsilon class TestNondeterministicTransitionFunction(unittest.TestCase): @@ -91,3 +91,23 @@ def test_call(self): self.assertEqual(len(transition_function(s_to, symb_by)), 0) transition_function.add_transition(s_from, symb_by, s_from) self.assertEqual(transition_function(s_from, symb_by), {s_to, s_from}) + + def test_get_transitions_from(self): + """ Tests iteration of transitions from specified state """ + transition_function = NondeterministicTransitionFunction() + states = [State(x) for x in range(0, 5)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + epsilon = Epsilon() + transition_function.add_transition(states[0], symbol_a, states[1]) + transition_function.add_transition(states[1], symbol_b, states[2]) + transition_function.add_transition(states[1], symbol_c, states[2]) + transition_function.add_transition(states[1], symbol_c, states[3]) + transition_function.add_transition(states[1], epsilon, states[4]) + transitions = list(transition_function.get_transitions_from(states[1])) + self.assertTrue((symbol_b, states[2]) in transitions) + self.assertTrue((symbol_c, states[2]) in transitions) + self.assertTrue((symbol_c, states[3]) in transitions) + self.assertTrue((epsilon, states[4]) in transitions) + self.assertEqual(len(transitions), 4) diff --git a/pyformlang/finite_automaton/tests/test_transition_function.py b/pyformlang/finite_automaton/tests/test_transition_function.py index d1c4b01..59bc3b0 100644 --- a/pyformlang/finite_automaton/tests/test_transition_function.py +++ b/pyformlang/finite_automaton/tests/test_transition_function.py @@ -90,3 +90,21 @@ def test_invalid_epsilon(self): transition_function = TransitionFunction() with self.assertRaises(InvalidEpsilonTransition): transition_function.add_transition("1", Epsilon(), "2") + + def test_get_transitions_from(self): + """ Tests iteration of transitions from specified state """ + transition_function = TransitionFunction() + states = [State(x) for x in range(0, 4)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + symbol_d = Symbol("d") + transition_function.add_transition(states[0], symbol_a, states[1]) + transition_function.add_transition(states[1], symbol_b, states[2]) + transition_function.add_transition(states[1], symbol_c, states[2]) + transition_function.add_transition(states[1], symbol_d, states[3]) + transitions = list(transition_function.get_transitions_from(states[1])) + self.assertTrue((symbol_b, states[2]) in transitions) + self.assertTrue((symbol_c, states[2]) in transitions) + self.assertTrue((symbol_d, states[3]) in transitions) + self.assertEqual(len(transitions), 3) From 76d5b75de508aedc8b3b8bfa1dd697c5b2bdbb35 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Fri, 4 Oct 2024 15:26:13 +0300 Subject: [PATCH 09/42] update CI --- .github/workflows/python-package.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 6e73c20..caf70a6 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -3,13 +3,7 @@ name: Python package -on: - push: - pull_request: - types: - - opened - - reopened - - closed +on: [push, pull_request] jobs: build: From d72eff86a5f29c1cd798aeb26b1f2ab3c1eec245 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Wed, 16 Oct 2024 14:39:30 +0300 Subject: [PATCH 10/42] add max word length specification, refactor, add cyclic enfa test --- .../finite_automaton/finite_automaton.py | 21 ++++++------- .../nondeterministic_transition_function.py | 2 +- .../test_deterministic_finite_automaton.py | 2 +- .../tests/test_epsilon_nfa.py | 31 ++++++++++++++++++- .../test_nondeterministic_finite_automaton.py | 2 +- 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 7998293..7a04ece 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -595,21 +595,18 @@ def is_equivalent_to(self, other): self_dfa = self.to_deterministic() return self_dfa.is_equivalent_to(other) - def get_accepted_words(self) -> Iterable[List[Symbol]]: - """ Gets words accepted by the finite automaton """ - for start_state in self.start_states: - yield from self.get_words_accepted_from_state(start_state) - - def get_words_accepted_from_state(self, initial_state: State) \ + def get_accepted_words(self, max_length: int = -1) \ -> Iterable[List[Symbol]]: """ - Gets words that are accepted by finite \ - automaton starting from the given state. + Gets words accepted by the finite automaton. """ - queue = [(initial_state, [])] + states_to_visit = [(start_state, []) + for start_state in self.start_states] self.__set_transitive_closure() - while len(queue) > 0: - (current_state, current_word) = queue.pop(0) + while states_to_visit: + current_state, current_word = states_to_visit.pop(0) + if len(current_word) > max_length and max_length != -1: + continue transitions = self._transition_function.get_transitions_from( current_state) for symbol, next_state in transitions: @@ -617,7 +614,7 @@ def get_words_accepted_from_state(self, initial_state: State) \ temp_word = current_word.copy() if symbol != Epsilon(): temp_word.append(symbol) - queue.append((next_state, temp_word)) + states_to_visit.append((next_state, temp_word)) if self.is_final_state(current_state): yield current_word diff --git a/pyformlang/finite_automaton/nondeterministic_transition_function.py b/pyformlang/finite_automaton/nondeterministic_transition_function.py index 71facbc..50c253a 100644 --- a/pyformlang/finite_automaton/nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/nondeterministic_transition_function.py @@ -208,4 +208,4 @@ def get_transitions_from(self, state_from: State) \ if state_from in self._transitions: for symb_by, states_to in self._transitions[state_from].items(): for state_to in states_to: - yield (symb_by, state_to) + yield symb_by, state_to diff --git a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py index 6629639..13ec5d6 100644 --- a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py @@ -341,7 +341,7 @@ def get_dfa_example(): def get_dfa_example_for_word_generation(): """ DFA example for the word generation test """ dfa = DeterministicFiniteAutomaton() - states = [State(x) for x in range(0, 4)] + states = [State(x) for x in range(4)] symbol_a = Symbol("a") symbol_b = Symbol("b") symbol_c = Symbol("c") diff --git a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py index c21f561..4fb6af1 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py +++ b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py @@ -633,6 +633,16 @@ def test_word_generation(self): [Symbol("d"), Symbol("e"), Symbol("f")] in accepted_words) self.assertEqual(len(accepted_words), 5) + def test_cyclic_word_generation(self): + enfa = get_cyclic_enfa_example() + max_length = 10 + accepted_words = [[Symbol("a")] + + [Symbol("b")] * x + + [Symbol("c")] + for x in range(1, max_length - 1)] + actual_accepted_words = list(enfa.get_accepted_words(max_length)) + self.assertEqual(accepted_words, actual_accepted_words) + def get_digits_enfa(): """ An epsilon NFA to recognize digits """ @@ -746,7 +756,7 @@ def get_example_non_minimal(): def get_enfa_example_for_word_generation(): """ ENFA example for the word generation test """ enfa = EpsilonNFA() - states = [State(x) for x in range(0, 9)] + states = [State(x) for x in range(9)] symbol_a = Symbol("a") symbol_b = Symbol("b") symbol_c = Symbol("c") @@ -772,3 +782,22 @@ def get_enfa_example_for_word_generation(): enfa.add_final_state(states[6]) enfa.add_final_state(states[8]) return enfa + + +def get_cyclic_enfa_example(): + """ ENFA example with a cycle on the path to the final state """ + enfa = EpsilonNFA() + states = [State(x) for x in range(4)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + epsilon = Epsilon() + enfa.add_transitions([ + (states[0], symbol_a, states[1]), + (states[1], symbol_b, states[2]), + (states[2], epsilon, states[1]), + (states[2], symbol_c, states[3]), + ]) + enfa.add_start_state(states[0]) + enfa.add_final_state(states[3]) + return enfa diff --git a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py index 7417e7f..618b3d6 100644 --- a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py @@ -136,7 +136,7 @@ def get_nfa_example_for_word_generation(): example for the word generation test. """ nfa = NondeterministicFiniteAutomaton() - states = [State(x) for x in range(0, 9)] + states = [State(x) for x in range(9)] symbol_a = Symbol("a") symbol_b = Symbol("b") symbol_c = Symbol("c") From f49ce75a3a633779862b1aeb516c84332cdfa77b Mon Sep 17 00:00:00 2001 From: bygu4 Date: Thu, 17 Oct 2024 00:30:30 +0300 Subject: [PATCH 11/42] rewrite the generator without networkx --- .../finite_automaton/finite_automaton.py | 56 +++++++++++-------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 7a04ece..95c2e00 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -1,6 +1,6 @@ """ A general finite automaton representation """ -from typing import List, Iterable, Any +from typing import List, Iterable, Set, Any import networkx as nx from networkx.drawing.nx_pydot import write_dot @@ -42,7 +42,6 @@ def __init__(self): self._transition_function = None self._start_state = set() self._final_states = set() - self.__transitive_closure = None def add_transition(self, s_from: State, symb_by: Symbol, s_to: State) -> int: @@ -602,7 +601,7 @@ def get_accepted_words(self, max_length: int = -1) \ """ states_to_visit = [(start_state, []) for start_state in self.start_states] - self.__set_transitive_closure() + states_leading_to_final = self._get_states_leading_to_final() while states_to_visit: current_state, current_word = states_to_visit.pop(0) if len(current_word) > max_length and max_length != -1: @@ -610,7 +609,9 @@ def get_accepted_words(self, max_length: int = -1) \ transitions = self._transition_function.get_transitions_from( current_state) for symbol, next_state in transitions: - if self.__exists_any_final_path_from(next_state): + if symbol == Epsilon() and next_state == current_state: + continue + if next_state in states_leading_to_final: temp_word = current_word.copy() if symbol != Epsilon(): temp_word.append(symbol) @@ -618,26 +619,37 @@ def get_accepted_words(self, max_length: int = -1) \ if self.is_final_state(current_state): yield current_word - def __set_transitive_closure(self): + def _get_states_leading_to_final(self) -> Set[State]: """ - Bulds MultiDiGraph transitive closure \ - of FA and sets it to the private field. + Gets a set of states from which one + of the final states can be reached. """ - self.__transitive_closure = nx.transitive_closure( - self.to_networkx()) - - def __exists_any_final_path_from(self, source: State) -> bool: - """ - Checks if there are any paths from \ - given state to one of the final states. - """ - return any(self.__exists_path(source, final) - for final in self.final_states) - - def __exists_path(self, source: State, target: State) -> bool: - """ Checks if the target state can be reached from the source state """ - return target == source or \ - target in self.__transitive_closure[source].keys() + leading_to_final = self.final_states.copy() + visited = set() + states_to_process = [(None, start_state) + for start_state in self.start_states] + while states_to_process: + previous_state, current_state = states_to_process.pop() + if previous_state and current_state in leading_to_final: + leading_to_final.add(previous_state) + continue + if current_state in visited: + continue + visited.add(current_state) + next_states = self._get_next_states_from(current_state) + if next_states: + states_to_process.append((previous_state, current_state)) + for next_state in next_states: + states_to_process.append((current_state, next_state)) + return leading_to_final + + def _get_next_states_from(self, state_from: State) -> Set[State]: + """ Gets a set of states that are next to the given one """ + next_states = set() + for _, next_state in \ + self._transition_function.get_transitions_from(state_from): + next_states.add(next_state) + return next_states def to_deterministic(self): """ Turns the automaton into a deterministic one""" From 5406e6f86b57a6a81e3b6a2c93680114328b57fb Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sat, 19 Oct 2024 18:57:24 +0300 Subject: [PATCH 12/42] use deque collection --- pyformlang/finite_automaton/finite_automaton.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 95c2e00..24fc2a6 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -1,6 +1,7 @@ """ A general finite automaton representation """ from typing import List, Iterable, Set, Any +from collections import deque import networkx as nx from networkx.drawing.nx_pydot import write_dot @@ -599,11 +600,11 @@ def get_accepted_words(self, max_length: int = -1) \ """ Gets words accepted by the finite automaton. """ - states_to_visit = [(start_state, []) - for start_state in self.start_states] + states_to_visit = deque((start_state, []) + for start_state in self.start_states) states_leading_to_final = self._get_states_leading_to_final() while states_to_visit: - current_state, current_word = states_to_visit.pop(0) + current_state, current_word = states_to_visit.popleft() if len(current_word) > max_length and max_length != -1: continue transitions = self._transition_function.get_transitions_from( @@ -626,8 +627,8 @@ def _get_states_leading_to_final(self) -> Set[State]: """ leading_to_final = self.final_states.copy() visited = set() - states_to_process = [(None, start_state) - for start_state in self.start_states] + states_to_process = deque((None, start_state) + for start_state in self.start_states) while states_to_process: previous_state, current_state = states_to_process.pop() if previous_state and current_state in leading_to_final: From ed3e418daad02a325bf77e50166fba3f26a542d5 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 21 Oct 2024 12:52:52 +0300 Subject: [PATCH 13/42] generalize _get_reachable_states, correct max length specification --- .../deterministic_finite_automaton.py | 17 ---------------- .../finite_automaton/finite_automaton.py | 20 ++++++++++++++++--- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index ce58a5b..c3117dc 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -300,23 +300,6 @@ def _get_previous_transitions(self): previous_transitions.add(None, symbol, None) return previous_transitions - def _get_reachable_states(self) -> AbstractSet[State]: - """ Get all states which are reachable """ - to_process = [] - processed = set() - for state in self._start_state: - to_process.append(state) - processed.add(state) - while to_process: - current = to_process.pop() - for symbol in self._input_symbols: - next_state = self._transition_function(current, symbol) - if not next_state or next_state[0] in processed: - continue - to_process.append(next_state[0]) - processed.add(next_state[0]) - return processed - def minimize(self) -> "DeterministicFiniteAutomaton": """ Minimize the current DFA diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 24fc2a6..cdf2c0e 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -1,6 +1,6 @@ """ A general finite automaton representation """ -from typing import List, Iterable, Set, Any +from typing import List, Iterable, Set, Optional, Any from collections import deque import networkx as nx @@ -595,17 +595,19 @@ def is_equivalent_to(self, other): self_dfa = self.to_deterministic() return self_dfa.is_equivalent_to(other) - def get_accepted_words(self, max_length: int = -1) \ + def get_accepted_words(self, max_length: Optional[int] = None) \ -> Iterable[List[Symbol]]: """ Gets words accepted by the finite automaton. """ + if max_length and max_length < 0: + return [] states_to_visit = deque((start_state, []) for start_state in self.start_states) states_leading_to_final = self._get_states_leading_to_final() while states_to_visit: current_state, current_word = states_to_visit.popleft() - if len(current_word) > max_length and max_length != -1: + if max_length and len(current_word) > max_length: continue transitions = self._transition_function.get_transitions_from( current_state) @@ -644,6 +646,18 @@ def _get_states_leading_to_final(self) -> Set[State]: states_to_process.append((current_state, next_state)) return leading_to_final + def _get_reachable_states(self) -> Set[State]: + """ Get all states which are reachable """ + visited = set() + states_to_process = deque(self.start_states) + while states_to_process: + current_state = states_to_process.pop() + visited.add(current_state) + for next_state in self._get_next_states_from(current_state): + if next_state not in visited: + states_to_process.append(next_state) + return visited + def _get_next_states_from(self, state_from: State) -> Set[State]: """ Gets a set of states that are next to the given one """ next_states = set() From 0fe3d88357cdc0a73480597e5ac093739daa0f1e Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 21 Oct 2024 16:55:02 +0300 Subject: [PATCH 14/42] avoid epsilon cycles, add tests with epsilon cycles, add tests with max_length zero, debug --- .../finite_automaton/finite_automaton.py | 15 +++--- .../tests/test_epsilon_nfa.py | 47 ++++++++++++++++++- 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index cdf2c0e..54492bf 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -600,25 +600,26 @@ def get_accepted_words(self, max_length: Optional[int] = None) \ """ Gets words accepted by the finite automaton. """ - if max_length and max_length < 0: + if max_length is not None and max_length < 0: return [] - states_to_visit = deque((start_state, []) + states_to_visit = deque((start_state, start_state, []) for start_state in self.start_states) states_leading_to_final = self._get_states_leading_to_final() while states_to_visit: - current_state, current_word = states_to_visit.popleft() - if max_length and len(current_word) > max_length: + last_state, current_state, current_word = states_to_visit.popleft() + if max_length is not None and len(current_word) > max_length: continue transitions = self._transition_function.get_transitions_from( current_state) for symbol, next_state in transitions: - if symbol == Epsilon() and next_state == current_state: - continue + if symbol == Epsilon() and next_state == last_state: + continue # avoiding epsilon cycles if next_state in states_leading_to_final: temp_word = current_word.copy() if symbol != Epsilon(): temp_word.append(symbol) - states_to_visit.append((next_state, temp_word)) + last_state = next_state + states_to_visit.append((last_state, next_state, temp_word)) if self.is_final_state(current_state): yield current_word diff --git a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py index 4fb6af1..9fa2a94 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py +++ b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py @@ -637,12 +637,34 @@ def test_cyclic_word_generation(self): enfa = get_cyclic_enfa_example() max_length = 10 accepted_words = [[Symbol("a")] + - [Symbol("b")] * x + + [Symbol("b")] * (i + 1) + [Symbol("c")] - for x in range(1, max_length - 1)] + for i in range(max_length - 2)] actual_accepted_words = list(enfa.get_accepted_words(max_length)) self.assertEqual(accepted_words, actual_accepted_words) + def test_epsilon_cycle_word_generation(self): + enfa = get_epsilon_cycle_enfa_example() + max_length = 4 + accepted_words = list(enfa.get_accepted_words(max_length)) + self.assertTrue([] in accepted_words) + self.assertTrue([Symbol("a"), Symbol("c")] in accepted_words) + self.assertTrue([Symbol("a"), + Symbol("b"),Symbol("c")] in accepted_words) + self.assertTrue([Symbol("a"), Symbol("b"), + Symbol("b"), Symbol("c")] in accepted_words) + self.assertEqual(len(accepted_words), 4) + + def test_max_length_zero_accepting_empty_string(self): + enfa = get_enfa_example_for_word_generation() + accepted_words = list(enfa.get_accepted_words(0)) + self.assertEqual(accepted_words, [[]]) + + def test_max_length_zero_not_accepting_empty_string(self): + enfa = get_cyclic_enfa_example() + accepted_words = list(enfa.get_accepted_words(0)) + self.assertEqual(accepted_words, []) + def get_digits_enfa(): """ An epsilon NFA to recognize digits """ @@ -801,3 +823,24 @@ def get_cyclic_enfa_example(): enfa.add_start_state(states[0]) enfa.add_final_state(states[3]) return enfa + +def get_epsilon_cycle_enfa_example(): + """ ENFA example with an epsilon cycle """ + enfa = EpsilonNFA() + states = [State(x) for x in range(4)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + epsilon = Epsilon() + enfa.add_transitions([ + (states[0], epsilon, states[0]), + (states[0], symbol_a, states[1]), + (states[1], symbol_b, states[1]), + (states[1], epsilon, states[2]), + (states[2], epsilon, states[1]), + (states[1], symbol_c, states[3]), + ]) + enfa.add_start_state(states[0]) + enfa.add_final_state(states[0]) + enfa.add_final_state(states[3]) + return enfa From 2ae69a304ee73e1aeef9fb8b63bc31f34edc37a2 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 21 Oct 2024 17:01:05 +0300 Subject: [PATCH 15/42] minor style changes --- pyformlang/finite_automaton/tests/test_epsilon_nfa.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py index 9fa2a94..7d63a90 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py +++ b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py @@ -824,6 +824,7 @@ def get_cyclic_enfa_example(): enfa.add_final_state(states[3]) return enfa + def get_epsilon_cycle_enfa_example(): """ ENFA example with an epsilon cycle """ enfa = EpsilonNFA() From 82202dadfd687b45f62d9102dbae065a6804dd69 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 22 Oct 2024 10:48:45 +0300 Subject: [PATCH 16/42] correct cycle checks, use BFS in _get_reachable_states --- pyformlang/finite_automaton/finite_automaton.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 54492bf..a930a86 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -606,20 +606,23 @@ def get_accepted_words(self, max_length: Optional[int] = None) \ for start_state in self.start_states) states_leading_to_final = self._get_states_leading_to_final() while states_to_visit: - last_state, current_state, current_word = states_to_visit.popleft() + last_state_before_epsilon, current_state, current_word = \ + states_to_visit.popleft() if max_length is not None and len(current_word) > max_length: continue transitions = self._transition_function.get_transitions_from( current_state) for symbol, next_state in transitions: - if symbol == Epsilon() and next_state == last_state: + if symbol == Epsilon() \ + and next_state == last_state_before_epsilon: continue # avoiding epsilon cycles if next_state in states_leading_to_final: + temp_state = last_state_before_epsilon temp_word = current_word.copy() if symbol != Epsilon(): + temp_state = next_state temp_word.append(symbol) - last_state = next_state - states_to_visit.append((last_state, next_state, temp_word)) + states_to_visit.append((temp_state, next_state, temp_word)) if self.is_final_state(current_state): yield current_word @@ -652,7 +655,7 @@ def _get_reachable_states(self) -> Set[State]: visited = set() states_to_process = deque(self.start_states) while states_to_process: - current_state = states_to_process.pop() + current_state = states_to_process.popleft() visited.add(current_state) for next_state in self._get_next_states_from(current_state): if next_state not in visited: From deed14323cdd4289b5beb4094079e6a7dbeeca50 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 22 Oct 2024 16:32:43 +0300 Subject: [PATCH 17/42] update tests for word generator --- .../test_deterministic_finite_automaton.py | 8 ++--- .../tests/test_epsilon_nfa.py | 32 +++++++++---------- .../test_nondeterministic_finite_automaton.py | 13 ++++---- .../tests/test_transition_function.py | 8 ++--- 4 files changed, 29 insertions(+), 32 deletions(-) diff --git a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py index 16b1481..eb1dc03 100644 --- a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py @@ -283,10 +283,10 @@ def test_regex_dfa(self): def test_word_generation(self): dfa = get_dfa_example_for_word_generation() accepted_words = list(dfa.get_accepted_words()) - self.assertTrue([] in accepted_words) - self.assertTrue([Symbol("b"), Symbol("c")] in accepted_words) - self.assertTrue([Symbol("b"), Symbol("d")] in accepted_words) - self.assertEqual(len(accepted_words), 3) + assert [] in accepted_words + assert [Symbol("b"), Symbol("c")] in accepted_words + assert [Symbol("b"), Symbol("d")] in accepted_words + assert len(accepted_words) == 3 def get_example0(): diff --git a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py index 43db732..8b52127 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py +++ b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py @@ -625,13 +625,12 @@ def test_remove_epsilon_transitions(self): def test_word_generation(self): enfa = get_enfa_example_for_word_generation() accepted_words = list(enfa.get_accepted_words()) - self.assertTrue([] in accepted_words) - self.assertTrue([Symbol("b")] in accepted_words) - self.assertTrue([Symbol("c")] in accepted_words) - self.assertTrue([Symbol("d"), Symbol("e")] in accepted_words) - self.assertTrue( - [Symbol("d"), Symbol("e"), Symbol("f")] in accepted_words) - self.assertEqual(len(accepted_words), 5) + assert [] in accepted_words + assert [Symbol("b")] in accepted_words + assert [Symbol("c")] in accepted_words + assert [Symbol("d"), Symbol("e")] in accepted_words + assert [Symbol("d"), Symbol("e"), Symbol("f")] in accepted_words + assert len(accepted_words) == 5 def test_cyclic_word_generation(self): enfa = get_cyclic_enfa_example() @@ -641,29 +640,28 @@ def test_cyclic_word_generation(self): [Symbol("c")] for i in range(max_length - 2)] actual_accepted_words = list(enfa.get_accepted_words(max_length)) - self.assertEqual(accepted_words, actual_accepted_words) + assert accepted_words == actual_accepted_words def test_epsilon_cycle_word_generation(self): enfa = get_epsilon_cycle_enfa_example() max_length = 4 accepted_words = list(enfa.get_accepted_words(max_length)) - self.assertTrue([] in accepted_words) - self.assertTrue([Symbol("a"), Symbol("c")] in accepted_words) - self.assertTrue([Symbol("a"), - Symbol("b"),Symbol("c")] in accepted_words) - self.assertTrue([Symbol("a"), Symbol("b"), - Symbol("b"), Symbol("c")] in accepted_words) - self.assertEqual(len(accepted_words), 4) + assert [] in accepted_words + assert [Symbol("a"), Symbol("c")] in accepted_words + assert [Symbol("a"), Symbol("b"),Symbol("c")] in accepted_words + assert [Symbol("a"), Symbol("b"), + Symbol("b"), Symbol("c")] in accepted_words + assert len(accepted_words) == 4 def test_max_length_zero_accepting_empty_string(self): enfa = get_enfa_example_for_word_generation() accepted_words = list(enfa.get_accepted_words(0)) - self.assertEqual(accepted_words, [[]]) + assert accepted_words == [[]] def test_max_length_zero_not_accepting_empty_string(self): enfa = get_cyclic_enfa_example() accepted_words = list(enfa.get_accepted_words(0)) - self.assertEqual(accepted_words, []) + assert accepted_words == [] def get_digits_enfa(): diff --git a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py index 94c4157..f4eec26 100644 --- a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py @@ -119,13 +119,12 @@ def test_epsilon_refused(self): def test_word_generation(self): nfa = get_nfa_example_for_word_generation() accepted_words = list(nfa.get_accepted_words()) - self.assertTrue([] in accepted_words) - self.assertTrue([Symbol("a"), Symbol("b")] in accepted_words) - self.assertTrue([Symbol("a"), Symbol("c")] in accepted_words) - self.assertTrue([Symbol("d"), Symbol("e")] in accepted_words) - self.assertTrue( - [Symbol("d"), Symbol("e"), Symbol("f")] in accepted_words) - self.assertEqual(len(accepted_words), 5) + assert [] in accepted_words + assert [Symbol("a"), Symbol("b")] in accepted_words + assert [Symbol("a"), Symbol("c")] in accepted_words + assert [Symbol("d"), Symbol("e")] in accepted_words + assert [Symbol("d"), Symbol("e"), Symbol("f")] in accepted_words + assert len(accepted_words) == 5 def get_nfa_example_for_word_generation(): diff --git a/pyformlang/finite_automaton/tests/test_transition_function.py b/pyformlang/finite_automaton/tests/test_transition_function.py index 132e2b1..85e1972 100644 --- a/pyformlang/finite_automaton/tests/test_transition_function.py +++ b/pyformlang/finite_automaton/tests/test_transition_function.py @@ -102,7 +102,7 @@ def test_get_transitions_from(self): transition_function.add_transition(states[1], symbol_c, states[2]) transition_function.add_transition(states[1], symbol_d, states[3]) transitions = list(transition_function.get_transitions_from(states[1])) - self.assertTrue((symbol_b, states[2]) in transitions) - self.assertTrue((symbol_c, states[2]) in transitions) - self.assertTrue((symbol_d, states[3]) in transitions) - self.assertEqual(len(transitions), 3) + assert (symbol_b, states[2]) in transitions + assert (symbol_c, states[2]) in transitions + assert (symbol_d, states[3]) in transitions + assert len(transitions) == 3 From bd3dabaee826fd2f2925c50e794ce900fa3a8ba4 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Thu, 24 Oct 2024 22:45:05 +0300 Subject: [PATCH 18/42] add checks for any duplicates, add test for duplicate generation --- .../finite_automaton/finite_automaton.py | 30 +++++++++++------ .../test_deterministic_finite_automaton.py | 20 +++++++++++ .../tests/test_epsilon_nfa.py | 2 +- .../test_nondeterministic_finite_automaton.py | 33 +++++++++++++++++++ 4 files changed, 74 insertions(+), 11 deletions(-) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 2e18b54..d33ba84 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -602,29 +602,29 @@ def get_accepted_words(self, max_length: Optional[int] = None) \ """ if max_length is not None and max_length < 0: return [] - states_to_visit = deque((start_state, start_state, []) + states_to_visit = deque((start_state, []) for start_state in self.start_states) states_leading_to_final = self._get_states_leading_to_final() + words_by_state = {state: set() for state in self.states} + yielded_words = set() while states_to_visit: - last_state_before_epsilon, current_state, current_word = \ - states_to_visit.popleft() + current_state, current_word = states_to_visit.popleft() if max_length is not None and len(current_word) > max_length: continue + word_to_add = tuple(current_word) + if not self.__try_add(words_by_state[current_state], word_to_add): + continue transitions = self._transition_function.get_transitions_from( current_state) for symbol, next_state in transitions: - if symbol == Epsilon() \ - and next_state == last_state_before_epsilon: - continue # avoiding epsilon cycles if next_state in states_leading_to_final: - temp_state = last_state_before_epsilon temp_word = current_word.copy() if symbol != Epsilon(): - temp_state = next_state temp_word.append(symbol) - states_to_visit.append((temp_state, next_state, temp_word)) + states_to_visit.append((next_state, temp_word)) if self.is_final_state(current_state): - yield current_word + if self.__try_add(yielded_words, word_to_add): + yield current_word def _get_states_leading_to_final(self) -> Set[State]: """ @@ -713,6 +713,16 @@ def to_dict(self): """ return self._transition_function.to_dict() + @staticmethod + def __try_add(set_to_add_to: Set[Any], element_to_add: Any) -> bool: + """ + Tries to add a given element to the given set. + Returns True if element was added, otherwise False. + """ + initial_length = len(set_to_add_to) + set_to_add_to.add(element_to_add) + return len(set_to_add_to) != initial_length + def to_state(given: Any) -> Union[State, None]: """ Transforms the input into a state diff --git a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py index eb1dc03..b0c0981 100644 --- a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py @@ -288,6 +288,11 @@ def test_word_generation(self): assert [Symbol("b"), Symbol("d")] in accepted_words assert len(accepted_words) == 3 + def test_dfa_generating_no_words(self): + dfa = get_dfa_example_without_accepted_words() + accepted_words = list(dfa.get_accepted_words()) + assert not accepted_words + def get_example0(): """ Gives a dfa """ @@ -355,3 +360,18 @@ def get_dfa_example_for_word_generation(): dfa.add_final_state(states[0]) dfa.add_final_state(states[3]) return dfa + + +def get_dfa_example_without_accepted_words(): + """ DFA example accepting no words """ + dfa = DeterministicFiniteAutomaton() + states = [State(x) for x in range(4)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + dfa.add_transitions([ + (states[0], symbol_a, states[1]), + (states[2], symbol_b, states[3]), + ]) + dfa.add_start_state(states[0]) + dfa.add_final_state(states[3]) + return dfa diff --git a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py index 8b52127..f273e35 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py +++ b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py @@ -661,7 +661,7 @@ def test_max_length_zero_accepting_empty_string(self): def test_max_length_zero_not_accepting_empty_string(self): enfa = get_cyclic_enfa_example() accepted_words = list(enfa.get_accepted_words(0)) - assert accepted_words == [] + assert not accepted_words def get_digits_enfa(): diff --git a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py index f4eec26..fe7819c 100644 --- a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py @@ -126,6 +126,13 @@ def test_word_generation(self): assert [Symbol("d"), Symbol("e"), Symbol("f")] in accepted_words assert len(accepted_words) == 5 + def test_for_duplicate_generation(self): + nfa = get_nfa_example_with_duplicates() + accepted_words = list(nfa.get_accepted_words()) + assert [Symbol("a"), Symbol("c")] in accepted_words + assert [Symbol("b"), Symbol("c")] in accepted_words + assert len(accepted_words) == 2 + def get_nfa_example_for_word_generation(): """ @@ -158,3 +165,29 @@ def get_nfa_example_for_word_generation(): nfa.add_final_state(states[6]) nfa.add_final_state(states[8]) return nfa + + +def get_nfa_example_with_duplicates(): + """ Gets NFA example with duplicate word chains """ + nfa = NondeterministicFiniteAutomaton() + states = [State(x) for x in range(9)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + nfa.add_transitions([ + (states[0], symbol_a, states[2]), + (states[1], symbol_a, states[2]), + (states[2], symbol_c, states[3]), + (states[2], symbol_c, states[4]), + (states[5], symbol_a, states[7]), + (states[6], symbol_b, states[7]), + (states[7], symbol_c, states[8]), + ]) + nfa.add_start_state(states[0]) + nfa.add_start_state(states[1]) + nfa.add_start_state(states[5]) + nfa.add_start_state(states[6]) + nfa.add_final_state(states[3]) + nfa.add_final_state(states[4]) + nfa.add_final_state(states[8]) + return nfa From ebf710157bf7e01dd540df2ffdbcf508ccdd5c16 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sun, 13 Oct 2024 23:15:15 +0300 Subject: [PATCH 19/42] add type annotations for finite_automaton module, refactor --- .../deterministic_finite_automaton.py | 94 ++++++++------- .../finite_automaton/doubly_linked_list.py | 26 +++-- .../finite_automaton/doubly_linked_node.py | 29 ++--- pyformlang/finite_automaton/epsilon.py | 7 +- pyformlang/finite_automaton/epsilon_nfa.py | 79 +++++++------ .../finite_automaton/finite_automaton.py | 109 +++++++++++------- .../finite_automaton_object.py | 2 +- .../hopcroft_processing_list.py | 19 +-- .../nondeterministic_finite_automaton.py | 9 +- .../nondeterministic_transition_function.py | 103 +++++++++-------- pyformlang/finite_automaton/partition.py | 21 ++-- pyformlang/finite_automaton/regexable.py | 16 ++- pyformlang/finite_automaton/state.py | 2 +- .../finite_automaton/transition_function.py | 77 ++++++++----- pyrightconfig.json | 23 ++++ 15 files changed, 351 insertions(+), 265 deletions(-) create mode 100644 pyrightconfig.json diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index 7ed51aa..f58e649 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -2,6 +2,14 @@ Representation of a deterministic finite automaton """ +# pylint: disable=too-many-arguments + +from typing import \ + Dict, List, Iterable, Set, \ + AbstractSet, Optional, Any + +from numpy import empty + from typing import AbstractSet, Iterable, Any import numpy as np @@ -10,7 +18,7 @@ from .epsilon_nfa import to_single_state from .finite_automaton import to_state, to_symbol from .hopcroft_processing_list import HopcroftProcessingList -# pylint: disable=cyclic-import +from .finite_automaton import FiniteAutomaton from .nondeterministic_finite_automaton import NondeterministicFiniteAutomaton from .partition import Partition from .state import State @@ -21,18 +29,23 @@ class PreviousTransitions: """For internal usage""" - def __init__(self, states, symbols): - self._to_index_state = {} + def __init__(self, + states: AbstractSet[Any], + symbols: AbstractSet[Any]) -> None: + self._to_index_state: Dict[Optional[State], int] = {} self._to_index_state[None] = 0 for i, state in enumerate(states): self._to_index_state[state] = i + 1 - self._to_index_symbol = {} + self._to_index_symbol: Dict[Optional[Symbol], int] = {} for i, symbol in enumerate(symbols): self._to_index_symbol[symbol] = i - self._conversion = np.empty((len(states) + 1, len(symbols)), + self._conversion = empty((len(states) + 1, len(symbols)), dtype=object) - def add(self, next0, symbol, state): + def add(self, + next0: Optional[State], + symbol: Symbol, + state: Optional[State]) -> None: """ Internal """ i_next0 = self._to_index_state[next0] i_symbol = self._to_index_symbol[symbol] @@ -41,7 +54,7 @@ def add(self, next0, symbol, state): else: self._conversion[i_next0, i_symbol].append(state) - def get(self, next0, symbol): + def get(self, next0: State, symbol: Symbol) -> List[object]: """ Internal """ i_next0 = self._to_index_state[next0] i_symbol = self._to_index_symbol[symbol] @@ -101,23 +114,23 @@ class DeterministicFiniteAutomaton(NondeterministicFiniteAutomaton): """ - # pylint: disable=too-many-arguments def __init__(self, - states: AbstractSet[State] = None, - input_symbols: AbstractSet[Symbol] = None, + states: AbstractSet[Any] = None, + input_symbols: AbstractSet[Any] = None, transition_function: TransitionFunction = None, - start_state: State = None, - final_states: AbstractSet[State] = None): + start_state: Any = None, + final_states: AbstractSet[Any] = None) -> None: super().__init__(states, input_symbols, None, None, final_states) - start_state = to_state(start_state) - self._transition_function = transition_function or TransitionFunction() + self._transition_function: TransitionFunction = \ + transition_function or TransitionFunction() if start_state is not None: + start_state = to_state(start_state) self._start_state = {start_state} - else: - self._start_state = {} - if start_state is not None: self._states.add(start_state) + else: + self._start_state = set() + def add_start_state(self, state: Any) -> int: def add_start_state(self, state: Any) -> int: """ Set an initial state @@ -143,6 +156,7 @@ def add_start_state(self, state: Any) -> int: self._states.add(state) return 1 + def remove_start_state(self, state: Any) -> int: def remove_start_state(self, state: Any) -> int: """ remove an initial state @@ -165,11 +179,12 @@ def remove_start_state(self, state: Any) -> int: """ state = to_state(state) - if {state} == self._start_state: - self._start_state = {} + if self._start_state == {state}: + self._start_state.remove(state) return 1 return 0 + def accepts(self, word: Iterable[Any]) -> bool: def accepts(self, word: Iterable[Any]) -> bool: """ Checks whether the dfa accepts a given word @@ -201,11 +216,8 @@ def accepts(self, word: Iterable[Any]) -> bool: for symbol in word: if current_state is None: return False - current_state = self._transition_function(current_state, symbol) - if current_state: - current_state = current_state[0] - else: - current_state = None + current_state = self._transition_function.get_state(current_state, + symbol) return current_state is not None and self.is_final_state(current_state) def is_deterministic(self) -> bool: @@ -275,25 +287,17 @@ def copy(self) -> "DeterministicFiniteAutomaton": dfa.add_final_state(final) for state in self._states: for symbol in self._input_symbols: - state_to = self._transition_function(state, symbol) - if state_to: - state_to = state_to[0] - else: - state_to = None + state_to = self._transition_function.get_state(state, symbol) if state_to is not None: dfa.add_transition(state, symbol, state_to) return dfa - def _get_previous_transitions(self): + def _get_previous_transitions(self) -> PreviousTransitions: previous_transitions = PreviousTransitions(self._states, self._input_symbols) for state in self._states: for symbol in self._input_symbols: - next0 = self._transition_function(state, symbol) - if next0: - next0 = next0[0] - else: - next0 = None + next0 = self._transition_function.get_state(state, symbol) previous_transitions.add(next0, symbol, state) for symbol in self._input_symbols: previous_transitions.add(None, symbol, None) @@ -346,15 +350,15 @@ def minimize(self) -> "DeterministicFiniteAutomaton": done = set() new_state = to_new_states[state] for symbol in self._input_symbols: - for next_node in self._transition_function(state, symbol): - if next_node in states: - next_node = to_new_states[next_node] - if (next_node, symbol) not in done: - dfa.add_transition(new_state, symbol, next_node) - done.add((next_node, symbol)) + next_node = self._transition_function.get_state(state, symbol) + if next_node and next_node in states: + next_node = to_new_states[next_node] + if (next_node, symbol) not in done: + dfa.add_transition(new_state, symbol, next_node) + done.add((next_node, symbol)) return dfa - def _get_partition(self): + def _get_partition(self) -> Partition: previous_transitions = self._get_previous_transitions() finals = [] non_finals = [] @@ -395,7 +399,7 @@ def _get_partition(self): processing_list.insert(new_class, symbol) return partition - def is_equivalent_to(self, other): + def is_equivalent_to(self, other: FiniteAutomaton) -> bool: """ Check whether two automata are equivalent Parameters @@ -434,7 +438,9 @@ def start_state(self) -> State: return list(self._start_state)[0] @staticmethod - def _is_equivalent_to_minimal(self_minimal, other_minimal): + def _is_equivalent_to_minimal( + self_minimal: "DeterministicFiniteAutomaton", + other_minimal: "DeterministicFiniteAutomaton") -> bool: to_process = [(self_minimal.start_state, other_minimal.start_state)] matches = {self_minimal.start_state: other_minimal.start_state} diff --git a/pyformlang/finite_automaton/doubly_linked_list.py b/pyformlang/finite_automaton/doubly_linked_list.py index d81ce46..d1345e9 100644 --- a/pyformlang/finite_automaton/doubly_linked_list.py +++ b/pyformlang/finite_automaton/doubly_linked_list.py @@ -1,29 +1,31 @@ """A doubly linked list""" +from typing import Iterable, Optional, Any + from .doubly_linked_node import DoublyLinkedNode -class DoublyLinkedList: +class DoublyLinkedList(Iterable[DoublyLinkedNode]): """ A doubly linked list """ - def __init__(self): - self.first = None - self.last = None + def __init__(self) -> None: + self.first: Optional[DoublyLinkedNode] = None + self.last: Optional[DoublyLinkedNode] = None self.size = 0 - self._current_node = None + self._current_node: Optional[DoublyLinkedNode] = None - def append(self, value): + def append(self, value: Any) -> DoublyLinkedNode: """ Appends an element """ if self.last is not None: self.last = self.last.append(value) else: - node = DoublyLinkedNode(self, value=value) + node = DoublyLinkedNode(value=value) self.first = node self.last = node - self.size += 1 + self.size += 1 return self.last - def delete(self, node): + def delete(self, node: DoublyLinkedNode) -> None: """ Delete an element """ if node.next_node is not None: node.next_node.previous_node = node.previous_node @@ -35,14 +37,14 @@ def delete(self, node): self.first = node.next_node self.size -= 1 - def __len__(self): + def __len__(self) -> int: return self.size - def __iter__(self): + def __iter__(self) -> "DoublyLinkedList": self._current_node = self.first return self - def __next__(self): + def __next__(self) -> DoublyLinkedNode: if self._current_node is None: raise StopIteration res = self._current_node diff --git a/pyformlang/finite_automaton/doubly_linked_node.py b/pyformlang/finite_automaton/doubly_linked_node.py index f776386..7621fff 100644 --- a/pyformlang/finite_automaton/doubly_linked_node.py +++ b/pyformlang/finite_automaton/doubly_linked_node.py @@ -1,24 +1,20 @@ """Linked nodes in both direction""" +from typing import Optional, Any + class DoublyLinkedNode: """Represents doubly linked list of nodes from a doubly linked list""" def __init__(self, - list_in, - next_node=None, - previous_node=None, - value=None): - self.next_node = next_node - self.previous_node = previous_node - self.value = value - self.list_in = list_in - - def delete(self): - """Delete the current node""" - self.list_in.delete(self) - - def append(self, value): + next_node: "DoublyLinkedNode" = None, + previous_node: "DoublyLinkedNode" = None, + value: Any = None) -> None: + self.next_node: Optional[DoublyLinkedNode] = next_node + self.previous_node: Optional[DoublyLinkedNode] = previous_node + self.value: Any = value + + def append(self, value: Any) -> "DoublyLinkedNode": """ Append a new node with the given value @@ -33,9 +29,6 @@ def append(self, value): The created node """ - next_node = DoublyLinkedNode(self.list_in, self.next_node, self, value) - if self.next_node is None: - self.list_in.last = next_node + next_node = DoublyLinkedNode(self.next_node, self, value) self.next_node = next_node - self.list_in.size += 1 return next_node diff --git a/pyformlang/finite_automaton/epsilon.py b/pyformlang/finite_automaton/epsilon.py index 75c81ee..958bc34 100644 --- a/pyformlang/finite_automaton/epsilon.py +++ b/pyformlang/finite_automaton/epsilon.py @@ -2,6 +2,7 @@ Represents an epsilon transition """ +from typing import Any from .symbol import Symbol @@ -15,13 +16,13 @@ class Epsilon(Symbol): # pylint: disable=too-few-public-methods """ - def __init__(self): + def __init__(self) -> None: super().__init__("epsilon") - def __hash__(self): + def __hash__(self) -> int: return hash("EPSILON TRANSITION") - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: if isinstance(other, Epsilon): return True return False diff --git a/pyformlang/finite_automaton/epsilon_nfa.py b/pyformlang/finite_automaton/epsilon_nfa.py index 9d74542..35cd736 100644 --- a/pyformlang/finite_automaton/epsilon_nfa.py +++ b/pyformlang/finite_automaton/epsilon_nfa.py @@ -2,16 +2,20 @@ Nondeterministic Automaton with epsilon transitions """ -from typing import Set, Iterable, AbstractSet, Any +# pylint: disable=too-many-arguments -# pylint: disable=cyclic-import -from pyformlang import finite_automaton +from typing import Iterable, Set, AbstractSet, Tuple, Any + +from pyformlang.finite_automaton import NondeterministicFiniteAutomaton +from pyformlang.finite_automaton import DeterministicFiniteAutomaton +from pyformlang.regular_expression import Regex from .epsilon import Epsilon from .state import State from .symbol import Symbol from .nondeterministic_transition_function import \ NondeterministicTransitionFunction + from .regexable import Regexable from .finite_automaton import FiniteAutomaton from .finite_automaton import to_state, to_symbol @@ -20,7 +24,6 @@ class EpsilonNFA(Regexable, FiniteAutomaton): """ Represents an epsilon NFA - Parameters ---------- states : set of :class:`~pyformlang.finite_automaton.State`, optional @@ -63,14 +66,13 @@ class EpsilonNFA(Regexable, FiniteAutomaton): """ - # pylint: disable=too-many-arguments def __init__( self, - states: AbstractSet[State] = None, - input_symbols: AbstractSet[Symbol] = None, + states: AbstractSet[Any] = None, + input_symbols: AbstractSet[Any] = None, transition_function: NondeterministicTransitionFunction = None, - start_state: AbstractSet[State] = None, - final_states: AbstractSet[State] = None): + start_state: AbstractSet[Any] = None, + final_states: AbstractSet[Any] = None) -> None: super().__init__() if states is not None: states = {to_state(x) for x in states} @@ -78,8 +80,8 @@ def __init__( if input_symbols is not None: input_symbols = {to_symbol(x) for x in input_symbols} self._input_symbols = input_symbols or set() - self._transition_function = \ - transition_function or NondeterministicTransitionFunction() + if transition_function is not None: + self._transition_function = transition_function if start_state is not None: start_state = {to_state(x) for x in start_state} self._start_state = start_state or set() @@ -248,20 +250,20 @@ def is_deterministic(self) -> bool: """ return len(self._start_state) <= 1 \ - and self._transition_function.is_deterministic()\ + and self._transition_function.is_deterministic() \ and all({x} == self.eclose(x) for x in self._states) - def remove_epsilon_transitions(self) -> "NondeterministicFiniteAutomaton": + def remove_epsilon_transitions(self) \ + -> NondeterministicFiniteAutomaton: """ Removes the epsilon transitions from the automaton Returns ---------- - dfa : :class:`~pyformlang.finite_automaton.\ -NondeterministicFiniteAutomaton` + dfa : :class:`~pyformlang.finite_automaton. \ + NondeterministicFiniteAutomaton` A non-deterministic finite automaton equivalent to the current \ -nfa, with no epsilon transition + nfa, with no epsilon transition """ - from pyformlang.finite_automaton import NondeterministicFiniteAutomaton nfa = NondeterministicFiniteAutomaton() for state in self._start_state: nfa.add_start_state(state) @@ -282,7 +284,7 @@ def remove_epsilon_transitions(self) -> "NondeterministicFiniteAutomaton": def _to_deterministic_internal(self, eclose: bool) \ - -> "DeterministicFiniteAutomaton": + -> DeterministicFiniteAutomaton: """ Transforms the epsilon-nfa into a dfa Parameters @@ -296,7 +298,7 @@ def _to_deterministic_internal(self, .DeterministicFiniteAutomaton` A dfa equivalent to the current nfa """ - dfa = finite_automaton.DeterministicFiniteAutomaton() + dfa = DeterministicFiniteAutomaton() # Add Eclose if eclose: start_eclose = self.eclose_iterable(self._start_state) @@ -330,7 +332,8 @@ def _to_deterministic_internal(self, dfa.add_final_state(s_from) return dfa - def to_deterministic(self) -> "DeterministicFiniteAutomaton": + def to_deterministic(self) \ + -> DeterministicFiniteAutomaton: """ Transforms the epsilon-nfa into a dfa Returns @@ -393,10 +396,10 @@ def copy(self) -> "EpsilonNFA": enfa.add_transition(state, Epsilon(), state_to) return enfa - def __copy__(self): + def __copy__(self) -> "EpsilonNFA": return self.copy() - def to_regex(self) -> "Regex": + def to_regex(self) -> Regex: """ Transforms the EpsilonNFA to a regular expression Returns @@ -417,7 +420,7 @@ def to_regex(self) -> "Regex": True """ - from pyformlang.regular_expression import Regex + # pylint: disable=protected-access enfas = [self.copy() for _ in self._final_states] final_states = list(self._final_states) for i in range(len(self._final_states)): @@ -426,9 +429,7 @@ def to_regex(self) -> "Regex": enfas[j].remove_final_state(final_states[i]) regex_l = [] for enfa in enfas: - # pylint: disable=protected-access enfa._remove_all_basic_states() - # pylint: disable=protected-access regex_sub = enfa._get_regex_simple() if regex_sub: regex_l.append(regex_sub) @@ -465,7 +466,7 @@ def _get_regex_simple(self) -> str: end_to_start, end_to_end) - def _get_bi_transitions(self) -> (str, str, str, str): + def _get_bi_transitions(self) -> Tuple[str, str, str, str]: """ Internal method to compute the transition in the case of a \ simple automaton @@ -550,7 +551,7 @@ def get_complement(self) -> "EpsilonNFA": enfa.add_transition(trash, symbol, trash) return enfa - def __neg__(self): + def __neg__(self) -> "EpsilonNFA": """ Get the complement of the current Epsilon NFA Returns @@ -622,7 +623,7 @@ def get_intersection(self, other: "EpsilonNFA") -> "EpsilonNFA": to_process.append((new_s0, new_s1)) return enfa - def __and__(self, other): + def __and__(self, other: "EpsilonNFA") -> "EpsilonNFA": """ Computes the intersection of two Epsilon NFAs Parameters @@ -637,8 +638,7 @@ def __and__(self, other): """ return self.get_intersection(other) - def get_difference(self, other: "EpsilonNFA") \ - -> "EpsilonNFA": + def get_difference(self, other: "EpsilonNFA") -> "EpsilonNFA": """ Compute the difference with another Epsilon NFA Equivalent to: @@ -680,7 +680,7 @@ def get_difference(self, other: "EpsilonNFA") \ other.add_symbol(symbol) return self.get_intersection(other.get_complement()) - def __sub__(self, other): + def __sub__(self, other: "EpsilonNFA") -> "EpsilonNFA": """ Compute the difference with another Epsilon NFA Equivalent to: @@ -734,7 +734,7 @@ def reverse(self) -> "EpsilonNFA": enfa.add_start_state(final) return enfa - def __invert__(self): + def __invert__(self) -> "EpsilonNFA": """ Compute the reversed EpsilonNFA Returns @@ -784,10 +784,9 @@ def is_empty(self) -> bool: processed.add(state) return True - def _remove_all_basic_states(self): + def _remove_all_basic_states(self) -> None: """ Remove all states which are not the start state or a final state - CAREFUL: This method modifies the current automaton, for internal usage only! @@ -801,7 +800,7 @@ def _remove_all_basic_states(self): and state not in self._final_states): self._remove_state(state) - def _remove_state(self, state: State): + def _remove_state(self, state: State) -> None: """ Removes a given state from the epsilon NFA CAREFUL: This method modifies the current automaton, for internal usage @@ -846,7 +845,7 @@ def _remove_state(self, state: State): # We make sure the automaton has the good structure self._create_or_transitions() - def minimize(self) -> "DeterministicFiniteAutomaton": + def minimize(self) -> DeterministicFiniteAutomaton: """ Minimize the current epsilon NFA Returns @@ -870,7 +869,7 @@ def minimize(self) -> "DeterministicFiniteAutomaton": """ return self.to_deterministic().minimize() - def _create_or_transitions(self): + def _create_or_transitions(self) -> None: """ Creates a OR transition instead of several connections CAREFUL: This method modifies the automaton and is designed for \ @@ -898,12 +897,12 @@ def _create_or_transitions(self): next_symb, out_state) - def __bool__(self): + def __bool__(self) -> bool: return not self.is_empty() def get_temp(start_to_end: str, end_to_start: str, end_to_end: str) \ - -> (str, str): + -> Tuple[str, str]: """ Gets a temp values in the computation of the simple automaton regex """ temp = "epsilon" if (start_to_end != "epsilon" @@ -972,6 +971,6 @@ def to_single_state(l_states: Iterable[State]) -> State: return State(";".join(values)) -def combine_state_pair(state0, state1): +def combine_state_pair(state0: State, state1: State) -> State: """ Combine two states """ return State(str(state0.value) + "; " + str(state1.value)) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index d33ba84..9272fe1 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -1,24 +1,30 @@ """ A general finite automaton representation """ -from typing import List, Iterable, Set, Optional, Union, Any -from collections import deque +# pylint: disable=function-redefined -import networkx as nx +from typing import \ + Dict, List, Set, Tuple, \ + Iterable, Union, Optional, Any + +from fastcore.dispatch import typedispatch +from networkx import MultiDiGraph, transitive_closure from networkx.drawing.nx_pydot import write_dot +from pyformlang.finite_automaton import EpsilonNFA +from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.fst import FST -# pylint: disable=cyclic-import -from pyformlang import finite_automaton from .epsilon import Epsilon from .state import State from .symbol import Symbol +from .nondeterministic_transition_function import \ + NondeterministicTransitionFunction +from .transition_function import TransitionFunction class FiniteAutomaton: """ Represents a general finite automaton - Attributes ---------- _states : set of :class:`~pyformlang.finite_automaton.State`, optional @@ -37,12 +43,15 @@ class FiniteAutomaton: A set of final or accepting states. It is a subset of states. """ - def __init__(self): - self._states = set() - self._input_symbols = set() - self._transition_function = None - self._start_state = set() - self._final_states = set() + def __init__(self) -> None: + self._states: Set[State] = set() + self._input_symbols: Set[Symbol] = set() + self._transition_function: Union[NondeterministicTransitionFunction, + TransitionFunction] \ + = NondeterministicTransitionFunction() + self._start_state: Set[State] = set() + self._final_states: Set[State] = set() + self.__transitive_closure: Optional[MultiDiGraph] = None def add_transition(self, s_from: Any, symb_by: Any, s_to: Any) -> int: @@ -85,7 +94,8 @@ def add_transition(self, s_from: Any, symb_by: Any, self._input_symbols.add(symb_by) return temp - def add_transitions(self, transitions_list): + def add_transitions(self, \ + transitions_list: Iterable[Tuple[Any, Any, Any]]) -> int: """ Adds several transitions to the automaton @@ -119,8 +129,8 @@ def add_transitions(self, transitions_list): temp = self.add_transition(s_from, symb_by, s_to) return temp - def remove_transition(self, s_from: State, symb_by: Symbol, - s_to: State) -> int: + def remove_transition(self, s_from: Any, symb_by: Any, + s_to: Any) -> int: """ Remove a transition of the nfa Parameters @@ -154,7 +164,7 @@ def remove_transition(self, s_from: State, symb_by: Symbol, s_to) @property - def states(self): + def states(self) -> Set[State]: """ Gives the states Returns @@ -185,12 +195,12 @@ def get_number_transitions(self) -> int: return self._transition_function.get_number_transitions() @property - def symbols(self): + def symbols(self) -> Set[Symbol]: """The symbols""" return self._input_symbols @property - def final_states(self): + def final_states(self) -> Set[State]: """The final states""" return self._final_states @@ -221,7 +231,7 @@ def add_start_state(self, state: Any) -> int: self._states.add(state) return 1 - def remove_start_state(self, state: State) -> int: + def remove_start_state(self, state: Any) -> int: """ remove an initial state Parameters @@ -278,7 +288,7 @@ def add_final_state(self, state: Any) -> int: self._states.add(state) return 1 - def remove_final_state(self, state: State) -> int: + def remove_final_state(self, state: Any) -> int: """ Remove a final state Parameters @@ -307,7 +317,19 @@ def remove_final_state(self, state: State) -> int: return 1 return 0 - def __call__(self, state: Any, symbol: Any = None) -> List[State]: + @typedispatch + def __call__(self, state: Any) \ + -> Iterable[Tuple[Symbol, Set[State]]]: + """ + Gives FA transitions from given state. + Calls the transition function + """ + state = to_state(state) + return self._transition_function(state) + + @typedispatch + def __call__(self, state: Any, symbol: Any) \ + -> Set[State]: """ Gives the states obtained after calling a symbol on a state Calls the transition function @@ -333,13 +355,11 @@ def __call__(self, state: Any, symbol: Any = None) -> List[State]: [1] """ - # pylint: disable=not-callable state = to_state(state) - if symbol is not None: - symbol = to_symbol(symbol) + symbol = to_symbol(symbol) return self._transition_function(state, symbol) - def is_final_state(self, state: State) -> bool: + def is_final_state(self, state: Any) -> bool: """ Checks if a state is final Parameters @@ -368,11 +388,11 @@ def is_final_state(self, state: State) -> bool: return state in self._final_states @property - def start_states(self): + def start_states(self) -> Set[State]: """The start states""" return self._start_state - def add_symbol(self, symbol: Symbol): + def add_symbol(self, symbol: Any) -> None: """ Add a symbol Parameters @@ -390,7 +410,7 @@ def add_symbol(self, symbol: Symbol): symbol = to_symbol(symbol) self._input_symbols.add(symbol) - def to_fst(self) -> "FST": + def to_fst(self) -> FST: """ Turns the finite automaton into a finite state transducer The transducers accepts only the words in the language of the \ @@ -459,7 +479,7 @@ def is_acyclic(self) -> bool: to_process.append((state, visited.copy())) return True - def to_networkx(self) -> nx.MultiDiGraph: + def to_networkx(self) -> MultiDiGraph: """ Transform the current automaton into a networkx graph @@ -479,7 +499,7 @@ def to_networkx(self) -> nx.MultiDiGraph: >>> graph = enfa.to_networkx() """ - graph = nx.MultiDiGraph() + graph = MultiDiGraph() for state in self._states: graph.add_node(state.value, is_start=state in self.start_states, @@ -496,7 +516,8 @@ def to_networkx(self) -> nx.MultiDiGraph: return graph @classmethod - def from_networkx(cls, graph): + def from_networkx(cls, graph: MultiDiGraph) \ + -> EpsilonNFA: """ Import a networkx graph into an finite state automaton. \ The imported graph requires to have the good format, i.e. to come \ @@ -529,7 +550,7 @@ def from_networkx(cls, graph): >>> enfa_from_nx = EpsilonNFA.from_networkx(graph) """ - enfa = finite_automaton.EpsilonNFA() + enfa = EpsilonNFA() for s_from in graph: for s_to in graph[s_from]: for transition in graph[s_from][s_to].values(): @@ -544,7 +565,7 @@ def from_networkx(cls, graph): enfa.add_final_state(node) return enfa - def write_as_dot(self, filename): + def write_as_dot(self, filename: str) -> None: """ Write the automaton in dot format into a file @@ -565,7 +586,7 @@ def write_as_dot(self, filename): """ write_dot(self.to_networkx(), filename) - def is_equivalent_to(self, other): + def is_equivalent_to(self, other: "FiniteAutomaton") -> bool: """ Checks if the current automaton is equivalent to a given one. @@ -670,25 +691,27 @@ def _get_next_states_from(self, state_from: State) -> Set[State]: next_states.add(next_state) return next_states - def to_deterministic(self): + def to_deterministic(self) -> DeterministicFiniteAutomaton: """ Turns the automaton into a deterministic one""" raise NotImplementedError - def is_deterministic(self): + def is_deterministic(self) -> bool: """ Checks if the automaton is deterministic """ raise NotImplementedError - def __eq__(self, other): - return self.is_equivalent_to(other) + def __eq__(self, other: Any) -> bool: + if isinstance(other, FiniteAutomaton): + return self.is_equivalent_to(other) + return False - def __len__(self): + def __len__(self) -> int: """Number of transitions""" return len(self._transition_function) - def __iter__(self): + def __iter__(self) -> Iterable[Tuple[State, Symbol, State]]: yield from self._transition_function.__iter__() - def to_dict(self): + def to_dict(self) -> Dict[State, Dict[Symbol, Set[State]]]: """ Get the dictionary representation of the transition function. The \ keys of the dictionary are the source nodes. The items are \ @@ -732,8 +755,6 @@ def to_state(given: Any) -> Union[State, None]: given : any What we want to transform """ - if given is None: - return None if isinstance(given, State): return given return State(given) @@ -754,7 +775,7 @@ def to_symbol(given: Any) -> Symbol: return Symbol(given) -def add_start_state_to_graph(graph, state): +def add_start_state_to_graph(graph: MultiDiGraph, state: State) -> None: """ Adds a starting node to a given graph """ graph.add_node("starting_" + str(state.value), label="", diff --git a/pyformlang/finite_automaton/finite_automaton_object.py b/pyformlang/finite_automaton/finite_automaton_object.py index b182e0c..67ab262 100644 --- a/pyformlang/finite_automaton/finite_automaton_object.py +++ b/pyformlang/finite_automaton/finite_automaton_object.py @@ -14,7 +14,7 @@ class FiniteAutomatonObject: # pylint: disable=too-few-public-methods The value of the object """ - def __init__(self, value: Any): + def __init__(self, value: Any) -> None: self._value = value self._hash = None diff --git a/pyformlang/finite_automaton/hopcroft_processing_list.py b/pyformlang/finite_automaton/hopcroft_processing_list.py index 5b62220..ff91b32 100644 --- a/pyformlang/finite_automaton/hopcroft_processing_list.py +++ b/pyformlang/finite_automaton/hopcroft_processing_list.py @@ -2,7 +2,8 @@ For internal usage """ -import numpy as np +from typing import Dict, List, Set, Tuple, Any +from numpy import zeros class HopcroftProcessingList: @@ -10,29 +11,29 @@ class HopcroftProcessingList: For internal usage """ - def __init__(self, n_states, symbols): - self._reverse_symbols = {} + def __init__(self, n_states: int, symbols: Set[Any]) -> None: + self._reverse_symbols: Dict[Any, int] = {} for i, symbol in enumerate(symbols): self._reverse_symbols[symbol] = i - self._inclusion = np.zeros((n_states, len(symbols)), dtype=bool) - self._elements = [] + self._inclusion = zeros((n_states, len(symbols)), dtype=bool) + self._elements: List[Tuple[int, Any]] = [] - def is_empty(self): + def is_empty(self) -> bool: """Check if empty""" return len(self._elements) == 0 - def contains(self, class_name, symbol): + def contains(self, class_name: int, symbol: Any) -> bool: """ Check containment """ i_symbol = self._reverse_symbols[symbol] return self._inclusion[class_name, i_symbol] - def insert(self, class_name, symbol): + def insert(self, class_name: int, symbol: Any) -> None: """ Insert new element """ i_symbol = self._reverse_symbols[symbol] self._inclusion[class_name, i_symbol] = True self._elements.append((class_name, symbol)) - def pop(self): + def pop(self) -> Tuple[int, Any]: """ Pop an element """ res = self._elements.pop() i_symbol = self._reverse_symbols[res[1]] diff --git a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py index 210c91b..f7ac681 100644 --- a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py @@ -4,8 +4,9 @@ from typing import Iterable, Any -# pylint: disable=cyclic-import -from pyformlang.finite_automaton import epsilon +from pyformlang.finite_automaton import \ + Epsilon, DeterministicFiniteAutomaton + from .epsilon_nfa import EpsilonNFA from .finite_automaton import to_symbol from .transition_function import InvalidEpsilonTransition @@ -113,7 +114,7 @@ def is_deterministic(self) -> bool: return len(self._start_state) <= 1 and \ self._transition_function.is_deterministic() - def to_deterministic(self) -> "DeterministicFiniteAutomaton": + def to_deterministic(self) -> DeterministicFiniteAutomaton: """ Transforms the nfa into a dfa Returns @@ -140,6 +141,6 @@ def add_transition(self, s_from: Any, symb_by: Any, s_to: Any) -> int: - if symb_by == epsilon.Epsilon(): + if symb_by == Epsilon(): raise InvalidEpsilonTransition return super().add_transition(s_from, symb_by, s_to) diff --git a/pyformlang/finite_automaton/nondeterministic_transition_function.py b/pyformlang/finite_automaton/nondeterministic_transition_function.py index 50c253a..7479a0a 100644 --- a/pyformlang/finite_automaton/nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/nondeterministic_transition_function.py @@ -1,8 +1,12 @@ """ A nondeterministic transition function """ + +# pylint: disable=function-redefined + import copy -from typing import Set, Iterable, Tuple +from typing import Dict, Set, Iterable, Tuple +from fastcore.dispatch import typedispatch from .state import State from .symbol import Symbol @@ -25,7 +29,7 @@ class NondeterministicTransitionFunction: """ def __init__(self): - self._transitions = {} + self._transitions: Dict[State, Dict[Symbol, Set[State]]] = {} def add_transition(self, s_from: State, symb_by: Symbol, s_to: State) -> int: @@ -120,10 +124,19 @@ def get_number_transitions(self) -> int: counter += len(s_to) return counter - def __len__(self): + def __len__(self) -> int: return self.get_number_transitions() - def __call__(self, s_from: State, symb_by: Symbol = None) -> Set[State]: + @typedispatch + def __call__(self, s_from: State) \ + -> Iterable[Tuple[Symbol, Set[State]]]: + """ Calls the transition function as a real function """ + if s_from in self._transitions: + yield from self._transitions[s_from].items() + + @typedispatch + def __call__(self, s_from: State, symb_by: Symbol) \ + -> Set[State]: """ Calls the transition function as a real function Parameters @@ -135,42 +148,24 @@ def __call__(self, s_from: State, symb_by: Symbol = None) -> Set[State]: Returns ---------- - s_from : :class:`~pyformlang.finite_automaton.State` or None - The destination state or None if it does not exists + s_from : set :class:`~pyformlang.finite_automaton.State` + Set of destination states """ if s_from in self._transitions: - if symb_by is not None: - if symb_by in self._transitions[s_from]: - return self._transitions[s_from][symb_by] - else: - return self._transitions[s_from].items() + if symb_by in self._transitions[s_from]: + return self._transitions[s_from][symb_by] return set() - def is_deterministic(self): - """ Whether the transition function is deterministic - - Returns - ---------- - is_deterministic : bool - Whether the function is deterministic - - Examples - -------- - - >>> transition = NondeterministicTransitionFunction() - >>> transition.add_transition(State(0), Symbol("a"), State(1)) - >>> transition.is_deterministic() - True - - """ - for transitions in self._transitions.values(): - for s_to in transitions.values(): - if len(s_to) > 1: - return False - return True + def get_transitions_from(self, state_from: State) \ + -> Iterable[Tuple[Symbol, State]]: + """ Gets transitions from the given state """ + if state_from in self._transitions: + for symb_by, states_to in self._transitions[state_from].items(): + for state_to in states_to: + yield symb_by, state_to - def get_edges(self): + def get_edges(self) -> Iterable[Tuple[State, Symbol, State]]: """ Gets the edges Returns @@ -180,15 +175,14 @@ def get_edges(self): :class:`~pyformlang.finite_automaton.State`) A generator of edges """ - for state, transitions in self._transitions.items(): - for symbol, next_states in transitions.items(): - for next_state in next_states: - yield state, symbol, next_state + for s_from in self._transitions: + for (symb_by, s_to) in self.get_transitions_from(s_from): + yield (s_from, symb_by, s_to) - def __iter__(self): + def __iter__(self) -> Iterable[Tuple[State, Symbol, State]]: yield from self.get_edges() - def to_dict(self): + def to_dict(self) -> Dict[State, Dict[Symbol, Set[State]]]: """ Get the dictionary representation of the transition function. The keys of the dictionary are the source nodes. The items are dictionaries @@ -202,10 +196,25 @@ def to_dict(self): """ return copy.deepcopy(self._transitions) - def get_transitions_from(self, state_from: State) \ - -> Iterable[Tuple[Symbol, State]]: - """ Gets transitions from the given state """ - if state_from in self._transitions: - for symb_by, states_to in self._transitions[state_from].items(): - for state_to in states_to: - yield symb_by, state_to + def is_deterministic(self) -> bool: + """ Whether the transition function is deterministic + + Returns + ---------- + is_deterministic : bool + Whether the function is deterministic + + Examples + -------- + + >>> transition = NondeterministicTransitionFunction() + >>> transition.add_transition(State(0), Symbol("a"), State(1)) + >>> transition.is_deterministic() + True + + """ + for transitions in self._transitions.values(): + for s_to in transitions.values(): + if len(s_to) > 1: + return False + return True diff --git a/pyformlang/finite_automaton/partition.py b/pyformlang/finite_automaton/partition.py index 3e7aa66..419e940 100644 --- a/pyformlang/finite_automaton/partition.py +++ b/pyformlang/finite_automaton/partition.py @@ -2,20 +2,23 @@ For internal usage. """ +from typing import Dict, List, Iterable, Any + from .doubly_linked_list import DoublyLinkedList class Partition: """Class to manage partitions used in Hopcroft minimization algorithm""" - def __init__(self, n_states): - self._class_names = {} # States to class index + def __init__(self, n_states: int) -> None: + self._class_names: Dict[Any, int] = {} # States to class index # Class idx to states - self.part = [DoublyLinkedList() for _ in range(n_states)] - self._place = {} # state to position in list + self.part: List[DoublyLinkedList] = \ + [DoublyLinkedList() for _ in range(n_states)] + self._place: Dict[Any, Any] = {} # state to position in list self._counter = 0 # Number of classes - def add_class(self, new_class): + def add_class(self, new_class: Iterable[Any]) -> None: """Adds a new class""" index = self._counter self._counter += 1 @@ -24,14 +27,14 @@ def add_class(self, new_class): node = self.part[index].append(element) self._place[element] = node - def move_to_new_class(self, elements_to_move): + def move_to_new_class(self, elements_to_move: Iterable[Any]) -> None: """Move elements to a new class""" for element in elements_to_move: place = self._place[element] place.delete() self.add_class(elements_to_move) - def get_valid_sets(self, inverse): + def get_valid_sets(self, inverse: Iterable[Any]) -> List[int]: """Get the valid sets""" class_names = [0] * self._counter for element in inverse: @@ -39,7 +42,7 @@ def get_valid_sets(self, inverse): return [i for i, value in enumerate(class_names) if value != 0 and value != len(self.part[i])] - def split(self, to_split, splitter): + def split(self, to_split: Any, splitter: Iterable[Any]) -> int: """ Splits """ elements_to_move = [] for element in splitter: @@ -48,7 +51,7 @@ def split(self, to_split, splitter): self.move_to_new_class(elements_to_move) return self._counter - 1 - def get_groups(self): + def get_groups(self) -> List[Any]: """ Get the groups """ res = [] for i in range(self._counter): diff --git a/pyformlang/finite_automaton/regexable.py b/pyformlang/finite_automaton/regexable.py index 395d104..52caf49 100644 --- a/pyformlang/finite_automaton/regexable.py +++ b/pyformlang/finite_automaton/regexable.py @@ -1,14 +1,18 @@ -""" An abstract class to represent something which are be transformed into -a regex +""" +An abstract class to represent something +which are be transformed into a regex """ +from pyformlang.finite_automaton import EpsilonNFA +from pyformlang.regular_expression import Regex + class Regexable: """ An abstract class to represent something which are be transformed into a regex """ - def to_regex(self) -> "Regex": + def to_regex(self) -> Regex: """ Tranforms the EpsilonNFA to a regular expression Returns @@ -18,7 +22,7 @@ def to_regex(self) -> "Regex": """ raise NotImplementedError() - def union(self, other: "Regexable") -> "EpsilonNFA": + def union(self, other: "Regexable") -> EpsilonNFA: """ Makes the union of two regexable objects Parameters @@ -36,7 +40,7 @@ def union(self, other: "Regexable") -> "EpsilonNFA": regex = regex0 | regex1 return regex.to_epsilon_nfa() - def concatenate(self, other: "Regexable") -> "EpsilonNFA": + def concatenate(self, other: "Regexable") -> EpsilonNFA: """ Makes the concatenation of two regexable objects Parameters @@ -54,7 +58,7 @@ def concatenate(self, other: "Regexable") -> "EpsilonNFA": regex = regex0 + regex1 return regex.to_epsilon_nfa() - def kleene_star(self) -> "EpsilonNFA": + def kleene_star(self) -> EpsilonNFA: """ Makes the kleene star of the current regexable object Returns diff --git a/pyformlang/finite_automaton/state.py b/pyformlang/finite_automaton/state.py index c8f9f5b..90e37cb 100644 --- a/pyformlang/finite_automaton/state.py +++ b/pyformlang/finite_automaton/state.py @@ -22,7 +22,7 @@ class State(FiniteAutomatonObject): # pylint: disable=too-few-public-methods """ - def __init__(self, value): + def __init__(self, value: Any) -> None: super().__init__(value) self.index = None self.index_cfg_converter = None diff --git a/pyformlang/finite_automaton/transition_function.py b/pyformlang/finite_automaton/transition_function.py index 617986d..8ff1744 100644 --- a/pyformlang/finite_automaton/transition_function.py +++ b/pyformlang/finite_automaton/transition_function.py @@ -1,8 +1,12 @@ """ Representation of a transition function """ + +# pylint: disable=function-redefined + import copy -from typing import List, Iterable, Tuple, Any +from typing import Dict, Set, Iterable, Tuple, Optional +from fastcore.dispatch import typedispatch from pyformlang.finite_automaton.epsilon import Epsilon @@ -35,8 +39,8 @@ class TransitionFunction: """ - def __init__(self): - self._transitions = {} + def __init__(self) -> None: + self._transitions: Dict[State, Dict[Symbol, State]] = {} def add_transition(self, s_from: Any, symb_by: Any, s_to: Any) -> int: @@ -86,7 +90,6 @@ def add_transition(self, s_from: Any, symb_by: Any, self._transitions[s_from][symb_by] = s_to return 1 - # pylint: disable=duplicate-code def remove_transition(self, s_from: State, symb_by: Symbol, s_to: State) -> int: """ Removes a transition to the function @@ -121,7 +124,16 @@ def remove_transition(self, s_from: State, symb_by: Symbol, return 1 return 0 - def __call__(self, s_from: State, symb_by: Symbol = None) -> List[State]: + @typedispatch + def __call__(self, s_from: State) \ + -> Iterable[Tuple[Symbol, Set[State]]]: + """ Calls the transition function as a real function """ + for (symb_by, s_to) in self.get_transitions_from(s_from): + yield (symb_by, {s_to}) + + @typedispatch + def __call__(self, s_from: State, symb_by: Symbol) \ + -> Set[State]: """ Calls the transition function as a real function Parameters @@ -133,17 +145,26 @@ def __call__(self, s_from: State, symb_by: Symbol = None) -> List[State]: Returns ---------- - s_from : list of :class:`~pyformlang.finite_automaton.State` - The destination state, in a list + s_from : set of :class:`~pyformlang.finite_automaton.State` + The destination state, in a set """ + state = self.get_state(s_from, symb_by) + return {state} if state else set() + + def get_transitions_from(self, state_from: State) \ + -> Iterable[Tuple[Symbol, State]]: + """ Gets transitions from the given state """ + if state_from in self._transitions: + yield from self._transitions[state_from].items() + + def get_state(self, s_from: State, symb_by: Symbol) \ + -> Optional[State]: + """ Calls the transition function and with given arguments """ if s_from in self._transitions: - if symb_by is not None: - if symb_by in self._transitions[s_from]: - return [self._transitions[s_from][symb_by]] - else: - return self._transitions[s_from].items() - return [] + if symb_by in self._transitions[s_from]: + return self._transitions[s_from][symb_by] + return None def get_number_transitions(self) -> int: """ Gives the number of transitions describe by the deterministic \ @@ -165,7 +186,7 @@ def get_number_transitions(self) -> int: """ return sum(len(x) for x in self._transitions.values()) - def get_edges(self): + def get_edges(self) -> Iterable[Tuple[State, Symbol, State]]: """ Gets the edges Returns @@ -175,17 +196,17 @@ def get_edges(self): :class:`~pyformlang.finite_automaton.State`) A generator of edges """ - for state, transitions in self._transitions.items(): - for symbol, next_states in transitions.items(): - yield state, symbol, next_states + for s_from in self._transitions: + for (symb_by, s_to) in self.get_transitions_from(s_from): + yield (s_from, symb_by, s_to) - def __len__(self): + def __len__(self) -> int: return self.get_number_transitions() - def __iter__(self): + def __iter__(self) -> Iterable[Tuple[State, Symbol, State]]: yield from self.get_edges() - def to_dict(self): + def to_dict(self) -> Dict[State, Dict[Symbol, Set[State]]]: """ Get the dictionary representation of the transition function. The \ keys of the dictionary are the source nodes. The items are \ @@ -197,13 +218,15 @@ def to_dict(self): transition_dict : dict The transitions as a dictionary. """ - return copy.deepcopy(self._transitions) + result: Dict = copy.deepcopy(self._transitions) + for transitions in result.values(): + for (symb_by, s_to) in transitions.items(): + transitions[symb_by] = {s_to} + return result - def get_transitions_from(self, state_from: State) \ - -> Iterable[Tuple[Symbol, State]]: - """ Gets transitions from the given state """ - if state_from in self._transitions: - yield from self._transitions[state_from].items() + def is_deterministic(self) -> bool: + """ Whether the transition function is deterministic """ + return True class DuplicateTransitionError(Exception): @@ -226,7 +249,7 @@ def __init__(self, s_from: State, symb_by: Symbol, s_to: State, - s_to_old: State): + s_to_old: State) -> None: super().__init__("Transition from " + str(s_from) + " by " + str(symb_by) + " goes to " + str(s_to_old) + " not " + str(s_to)) diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 0000000..e281b53 --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,23 @@ +{ + "include": [ + "pyformlang" + ], + + "exclude": [ + "**/node_modules", + "**/__pycache__", + "**/.*", + "**/tests" + ], + + "pythonVersion": "3.8", + "pythonPlatform": "Linux", + + "strictListInference": true, + "strictSetInference": true, + "strictDictionaryInference": true, + "strictParameterNoneValue": false, + + "reportMissingParameterType": "warning", + "reportRedeclaration": "none", + } From 9c0512b0f47873676e2ede0291f098985eb4d4fc Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 14 Oct 2024 18:29:10 +0300 Subject: [PATCH 20/42] rename _start_state, correct start_state property --- .../deterministic_finite_automaton.py | 26 +++++++------- pyformlang/finite_automaton/epsilon_nfa.py | 34 +++++++++---------- .../finite_automaton/finite_automaton.py | 14 ++++---- .../nondeterministic_finite_automaton.py | 4 +-- 4 files changed, 39 insertions(+), 39 deletions(-) diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index f58e649..cff03e9 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -125,10 +125,10 @@ def __init__(self, transition_function or TransitionFunction() if start_state is not None: start_state = to_state(start_state) - self._start_state = {start_state} + self._start_states = {start_state} self._states.add(start_state) else: - self._start_state = set() + self._start_states = set() def add_start_state(self, state: Any) -> int: def add_start_state(self, state: Any) -> int: @@ -152,7 +152,7 @@ def add_start_state(self, state: Any) -> int: """ state = to_state(state) - self._start_state = {state} + self._start_states = {state} self._states.add(state) return 1 @@ -179,8 +179,8 @@ def remove_start_state(self, state: Any) -> int: """ state = to_state(state) - if self._start_state == {state}: - self._start_state.remove(state) + if self._start_states == {state}: + self._start_states.remove(state) return 1 return 0 @@ -211,8 +211,8 @@ def accepts(self, word: Iterable[Any]) -> bool: """ word = [to_symbol(x) for x in word] current_state = None - if self._start_state: - current_state = list(self._start_state)[0] + if self._start_states: + current_state = list(self._start_states)[0] for symbol in word: if current_state is None: return False @@ -281,8 +281,8 @@ def copy(self) -> "DeterministicFiniteAutomaton": """ dfa = DeterministicFiniteAutomaton() - if self._start_state: - dfa.add_start_state(list(self._start_state)[0]) + if self._start_states: + dfa.add_start_state(list(self._start_states)[0]) for final in self._final_states: dfa.add_final_state(final) for state in self._states: @@ -324,7 +324,7 @@ def minimize(self) -> "DeterministicFiniteAutomaton": True """ - if not self._start_state or not self._final_states: + if not self._start_states or not self._final_states: res = DeterministicFiniteAutomaton() res.add_start_state(State("Empty")) return res @@ -342,7 +342,7 @@ def minimize(self) -> "DeterministicFiniteAutomaton": to_new_states[state] = new_state # Build the DFA dfa = DeterministicFiniteAutomaton() - for state in self._start_state: + for state in self._start_states: dfa.add_start_state(to_new_states[state]) for state in states: if state in self._final_states: @@ -433,9 +433,9 @@ def is_equivalent_to(self, other: FiniteAutomaton) -> bool: return self._is_equivalent_to_minimal(self_minimal, other_minimal) @property - def start_state(self) -> State: + def start_state(self) -> Optional[State]: """ The start state """ - return list(self._start_state)[0] + return list(self._start_states)[0] if self._start_states else None @staticmethod def _is_equivalent_to_minimal( diff --git a/pyformlang/finite_automaton/epsilon_nfa.py b/pyformlang/finite_automaton/epsilon_nfa.py index 35cd736..b2e69b3 100644 --- a/pyformlang/finite_automaton/epsilon_nfa.py +++ b/pyformlang/finite_automaton/epsilon_nfa.py @@ -84,14 +84,14 @@ def __init__( self._transition_function = transition_function if start_state is not None: start_state = {to_state(x) for x in start_state} - self._start_state = start_state or set() + self._start_states = start_state or set() if final_states is not None: final_states = {to_state(x) for x in final_states} self._final_states = final_states or set() for state in self._final_states: if state is not None and state not in self._states: self._states.add(state) - for state in self._start_state: + for state in self._start_states: if state is not None and state not in self._states: self._states.add(state) @@ -150,7 +150,7 @@ def accepts(self, word: Iterable[Any]) -> bool: """ word = [to_symbol(x) for x in word] - current_states = self.eclose_iterable(self._start_state) + current_states = self.eclose_iterable(self._start_states) for symbol in word: if symbol == Epsilon(): continue @@ -249,7 +249,7 @@ def is_deterministic(self) -> bool: False """ - return len(self._start_state) <= 1 \ + return len(self._start_states) <= 1 \ and self._transition_function.is_deterministic() \ and all({x} == self.eclose(x) for x in self._states) @@ -265,11 +265,11 @@ def remove_epsilon_transitions(self) \ nfa, with no epsilon transition """ nfa = NondeterministicFiniteAutomaton() - for state in self._start_state: + for state in self._start_states: nfa.add_start_state(state) for state in self._final_states: nfa.add_final_state(state) - start_eclose = self.eclose_iterable(self._start_state) + start_eclose = self.eclose_iterable(self._start_states) for state in start_eclose: nfa.add_start_state(state) for state in self._states: @@ -301,9 +301,9 @@ def _to_deterministic_internal(self, dfa = DeterministicFiniteAutomaton() # Add Eclose if eclose: - start_eclose = self.eclose_iterable(self._start_state) + start_eclose = self.eclose_iterable(self._start_states) else: - start_eclose = self._start_state + start_eclose = self._start_states start_state = to_single_state(start_eclose) dfa.add_start_state(start_state) to_process = [start_eclose] @@ -382,7 +382,7 @@ def copy(self) -> "EpsilonNFA": """ enfa = EpsilonNFA() - for start in self._start_state: + for start in self._start_states: enfa.add_start_state(start) for final in self._final_states: enfa.add_final_state(final) @@ -447,15 +447,15 @@ def _get_regex_simple(self) -> str: regex : str A regex representing the automaton """ - if not self._final_states or not self._start_state: + if not self._final_states or not self._start_states: return "" - if len(self._final_states) != 1 or len(self._start_state) != 1: + if len(self._final_states) != 1 or len(self._start_states) != 1: raise ValueError("The automaton is not simple enough!") - if self._start_state == self._final_states: + if self._start_states == self._final_states: # We are suppose to have only one good symbol for symbol in self._input_symbols: out_states = self._transition_function( - list(self._start_state)[0], symbol) + list(self._start_states)[0], symbol) if out_states: return "(" + str(symbol.value) + ")*" return "epsilon" @@ -481,7 +481,7 @@ def _get_bi_transitions(self) -> Tuple[str, str, str, str]: The transition from the end state to the end state ---------- """ - start = list(self._start_state)[0] + start = list(self._start_states)[0] end = list(self._final_states)[0] start_to_start = "epsilon" start_to_end = "" @@ -728,7 +728,7 @@ def reverse(self) -> "EpsilonNFA": enfa.add_transition(state1, symbol, state0) for state1 in self._transition_function(state0, Epsilon()): enfa.add_transition(state1, Epsilon(), state0) - for start in self._start_state: + for start in self._start_states: enfa.add_final_state(start) for final in self._final_states: enfa.add_start_state(final) @@ -766,7 +766,7 @@ def is_empty(self) -> bool: """ to_process = [] processed = set() - for start in self._start_state: + for start in self._start_states: to_process.append(start) processed.add(start) while to_process: @@ -796,7 +796,7 @@ def _remove_all_basic_states(self) -> None: self._create_or_transitions() states = self._states.copy() for state in states: - if (state not in self._start_state + if (state not in self._start_states and state not in self._final_states): self._remove_state(state) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 9272fe1..6a4b02d 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -49,7 +49,7 @@ def __init__(self) -> None: self._transition_function: Union[NondeterministicTransitionFunction, TransitionFunction] \ = NondeterministicTransitionFunction() - self._start_state: Set[State] = set() + self._start_states: Set[State] = set() self._final_states: Set[State] = set() self.__transitive_closure: Optional[MultiDiGraph] = None @@ -227,7 +227,7 @@ def add_start_state(self, state: Any) -> int: """ state = to_state(state) - self._start_state.add(state) + self._start_states.add(state) self._states.add(state) return 1 @@ -255,8 +255,8 @@ def remove_start_state(self, state: Any) -> int: """ state = to_state(state) - if state in self._start_state: - self._start_state.remove(state) + if state in self._start_states: + self._start_states.remove(state) return 1 return 0 @@ -390,7 +390,7 @@ def is_final_state(self, state: Any) -> bool: @property def start_states(self) -> Set[State]: """The start states""" - return self._start_state + return self._start_states def add_symbol(self, symbol: Any) -> None: """ Add a symbol @@ -431,7 +431,7 @@ def to_fst(self) -> FST: """ fst = FST() - for start_state in self._start_state: + for start_state in self._start_states: fst.add_start_state(start_state.value) for final_state in self._final_states: fst.add_final_state(final_state.value) @@ -464,7 +464,7 @@ def is_acyclic(self) -> bool: """ to_process = [] - for state in self._start_state: + for state in self._start_states: to_process.append((state, set())) while to_process: current, visited = to_process.pop() diff --git a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py index f7ac681..590b1be 100644 --- a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py @@ -86,7 +86,7 @@ def accepts(self, word: Iterable[Any]) -> bool: """ word = [to_symbol(x) for x in word] - current_states = self._start_state + current_states = self._start_states for symbol in word: current_states = self._get_next_states_iterable(current_states, symbol) @@ -111,7 +111,7 @@ def is_deterministic(self) -> bool: False """ - return len(self._start_state) <= 1 and \ + return len(self._start_states) <= 1 and \ self._transition_function.is_deterministic() def to_deterministic(self) -> DeterministicFiniteAutomaton: From 95a93fd30bf4320b5caaf15f59f1af445f22ae72 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 11 Nov 2024 16:49:40 +0300 Subject: [PATCH 21/42] rework transition function --- pyformlang/finite_automaton/__init__.py | 9 +- .../deterministic_finite_automaton.py | 69 ++--- .../deterministic_transition_function.py | 120 ++++++++ pyformlang/finite_automaton/epsilon_nfa.py | 28 +- .../finite_automaton/finite_automaton.py | 75 ++--- .../nondeterministic_finite_automaton.py | 7 +- .../nondeterministic_transition_function.py | 41 ++- ...test_deterministic_transition_function.py} | 49 +-- .../finite_automaton/transition_function.py | 281 ++++-------------- 9 files changed, 308 insertions(+), 371 deletions(-) create mode 100644 pyformlang/finite_automaton/deterministic_transition_function.py rename pyformlang/finite_automaton/tests/{test_transition_function.py => test_deterministic_transition_function.py} (68%) diff --git a/pyformlang/finite_automaton/__init__.py b/pyformlang/finite_automaton/__init__.py index 145ab3e..e6b7db8 100644 --- a/pyformlang/finite_automaton/__init__.py +++ b/pyformlang/finite_automaton/__init__.py @@ -41,9 +41,10 @@ from .state import State from .symbol import Symbol from .epsilon import Epsilon -from .transition_function import (TransitionFunction, - DuplicateTransitionError, - InvalidEpsilonTransition) +from .deterministic_transition_function import \ + (DeterministicTransitionFunction, + DuplicateTransitionError, + InvalidEpsilonTransition) from .nondeterministic_transition_function import \ NondeterministicTransitionFunction @@ -54,7 +55,7 @@ "State", "Symbol", "Epsilon", - "TransitionFunction", + "DeterministicTransitionFunction", "NondeterministicTransitionFunction", "DuplicateTransitionError", "InvalidEpsilonTransition"] diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index cff03e9..3d0d28d 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -2,28 +2,18 @@ Representation of a deterministic finite automaton """ -# pylint: disable=too-many-arguments - -from typing import \ - Dict, List, Iterable, Set, \ - AbstractSet, Optional, Any +from typing import Dict, List, Iterable, AbstractSet, Optional, Any from numpy import empty -from typing import AbstractSet, Iterable, Any - -import numpy as np - -# pylint: disable=cyclic-import +from .state import State +from .symbol import Symbol +from .deterministic_transition_function import DeterministicTransitionFunction +from .finite_automaton import FiniteAutomaton, to_state, to_symbol from .epsilon_nfa import to_single_state -from .finite_automaton import to_state, to_symbol -from .hopcroft_processing_list import HopcroftProcessingList -from .finite_automaton import FiniteAutomaton from .nondeterministic_finite_automaton import NondeterministicFiniteAutomaton +from .hopcroft_processing_list import HopcroftProcessingList from .partition import Partition -from .state import State -from .symbol import Symbol -from .transition_function import TransitionFunction class PreviousTransitions: @@ -40,7 +30,7 @@ def __init__(self, for i, symbol in enumerate(symbols): self._to_index_symbol[symbol] = i self._conversion = empty((len(states) + 1, len(symbols)), - dtype=object) + dtype=object) def add(self, next0: Optional[State], @@ -117,12 +107,12 @@ class DeterministicFiniteAutomaton(NondeterministicFiniteAutomaton): def __init__(self, states: AbstractSet[Any] = None, input_symbols: AbstractSet[Any] = None, - transition_function: TransitionFunction = None, + transition_function: DeterministicTransitionFunction = None, start_state: Any = None, final_states: AbstractSet[Any] = None) -> None: super().__init__(states, input_symbols, None, None, final_states) - self._transition_function: TransitionFunction = \ - transition_function or TransitionFunction() + self._transition_function = transition_function \ + or DeterministicTransitionFunction() if start_state is not None: start_state = to_state(start_state) self._start_states = {start_state} @@ -130,7 +120,11 @@ def __init__(self, else: self._start_states = set() - def add_start_state(self, state: Any) -> int: + @property + def start_state(self) -> Optional[State]: + """ Gets the start state """ + return list(self._start_states)[0] if self._start_states else None + def add_start_state(self, state: Any) -> int: """ Set an initial state @@ -156,7 +150,6 @@ def add_start_state(self, state: Any) -> int: self._states.add(state) return 1 - def remove_start_state(self, state: Any) -> int: def remove_start_state(self, state: Any) -> int: """ remove an initial state @@ -180,11 +173,10 @@ def remove_start_state(self, state: Any) -> int: """ state = to_state(state) if self._start_states == {state}: - self._start_states.remove(state) + self._start_states = set() return 1 return 0 - def accepts(self, word: Iterable[Any]) -> bool: def accepts(self, word: Iterable[Any]) -> bool: """ Checks whether the dfa accepts a given word @@ -210,14 +202,12 @@ def accepts(self, word: Iterable[Any]) -> bool: """ word = [to_symbol(x) for x in word] - current_state = None - if self._start_states: - current_state = list(self._start_states)[0] + current_state = self.start_state for symbol in word: if current_state is None: return False - current_state = self._transition_function.get_state(current_state, - symbol) + current_state = self._transition_function.get_next_state( + current_state, symbol) return current_state is not None and self.is_final_state(current_state) def is_deterministic(self) -> bool: @@ -281,23 +271,28 @@ def copy(self) -> "DeterministicFiniteAutomaton": """ dfa = DeterministicFiniteAutomaton() - if self._start_states: - dfa.add_start_state(list(self._start_states)[0]) + if self.start_state: + dfa.add_start_state(self.start_state) for final in self._final_states: dfa.add_final_state(final) for state in self._states: for symbol in self._input_symbols: - state_to = self._transition_function.get_state(state, symbol) + state_to = self._transition_function.get_next_state( + state, symbol) if state_to is not None: dfa.add_transition(state, symbol, state_to) return dfa + def get_next_state(self, s_from: State, symb_by: Symbol) -> Optional[State]: + """ Make a call of deterministic transition function """ + return self._transition_function.get_next_state(s_from, symb_by) + def _get_previous_transitions(self) -> PreviousTransitions: previous_transitions = PreviousTransitions(self._states, self._input_symbols) for state in self._states: for symbol in self._input_symbols: - next0 = self._transition_function.get_state(state, symbol) + next0 = self._transition_function.get_next_state(state, symbol) previous_transitions.add(next0, symbol, state) for symbol in self._input_symbols: previous_transitions.add(None, symbol, None) @@ -350,7 +345,8 @@ def minimize(self) -> "DeterministicFiniteAutomaton": done = set() new_state = to_new_states[state] for symbol in self._input_symbols: - next_node = self._transition_function.get_state(state, symbol) + next_node = self._transition_function.get_next_state( + state, symbol) if next_node and next_node in states: next_node = to_new_states[next_node] if (next_node, symbol) not in done: @@ -432,11 +428,6 @@ def is_equivalent_to(self, other: FiniteAutomaton) -> bool: other_minimal = other.minimize() return self._is_equivalent_to_minimal(self_minimal, other_minimal) - @property - def start_state(self) -> Optional[State]: - """ The start state """ - return list(self._start_states)[0] if self._start_states else None - @staticmethod def _is_equivalent_to_minimal( self_minimal: "DeterministicFiniteAutomaton", diff --git a/pyformlang/finite_automaton/deterministic_transition_function.py b/pyformlang/finite_automaton/deterministic_transition_function.py new file mode 100644 index 0000000..4a03c18 --- /dev/null +++ b/pyformlang/finite_automaton/deterministic_transition_function.py @@ -0,0 +1,120 @@ +""" +A deterministic transition function +""" + +# pylint: disable=function-redefined + +from typing import Optional + +from .state import State +from .symbol import Symbol +from .epsilon import Epsilon +from .nondeterministic_transition_function import \ + NondeterministicTransitionFunction + +class DeterministicTransitionFunction(NondeterministicTransitionFunction): + """A deterministic transition function in a finite automaton + + This is a deterministic transition function. + + Attributes + ---------- + _transitions : dict + A dictionary which contains the transitions of a finite automaton + + Examples + -------- + + >>> transition = TransitionFunction() + >>> transition.add_transition(State(0), Symbol("a"), State(1)) + + Creates a transition function and adds a transition. + + """ + + def add_transition(self, + s_from: State, + symb_by: Symbol, + s_to: State) -> int: + """ Adds a new transition to the function + + Parameters + ---------- + s_from : :class:`~pyformlang.finite_automaton.State` + The source state + symb_by : :class:`~pyformlang.finite_automaton.Symbol` + The transition symbol + s_to : :class:`~pyformlang.finite_automaton.State` + The destination state + + + Returns + -------- + done : int + Always 1 + + Raises + -------- + DuplicateTransitionError + If the transition already exists + + Examples + -------- + + >>> transition = TransitionFunction() + >>> transition.add_transition(State(0), Symbol("a"), State(1)) + + """ + if symb_by == Epsilon(): + raise InvalidEpsilonTransition() + s_to_old = self.get_next_state(s_from, symb_by) + if s_to_old is not None and s_to_old != s_to: + raise DuplicateTransitionError(s_from, + symb_by, + s_to, + s_to_old) + return super().add_transition(s_from, symb_by, s_to) + + def get_next_state(self, s_from: State, symb_by: Symbol) -> Optional[State]: + """ Make a call of deterministic transition function """ + next_state = self(s_from, symb_by) + return list(next_state)[0] if next_state else None + + def is_deterministic(self) -> bool: + """ Whether the transition function is deterministic """ + return True + + +class InvalidEpsilonTransition(Exception): + """Exception raised when an epsilon transition is created in + deterministic automaton""" + + +class DuplicateTransitionError(Exception): + """ Signals a duplicated transition + + Parameters + ---------- + s_from : :class:`~pyformlang.finite_automaton.State` + The source state + symb_by : :class:`~pyformlang.finite_automaton.Symbol` + The transition symbol + s_to : :class:`~pyformlang.finite_automaton.State` + The wanted new destination state + s_to_old : :class:`~pyformlang.finite_automaton.State` + The old destination state + + """ + + def __init__(self, + s_from: State, + symb_by: Symbol, + s_to: State, + s_to_old: State) -> None: + super().__init__("Transition from " + str(s_from) + + " by " + str(symb_by) + + " goes to " + str(s_to_old) + " not " + str(s_to)) + self.s_from = s_from + self.symb_by = symb_by + self.s_to = s_to + self.s_to_old = s_to_old diff --git a/pyformlang/finite_automaton/epsilon_nfa.py b/pyformlang/finite_automaton/epsilon_nfa.py index b2e69b3..9d0fdfb 100644 --- a/pyformlang/finite_automaton/epsilon_nfa.py +++ b/pyformlang/finite_automaton/epsilon_nfa.py @@ -2,17 +2,15 @@ Nondeterministic Automaton with epsilon transitions """ -# pylint: disable=too-many-arguments - from typing import Iterable, Set, AbstractSet, Tuple, Any from pyformlang.finite_automaton import NondeterministicFiniteAutomaton from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.regular_expression import Regex -from .epsilon import Epsilon from .state import State from .symbol import Symbol +from .epsilon import Epsilon from .nondeterministic_transition_function import \ NondeterministicTransitionFunction @@ -71,7 +69,7 @@ def __init__( states: AbstractSet[Any] = None, input_symbols: AbstractSet[Any] = None, transition_function: NondeterministicTransitionFunction = None, - start_state: AbstractSet[Any] = None, + start_states: AbstractSet[Any] = None, final_states: AbstractSet[Any] = None) -> None: super().__init__() if states is not None: @@ -80,20 +78,18 @@ def __init__( if input_symbols is not None: input_symbols = {to_symbol(x) for x in input_symbols} self._input_symbols = input_symbols or set() - if transition_function is not None: - self._transition_function = transition_function - if start_state is not None: - start_state = {to_state(x) for x in start_state} - self._start_states = start_state or set() + self._transition_function = transition_function \ + or NondeterministicTransitionFunction() + if start_states is not None: + start_states = {to_state(x) for x in start_states} + self._start_states = start_states or set() if final_states is not None: final_states = {to_state(x) for x in final_states} self._final_states = final_states or set() for state in self._final_states: - if state is not None and state not in self._states: - self._states.add(state) + self._states.add(state) for state in self._start_states: - if state is not None and state not in self._states: - self._states.add(state) + self._states.add(state) def _get_next_states_iterable(self, current_states: Iterable[State], @@ -253,8 +249,7 @@ def is_deterministic(self) -> bool: and self._transition_function.is_deterministic() \ and all({x} == self.eclose(x) for x in self._states) - def remove_epsilon_transitions(self) \ - -> NondeterministicFiniteAutomaton: + def remove_epsilon_transitions(self) -> NondeterministicFiniteAutomaton: """ Removes the epsilon transitions from the automaton Returns @@ -332,8 +327,7 @@ def _to_deterministic_internal(self, dfa.add_final_state(s_from) return dfa - def to_deterministic(self) \ - -> DeterministicFiniteAutomaton: + def to_deterministic(self) -> DeterministicFiniteAutomaton: """ Transforms the epsilon-nfa into a dfa Returns diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 6a4b02d..a0f050b 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -2,23 +2,19 @@ # pylint: disable=function-redefined -from typing import \ - Dict, List, Set, Tuple, \ - Iterable, Union, Optional, Any - -from fastcore.dispatch import typedispatch -from networkx import MultiDiGraph, transitive_closure +from typing import Dict, List, Set, Tuple, Iterable, Optional, Any +from collections import deque +from networkx import MultiDiGraph from networkx.drawing.nx_pydot import write_dot +from fastcore.dispatch import typedispatch from pyformlang.finite_automaton import EpsilonNFA from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.fst import FST -from .epsilon import Epsilon from .state import State from .symbol import Symbol -from .nondeterministic_transition_function import \ - NondeterministicTransitionFunction +from .epsilon import Epsilon from .transition_function import TransitionFunction @@ -46,15 +42,11 @@ class FiniteAutomaton: def __init__(self) -> None: self._states: Set[State] = set() self._input_symbols: Set[Symbol] = set() - self._transition_function: Union[NondeterministicTransitionFunction, - TransitionFunction] \ - = NondeterministicTransitionFunction() + self._transition_function = TransitionFunction() self._start_states: Set[State] = set() self._final_states: Set[State] = set() - self.__transitive_closure: Optional[MultiDiGraph] = None - def add_transition(self, s_from: Any, symb_by: Any, - s_to: Any) -> int: + def add_transition(self, s_from: Any, symb_by: Any, s_to: Any) -> int: """ Adds a transition to the nfa Parameters @@ -129,8 +121,7 @@ def add_transitions(self, \ temp = self.add_transition(s_from, symb_by, s_to) return temp - def remove_transition(self, s_from: Any, symb_by: Any, - s_to: Any) -> int: + def remove_transition(self, s_from: Any, symb_by: Any, s_to: Any) -> int: """ Remove a transition of the nfa Parameters @@ -318,18 +309,16 @@ def remove_final_state(self, state: Any) -> int: return 0 @typedispatch - def __call__(self, state: Any) \ - -> Iterable[Tuple[Symbol, Set[State]]]: + def __call__(self, s_from: Any) -> Iterable[Tuple[Symbol, Set[State]]]: """ Gives FA transitions from given state. Calls the transition function """ - state = to_state(state) - return self._transition_function(state) + s_from = to_state(s_from) + return self._transition_function(s_from) @typedispatch - def __call__(self, state: Any, symbol: Any) \ - -> Set[State]: + def __call__(self, s_from: Any, symb_by: Any) -> Set[State]: """ Gives the states obtained after calling a symbol on a state Calls the transition function @@ -355,9 +344,19 @@ def __call__(self, state: Any, symbol: Any) \ [1] """ - state = to_state(state) - symbol = to_symbol(symbol) - return self._transition_function(state, symbol) + s_from = to_state(s_from) + symb_by = to_symbol(symb_by) + return self._transition_function(s_from, symb_by) + + def get_transitions_from(self, s_from: State) \ + -> Iterable[Tuple[Symbol, State]]: + """ Gets transitions from the given state """ + return self._transition_function.get_transitions_from(s_from) + + def get_next_states_from(self, s_from: Any) -> Set[State]: + """ Gets a set of states that are next to the given one """ + s_from = to_state(s_from) + return self._transition_function.get_next_states_from(s_from) def is_final_state(self, state: Any) -> bool: """ Checks if a state is final @@ -654,8 +653,8 @@ def _get_states_leading_to_final(self) -> Set[State]: """ leading_to_final = self.final_states.copy() visited = set() - states_to_process = deque((None, start_state) - for start_state in self.start_states) + states_to_process: deque[Any] = \ + deque((None, start_state) for start_state in self.start_states) while states_to_process: previous_state, current_state = states_to_process.pop() if previous_state and current_state in leading_to_final: @@ -664,7 +663,7 @@ def _get_states_leading_to_final(self) -> Set[State]: if current_state in visited: continue visited.add(current_state) - next_states = self._get_next_states_from(current_state) + next_states = self.get_next_states_from(current_state) if next_states: states_to_process.append((previous_state, current_state)) for next_state in next_states: @@ -678,19 +677,11 @@ def _get_reachable_states(self) -> Set[State]: while states_to_process: current_state = states_to_process.popleft() visited.add(current_state) - for next_state in self._get_next_states_from(current_state): + for next_state in self.get_next_states_from(current_state): if next_state not in visited: states_to_process.append(next_state) return visited - def _get_next_states_from(self, state_from: State) -> Set[State]: - """ Gets a set of states that are next to the given one """ - next_states = set() - for _, next_state in \ - self._transition_function.get_transitions_from(state_from): - next_states.add(next_state) - return next_states - def to_deterministic(self) -> DeterministicFiniteAutomaton: """ Turns the automaton into a deterministic one""" raise NotImplementedError @@ -700,9 +691,9 @@ def is_deterministic(self) -> bool: raise NotImplementedError def __eq__(self, other: Any) -> bool: - if isinstance(other, FiniteAutomaton): - return self.is_equivalent_to(other) - return False + if not isinstance(other, FiniteAutomaton): + return False + return self.is_equivalent_to(other) def __len__(self) -> int: """Number of transitions""" @@ -747,7 +738,7 @@ def __try_add(set_to_add_to: Set[Any], element_to_add: Any) -> bool: return len(set_to_add_to) != initial_length -def to_state(given: Any) -> Union[State, None]: +def to_state(given: Any) -> State: """ Transforms the input into a state Parameters diff --git a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py index 590b1be..12e966c 100644 --- a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py @@ -4,12 +4,11 @@ from typing import Iterable, Any -from pyformlang.finite_automaton import \ - Epsilon, DeterministicFiniteAutomaton - +from .epsilon import Epsilon from .epsilon_nfa import EpsilonNFA from .finite_automaton import to_symbol -from .transition_function import InvalidEpsilonTransition +from .deterministic_transition_function import InvalidEpsilonTransition +from .deterministic_finite_automaton import DeterministicFiniteAutomaton class NondeterministicFiniteAutomaton(EpsilonNFA): diff --git a/pyformlang/finite_automaton/nondeterministic_transition_function.py b/pyformlang/finite_automaton/nondeterministic_transition_function.py index 7479a0a..b941bb6 100644 --- a/pyformlang/finite_automaton/nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/nondeterministic_transition_function.py @@ -4,15 +4,16 @@ # pylint: disable=function-redefined -import copy from typing import Dict, Set, Iterable, Tuple +from copy import deepcopy from fastcore.dispatch import typedispatch from .state import State from .symbol import Symbol +from .transition_function import TransitionFunction -class NondeterministicTransitionFunction: +class NondeterministicTransitionFunction(TransitionFunction): """ A nondeterministic transition function in a finite automaton. The difference with a deterministic transition is that the return value is @@ -28,10 +29,12 @@ class NondeterministicTransitionFunction: """ - def __init__(self): + def __init__(self) -> None: self._transitions: Dict[State, Dict[Symbol, Set[State]]] = {} - def add_transition(self, s_from: State, symb_by: Symbol, + def add_transition(self, + s_from: State, + symb_by: Symbol, s_to: State) -> int: """ Adds a new transition to the function @@ -67,9 +70,11 @@ def add_transition(self, s_from: State, symb_by: Symbol, self._transitions[s_from][symb_by] = {s_to} return 1 - def remove_transition(self, s_from: State, symb_by: Symbol, + def remove_transition(self, + s_from: State, + symb_by: Symbol, s_to: State) -> int: - """ Removes a transition to the function + """ Removes a transition from the function Parameters ---------- @@ -124,19 +129,14 @@ def get_number_transitions(self) -> int: counter += len(s_to) return counter - def __len__(self) -> int: - return self.get_number_transitions() - @typedispatch - def __call__(self, s_from: State) \ - -> Iterable[Tuple[Symbol, Set[State]]]: + def __call__(self, s_from: State) -> Iterable[Tuple[Symbol, Set[State]]]: """ Calls the transition function as a real function """ if s_from in self._transitions: yield from self._transitions[s_from].items() @typedispatch - def __call__(self, s_from: State, symb_by: Symbol) \ - -> Set[State]: + def __call__(self, s_from: State, symb_by: Symbol) -> Set[State]: """ Calls the transition function as a real function Parameters @@ -157,11 +157,11 @@ def __call__(self, s_from: State, symb_by: Symbol) \ return self._transitions[s_from][symb_by] return set() - def get_transitions_from(self, state_from: State) \ + def get_transitions_from(self, s_from: State) \ -> Iterable[Tuple[Symbol, State]]: """ Gets transitions from the given state """ - if state_from in self._transitions: - for symb_by, states_to in self._transitions[state_from].items(): + if s_from in self._transitions: + for symb_by, states_to in self._transitions[s_from].items(): for state_to in states_to: yield symb_by, state_to @@ -176,11 +176,8 @@ def get_edges(self) -> Iterable[Tuple[State, Symbol, State]]: A generator of edges """ for s_from in self._transitions: - for (symb_by, s_to) in self.get_transitions_from(s_from): - yield (s_from, symb_by, s_to) - - def __iter__(self) -> Iterable[Tuple[State, Symbol, State]]: - yield from self.get_edges() + for symb_by, s_to in self.get_transitions_from(s_from): + yield s_from, symb_by, s_to def to_dict(self) -> Dict[State, Dict[Symbol, Set[State]]]: """ @@ -194,7 +191,7 @@ def to_dict(self) -> Dict[State, Dict[Symbol, Set[State]]]: transition_dict : dict The transitions as a dictionary. """ - return copy.deepcopy(self._transitions) + return deepcopy(self._transitions) def is_deterministic(self) -> bool: """ Whether the transition function is deterministic diff --git a/pyformlang/finite_automaton/tests/test_transition_function.py b/pyformlang/finite_automaton/tests/test_deterministic_transition_function.py similarity index 68% rename from pyformlang/finite_automaton/tests/test_transition_function.py rename to pyformlang/finite_automaton/tests/test_deterministic_transition_function.py index 85e1972..734b4b8 100644 --- a/pyformlang/finite_automaton/tests/test_transition_function.py +++ b/pyformlang/finite_automaton/tests/test_deterministic_transition_function.py @@ -1,10 +1,13 @@ """ Test the transition functions """ -from pyformlang.finite_automaton import State, Symbol, TransitionFunction, \ - DuplicateTransitionError, InvalidEpsilonTransition, Epsilon + import pytest +from pyformlang.finite_automaton import \ + State, Symbol, Epsilon, DeterministicTransitionFunction, \ + DuplicateTransitionError, InvalidEpsilonTransition + class TestTransitionFunction: """ Tests the transitions functions @@ -13,14 +16,13 @@ class TestTransitionFunction: def test_creation(self): """ Tests the creation of transition functions """ - transition_function = TransitionFunction() + transition_function = DeterministicTransitionFunction() assert transition_function is not None - # pylint: disable=protected-access def test_add_transitions(self): """ Tests the addition of transitions """ - transition_function = TransitionFunction() + transition_function = DeterministicTransitionFunction() s_from = State(10) s_to = State(11) s_to_bis = State(2) @@ -38,7 +40,7 @@ def test_add_transitions(self): def test_number_transitions(self): """ Tests the number of transitions """ - transition_function = TransitionFunction() + transition_function = DeterministicTransitionFunction() assert transition_function.get_number_transitions() == 0 s_from = State(110) s_to = State(12) @@ -57,41 +59,54 @@ def test_number_transitions(self): def test_remove_transitions(self): """ Tests the removal of transitions """ - transition_function = TransitionFunction() + transition_function = DeterministicTransitionFunction() s_from = State(10) s_to = State(11) symb_by = Symbol("abc") transition_function.add_transition(s_from, symb_by, s_to) assert transition_function.remove_transition(s_from, - symb_by, - s_to) == 1 + symb_by, + s_to) == 1 assert transition_function.get_number_transitions() == 0 assert transition_function(s_to, symb_by) == [] assert transition_function(s_from, symb_by) == [] assert transition_function.remove_transition(s_from, - symb_by, - s_to) == 0 + symb_by, + s_to) == 0 def test_call(self): """ Tests the call of a transition function """ - transition_function = TransitionFunction() + transition_function = DeterministicTransitionFunction() s_from = State(0) s_to = State(1) symb_by = Symbol("a") transition_function.add_transition(s_from, symb_by, s_to) - assert transition_function(s_from, symb_by) == [s_to] - assert transition_function(s_to, symb_by) == [] + assert transition_function(s_from, symb_by) == {s_to} + assert transition_function(s_to, symb_by) == set() + + def test_get_next_state(self): + """ Tests the transition function call to get a single state """ + transition_function = DeterministicTransitionFunction() + s_from = State(0) + s_to = State(1) + symb_by = Symbol("a") + transition_function.add_transition(s_from, symb_by, s_to) + assert transition_function.get_next_state(s_from, symb_by) == s_to + assert transition_function.get_next_state(s_to, symb_by) is None def test_invalid_epsilon(self): """ Tests invalid transition """ - transition_function = TransitionFunction() + transition_function = DeterministicTransitionFunction() + s_from = State(0) + s_to = State(1) + epsilon = Epsilon() with pytest.raises(InvalidEpsilonTransition): - transition_function.add_transition("1", Epsilon(), "2") + transition_function.add_transition(s_from, epsilon, s_to) def test_get_transitions_from(self): """ Tests iteration of transitions from specified state """ - transition_function = TransitionFunction() + transition_function = DeterministicTransitionFunction() states = [State(x) for x in range(0, 4)] symbol_a = Symbol("a") symbol_b = Symbol("b") diff --git a/pyformlang/finite_automaton/transition_function.py b/pyformlang/finite_automaton/transition_function.py index 8ff1744..8e1795a 100644 --- a/pyformlang/finite_automaton/transition_function.py +++ b/pyformlang/finite_automaton/transition_function.py @@ -1,259 +1,88 @@ """ -Representation of a transition function +General transition function representation """ # pylint: disable=function-redefined -import copy -from typing import Dict, Set, Iterable, Tuple, Optional +from typing import Dict, Set, Tuple, Iterable +from abc import abstractmethod from fastcore.dispatch import typedispatch -from pyformlang.finite_automaton.epsilon import Epsilon - from .state import State from .symbol import Symbol - -class InvalidEpsilonTransition(Exception): - """Exception raised when an epsilon transition is created in - deterministic automaton""" - - class TransitionFunction: - """ A transition function in a finite automaton. - - This is a deterministic transition function. - - Attributes - ---------- - _transitions : dict - A dictionary which contains the transitions of a finite automaton - - Examples - -------- - - >>> transition = TransitionFunction() - >>> transition.add_transition(State(0), Symbol("a"), State(1)) - - Creates a transition function and adds a transition. - - """ - - def __init__(self) -> None: - self._transitions: Dict[State, Dict[Symbol, State]] = {} - - def add_transition(self, s_from: Any, symb_by: Any, - s_to: Any) -> int: - """ Adds a new transition to the function - - Parameters - ---------- - s_from : :class:`~pyformlang.finite_automaton.State` - The source state - symb_by : :class:`~pyformlang.finite_automaton.Symbol` - The transition symbol - s_to : :class:`~pyformlang.finite_automaton.State` - The destination state - - - Returns - -------- - done : int - Always 1 - - Raises - -------- - DuplicateTransitionError - If the transition already exists - - Examples - -------- - - >>> transition = TransitionFunction() - >>> transition.add_transition(State(0), Symbol("a"), State(1)) - - """ - if symb_by == Epsilon(): - raise InvalidEpsilonTransition() - if s_from in self._transitions: - if symb_by in self._transitions[s_from]: - if self._transitions[s_from][symb_by] != s_to: - raise DuplicateTransitionError(s_from, - symb_by, - s_to, - self._transitions[s_from][ - symb_by]) - else: - self._transitions[s_from][symb_by] = s_to - else: - self._transitions[s_from] = {} - self._transitions[s_from][symb_by] = s_to - return 1 - - def remove_transition(self, s_from: State, symb_by: Symbol, + """ General transition function representation """ + + @abstractmethod + def add_transition(self, + s_from: State, + symb_by: Symbol, + s_to: State) -> int: + """ Adds a new transition to the function """ + raise NotImplementedError + + @abstractmethod + def remove_transition(self, + s_from: State, + symb_by: Symbol, s_to: State) -> int: - """ Removes a transition to the function + """ Removes a transition from the function """ + raise NotImplementedError - Parameters - ---------- - s_from : :class:`~pyformlang.finite_automaton.State` - The source state - symb_by : :class:`~pyformlang.finite_automaton.Symbol` - The transition symbol - s_to : :class:`~pyformlang.finite_automaton.State` - The destination state - - - Returns - -------- - done : int - 1 is the transition was found, 0 otherwise - - Examples - -------- - - >>> transition = TransitionFunction() - >>> transition.add_transition(State(0), Symbol("a"), State(1)) - >>> transition.remove_transition(State(0), Symbol("a"), State(1)) + @abstractmethod + def get_number_transitions(self) -> int: + """ Gives the number of transitions described by the function """ + raise NotImplementedError - """ - if s_from in self._transitions and \ - symb_by in self._transitions[s_from] and \ - s_to == self._transitions[s_from][symb_by]: - del self._transitions[s_from][symb_by] - return 1 - return 0 + def __len__(self) -> int: + return self.get_number_transitions() @typedispatch - def __call__(self, s_from: State) \ - -> Iterable[Tuple[Symbol, Set[State]]]: - """ Calls the transition function as a real function """ - for (symb_by, s_to) in self.get_transitions_from(s_from): - yield (symb_by, {s_to}) + @abstractmethod + def __call__(self, s_from: State) -> Iterable[Tuple[Symbol, Set[State]]]: + """ + Calls the transition function + as a real function for given state. + """ + raise NotImplementedError @typedispatch - def __call__(self, s_from: State, symb_by: Symbol) \ - -> Set[State]: - """ Calls the transition function as a real function - - Parameters - ---------- - s_from : :class:`~pyformlang.finite_automaton.State` - The source state - symb_by : :class:`~pyformlang.finite_automaton.Symbol` - The transition symbol - - Returns - ---------- - s_from : set of :class:`~pyformlang.finite_automaton.State` - The destination state, in a set - + @abstractmethod + def __call__(self, s_from: State, symb_by: Symbol) -> Set[State]: + """ + Calls the transition function + as a real function for given state and symbol. """ - state = self.get_state(s_from, symb_by) - return {state} if state else set() + raise NotImplementedError - def get_transitions_from(self, state_from: State) \ + @abstractmethod + def get_transitions_from(self, s_from: State) \ -> Iterable[Tuple[Symbol, State]]: """ Gets transitions from the given state """ - if state_from in self._transitions: - yield from self._transitions[state_from].items() + raise NotImplementedError - def get_state(self, s_from: State, symb_by: Symbol) \ - -> Optional[State]: - """ Calls the transition function and with given arguments """ - if s_from in self._transitions: - if symb_by in self._transitions[s_from]: - return self._transitions[s_from][symb_by] - return None - - def get_number_transitions(self) -> int: - """ Gives the number of transitions describe by the deterministic \ - function - - Returns - ---------- - n_transitions : int - The number of deterministic transitions - - Examples - -------- - - >>> transition = TransitionFunction() - >>> transition.add_transition(State(0), Symbol("a"), State(1)) - >>> transition.get_number_transitions() - 1 - - """ - return sum(len(x) for x in self._transitions.values()) + def get_next_states_from(self, s_from: State) -> Set[State]: + """ Gets a set of states that are next to the given one """ + next_states = set() + for _, next_state in self.get_transitions_from(s_from): + next_states.add(next_state) + return next_states + @abstractmethod def get_edges(self) -> Iterable[Tuple[State, Symbol, State]]: - """ Gets the edges - - Returns - ---------- - edges : generator of (:class:`~pyformlang.finite_automaton.State`, \ - :class:`~pyformlang.finite_automaton.Symbol`,\ - :class:`~pyformlang.finite_automaton.State`) - A generator of edges - """ - for s_from in self._transitions: - for (symb_by, s_to) in self.get_transitions_from(s_from): - yield (s_from, symb_by, s_to) - - def __len__(self) -> int: - return self.get_number_transitions() + """ Gets the edges """ + raise NotImplementedError def __iter__(self) -> Iterable[Tuple[State, Symbol, State]]: yield from self.get_edges() + @abstractmethod def to_dict(self) -> Dict[State, Dict[Symbol, Set[State]]]: - """ - Get the dictionary representation of the transition function. The \ - keys of the dictionary are the source nodes. The items are \ - dictionaries where the keys are the symbols of the transitions and \ - the items are the set of target nodes. - - Returns - ------- - transition_dict : dict - The transitions as a dictionary. - """ - result: Dict = copy.deepcopy(self._transitions) - for transitions in result.values(): - for (symb_by, s_to) in transitions.items(): - transitions[symb_by] = {s_to} - return result + """ Gets the dictionary representation of the transition function """ + raise NotImplementedError + @abstractmethod def is_deterministic(self) -> bool: """ Whether the transition function is deterministic """ - return True - - -class DuplicateTransitionError(Exception): - """ Signals a duplicated transition - - Parameters - ---------- - s_from : :class:`~pyformlang.finite_automaton.State` - The source state - symb_by : :class:`~pyformlang.finite_automaton.Symbol` - The transition symbol - s_to : :class:`~pyformlang.finite_automaton.State` - The wanted new destination state - s_to_old : :class:`~pyformlang.finite_automaton.State` - The old destination state - - """ - - def __init__(self, - s_from: State, - symb_by: Symbol, - s_to: State, - s_to_old: State) -> None: - super().__init__("Transition from " + str(s_from) + - " by " + str(symb_by) + - " goes to " + str(s_to_old) + " not " + str(s_to)) - self.s_from = s_from - self.symb_by = symb_by - self.s_to = s_to - self.s_to_old = s_to_old + raise NotImplementedError From 7e542087a402dc4f20e0835ef62df4c4cc033542 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 11 Nov 2024 18:26:45 +0300 Subject: [PATCH 22/42] change input types to Hashable, add utils.py --- .../deterministic_finite_automaton.py | 43 ++-- .../finite_automaton/doubly_linked_list.py | 1 - pyformlang/finite_automaton/epsilon.py | 4 +- pyformlang/finite_automaton/epsilon_nfa.py | 219 ++++++++++-------- .../finite_automaton/finite_automaton.py | 139 +++-------- .../finite_automaton_object.py | 11 +- .../nondeterministic_finite_automaton.py | 12 +- pyformlang/finite_automaton/partition.py | 1 - pyformlang/finite_automaton/state.py | 8 +- pyformlang/finite_automaton/symbol.py | 8 +- pyformlang/finite_automaton/utils.py | 58 +++++ 11 files changed, 259 insertions(+), 245 deletions(-) create mode 100644 pyformlang/finite_automaton/utils.py diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index 3d0d28d..3ccb667 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -2,15 +2,14 @@ Representation of a deterministic finite automaton """ -from typing import Dict, List, Iterable, AbstractSet, Optional, Any - +from typing import Dict, List, Iterable, AbstractSet, Optional, Hashable from numpy import empty from .state import State from .symbol import Symbol from .deterministic_transition_function import DeterministicTransitionFunction -from .finite_automaton import FiniteAutomaton, to_state, to_symbol -from .epsilon_nfa import to_single_state +from .finite_automaton import FiniteAutomaton +from .utils import to_state, to_symbol, to_single_state from .nondeterministic_finite_automaton import NondeterministicFiniteAutomaton from .hopcroft_processing_list import HopcroftProcessingList from .partition import Partition @@ -20,8 +19,8 @@ class PreviousTransitions: """For internal usage""" def __init__(self, - states: AbstractSet[Any], - symbols: AbstractSet[Any]) -> None: + states: AbstractSet[State], + symbols: AbstractSet[Symbol]) -> None: self._to_index_state: Dict[Optional[State], int] = {} self._to_index_state[None] = 0 for i, state in enumerate(states): @@ -105,27 +104,26 @@ class DeterministicFiniteAutomaton(NondeterministicFiniteAutomaton): """ def __init__(self, - states: AbstractSet[Any] = None, - input_symbols: AbstractSet[Any] = None, + states: AbstractSet[Hashable] = None, + input_symbols: AbstractSet[Hashable] = None, transition_function: DeterministicTransitionFunction = None, - start_state: Any = None, - final_states: AbstractSet[Any] = None) -> None: - super().__init__(states, input_symbols, None, None, final_states) + start_state: Hashable = None, + final_states: AbstractSet[Hashable] = None) -> None: + start_states = {start_state} if start_state is not None else None + super().__init__(states, + input_symbols, + None, + start_states, + final_states) self._transition_function = transition_function \ or DeterministicTransitionFunction() - if start_state is not None: - start_state = to_state(start_state) - self._start_states = {start_state} - self._states.add(start_state) - else: - self._start_states = set() @property def start_state(self) -> Optional[State]: """ Gets the start state """ return list(self._start_states)[0] if self._start_states else None - def add_start_state(self, state: Any) -> int: + def add_start_state(self, state: Hashable) -> int: """ Set an initial state Parameters @@ -150,7 +148,7 @@ def add_start_state(self, state: Any) -> int: self._states.add(state) return 1 - def remove_start_state(self, state: Any) -> int: + def remove_start_state(self, state: Hashable) -> int: """ remove an initial state Parameters @@ -177,7 +175,7 @@ def remove_start_state(self, state: Any) -> int: return 1 return 0 - def accepts(self, word: Iterable[Any]) -> bool: + def accepts(self, word: Iterable[Hashable]) -> bool: """ Checks whether the dfa accepts a given word Parameters @@ -283,8 +281,11 @@ def copy(self) -> "DeterministicFiniteAutomaton": dfa.add_transition(state, symbol, state_to) return dfa - def get_next_state(self, s_from: State, symb_by: Symbol) -> Optional[State]: + def get_next_state(self, s_from: Hashable, symb_by: Hashable) \ + -> Optional[State]: """ Make a call of deterministic transition function """ + s_from = to_state(s_from) + symb_by = to_symbol(symb_by) return self._transition_function.get_next_state(s_from, symb_by) def _get_previous_transitions(self) -> PreviousTransitions: diff --git a/pyformlang/finite_automaton/doubly_linked_list.py b/pyformlang/finite_automaton/doubly_linked_list.py index d1345e9..ce09237 100644 --- a/pyformlang/finite_automaton/doubly_linked_list.py +++ b/pyformlang/finite_automaton/doubly_linked_list.py @@ -1,7 +1,6 @@ """A doubly linked list""" from typing import Iterable, Optional, Any - from .doubly_linked_node import DoublyLinkedNode diff --git a/pyformlang/finite_automaton/epsilon.py b/pyformlang/finite_automaton/epsilon.py index 958bc34..431a98c 100644 --- a/pyformlang/finite_automaton/epsilon.py +++ b/pyformlang/finite_automaton/epsilon.py @@ -23,6 +23,4 @@ def __hash__(self) -> int: return hash("EPSILON TRANSITION") def __eq__(self, other: Any) -> bool: - if isinstance(other, Epsilon): - return True - return False + return isinstance(other, Epsilon) diff --git a/pyformlang/finite_automaton/epsilon_nfa.py b/pyformlang/finite_automaton/epsilon_nfa.py index 9d0fdfb..a79acf9 100644 --- a/pyformlang/finite_automaton/epsilon_nfa.py +++ b/pyformlang/finite_automaton/epsilon_nfa.py @@ -2,7 +2,8 @@ Nondeterministic Automaton with epsilon transitions """ -from typing import Iterable, Set, AbstractSet, Tuple, Any +from typing import Iterable, Set, AbstractSet, Tuple, Hashable +from networkx import MultiDiGraph from pyformlang.finite_automaton import NondeterministicFiniteAutomaton from pyformlang.finite_automaton import DeterministicFiniteAutomaton @@ -13,10 +14,9 @@ from .epsilon import Epsilon from .nondeterministic_transition_function import \ NondeterministicTransitionFunction - -from .regexable import Regexable from .finite_automaton import FiniteAutomaton -from .finite_automaton import to_state, to_symbol +from .utils import to_state, to_symbol, to_single_state +from .regexable import Regexable class EpsilonNFA(Regexable, FiniteAutomaton): @@ -66,11 +66,11 @@ class EpsilonNFA(Regexable, FiniteAutomaton): def __init__( self, - states: AbstractSet[Any] = None, - input_symbols: AbstractSet[Any] = None, + states: AbstractSet[Hashable] = None, + input_symbols: AbstractSet[Hashable] = None, transition_function: NondeterministicTransitionFunction = None, - start_states: AbstractSet[Any] = None, - final_states: AbstractSet[Any] = None) -> None: + start_states: AbstractSet[Hashable] = None, + final_states: AbstractSet[Hashable] = None) -> None: super().__init__() if states is not None: states = {to_state(x) for x in states} @@ -117,7 +117,7 @@ def _get_next_states_iterable(self, next_states = next_states.union(next_states_temp) return next_states - def accepts(self, word: Iterable[Any]) -> bool: + def accepts(self, word: Iterable[Hashable]) -> bool: """ Checks whether the epsilon nfa accepts a given word Parameters @@ -155,7 +155,7 @@ def accepts(self, word: Iterable[Any]) -> bool: current_states = self.eclose_iterable(next_states) return any(self.is_final_state(x) for x in current_states) - def eclose_iterable(self, states: Iterable[Any]) -> Set[State]: + def eclose_iterable(self, states: Iterable[Hashable]) -> Set[State]: """ Compute the epsilon closure of a collection of states Parameters @@ -185,7 +185,7 @@ def eclose_iterable(self, states: Iterable[Any]) -> Set[State]: res = res.union(self.eclose(state)) return res - def eclose(self, state: Any) -> Set[State]: + def eclose(self, state: Hashable) -> Set[State]: """ Compute the epsilon closure of a state Parameters @@ -277,8 +277,7 @@ def remove_epsilon_transitions(self) -> NondeterministicFiniteAutomaton: nfa.add_transition(state, symb, next_state) return nfa - def _to_deterministic_internal(self, - eclose: bool) \ + def _to_deterministic_internal(self, eclose: bool) \ -> DeterministicFiniteAutomaton: """ Transforms the epsilon-nfa into a dfa @@ -393,6 +392,55 @@ def copy(self) -> "EpsilonNFA": def __copy__(self) -> "EpsilonNFA": return self.copy() + @classmethod + def from_networkx(cls, graph: MultiDiGraph) -> "EpsilonNFA": + """ + Import a networkx graph into an finite state automaton. \ + The imported graph requires to have the good format, i.e. to come \ + from the function to_networkx + + Parameters + ---------- + graph : + The graph representation of the automaton + + Returns + ------- + enfa : + A epsilon nondeterministic finite automaton read from the graph + + TODO + ------- + * We lose the type of the node value if going through a dot file + * Explain the format + + Examples + -------- + + >>> enfa = EpsilonNFA() + >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \ + (0, "epsilon", 2)]) + >>> enfa.add_start_state(0) + >>> enfa.add_final_state(1) + >>> graph = enfa.to_networkx() + >>> enfa_from_nx = EpsilonNFA.from_networkx(graph) + + """ + enfa = EpsilonNFA() + for s_from in graph: + for s_to in graph[s_from]: + for transition in graph[s_from][s_to].values(): + if "label" in transition: + enfa.add_transition(s_from, + transition["label"], + s_to) + for node in graph.nodes: + if graph.nodes[node].get("is_start", False): + enfa.add_start_state(node) + if graph.nodes[node].get("is_final", False): + enfa.add_final_state(node) + return enfa + def to_regex(self) -> Regex: """ Transforms the EpsilonNFA to a regular expression @@ -455,10 +503,10 @@ def _get_regex_simple(self) -> str: return "epsilon" start_to_start, start_to_end, end_to_start, end_to_end = \ self._get_bi_transitions() - return get_regex_sub(start_to_start, - start_to_end, - end_to_start, - end_to_end) + return self.__get_regex_sub(start_to_start, + start_to_end, + end_to_start, + end_to_end) def _get_bi_transitions(self) -> Tuple[str, str, str, str]: """ Internal method to compute the transition in the case of a \ @@ -598,19 +646,19 @@ def get_intersection(self, other: "EpsilonNFA") -> "EpsilonNFA": processed = set() for st0 in self.eclose_iterable(self.start_states): for st1 in other.eclose_iterable(other.start_states): - enfa.add_start_state(combine_state_pair(st0, st1)) + enfa.add_start_state(self.__combine_state_pair(st0, st1)) to_process.append((st0, st1)) processed.add((st0, st1)) for st0 in self.final_states: for st1 in other.final_states: - enfa.add_final_state(combine_state_pair(st0, st1)) + enfa.add_final_state(self.__combine_state_pair(st0, st1)) while to_process: st0, st1 = to_process.pop() - current_state = combine_state_pair(st0, st1) + current_state = self.__combine_state_pair(st0, st1) for symb in symbols: for new_s0 in self.eclose_iterable(self(st0, symb)): for new_s1 in other.eclose_iterable(other(st1, symb)): - state = combine_state_pair(new_s0, new_s1) + state = self.__combine_state_pair(new_s0, new_s1) enfa.add_transition(current_state, symb, state) if (new_s0, new_s1) not in processed: processed.add((new_s0, new_s1)) @@ -894,77 +942,58 @@ def _create_or_transitions(self) -> None: def __bool__(self) -> bool: return not self.is_empty() - -def get_temp(start_to_end: str, end_to_start: str, end_to_end: str) \ - -> Tuple[str, str]: - """ Gets a temp values in the computation of the simple automaton regex """ - temp = "epsilon" - if (start_to_end != "epsilon" - or end_to_end != "epsilon" - or end_to_start != "epsilon"): - temp = "" - if start_to_end != "epsilon": - temp = start_to_end - if end_to_end != "epsilon": - if temp: - temp += "." + end_to_end + "*" - else: - temp = end_to_end + "*" - part1 = temp - if not part1: - part1 = "epsilon" - if end_to_start != "epsilon": - if temp: - temp += "." + end_to_start - else: - temp = end_to_start - if not end_to_start: - temp = "" - return (temp, part1) - - -def get_regex_sub(start_to_start: str, - start_to_end: str, - end_to_start: str, - end_to_end: str) -> str: - """ Combines the transitions in the regex simple function """ - if not start_to_end: - return "" - temp, part1 = get_temp(start_to_end, end_to_start, end_to_end) - part0 = "epsilon" - if start_to_start != "epsilon": - if temp: - part0 = "(" + start_to_start + "+" + temp + ")*" - else: - part0 = "(" + start_to_start + ")*" - elif temp != "epsilon" and temp: - part0 = "(" + temp + ")*" - return "(" + part0 + "." + part1 + ")" - - -def to_single_state(l_states: Iterable[State]) -> State: - """ Merge a list of states - - Parameters - ---------- - l_states : list of :class:`~pyformlang.finite_automaton.State` - A list of states - - Returns - ---------- - state : :class:`~pyformlang.finite_automaton.State` - The merged state - """ - values = [] - for state in l_states: - if state is not None: - values.append(str(state.value)) - else: - values.append("TRASH") - values = sorted(values) - return State(";".join(values)) - - -def combine_state_pair(state0: State, state1: State) -> State: - """ Combine two states """ - return State(str(state0.value) + "; " + str(state1.value)) + def __get_regex_sub(self, + start_to_start: str, + start_to_end: str, + end_to_start: str, + end_to_end: str) -> str: + """ Combines the transitions in the regex simple function """ + if not start_to_end: + return "" + temp, part1 = self.__get_temp(start_to_end, end_to_start, end_to_end) + part0 = "epsilon" + if start_to_start != "epsilon": + if temp: + part0 = "(" + start_to_start + "+" + temp + ")*" + else: + part0 = "(" + start_to_start + ")*" + elif temp != "epsilon" and temp: + part0 = "(" + temp + ")*" + return "(" + part0 + "." + part1 + ")" + + @staticmethod + def __get_temp(start_to_end: str, + end_to_start: str, + end_to_end: str) -> Tuple[str, str]: + """ + Gets a temp values in the computation + of the simple automaton regex. + """ + temp = "epsilon" + if (start_to_end != "epsilon" + or end_to_end != "epsilon" + or end_to_start != "epsilon"): + temp = "" + if start_to_end != "epsilon": + temp = start_to_end + if end_to_end != "epsilon": + if temp: + temp += "." + end_to_end + "*" + else: + temp = end_to_end + "*" + part1 = temp + if not part1: + part1 = "epsilon" + if end_to_start != "epsilon": + if temp: + temp += "." + end_to_start + else: + temp = end_to_start + if not end_to_start: + temp = "" + return (temp, part1) + + @staticmethod + def __combine_state_pair(state0: State, state1: State) -> State: + """ Combine two states """ + return State(str(state0.value) + "; " + str(state1.value)) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index a0f050b..01165ea 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -2,13 +2,12 @@ # pylint: disable=function-redefined -from typing import Dict, List, Set, Tuple, Iterable, Optional, Any +from typing import Dict, List, Set, Tuple, Iterable, Optional, Hashable, Any from collections import deque from networkx import MultiDiGraph from networkx.drawing.nx_pydot import write_dot from fastcore.dispatch import typedispatch -from pyformlang.finite_automaton import EpsilonNFA from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.fst import FST @@ -16,6 +15,7 @@ from .symbol import Symbol from .epsilon import Epsilon from .transition_function import TransitionFunction +from .utils import to_state, to_symbol class FiniteAutomaton: @@ -46,7 +46,10 @@ def __init__(self) -> None: self._start_states: Set[State] = set() self._final_states: Set[State] = set() - def add_transition(self, s_from: Any, symb_by: Any, s_to: Any) -> int: + def add_transition(self, + s_from: Hashable, + symb_by: Hashable, + s_to: Hashable) -> int: """ Adds a transition to the nfa Parameters @@ -86,8 +89,8 @@ def add_transition(self, s_from: Any, symb_by: Any, s_to: Any) -> int: self._input_symbols.add(symb_by) return temp - def add_transitions(self, \ - transitions_list: Iterable[Tuple[Any, Any, Any]]) -> int: + def add_transitions(self, transitions_list: \ + Iterable[Tuple[Hashable, Hashable, Hashable]]) -> int: """ Adds several transitions to the automaton @@ -121,7 +124,10 @@ def add_transitions(self, \ temp = self.add_transition(s_from, symb_by, s_to) return temp - def remove_transition(self, s_from: Any, symb_by: Any, s_to: Any) -> int: + def remove_transition(self, + s_from: Hashable, + symb_by: Hashable, + s_to: Hashable) -> int: """ Remove a transition of the nfa Parameters @@ -195,7 +201,7 @@ def final_states(self) -> Set[State]: """The final states""" return self._final_states - def add_start_state(self, state: Any) -> int: + def add_start_state(self, state: Hashable) -> int: """ Set an initial state Parameters @@ -222,7 +228,7 @@ def add_start_state(self, state: Any) -> int: self._states.add(state) return 1 - def remove_start_state(self, state: Any) -> int: + def remove_start_state(self, state: Hashable) -> int: """ remove an initial state Parameters @@ -251,7 +257,7 @@ def remove_start_state(self, state: Any) -> int: return 1 return 0 - def add_final_state(self, state: Any) -> int: + def add_final_state(self, state: Hashable) -> int: """ Adds a new final state Parameters @@ -279,7 +285,7 @@ def add_final_state(self, state: Any) -> int: self._states.add(state) return 1 - def remove_final_state(self, state: Any) -> int: + def remove_final_state(self, state: Hashable) -> int: """ Remove a final state Parameters @@ -309,7 +315,7 @@ def remove_final_state(self, state: Any) -> int: return 0 @typedispatch - def __call__(self, s_from: Any) -> Iterable[Tuple[Symbol, Set[State]]]: + def __call__(self, s_from: Hashable) -> Iterable[Tuple[Symbol, Set[State]]]: """ Gives FA transitions from given state. Calls the transition function @@ -318,7 +324,7 @@ def __call__(self, s_from: Any) -> Iterable[Tuple[Symbol, Set[State]]]: return self._transition_function(s_from) @typedispatch - def __call__(self, s_from: Any, symb_by: Any) -> Set[State]: + def __call__(self, s_from: Hashable, symb_by: Hashable) -> Set[State]: """ Gives the states obtained after calling a symbol on a state Calls the transition function @@ -348,17 +354,18 @@ def __call__(self, s_from: Any, symb_by: Any) -> Set[State]: symb_by = to_symbol(symb_by) return self._transition_function(s_from, symb_by) - def get_transitions_from(self, s_from: State) \ + def get_transitions_from(self, s_from: Hashable) \ -> Iterable[Tuple[Symbol, State]]: """ Gets transitions from the given state """ + s_from = to_state(s_from) return self._transition_function.get_transitions_from(s_from) - def get_next_states_from(self, s_from: Any) -> Set[State]: + def get_next_states_from(self, s_from: Hashable) -> Set[State]: """ Gets a set of states that are next to the given one """ s_from = to_state(s_from) return self._transition_function.get_next_states_from(s_from) - def is_final_state(self, state: Any) -> bool: + def is_final_state(self, state: Hashable) -> bool: """ Checks if a state is final Parameters @@ -391,7 +398,7 @@ def start_states(self) -> Set[State]: """The start states""" return self._start_states - def add_symbol(self, symbol: Any) -> None: + def add_symbol(self, symbol: Hashable) -> None: """ Add a symbol Parameters @@ -506,7 +513,7 @@ def to_networkx(self) -> MultiDiGraph: peripheries=2 if state in self.final_states else 1, label=state.value) if state in self.start_states: - add_start_state_to_graph(graph, state) + self.__add_start_state_to_graph(graph, state) for s_from, symbol, s_to in self._transition_function.get_edges(): label_ = symbol.value if label_ == 'epsilon': @@ -514,56 +521,6 @@ def to_networkx(self) -> MultiDiGraph: graph.add_edge(s_from.value, s_to.value, label=label_) return graph - @classmethod - def from_networkx(cls, graph: MultiDiGraph) \ - -> EpsilonNFA: - """ - Import a networkx graph into an finite state automaton. \ - The imported graph requires to have the good format, i.e. to come \ - from the function to_networkx - - Parameters - ---------- - graph : - The graph representation of the automaton - - Returns - ------- - enfa : - A epsilon nondeterministic finite automaton read from the graph - - TODO - ------- - * We lose the type of the node value if going through a dot file - * Explain the format - - Examples - -------- - - >>> enfa = EpsilonNFA() - >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \ - (0, "epsilon", 2)]) - >>> enfa.add_start_state(0) - >>> enfa.add_final_state(1) - >>> graph = enfa.to_networkx() - >>> enfa_from_nx = EpsilonNFA.from_networkx(graph) - - """ - enfa = EpsilonNFA() - for s_from in graph: - for s_to in graph[s_from]: - for transition in graph[s_from][s_to].values(): - if "label" in transition: - enfa.add_transition(s_from, - transition["label"], - s_to) - for node in graph.nodes: - if graph.nodes[node].get("is_start", False): - enfa.add_start_state(node) - if graph.nodes[node].get("is_final", False): - enfa.add_final_state(node) - return enfa - def write_as_dot(self, filename: str) -> None: """ Write the automaton in dot format into a file @@ -737,41 +694,13 @@ def __try_add(set_to_add_to: Set[Any], element_to_add: Any) -> bool: set_to_add_to.add(element_to_add) return len(set_to_add_to) != initial_length - -def to_state(given: Any) -> State: - """ Transforms the input into a state - - Parameters - ---------- - given : any - What we want to transform - """ - if isinstance(given, State): - return given - return State(given) - - -def to_symbol(given: Any) -> Symbol: - """ Transforms the input into a symbol - - Parameters - ---------- - given : any - What we want to transform - """ - if isinstance(given, Symbol): - return given - if given in ("epsilon", "É›"): - return Epsilon() - return Symbol(given) - - -def add_start_state_to_graph(graph: MultiDiGraph, state: State) -> None: - """ Adds a starting node to a given graph """ - graph.add_node("starting_" + str(state.value), - label="", - shape=None, - height=.0, - width=.0) - graph.add_edge("starting_" + str(state.value), - state.value) + @staticmethod + def __add_start_state_to_graph(graph: MultiDiGraph, state: State) -> None: + """ Adds a starting node to a given graph """ + graph.add_node("starting_" + str(state.value), + label="", + shape=None, + height=.0, + width=.0) + graph.add_edge("starting_" + str(state.value), + state.value) diff --git a/pyformlang/finite_automaton/finite_automaton_object.py b/pyformlang/finite_automaton/finite_automaton_object.py index 67ab262..ed80609 100644 --- a/pyformlang/finite_automaton/finite_automaton_object.py +++ b/pyformlang/finite_automaton/finite_automaton_object.py @@ -2,7 +2,7 @@ Represents an object of a finite state automaton """ -from typing import Any +from typing import Hashable class FiniteAutomatonObject: # pylint: disable=too-few-public-methods @@ -14,15 +14,20 @@ class FiniteAutomatonObject: # pylint: disable=too-few-public-methods The value of the object """ - def __init__(self, value: Any) -> None: + def __init__(self, value: Hashable) -> None: self._value = value self._hash = None + def __hash__(self) -> int: + if self._hash is None: + self._hash = hash(self._value) + return self._hash + def __repr__(self) -> str: return str(self._value) @property - def value(self) -> Any: + def value(self) -> Hashable: """ Gets the value of the object Returns diff --git a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py index 12e966c..a309617 100644 --- a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py @@ -2,13 +2,13 @@ Representation of a nondeterministic finite automaton """ -from typing import Iterable, Any +from typing import Iterable, Hashable from .epsilon import Epsilon from .epsilon_nfa import EpsilonNFA -from .finite_automaton import to_symbol from .deterministic_transition_function import InvalidEpsilonTransition from .deterministic_finite_automaton import DeterministicFiniteAutomaton +from .utils import to_symbol class NondeterministicFiniteAutomaton(EpsilonNFA): @@ -60,7 +60,7 @@ class NondeterministicFiniteAutomaton(EpsilonNFA): """ - def accepts(self, word: Iterable[Any]) -> bool: + def accepts(self, word: Iterable[Hashable]) -> bool: """ Checks whether the nfa accepts a given word Parameters @@ -137,9 +137,9 @@ def to_deterministic(self) -> DeterministicFiniteAutomaton: return self._to_deterministic_internal(False) def add_transition(self, - s_from: Any, - symb_by: Any, - s_to: Any) -> int: + s_from: Hashable, + symb_by: Hashable, + s_to: Hashable) -> int: if symb_by == Epsilon(): raise InvalidEpsilonTransition return super().add_transition(s_from, symb_by, s_to) diff --git a/pyformlang/finite_automaton/partition.py b/pyformlang/finite_automaton/partition.py index 419e940..dbb4cdc 100644 --- a/pyformlang/finite_automaton/partition.py +++ b/pyformlang/finite_automaton/partition.py @@ -3,7 +3,6 @@ """ from typing import Dict, List, Iterable, Any - from .doubly_linked_list import DoublyLinkedList diff --git a/pyformlang/finite_automaton/state.py b/pyformlang/finite_automaton/state.py index 90e37cb..53ed6d9 100644 --- a/pyformlang/finite_automaton/state.py +++ b/pyformlang/finite_automaton/state.py @@ -2,7 +2,7 @@ Representation of a state in a finite state automaton """ -from typing import Any +from typing import Hashable, Any from .finite_automaton_object import FiniteAutomatonObject @@ -22,15 +22,13 @@ class State(FiniteAutomatonObject): # pylint: disable=too-few-public-methods """ - def __init__(self, value: Any) -> None: + def __init__(self, value: Hashable) -> None: super().__init__(value) self.index = None self.index_cfg_converter = None def __hash__(self) -> int: - if self._hash is None: - self._hash = hash(self._value) - return self._hash + return super().__hash__() def __eq__(self, other: Any) -> bool: if isinstance(other, State): diff --git a/pyformlang/finite_automaton/symbol.py b/pyformlang/finite_automaton/symbol.py index 7be694b..8599108 100644 --- a/pyformlang/finite_automaton/symbol.py +++ b/pyformlang/finite_automaton/symbol.py @@ -21,12 +21,10 @@ class Symbol(FiniteAutomatonObject): # pylint: disable=too-few-public-methods A """ + def __hash__(self) -> int: + return super().__hash__() + def __eq__(self, other: Any) -> bool: if isinstance(other, Symbol): return self._value == other.value return self._value == other - - def __hash__(self) -> int: - if self._hash is None: - self._hash = hash(self._value) - return self._hash diff --git a/pyformlang/finite_automaton/utils.py b/pyformlang/finite_automaton/utils.py new file mode 100644 index 0000000..f8eddca --- /dev/null +++ b/pyformlang/finite_automaton/utils.py @@ -0,0 +1,58 @@ +""" Utility for finite automata """ + +from typing import Iterable, Hashable + +from .state import State +from .symbol import Symbol +from .epsilon import Epsilon + + +def to_state(given: Hashable) -> State: + """ Transforms the input into a state + + Parameters + ---------- + given : any + What we want to transform + """ + if isinstance(given, State): + return given + return State(given) + + +def to_symbol(given: Hashable) -> Symbol: + """ Transforms the input into a symbol + + Parameters + ---------- + given : any + What we want to transform + """ + if isinstance(given, Symbol): + return given + if given in ("epsilon", "É›"): + return Epsilon() + return Symbol(given) + + +def to_single_state(l_states: Iterable[State]) -> State: + """ Merge a list of states + + Parameters + ---------- + l_states : list of :class:`~pyformlang.finite_automaton.State` + A list of states + + Returns + ---------- + state : :class:`~pyformlang.finite_automaton.State` + The merged state + """ + values = [] + for state in l_states: + if state is not None: + values.append(str(state.value)) + else: + values.append("TRASH") + values = sorted(values) + return State(";".join(values)) From 85a06d1338afa56d8fe7e5b04d6f25c7825f799a Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 11 Nov 2024 21:23:37 +0300 Subject: [PATCH 23/42] remove to_deterministic, refactor --- .../deterministic_finite_automaton.py | 97 +++++++++---- pyformlang/finite_automaton/epsilon_nfa.py | 132 +----------------- .../finite_automaton/finite_automaton.py | 94 ++++--------- .../nondeterministic_finite_automaton.py | 54 +++---- pyformlang/pda/pda.py | 14 +- pyformlang/rsa/box.py | 21 +-- pyformlang/rsa/recursive_automaton.py | 19 ++- 7 files changed, 168 insertions(+), 263 deletions(-) diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index 3ccb667..a8174a3 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -2,17 +2,17 @@ Representation of a deterministic finite automaton """ -from typing import Dict, List, Iterable, AbstractSet, Optional, Hashable +from typing import Dict, List, Iterable, AbstractSet, Optional, Hashable, Any from numpy import empty from .state import State from .symbol import Symbol from .deterministic_transition_function import DeterministicTransitionFunction -from .finite_automaton import FiniteAutomaton -from .utils import to_state, to_symbol, to_single_state +from .epsilon_nfa import EpsilonNFA from .nondeterministic_finite_automaton import NondeterministicFiniteAutomaton from .hopcroft_processing_list import HopcroftProcessingList from .partition import Partition +from .utils import to_state, to_symbol, to_single_state class PreviousTransitions: @@ -226,27 +226,6 @@ def is_deterministic(self) -> bool: """ return True - def to_deterministic(self) -> "DeterministicFiniteAutomaton": - """ Transforms the current automaton into a dfa. Does nothing if the \ - automaton is already deterministic. - - Returns - ---------- - dfa : :class:`~pyformlang.deterministic_finite_automaton\ - .DeterministicFiniteAutomaton` - A dfa equivalent to the current nfa - - Examples - -------- - - >>> dfa0 = DeterministicFiniteAutomaton() - >>> dfa1 = dfa0.to_deterministic() - >>> dfa0.is_equivalent_to(dfa1) - True - - """ - return self - def copy(self) -> "DeterministicFiniteAutomaton": """ Copies the current DFA @@ -355,6 +334,67 @@ def minimize(self) -> "DeterministicFiniteAutomaton": done.add((next_node, symbol)) return dfa + @classmethod + def from_epsilon_nfa(cls, enfa: EpsilonNFA) \ + -> "DeterministicFiniteAutomaton": + """ Builds dfa equivalent to the given enfa """ + return cls._from_epsilon_nfa_internal(enfa, True) + + @classmethod + def from_nfa(cls, nfa: NondeterministicFiniteAutomaton) \ + -> "DeterministicFiniteAutomaton": + """ Builds dfa equivalent to the given nfa """ + return cls._from_epsilon_nfa_internal(nfa, False) + + @classmethod + def _from_epsilon_nfa_internal(cls, enfa: EpsilonNFA, eclose: bool) \ + -> "DeterministicFiniteAutomaton": + """ Builds dfa equivalent to the given automaton + + Parameters + ---------- + eclose : bool + Whether to use the epsilon closure or not + + Returns + ---------- + dfa : :class:`~pyformlang.finite_automaton\ + .DeterministicFiniteAutomaton` + A dfa equivalent to the current nfa + """ + dfa = DeterministicFiniteAutomaton() + # Add Eclose + if eclose: + start_eclose = enfa.eclose_iterable(enfa.start_states) + else: + start_eclose = enfa.start_states + start_state = to_single_state(start_eclose) + dfa.add_start_state(start_state) + to_process = [start_eclose] + processed = {start_state} + while to_process: + current = to_process.pop() + s_from = to_single_state(current) + for symbol in enfa.symbols: + all_trans = [enfa(x, symbol) for x in current] + state = set() + for trans in all_trans: + state = state.union(trans) + if not state: + continue + # Eclose added + if eclose: + state = enfa.eclose_iterable(state) + state_merged = to_single_state(state) + dfa.add_transition(s_from, symbol, state_merged) + if state_merged not in processed: + processed.add(state_merged) + to_process.append(state) + for state in current: + if state in enfa.final_states: + dfa.add_final_state(s_from) + return dfa + def _get_partition(self) -> Partition: previous_transitions = self._get_previous_transitions() finals = [] @@ -396,7 +436,12 @@ def _get_partition(self) -> Partition: processing_list.insert(new_class, symbol) return partition - def is_equivalent_to(self, other: FiniteAutomaton) -> bool: + def __eq__(self, other: Any) -> bool: + if not isinstance(other, EpsilonNFA): + return False + return self.is_equivalent_to(other) + + def is_equivalent_to(self, other: EpsilonNFA) -> bool: """ Check whether two automata are equivalent Parameters @@ -423,7 +468,7 @@ def is_equivalent_to(self, other: FiniteAutomaton) -> bool: """ if not isinstance(other, DeterministicFiniteAutomaton): - other_dfa = other.to_deterministic() + other_dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(other) return self.is_equivalent_to(other_dfa) self_minimal = self.minimize() other_minimal = other.minimize() diff --git a/pyformlang/finite_automaton/epsilon_nfa.py b/pyformlang/finite_automaton/epsilon_nfa.py index a79acf9..909b66d 100644 --- a/pyformlang/finite_automaton/epsilon_nfa.py +++ b/pyformlang/finite_automaton/epsilon_nfa.py @@ -5,8 +5,6 @@ from typing import Iterable, Set, AbstractSet, Tuple, Hashable from networkx import MultiDiGraph -from pyformlang.finite_automaton import NondeterministicFiniteAutomaton -from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.regular_expression import Regex from .state import State @@ -15,7 +13,7 @@ from .nondeterministic_transition_function import \ NondeterministicTransitionFunction from .finite_automaton import FiniteAutomaton -from .utils import to_state, to_symbol, to_single_state +from .utils import to_state, to_symbol from .regexable import Regexable @@ -249,110 +247,6 @@ def is_deterministic(self) -> bool: and self._transition_function.is_deterministic() \ and all({x} == self.eclose(x) for x in self._states) - def remove_epsilon_transitions(self) -> NondeterministicFiniteAutomaton: - """ Removes the epsilon transitions from the automaton - - Returns - ---------- - dfa : :class:`~pyformlang.finite_automaton. \ - NondeterministicFiniteAutomaton` - A non-deterministic finite automaton equivalent to the current \ - nfa, with no epsilon transition - """ - nfa = NondeterministicFiniteAutomaton() - for state in self._start_states: - nfa.add_start_state(state) - for state in self._final_states: - nfa.add_final_state(state) - start_eclose = self.eclose_iterable(self._start_states) - for state in start_eclose: - nfa.add_start_state(state) - for state in self._states: - eclose = self.eclose(state) - for e_state in eclose: - if e_state in self._final_states: - nfa.add_final_state(state) - for symb in self._input_symbols: - for next_state in self._transition_function(e_state, symb): - nfa.add_transition(state, symb, next_state) - return nfa - - def _to_deterministic_internal(self, eclose: bool) \ - -> DeterministicFiniteAutomaton: - """ Transforms the epsilon-nfa into a dfa - - Parameters - ---------- - eclose : bool - Whether to use the epsilon closure or not - - Returns - ---------- - dfa : :class:`~pyformlang.finite_automaton\ - .DeterministicFiniteAutomaton` - A dfa equivalent to the current nfa - """ - dfa = DeterministicFiniteAutomaton() - # Add Eclose - if eclose: - start_eclose = self.eclose_iterable(self._start_states) - else: - start_eclose = self._start_states - start_state = to_single_state(start_eclose) - dfa.add_start_state(start_state) - to_process = [start_eclose] - processed = {start_state} - while to_process: - current = to_process.pop() - s_from = to_single_state(current) - for symb in self._input_symbols: - all_trans = [self._transition_function(x, symb) - for x in current] - state = set() - for trans in all_trans: - state = state.union(trans) - if not state: - continue - # Eclose added - if eclose: - state = self.eclose_iterable(state) - state_merged = to_single_state(state) - dfa.add_transition(s_from, symb, state_merged) - if state_merged not in processed: - processed.add(state_merged) - to_process.append(state) - for state in current: - if state in self._final_states: - dfa.add_final_state(s_from) - return dfa - - def to_deterministic(self) -> DeterministicFiniteAutomaton: - """ Transforms the epsilon-nfa into a dfa - - Returns - ---------- - dfa : :class:`~pyformlang.finite_automaton\ - .DeterministicFiniteAutomaton` - A dfa equivalent to the current nfa - - Examples - -------- - - >>> enfa = EpsilonNFA() - >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \ - (0, "epsilon", 2)]) - >>> enfa.add_start_state(0) - >>> enfa.add_final_state(1) - >>> dfa = enfa.to_deterministic() - >>> dfa.is_deterministic() - True - - >>> enfa.is_equivalent_to(dfa) - True - - """ - return self._to_deterministic_internal(True) - def copy(self) -> "EpsilonNFA": """ Copies the current Epsilon NFA @@ -887,30 +781,6 @@ def _remove_state(self, state: State) -> None: # We make sure the automaton has the good structure self._create_or_transitions() - def minimize(self) -> DeterministicFiniteAutomaton: - """ Minimize the current epsilon NFA - - Returns - ---------- - dfa : :class:`~pyformlang.deterministic_finite_automaton\ - .DeterministicFiniteAutomaton` - The minimal DFA - - Examples - -------- - - >>> enfa = EpsilonNFA() - >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \ - (0, "epsilon", 2)]) - >>> enfa.add_start_state(0) - >>> enfa.add_final_state(1) - >>> dfa_minimal = enfa.minimize() - >>> dfa_minimal.is_equivalent(enfa) - True - - """ - return self.to_deterministic().minimize() - def _create_or_transitions(self) -> None: """ Creates a OR transition instead of several connections diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 01165ea..d605d81 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -3,12 +3,12 @@ # pylint: disable=function-redefined from typing import Dict, List, Set, Tuple, Iterable, Optional, Hashable, Any +from abc import abstractmethod from collections import deque from networkx import MultiDiGraph from networkx.drawing.nx_pydot import write_dot from fastcore.dispatch import typedispatch -from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.fst import FST from .state import State @@ -46,6 +46,32 @@ def __init__(self) -> None: self._start_states: Set[State] = set() self._final_states: Set[State] = set() + @property + def states(self) -> Set[State]: + """ Gives the states + + Returns + ---------- + states : set of :class:`~pyformlang.finite_automaton.State` + The states + """ + return self._states + + @property + def symbols(self) -> Set[Symbol]: + """The symbols""" + return self._input_symbols + + @property + def start_states(self) -> Set[State]: + """The start states""" + return self._start_states + + @property + def final_states(self) -> Set[State]: + """The final states""" + return self._final_states + def add_transition(self, s_from: Hashable, symb_by: Hashable, @@ -160,17 +186,6 @@ def remove_transition(self, symb_by, s_to) - @property - def states(self) -> Set[State]: - """ Gives the states - - Returns - ---------- - states : set of :class:`~pyformlang.finite_automaton.State` - The states - """ - return self._states - def get_number_transitions(self) -> int: """ Gives the number of transitions @@ -191,16 +206,6 @@ def get_number_transitions(self) -> int: """ return self._transition_function.get_number_transitions() - @property - def symbols(self) -> Set[Symbol]: - """The symbols""" - return self._input_symbols - - @property - def final_states(self) -> Set[State]: - """The final states""" - return self._final_states - def add_start_state(self, state: Hashable) -> int: """ Set an initial state @@ -393,11 +398,6 @@ def is_final_state(self, state: Hashable) -> bool: state = to_state(state) return state in self._final_states - @property - def start_states(self) -> Set[State]: - """The start states""" - return self._start_states - def add_symbol(self, symbol: Hashable) -> None: """ Add a symbol @@ -542,36 +542,6 @@ def write_as_dot(self, filename: str) -> None: """ write_dot(self.to_networkx(), filename) - def is_equivalent_to(self, other: "FiniteAutomaton") -> bool: - """ - Checks if the current automaton is equivalent to a given one. - - Parameters - ---------- - other : - An other finite state automaton - - Returns - ------- - is_equivalent : bool - Whether the two automata are equivalent or not - - Examples - -------- - - >>> enfa = EpsilonNFA() - >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \ - (0, "epsilon", 2)]) - >>> enfa.add_start_state(0) - >>> enfa.add_final_state(1) - >>> dfa = enfa.to_deterministic() - >>> dfa.is_deterministic() - True - - """ - self_dfa = self.to_deterministic() - return self_dfa.is_equivalent_to(other) - def get_accepted_words(self, max_length: Optional[int] = None) \ -> Iterable[List[Symbol]]: """ @@ -639,19 +609,11 @@ def _get_reachable_states(self) -> Set[State]: states_to_process.append(next_state) return visited - def to_deterministic(self) -> DeterministicFiniteAutomaton: - """ Turns the automaton into a deterministic one""" - raise NotImplementedError - + @abstractmethod def is_deterministic(self) -> bool: """ Checks if the automaton is deterministic """ raise NotImplementedError - def __eq__(self, other: Any) -> bool: - if not isinstance(other, FiniteAutomaton): - return False - return self.is_equivalent_to(other) - def __len__(self) -> int: """Number of transitions""" return len(self._transition_function) diff --git a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py index a309617..2d0d60f 100644 --- a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py @@ -7,7 +7,6 @@ from .epsilon import Epsilon from .epsilon_nfa import EpsilonNFA from .deterministic_transition_function import InvalidEpsilonTransition -from .deterministic_finite_automaton import DeterministicFiniteAutomaton from .utils import to_symbol @@ -113,29 +112,6 @@ def is_deterministic(self) -> bool: return len(self._start_states) <= 1 and \ self._transition_function.is_deterministic() - def to_deterministic(self) -> DeterministicFiniteAutomaton: - """ Transforms the nfa into a dfa - - Returns - ---------- - dfa : :class:`~pyformlang.deterministic_finite_automaton\ - .DeterministicFiniteAutomaton` - A dfa equivalent to the current nfa - - Examples - -------- - - >>> nfa = NondeterministicFiniteAutomaton() - >>> nfa.add_transitions([(0, "a", 1), (0, "a", 2)]) - >>> nfa.add_start_state(0) - >>> nfa.add_final_state(1) - >>> dfa = nfa.to_deterministic() - >>> nfa.is_equivalent_to(dfa) - True - - """ - return self._to_deterministic_internal(False) - def add_transition(self, s_from: Hashable, symb_by: Hashable, @@ -143,3 +119,33 @@ def add_transition(self, if symb_by == Epsilon(): raise InvalidEpsilonTransition return super().add_transition(s_from, symb_by, s_to) + + @classmethod + def from_epsilon_nfa(cls, enfa: EpsilonNFA) \ + -> "NondeterministicFiniteAutomaton": + """ Builds nfa equivalent to the given enfa + + Returns + ---------- + dfa : :class:`~pyformlang.finite_automaton. \ + NondeterministicFiniteAutomaton` + A non-deterministic finite automaton equivalent to the current \ + nfa, with no epsilon transition + """ + nfa = NondeterministicFiniteAutomaton() + for state in enfa.start_states: + nfa.add_start_state(state) + for state in enfa.final_states: + nfa.add_final_state(state) + start_eclose = enfa.eclose_iterable(enfa.start_states) + for state in start_eclose: + nfa.add_start_state(state) + for state in enfa.states: + eclose = enfa.eclose(state) + for e_state in eclose: + if e_state in enfa.final_states: + nfa.add_final_state(state) + for symb in enfa.symbols: + for next_state in enfa(e_state, symb): + nfa.add_transition(state, symb, next_state) + return nfa diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 82c3b31..04642d9 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -17,7 +17,6 @@ from .transition_function import TransitionFunction from .utils import PDAObjectCreator from ..finite_automaton import FiniteAutomaton -from ..finite_automaton.finite_automaton import add_start_state_to_graph INPUT_SYMBOL = 1 @@ -582,7 +581,7 @@ def to_networkx(self) -> nx.MultiDiGraph: peripheries=2 if state in self.final_states else 1, label=state.value) if state == self._start_state: - add_start_state_to_graph(graph, state) + self.__add_start_state_to_graph(graph, state) if self._start_stack_symbol is not None: graph.add_node("INITIAL_STACK_HIDDEN", label=json.dumps(self._start_stack_symbol.value), @@ -661,6 +660,17 @@ def write_as_dot(self, filename): """ write_dot(self.to_networkx(), filename) + @staticmethod + def __add_start_state_to_graph(graph: nx.MultiDiGraph, state: State) -> None: + """ Adds a starting node to a given graph """ + graph.add_node("starting_" + str(state.value), + label="", + shape=None, + height=.0, + width=.0) + graph.add_edge("starting_" + str(state.value), + state.value) + def _prepend_input_symbol_to_the_bodies(bodies, transition): to_prepend = cfg.Terminal(transition[INPUT][INPUT_SYMBOL].value) diff --git a/pyformlang/rsa/box.py b/pyformlang/rsa/box.py index 0baafe5..d9ba91f 100644 --- a/pyformlang/rsa/box.py +++ b/pyformlang/rsa/box.py @@ -1,11 +1,11 @@ """ Representation of a box for recursive automaton """ -from typing import Union +from typing import Union, Any -from pyformlang.finite_automaton.epsilon_nfa import EpsilonNFA -from pyformlang.finite_automaton.finite_automaton import to_symbol +from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton.symbol import Symbol +from pyformlang.finite_automaton.utils import to_symbol class Box: @@ -22,8 +22,10 @@ class Box: """ - def __init__(self, enfa: EpsilonNFA, nonterminal: Union[Symbol, str]): - self._dfa = enfa + def __init__(self, + dfa: DeterministicFiniteAutomaton, + nonterminal: Union[Symbol, str]): + self._dfa = dfa nonterminal = to_symbol(nonterminal) self._nonterminal = nonterminal @@ -77,7 +79,7 @@ def final_states(self): """ The final states """ return self._dfa.final_states - def is_equivalent_to(self, other): + def is_equivalent_to(self, other: "Box") -> bool: """ Check whether two boxes are equivalent Parameters @@ -90,13 +92,12 @@ def is_equivalent_to(self, other): are_equivalent : bool Whether the two boxes are equivalent or not """ + return self._dfa.is_equivalent_to(other.dfa) \ + and self.nonterminal == other.nonterminal + def __eq__(self, other: Any) -> bool: if not isinstance(other, Box): return False - - return self._dfa.is_equivalent_to(other.dfa) and self.nonterminal == other.nonterminal - - def __eq__(self, other): return self.is_equivalent_to(other) def __hash__(self): diff --git a/pyformlang/rsa/recursive_automaton.py b/pyformlang/rsa/recursive_automaton.py index 1d89f36..c12d652 100644 --- a/pyformlang/rsa/recursive_automaton.py +++ b/pyformlang/rsa/recursive_automaton.py @@ -4,8 +4,9 @@ from typing import AbstractSet, Union -from pyformlang.finite_automaton.finite_automaton import to_symbol +from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton.symbol import Symbol +from pyformlang.finite_automaton.utils import to_symbol from pyformlang.regular_expression import Regex from pyformlang.cfg import Epsilon @@ -111,7 +112,7 @@ def from_regex(cls, regex: Regex, start_nonterminal: Union[Symbol, str]): The new recursive automaton built from regular expression """ start_nonterminal = to_symbol(start_nonterminal) - box = Box(regex.to_epsilon_nfa().minimize(), start_nonterminal) + box = Box(cls.__regex_to_minimal_dfa(regex), start_nonterminal) return RecursiveAutomaton(box, {box}) @classmethod @@ -153,9 +154,11 @@ def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")): productions[head] = body for head, body in productions.items(): - boxes.add(Box(Regex(body).to_epsilon_nfa().minimize(), + boxes.add(Box(cls.__regex_to_minimal_dfa(Regex(body)), to_symbol(head))) - start_box = Box(Regex(productions[start_nonterminal.value]).to_epsilon_nfa().minimize(), start_nonterminal) + start_box_dfa = cls.__regex_to_minimal_dfa( + Regex(productions[start_nonterminal.value])) + start_box = Box(start_box_dfa, start_nonterminal) return RecursiveAutomaton(start_box, boxes) def is_equals_to(self, other): @@ -179,3 +182,11 @@ def is_equals_to(self, other): def __eq__(self, other): return self.is_equals_to(other) + + @classmethod + def __regex_to_minimal_dfa(cls, regex: Regex) \ + -> DeterministicFiniteAutomaton: + """ Build minimal dfa from given regex """ + enfa = regex.to_epsilon_nfa() + dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) + return dfa.minimize() From 5ee276dff7148b1ddf738ef64c63d5dd14be9375 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 14 Oct 2024 23:11:21 +0300 Subject: [PATCH 24/42] add type annotations for regex --- pyformlang/regular_expression/python_regex.py | 27 ++-- pyformlang/regular_expression/regex.py | 117 +++++++++--------- .../regular_expression/regex_objects.py | 105 ++++++++-------- pyformlang/regular_expression/regex_reader.py | 78 ++++++------ 4 files changed, 168 insertions(+), 159 deletions(-) diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py index fd63b5c..05df264 100644 --- a/pyformlang/regular_expression/python_regex.py +++ b/pyformlang/regular_expression/python_regex.py @@ -2,16 +2,17 @@ A class to read Python format regex """ -import re -import string -import unicodedata +from typing import Union +from re import compile as comp, Pattern +from string import printable +from unicodedata import lookup -# pylint: disable=cyclic-import -from pyformlang.regular_expression import regex, MisformedRegexError +from pyformlang.regular_expression import MisformedRegexError +from pyformlang.regular_expression.regex import Regex from pyformlang.regular_expression.regex_reader import \ WRONG_PARENTHESIS_MESSAGE -PRINTABLES = list(string.printable) +PRINTABLES = list(printable) TRANSFORMATIONS = { "|": "\\|", @@ -55,7 +56,7 @@ ESCAPED_OCTAL = ["\\0", "\\1", "\\2", "\\3", "\\4", "\\5", "\\6", "\\7"] -class PythonRegex(regex.Regex): +class PythonRegex(Regex): """ Represents a regular expression as used in Python. It adds the following features to the basic regex: @@ -98,11 +99,11 @@ class PythonRegex(regex.Regex): """ - def __init__(self, python_regex): - if not isinstance(python_regex, str): - python_regex = python_regex.pattern + def __init__(self, python_regex: Union[str, Pattern[str]]) -> None: + if isinstance(python_regex, str): + comp(python_regex) # Check if it is valid else: - re.compile(python_regex) # Check if it is valid + python_regex = python_regex.pattern self._python_regex = python_regex self._replace_shortcuts() @@ -114,7 +115,7 @@ def __init__(self, python_regex): self._python_regex = self._python_regex.lstrip('\b') super().__init__(self._python_regex) - def _separate(self): + def _separate(self) -> None: regex_temp = [] for symbol in self._python_regex: if self._should_escape_next_symbol(regex_temp): @@ -193,7 +194,7 @@ def _recombine(regex_to_recombine): while regex_to_recombine[idx_end] != "}": idx_end += 1 name = "".join(regex_to_recombine[idx + 2: idx_end]) - name = unicodedata.lookup(name) + name = lookup(name) temp.append(TRANSFORMATIONS.get(name, name)) idx = idx_end + 1 elif regex_to_recombine[idx] == "\\u": diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index c8155b3..46a55ab 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -1,16 +1,17 @@ """ Representation of a regular expression """ -from typing import Iterable - -from pyformlang import finite_automaton -# pylint: disable=cyclic-import -import pyformlang.regular_expression.regex_objects -from pyformlang import cfg -from pyformlang.finite_automaton import State -# pylint: disable=cyclic-import + +from typing import List, Iterable, Tuple, Any + +from pyformlang.finite_automaton import Epsilon as FAEpsilon +from pyformlang.finite_automaton import EpsilonNFA, State, Symbol +from pyformlang.cfg.cfg import CFG, Production +from pyformlang.cfg.utils import to_variable from pyformlang.regular_expression.regex_reader import RegexReader -from pyformlang import regular_expression +from pyformlang.regular_expression.python_regex import PythonRegex +from pyformlang.regular_expression.regex_objects import \ + Epsilon as RegexEpsilon, Empty, Concatenation, Union, KleeneStar class Regex(RegexReader): @@ -85,16 +86,11 @@ class Regex(RegexReader): """ - def __init__(self, regex): - self.head = None - self.sons = None + def __init__(self, regex: str) -> None: super().__init__(regex) + self.sons: List[Regex] = [] self._counter = 0 - self._initialize_enfa() - self._enfa = None - - def _initialize_enfa(self): - self._enfa = finite_automaton.EpsilonNFA() + self._enfa = EpsilonNFA() def get_number_symbols(self) -> int: """ Gives the number of symbols in the regex @@ -139,7 +135,7 @@ def get_number_operators(self) -> int: return 1 + sum(son.get_number_operators() for son in self.sons) return 0 - def to_epsilon_nfa(self): + def to_epsilon_nfa(self) -> EpsilonNFA: """ Transforms the regular expression into an epsilon NFA Returns @@ -154,28 +150,28 @@ def to_epsilon_nfa(self): >>> regex.to_epsilon_nfa() """ - self._initialize_enfa() + self._enfa = EpsilonNFA() s_initial = self._set_and_get_initial_state_in_enfa() s_final = self._set_and_get_final_state_in_enfa() self._process_to_enfa(s_initial, s_final) return self._enfa - def _set_and_get_final_state_in_enfa(self): + def _set_and_get_final_state_in_enfa(self) -> State: s_final = self._get_next_state_enfa() self._enfa.add_final_state(s_final) return s_final - def _get_next_state_enfa(self): - s_final = finite_automaton.State(self._counter) + def _get_next_state_enfa(self) -> State: + s_final = State(self._counter) self._counter += 1 return s_final - def _set_and_get_initial_state_in_enfa(self): + def _set_and_get_initial_state_in_enfa(self) -> State: s_initial = self._get_next_state_enfa() self._enfa.add_start_state(s_initial) return s_initial - def _process_to_enfa(self, s_from: State, s_to: State): + def _process_to_enfa(self, s_from: State, s_to: State) -> None: """ Internal function to add a regex to a given epsilon NFA Parameters @@ -190,29 +186,24 @@ def _process_to_enfa(self, s_from: State, s_to: State): else: self._process_to_enfa_when_no_son(s_from, s_to) - def _process_to_enfa_when_no_son(self, s_from, s_to): - if isinstance(self.head, - pyformlang.regular_expression.regex_objects.Epsilon): + def _process_to_enfa_when_no_son(self, s_from: State, s_to: State) -> None: + if isinstance(self.head, RegexEpsilon): self._add_epsilon_transition_in_enfa_between(s_from, s_to) - elif not isinstance(self.head, - pyformlang.regular_expression.regex_objects.Empty): - symbol = finite_automaton.Symbol(self.head.value) + elif not isinstance(self.head, Empty): + symbol = Symbol(self.head.value) self._enfa.add_transition(s_from, symbol, s_to) - def _process_to_enfa_when_sons(self, s_from, s_to): + def _process_to_enfa_when_sons(self, s_from: State, s_to: State) -> None: if isinstance( - self.head, - pyformlang.regular_expression.regex_objects.Concatenation): + self.head, Concatenation): self._process_to_enfa_concatenation(s_from, s_to) - elif isinstance(self.head, - pyformlang.regular_expression.regex_objects.Union): + elif isinstance(self.head, Union): self._process_to_enfa_union(s_from, s_to) elif isinstance( - self.head, - pyformlang.regular_expression.regex_objects.KleeneStar): + self.head, KleeneStar): self._process_to_enfa_kleene_star(s_from, s_to) - def _process_to_enfa_kleene_star(self, s_from, s_to): + def _process_to_enfa_kleene_star(self, s_from: State, s_to: State) -> None: # pylint: disable=protected-access state_first = self._get_next_state_enfa() state_second = self._get_next_state_enfa() @@ -222,30 +213,40 @@ def _process_to_enfa_kleene_star(self, s_from, s_to): self._add_epsilon_transition_in_enfa_between(state_second, s_to) self._process_to_enfa_son(state_first, state_second, 0) - def _process_to_enfa_union(self, s_from, s_to): + def _process_to_enfa_union(self, s_from: State, s_to: State) -> None: son_number = 0 self._create_union_branch_in_enfa(s_from, s_to, son_number) son_number = 1 self._create_union_branch_in_enfa(s_from, s_to, son_number) - def _create_union_branch_in_enfa(self, s_from, s_to, son_number): + def _create_union_branch_in_enfa(self, + s_from: State, + s_to: State, + son_number: int) -> None: state0 = self._get_next_state_enfa() state2 = self._get_next_state_enfa() self._add_epsilon_transition_in_enfa_between(s_from, state0) self._add_epsilon_transition_in_enfa_between(state2, s_to) self._process_to_enfa_son(state0, state2, son_number) - def _process_to_enfa_concatenation(self, s_from, s_to): + def _process_to_enfa_concatenation(self, + s_from: State, + s_to: State) -> None: state0 = self._get_next_state_enfa() state1 = self._get_next_state_enfa() self._add_epsilon_transition_in_enfa_between(state0, state1) self._process_to_enfa_son(s_from, state0, 0) self._process_to_enfa_son(state1, s_to, 1) - def _add_epsilon_transition_in_enfa_between(self, state0, state1): - self._enfa.add_transition(state0, finite_automaton.Epsilon(), state1) + def _add_epsilon_transition_in_enfa_between(self, + state0: State, + state1: State) -> None: + self._enfa.add_transition(state0, FAEpsilon(), state1) - def _process_to_enfa_son(self, s_from, s_to, index_son): + def _process_to_enfa_son(self, + s_from: State, + s_to: State, + index_son: int) -> None: # pylint: disable=protected-access self.sons[index_son]._counter = self._counter self.sons[index_son]._enfa = self._enfa @@ -280,7 +281,7 @@ def get_tree_str(self, depth: int = 0) -> str: temp += son.get_tree_str(depth + 1) return temp - def to_cfg(self, starting_symbol="S") -> "CFG": + def to_cfg(self, starting_symbol: str = "S") -> CFG: """ Turns the regex into a context-free grammar @@ -304,11 +305,12 @@ def to_cfg(self, starting_symbol="S") -> "CFG": """ productions, _ = self._get_production(starting_symbol) - cfg_res = cfg.CFG(start_symbol=cfg.utils.to_variable(starting_symbol), + cfg_res = CFG(start_symbol=to_variable(starting_symbol), productions=set(productions)) return cfg_res - def _get_production(self, current_symbol, count=0): + def _get_production(self, current_symbol: Any, count: int = 0) \ + -> Tuple[List[Production], int]: next_symbols = [] next_productions = [] for son in self.sons: @@ -322,7 +324,7 @@ def _get_production(self, current_symbol, count=0): next_productions += new_prods return next_productions, count - def __repr__(self): + def __repr__(self) -> str: return self.head.get_str_repr([str(son) for son in self.sons]) def union(self, other: "Regex") -> "Regex": @@ -357,11 +359,11 @@ def union(self, other: "Regex") -> "Regex": """ regex = Regex("") - regex.head = pyformlang.regular_expression.regex_objects.Union() + regex.head = Union() regex.sons = [self, other] return regex - def __or__(self, other): + def __or__(self, other: "Regex") -> "Regex": """ Makes the union with another regex Parameters @@ -427,12 +429,11 @@ def concatenate(self, other: "Regex") -> "Regex": True """ regex = Regex("") - regex.head = \ - pyformlang.regular_expression.regex_objects.Concatenation() + regex.head = Concatenation() regex.sons = [self, other] return regex - def __add__(self, other): + def __add__(self, other: "Regex") -> "Regex": """ Concatenates a regular expression with an other one Parameters @@ -485,11 +486,11 @@ def kleene_star(self) -> "Regex": """ regex = Regex("") - regex.head = pyformlang.regular_expression.regex_objects.KleeneStar() + regex.head = KleeneStar() regex.sons = [self] return regex - def from_string(self, regex_str: str): + def from_string(self, regex_str: str) -> "Regex": """ Construct a regex from a string. For internal usage. Equivalent to the constructor of Regex @@ -515,7 +516,7 @@ def from_string(self, regex_str: str): """ return Regex(regex_str) - def accepts(self, word: Iterable[str]) -> bool: + def accepts(self, word: Iterable[Any]) -> bool: """ Check if a word matches (completely) the regex @@ -545,7 +546,7 @@ def accepts(self, word: Iterable[str]) -> bool: return self._enfa.accepts(word) @classmethod - def from_python_regex(cls, regex): + def from_python_regex(cls, regex: str) -> PythonRegex: """ Creates a regex from a string using the python way to write it. @@ -570,4 +571,4 @@ def from_python_regex(cls, regex): >>> Regex.from_python_regex("a+[cd]") """ - return regular_expression.PythonRegex(regex) + return PythonRegex(regex) diff --git a/pyformlang/regular_expression/regex_objects.py b/pyformlang/regular_expression/regex_objects.py index 053f9b4..73c4913 100644 --- a/pyformlang/regular_expression/regex_objects.py +++ b/pyformlang/regular_expression/regex_objects.py @@ -1,7 +1,11 @@ """ Representation of some objects used in regex. """ -import pyformlang + +from typing import List, Iterable, Any + +from pyformlang.cfg import Production +from pyformlang.cfg.utils import to_variable, to_terminal class Node: # pylint: disable=too-few-public-methods @@ -13,11 +17,11 @@ class Node: # pylint: disable=too-few-public-methods The value of the node """ - def __init__(self, value): + def __init__(self, value: Any) -> None: self._value = value @property - def value(self): + def value(self) -> Any: """ Give the value of the node Returns @@ -27,7 +31,7 @@ def value(self): """ return self._value - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: """ The string representation of the node @@ -44,7 +48,8 @@ def get_str_repr(self, sons_repr): """ raise NotImplementedError - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: """ Gets the rules for a context-free grammar to represent the \ operator""" raise NotImplementedError @@ -91,14 +96,15 @@ class Operator(Node): # pylint: disable=too-few-public-methods The value of the operator """ - def __repr__(self): + def __repr__(self) -> str: return "Operator(" + str(self._value) + ")" - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: """ Get the string representation """ raise NotImplementedError - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: """ Gets the rules for a context-free grammar to represent the \ operator""" raise NotImplementedError @@ -113,17 +119,18 @@ class Symbol(Node): # pylint: disable=too-few-public-methods The value of the symbol """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return str(self.value) - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: """ Gets the rules for a context-free grammar to represent the \ operator""" - return [pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_terminal(self.value)])] + return [Production( + to_variable(current_symbol), + [to_terminal(self.value)])] - def __repr__(self): + def __repr__(self) -> str: return "Symbol(" + str(self._value) + ")" @@ -131,15 +138,16 @@ class Concatenation(Operator): # pylint: disable=too-few-public-methods """ Represents a concatenation """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "(" + ".".join(sons_repr) + ")" - def get_cfg_rules(self, current_symbol, sons): - return [pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_variable(son) for son in sons])] + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: + return [Production( + to_variable(current_symbol), + [to_variable(son) for son in sons])] - def __init__(self): + def __init__(self) -> None: super().__init__("Concatenation") @@ -147,16 +155,16 @@ class Union(Operator): # pylint: disable=too-few-public-methods """ Represents a union """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "(" + "|".join(sons_repr) + ")" - def get_cfg_rules(self, current_symbol, sons): - return [pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_variable(son)]) - for son in sons] + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: + return [Production( + to_variable(current_symbol), + [to_variable(son)]) for son in sons] - def __init__(self): + def __init__(self) -> None: super().__init__("Union") @@ -164,24 +172,23 @@ class KleeneStar(Operator): # pylint: disable=too-few-public-methods """ Represents an epsilon symbol """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "(" + ".".join(sons_repr) + ")*" - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: return [ - pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - []), - pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_variable(current_symbol), - pyformlang.cfg.utils.to_variable(current_symbol)]), - pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_variable(son) for son in sons]) + Production( + to_variable(current_symbol), []), + Production( + to_variable(current_symbol), + [to_variable(current_symbol), to_variable(current_symbol)]), + Production( + to_variable(current_symbol), + [to_variable(son) for son in sons]) ] - def __init__(self): + def __init__(self) -> None: super().__init__("Kleene Star") @@ -189,15 +196,14 @@ class Epsilon(Symbol): # pylint: disable=too-few-public-methods """ Represents an epsilon symbol """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "$" - def get_cfg_rules(self, current_symbol, sons): - return [pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [])] + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[str]) \ + -> List[Production]: + return [Production(to_variable(current_symbol), [])] - def __init__(self): + def __init__(self) -> None: super().__init__("Epsilon") @@ -205,16 +211,17 @@ class Empty(Symbol): # pylint: disable=too-few-public-methods """ Represents an empty symbol """ - def __init__(self): + def __init__(self) -> None: super().__init__("Empty") - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + -> List[Production]: return [] class MisformedRegexError(Exception): """ Error for misformed regex """ - def __init__(self, message: str, regex: str): + def __init__(self, message: str, regex: str) -> None: super().__init__(message + " Regex: " + regex) self._regex = regex diff --git a/pyformlang/regular_expression/regex_reader.py b/pyformlang/regular_expression/regex_reader.py index 1bebd57..915e8b3 100644 --- a/pyformlang/regular_expression/regex_reader.py +++ b/pyformlang/regular_expression/regex_reader.py @@ -2,9 +2,11 @@ A class to read regex """ -import re +from typing import List, Optional, Any +from re import sub -from pyformlang.regular_expression.regex_objects import to_node, Operator, \ +from pyformlang.regular_expression.regex_objects import \ + to_node, Node, Operator, \ Symbol, Concatenation, Union, \ KleeneStar, MisformedRegexError, SPECIAL_SYMBOLS @@ -19,40 +21,39 @@ class RegexReader: """ # pylint: disable=too-few-public-methods - def __init__(self, regex: str): - self._current_node = None - self.head = None - self.sons = None - self._end_current_group: int = 0 + def __init__(self, regex: str) -> None: + self._current_node: Optional[Node] = None + self.head: Optional[Node] = None + self.sons: List[RegexReader] = [] + self._end_current_group = 0 regex = _pre_process_regex(regex) self._regex = regex self._components = _get_regex_componants(regex) self._pre_process_input_regex_componants() - self._setup_sons() self._setup_from_regex_componants() - def _remove_useless_extreme_parenthesis_from_components(self): + def _remove_useless_extreme_parenthesis_from_components(self) -> None: if self._begins_with_parenthesis_components(): self._remove_useless_extreme_parenthesis_from_componants() - def _pre_process_input_regex_componants(self): + def _pre_process_input_regex_componants(self) -> None: self._remove_useless_extreme_parenthesis_from_components() self._compute_precedence() self._remove_useless_extreme_parenthesis_from_components() def _remove_useless_extreme_parenthesis_from_componants( - self): + self) -> None: if self._is_surrounded_by_parenthesis(): self._components = self._components[1:-1] self._remove_useless_extreme_parenthesis_from_components() - def _is_surrounded_by_parenthesis(self): + def _is_surrounded_by_parenthesis(self) -> bool: parenthesis_depths = self._get_parenthesis_depths() first_complete_closing = _find_first_complete_closing_if_possible( parenthesis_depths) return first_complete_closing == len(self._components) - 1 - def _get_parenthesis_depths(self): + def _get_parenthesis_depths(self) -> List[int]: depths = [0] for component in self._components: depths.append(depths[-1] + _get_parenthesis_value(component)) @@ -61,7 +62,7 @@ def _get_parenthesis_depths(self): def _begins_with_parenthesis_components(self): return self._components[0] == "(" - def _setup_precedence_when_not_trivial(self): + def _setup_precedence_when_not_trivial(self) -> None: self._set_end_first_group_in_components() if self._end_current_group == len(self._components): self._current_node = None @@ -69,30 +70,30 @@ def _setup_precedence_when_not_trivial(self): self._current_node = to_node( self._components[self._end_current_group]) - def _setup_precedence(self): + def _setup_precedence(self) -> None: if len(self._components) <= 1: self._current_node = None else: self._setup_precedence_when_not_trivial() - def _found_no_union(self, next_node): + def _found_no_union(self, next_node: Optional[Node]) -> bool: return self._end_current_group < len( self._components) and not isinstance(next_node, Union) - def _add_parenthesis_around_part_of_componants(self, index_opening, - index_closing): + def _add_parenthesis_around_part_of_componants( + self, index_opening: int, index_closing: int) -> None: self._components.insert(index_opening, "(") # Add 1 as something was added before self._components.insert(index_closing + 1, ")") - def _compute_precedent_when_not_kleene_nor_union(self): + def _compute_precedent_when_not_kleene_nor_union(self) -> None: while self._found_no_union(self._current_node): self._set_next_end_group_and_node() if isinstance(self._current_node, Union): self._add_parenthesis_around_part_of_componants( 0, self._end_current_group) - def _compute_precedence(self): + def _compute_precedence(self) -> None: """ Add parenthesis for the first group in indicate precedence """ self._setup_precedence() if isinstance(self._current_node, KleeneStar): @@ -102,7 +103,7 @@ def _compute_precedence(self): elif not isinstance(self._current_node, Union): self._compute_precedent_when_not_kleene_nor_union() - def _set_next_end_group_and_node(self): + def _set_next_end_group_and_node(self) -> None: if isinstance(self._current_node, Operator) and not isinstance( self._current_node, KleeneStar): self._end_current_group += 1 @@ -111,7 +112,7 @@ def _set_next_end_group_and_node(self): self._current_node = to_node( self._components[self._end_current_group]) - def _set_end_first_group_in_components(self, idx_from=0): + def _set_end_first_group_in_components(self, idx_from: int = 0) -> None: """ Gives the end of the first group """ if idx_from >= len(self._components): self._end_current_group = idx_from @@ -130,7 +131,7 @@ def _set_end_first_group_in_components(self, idx_from=0): else: self._end_current_group = 1 + idx_from - def _setup_non_trivial_regex(self): + def _setup_non_trivial_regex(self) -> None: self._set_end_first_group_in_components() next_node = to_node(self._components[self._end_current_group]) if isinstance(next_node, KleeneStar): @@ -149,15 +150,15 @@ def _setup_non_trivial_regex(self): self.sons.append(self._process_sub_regex(begin_second_group, len(self._components))) - def _setup_empty_regex(self): + def _setup_empty_regex(self) -> None: self.head = to_node("") - def _setup_one_symbol_regex(self): + def _setup_one_symbol_regex(self) -> None: first_symbol = to_node(self._components[0]) self._check_is_valid_single_first_symbol(first_symbol) self.head = first_symbol - def _setup_from_regex_componants(self): + def _setup_from_regex_componants(self) -> None: if not self._components: self._setup_empty_regex() elif len(self._components) == 1: @@ -165,18 +166,15 @@ def _setup_from_regex_componants(self): else: self._setup_non_trivial_regex() - def _process_sub_regex(self, idx_from, idx_to): + def _process_sub_regex(self, idx_from: int, idx_to: int) -> "RegexReader": sub_regex = " ".join(self._components[idx_from:idx_to]) return self.from_string(sub_regex) - def _check_is_valid_single_first_symbol(self, first_symbol): + def _check_is_valid_single_first_symbol(self, first_symbol: Any) -> None: if not isinstance(first_symbol, Symbol): raise MisformedRegexError(MISFORMED_MESSAGE, self._regex) - def _setup_sons(self): - self.sons = [] - - def from_string(self, regex_str: str): + def from_string(self, regex_str: str) -> "RegexReader": """ Read a regex from a string Parameters @@ -192,7 +190,9 @@ def from_string(self, regex_str: str): return RegexReader(regex_str) -def _find_first_complete_closing_if_possible(parenthesis_depths, index_from=0): +def _find_first_complete_closing_if_possible( + parenthesis_depths: List[int], + index_from: int = 0) -> int: try: first_complete_closing = parenthesis_depths.index(0, index_from) except ValueError: @@ -200,7 +200,7 @@ def _find_first_complete_closing_if_possible(parenthesis_depths, index_from=0): return first_complete_closing -def _get_parenthesis_value(component): +def _get_parenthesis_value(component: str) -> int: if component == "(": return 1 if component == ")": @@ -212,8 +212,8 @@ def _pre_process_regex(regex: str) -> str: regex = regex.strip(" ") if regex.endswith("\\") and not regex.endswith("\\\\"): regex += " " - regex = re.sub(r" +", " ", regex) - regex = re.sub(r"\\ ", "\\ ", regex) + regex = sub(r" +", " ", regex) + regex = sub(r"\\ ", "\\ ", regex) if regex.endswith(" "): regex = regex[:-1] res = [] @@ -232,10 +232,10 @@ def _pre_process_regex(regex: str) -> str: return "".join(res) -def _get_regex_componants(regex): +def _get_regex_componants(regex: str) -> List[str]: temp = regex.split(" ") - for i, sub in enumerate(temp): - if sub.endswith("\\") and not sub.endswith("\\\\"): + for i, component in enumerate(temp): + if component.endswith("\\") and not component.endswith("\\\\"): temp[i] += " " if len(temp) > 1 and not temp[-1]: del temp[-1] From 5fdb201389d37b7a8e454d9e501b29f1af38471f Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 15 Oct 2024 15:17:52 +0300 Subject: [PATCH 25/42] add annotations for python_regex --- pyformlang/regular_expression/python_regex.py | 88 +++++++++++-------- pyformlang/regular_expression/regex.py | 2 +- 2 files changed, 51 insertions(+), 39 deletions(-) diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py index 05df264..5f9e8bf 100644 --- a/pyformlang/regular_expression/python_regex.py +++ b/pyformlang/regular_expression/python_regex.py @@ -2,7 +2,7 @@ A class to read Python format regex """ -from typing import Union +from typing import List, Tuple, Union from re import compile as comp, Pattern from string import printable from unicodedata import lookup @@ -116,7 +116,7 @@ def __init__(self, python_regex: Union[str, Pattern[str]]) -> None: super().__init__(self._python_regex) def _separate(self) -> None: - regex_temp = [] + regex_temp: List[str] = [] for symbol in self._python_regex: if self._should_escape_next_symbol(regex_temp): regex_temp[-1] += symbol @@ -131,16 +131,19 @@ def _separate(self) -> None: regex_temp_dot.append(symbol) self._python_regex = " ".join(regex_temp_dot) - def _preprocess_brackets(self): - regex_temp = [] + def _preprocess_brackets(self) -> None: + regex_temp: List[str] = [] in_brackets = 0 - in_brackets_temp = [] + in_brackets_temp: List[List[str]] = [] for symbol in self._python_regex: - if symbol == "[" and not self._should_escape_next_symbol(regex_temp) and \ - (in_brackets == 0 or not self._should_escape_next_symbol(in_brackets_temp[-1])): + if symbol == "[" and \ + not self._should_escape_next_symbol(regex_temp) and \ + (in_brackets == 0 or \ + not self._should_escape_next_symbol(in_brackets_temp[-1])): in_brackets += 1 in_brackets_temp.append([]) - elif symbol == "]" and in_brackets >= 1 and not self._should_escape_next_symbol(in_brackets_temp[-1]): + elif symbol == "]" and in_brackets >= 1 and \ + not self._should_escape_next_symbol(in_brackets_temp[-1]): if len(in_brackets_temp) == 1: regex_temp.append("(") regex_temp += self._preprocess_brackets_content( @@ -170,11 +173,12 @@ def _preprocess_brackets(self): self._python_regex = "".join(regex_temp) @staticmethod - def _recombine(regex_to_recombine): - temp = [] + def _recombine(regex_to_recombine: List[str]) -> List[str]: + temp: List[str] = [] idx = 0 while idx < len(regex_to_recombine): - if regex_to_recombine[idx] == "\\x" and idx < len(regex_to_recombine) - 2 \ + if regex_to_recombine[idx] == "\\x" \ + and idx < len(regex_to_recombine) - 2 \ and regex_to_recombine[idx + 1] in HEXASTRING \ and regex_to_recombine[idx + 2] in HEXASTRING: next_str = "".join(regex_to_recombine[idx + 1:idx + 3]) @@ -218,30 +222,35 @@ def _recombine(regex_to_recombine): res.append(x) return res - def _preprocess_brackets_content(self, bracket_content): - bracket_content_temp = [] + def _preprocess_brackets_content(self, bracket_content: List[str]) \ + -> List[str]: + bracket_content_temp: List[str] = [] previous_is_valid_for_range = False for i, symbol in enumerate(bracket_content): # We have a range - if symbol == "-" and not self._should_escape_next_symbol(bracket_content_temp): - if not previous_is_valid_for_range or i == len(bracket_content) - 1: + if symbol == "-" and \ + not self._should_escape_next_symbol(bracket_content_temp): + if not previous_is_valid_for_range or \ + i == len(bracket_content) - 1: # False alarm, no range bracket_content_temp.append("-") previous_is_valid_for_range = True else: # We insert all the characters in the range - bracket_content[i - 1] = self._recombine(bracket_content[i - 1]) + recombined = self._recombine(bracket_content[i - 1].split()) + bracket_content[i - 1] = "".join(recombined) for j in range(ord(bracket_content[i - 1][-1]) + 1, ord(bracket_content[i + 1][-1])): next_char = chr(j) if next_char in TRANSFORMATIONS: - bracket_content_temp.append(TRANSFORMATIONS[next_char]) + bracket_content_temp.append( + TRANSFORMATIONS[next_char]) else: bracket_content_temp.append(next_char) previous_is_valid_for_range = False else: if self._should_escape_next_symbol(bracket_content_temp): - bracket_content_temp[-1] += symbol + bracket_content_temp[-1] += (symbol) else: bracket_content_temp.append(symbol) if (i != 0 and bracket_content[i - 1] == "-" @@ -255,15 +264,15 @@ def _preprocess_brackets_content(self, bracket_content): return bracket_content_temp @staticmethod - def _preprocess_negation(bracket_content): + def _preprocess_negation(bracket_content: List[str]) -> List[str]: if not bracket_content or bracket_content[0] != "^": return bracket_content # We inverse everything return [x for x in ESCAPED_PRINTABLES if x not in bracket_content] @staticmethod - def _insert_or(l_to_modify): - res = [] + def _insert_or(l_to_modify: List[str]) -> List[str]: + res: List[str] = [] for x in l_to_modify: res.append(x) res.append("|") @@ -271,7 +280,8 @@ def _insert_or(l_to_modify): return res[:-1] return res - def _find_previous_opening_parenthesis(self, split_sequence): + def _find_previous_opening_parenthesis(self, + split_sequence: List[str]) -> int: counter = 0 for i in range(len(split_sequence) - 1, -1, -1): temp = split_sequence[i] @@ -284,8 +294,8 @@ def _find_previous_opening_parenthesis(self, split_sequence): raise MisformedRegexError(WRONG_PARENTHESIS_MESSAGE, self._python_regex) - def _preprocess_positive_closure(self): - regex_temp = [] + def _preprocess_positive_closure(self) -> None: + regex_temp: List[str] = [] for symbol in self._python_regex: if symbol != "+" or (self._should_escape_next_symbol(regex_temp)): if self._should_escape_next_symbol(regex_temp): @@ -305,7 +315,8 @@ def _preprocess_positive_closure(self): self._python_regex = "".join(regex_temp) @staticmethod - def _is_repetition(regex_list, idx): + def _is_repetition(regex_list: List[str], idx: int) \ + -> Union[Tuple[int, int, int], Tuple[int, int], None]: if regex_list[idx] == "{": end = idx for i in range(idx + 1, len(regex_list)): @@ -315,7 +326,8 @@ def _is_repetition(regex_list, idx): inner = "".join(regex_list[idx + 1:end]) if "," in inner: split = inner.split(",") - if len(split) != 2 or not split[0].isdigit() or not split[1].isdigit(): + if len(split) != 2 or not split[0].isdigit() or \ + not split[1].isdigit(): return None return int(split[0]), int(split[1]), end if inner.isdigit(): @@ -323,10 +335,10 @@ def _is_repetition(regex_list, idx): return None @staticmethod - def _find_repeated_sequence(regex_list): + def _find_repeated_sequence(regex_list: List[str]) -> List[str]: if regex_list[-1] != ")": return [regex_list[-1]] - res = [")"] + res: List[str] = [")"] counter = -1 for i in range(len(regex_list) - 2, -1, -1): if regex_list[i] == "(": @@ -341,8 +353,8 @@ def _find_repeated_sequence(regex_list): res.append(regex_list[i]) return [] - def _add_repetition(self, regex_list): - res = [] + def _add_repetition(self, regex_list: List[str]) -> List[str]: + res: List[str] = [] idx = 0 while idx < len(regex_list): rep = self._is_repetition(regex_list, idx) @@ -350,7 +362,7 @@ def _add_repetition(self, regex_list): res.append(regex_list[idx]) idx += 1 elif len(rep) == 2: - n_rep, end = rep + n_rep, end = rep[0], rep[1] repeated = self._find_repeated_sequence(res) for _ in range(n_rep - 1): res.extend(repeated) @@ -366,8 +378,8 @@ def _add_repetition(self, regex_list): idx = end + 1 return res - def _preprocess_optional(self): - regex_temp = [] + def _preprocess_optional(self) -> None: + regex_temp: List[str] = [] for symbol in self._python_regex: if symbol == "?": if regex_temp[-1] == ")": @@ -384,11 +396,11 @@ def _preprocess_optional(self): self._python_regex = "".join(regex_temp) @staticmethod - def _should_escape_next_symbol(regex_temp): - return regex_temp and regex_temp[-1] == "\\" + def _should_escape_next_symbol(regex_temp: List[str]) -> bool: + return bool(regex_temp) and regex_temp[-1] == "\\" - def _escape_in_brackets(self): - regex_temp = [] + def _escape_in_brackets(self) -> None: + regex_temp: List[str] = [] in_brackets = False for symbol in self._python_regex: if (symbol == "[" @@ -407,7 +419,7 @@ def _escape_in_brackets(self): regex_temp.append(symbol) self._python_regex = "".join(regex_temp) - def _replace_shortcuts(self): + def _replace_shortcuts(self) -> None: for to_replace, replacement in SHORTCUTS.items(): self._python_regex = self._python_regex.replace(to_replace, replacement) diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index 46a55ab..da4cefd 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -88,7 +88,7 @@ class Regex(RegexReader): def __init__(self, regex: str) -> None: super().__init__(regex) - self.sons: List[Regex] = [] + self.sons: List[Regex] = [] # type: ignore self._counter = 0 self._enfa = EpsilonNFA() From 422e4b57000c7ad291f7bedfece65989024bad4b Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 11 Nov 2024 22:53:01 +0300 Subject: [PATCH 26/42] refactor regex --- pyformlang/regular_expression/python_regex.py | 7 +- pyformlang/regular_expression/regex.py | 172 +++++++++--------- pyformlang/regular_expression/regex_reader.py | 6 +- 3 files changed, 87 insertions(+), 98 deletions(-) diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py index 5f9e8bf..8713c9c 100644 --- a/pyformlang/regular_expression/python_regex.py +++ b/pyformlang/regular_expression/python_regex.py @@ -7,10 +7,9 @@ from string import printable from unicodedata import lookup -from pyformlang.regular_expression import MisformedRegexError -from pyformlang.regular_expression.regex import Regex -from pyformlang.regular_expression.regex_reader import \ - WRONG_PARENTHESIS_MESSAGE +from .regex_objects import MisformedRegexError +from .regex_reader import WRONG_PARENTHESIS_MESSAGE +from .regex import Regex PRINTABLES = list(printable) diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index da4cefd..819126c 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -2,16 +2,16 @@ Representation of a regular expression """ -from typing import List, Iterable, Tuple, Any +from typing import List, Iterable, Tuple, Optional, Any from pyformlang.finite_automaton import Epsilon as FAEpsilon from pyformlang.finite_automaton import EpsilonNFA, State, Symbol from pyformlang.cfg.cfg import CFG, Production from pyformlang.cfg.utils import to_variable -from pyformlang.regular_expression.regex_reader import RegexReader -from pyformlang.regular_expression.python_regex import PythonRegex -from pyformlang.regular_expression.regex_objects import \ - Epsilon as RegexEpsilon, Empty, Concatenation, Union, KleeneStar + +from .regex_reader import RegexReader +from .regex_objects import Epsilon as RegexEpsilon, Node, \ + Empty, Concatenation, Union, KleeneStar class Regex(RegexReader): @@ -88,9 +88,10 @@ class Regex(RegexReader): def __init__(self, regex: str) -> None: super().__init__(regex) + self.head: Node = Empty() # type: ignore self.sons: List[Regex] = [] # type: ignore self._counter = 0 - self._enfa = EpsilonNFA() + self._enfa: Optional[EpsilonNFA] = None def get_number_symbols(self) -> int: """ Gives the number of symbols in the regex @@ -150,28 +151,35 @@ def to_epsilon_nfa(self) -> EpsilonNFA: >>> regex.to_epsilon_nfa() """ + return self._to_epsilon_nfa_internal(True) + + def _to_epsilon_nfa_internal(self, copy: bool) -> EpsilonNFA: + """ + Transforms the regular expression into an epsilon NFA. + Copy enfa in case of external usage. + """ + if self._enfa is not None: + return self._enfa.copy() if copy else self._enfa self._enfa = EpsilonNFA() - s_initial = self._set_and_get_initial_state_in_enfa() - s_final = self._set_and_get_final_state_in_enfa() - self._process_to_enfa(s_initial, s_final) - return self._enfa + s_initial = self._set_and_get_initial_state_in_enfa(self._enfa) + s_final = self._set_and_get_final_state_in_enfa(self._enfa) + self._process_to_enfa(self._enfa, s_initial, s_final) + return self._to_epsilon_nfa_internal(copy) - def _set_and_get_final_state_in_enfa(self) -> State: + def _set_and_get_final_state_in_enfa(self, enfa: EpsilonNFA) -> State: s_final = self._get_next_state_enfa() - self._enfa.add_final_state(s_final) + enfa.add_final_state(s_final) return s_final - def _get_next_state_enfa(self) -> State: - s_final = State(self._counter) - self._counter += 1 - return s_final - - def _set_and_get_initial_state_in_enfa(self) -> State: + def _set_and_get_initial_state_in_enfa(self, enfa: EpsilonNFA) -> State: s_initial = self._get_next_state_enfa() - self._enfa.add_start_state(s_initial) + enfa.add_start_state(s_initial) return s_initial - def _process_to_enfa(self, s_from: State, s_to: State) -> None: + def _process_to_enfa(self, + enfa: EpsilonNFA, + s_from: State, + s_to: State) -> None: """ Internal function to add a regex to a given epsilon NFA Parameters @@ -182,77 +190,90 @@ def _process_to_enfa(self, s_from: State, s_to: State) -> None: The destination state """ if self.sons: - self._process_to_enfa_when_sons(s_from, s_to) + self._process_to_enfa_when_sons(enfa, s_from, s_to) else: - self._process_to_enfa_when_no_son(s_from, s_to) + self._process_to_enfa_when_no_son(enfa, s_from, s_to) + + def _process_to_enfa_when_sons(self, + enfa: EpsilonNFA, + s_from: State, + s_to: State) -> None: + if isinstance(self.head, Concatenation): + self._process_to_enfa_concatenation(enfa, s_from, s_to) + elif isinstance(self.head, Union): + self._process_to_enfa_union(enfa, s_from, s_to) + elif isinstance(self.head, KleeneStar): + self._process_to_enfa_kleene_star(enfa, s_from, s_to) - def _process_to_enfa_when_no_son(self, s_from: State, s_to: State) -> None: + def _process_to_enfa_when_no_son(self, + enfa: EpsilonNFA, + s_from: State, + s_to: State) -> None: if isinstance(self.head, RegexEpsilon): - self._add_epsilon_transition_in_enfa_between(s_from, s_to) + enfa.add_transition(s_from, FAEpsilon(), s_to) elif not isinstance(self.head, Empty): symbol = Symbol(self.head.value) - self._enfa.add_transition(s_from, symbol, s_to) + enfa.add_transition(s_from, symbol, s_to) - def _process_to_enfa_when_sons(self, s_from: State, s_to: State) -> None: - if isinstance( - self.head, Concatenation): - self._process_to_enfa_concatenation(s_from, s_to) - elif isinstance(self.head, Union): - self._process_to_enfa_union(s_from, s_to) - elif isinstance( - self.head, KleeneStar): - self._process_to_enfa_kleene_star(s_from, s_to) + def _process_to_enfa_union(self, + enfa: EpsilonNFA, + s_from: State, + s_to: State) -> None: + son_number = 0 + self._create_union_branch_in_enfa(enfa, s_from, s_to, son_number) + son_number = 1 + self._create_union_branch_in_enfa(enfa, s_from, s_to, son_number) - def _process_to_enfa_kleene_star(self, s_from: State, s_to: State) -> None: + def _process_to_enfa_kleene_star(self, + enfa: EpsilonNFA, + s_from: State, + s_to: State) -> None: # pylint: disable=protected-access state_first = self._get_next_state_enfa() state_second = self._get_next_state_enfa() - self._add_epsilon_transition_in_enfa_between(state_second, state_first) - self._add_epsilon_transition_in_enfa_between(s_from, s_to) - self._add_epsilon_transition_in_enfa_between(s_from, state_first) - self._add_epsilon_transition_in_enfa_between(state_second, s_to) - self._process_to_enfa_son(state_first, state_second, 0) - - def _process_to_enfa_union(self, s_from: State, s_to: State) -> None: - son_number = 0 - self._create_union_branch_in_enfa(s_from, s_to, son_number) - son_number = 1 - self._create_union_branch_in_enfa(s_from, s_to, son_number) + enfa.add_transition(state_second, FAEpsilon(), state_first) + enfa.add_transition(s_from, FAEpsilon(), s_to) + enfa.add_transition(s_from, FAEpsilon(), state_first) + enfa.add_transition(state_second, FAEpsilon(), s_to) + self._process_to_enfa_son(enfa, state_first, state_second, 0) def _create_union_branch_in_enfa(self, + enfa: EpsilonNFA, s_from: State, s_to: State, son_number: int) -> None: state0 = self._get_next_state_enfa() state2 = self._get_next_state_enfa() - self._add_epsilon_transition_in_enfa_between(s_from, state0) - self._add_epsilon_transition_in_enfa_between(state2, s_to) - self._process_to_enfa_son(state0, state2, son_number) + enfa.add_transition(s_from, FAEpsilon(), state0) + enfa.add_transition(state2, FAEpsilon(), s_to) + self._process_to_enfa_son(enfa, state0, state2, son_number) def _process_to_enfa_concatenation(self, + enfa: EpsilonNFA, s_from: State, s_to: State) -> None: state0 = self._get_next_state_enfa() state1 = self._get_next_state_enfa() - self._add_epsilon_transition_in_enfa_between(state0, state1) - self._process_to_enfa_son(s_from, state0, 0) - self._process_to_enfa_son(state1, s_to, 1) - - def _add_epsilon_transition_in_enfa_between(self, - state0: State, - state1: State) -> None: - self._enfa.add_transition(state0, FAEpsilon(), state1) + enfa.add_transition(state0, FAEpsilon(), state1) + self._process_to_enfa_son(enfa, s_from, state0, 0) + self._process_to_enfa_son(enfa, state1, s_to, 1) def _process_to_enfa_son(self, + enfa: EpsilonNFA, s_from: State, s_to: State, index_son: int) -> None: # pylint: disable=protected-access self.sons[index_son]._counter = self._counter - self.sons[index_son]._enfa = self._enfa - self.sons[index_son]._process_to_enfa(s_from, s_to) + self.sons[index_son]._enfa = enfa + self.sons[index_son]._process_to_enfa(enfa, s_from, s_to) self._counter = self.sons[index_son]._counter + def _get_next_state_enfa(self) -> State: + s_final = State(self._counter) + self._counter += 1 + return s_final + def get_tree_str(self, depth: int = 0) -> str: """ Get a string representation of the tree behind the regex @@ -516,7 +537,7 @@ def from_string(self, regex_str: str) -> "Regex": """ return Regex(regex_str) - def accepts(self, word: Iterable[Any]) -> bool: + def accepts(self, word: Iterable[str]) -> bool: """ Check if a word matches (completely) the regex @@ -541,34 +562,5 @@ def accepts(self, word: Iterable[Any]) -> bool: True """ - if self._enfa is None: - self._enfa = self.to_epsilon_nfa() + self._enfa = self._to_epsilon_nfa_internal(False) return self._enfa.accepts(word) - - @classmethod - def from_python_regex(cls, regex: str) -> PythonRegex: - """ - Creates a regex from a string using the python way to write it. - - Careful: - Not everything is implemented, check PythonRegex class \ - documentation for more details. - - It is equivalent to calling PythonRegex constructor directly. - - Parameters - ---------- - regex : str - The regex given as a string or compile regex - - Returns - ------- - python_regex : :class:`~pyformlang.regular_expression.PythonRegex` - The regex - - Examples - -------- - >>> Regex.from_python_regex("a+[cd]") - - """ - return PythonRegex(regex) diff --git a/pyformlang/regular_expression/regex_reader.py b/pyformlang/regular_expression/regex_reader.py index 915e8b3..ac0a443 100644 --- a/pyformlang/regular_expression/regex_reader.py +++ b/pyformlang/regular_expression/regex_reader.py @@ -5,10 +5,8 @@ from typing import List, Optional, Any from re import sub -from pyformlang.regular_expression.regex_objects import \ - to_node, Node, Operator, \ - Symbol, Concatenation, Union, \ - KleeneStar, MisformedRegexError, SPECIAL_SYMBOLS +from .regex_objects import to_node, Node, Operator, Symbol, \ + Concatenation, Union, KleeneStar, MisformedRegexError, SPECIAL_SYMBOLS MISFORMED_MESSAGE = "The regex is misformed here." From 61acee70e0964dfac39a8f794efd5e908127bfb4 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 12 Nov 2024 00:26:03 +0300 Subject: [PATCH 27/42] refactor to_regex, remove regexable --- .../deterministic_finite_automaton.py | 16 +- pyformlang/finite_automaton/epsilon_nfa.py | 266 +----------------- .../finite_automaton/finite_automaton.py | 23 ++ .../nondeterministic_finite_automaton.py | 7 + pyformlang/finite_automaton/regexable.py | 71 ----- pyformlang/regular_expression/regex.py | 255 ++++++++++++++++- requirements.txt | 1 + 7 files changed, 291 insertions(+), 348 deletions(-) delete mode 100644 pyformlang/finite_automaton/regexable.py diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index a8174a3..0d9a657 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -247,18 +247,10 @@ def copy(self) -> "DeterministicFiniteAutomaton": True """ - dfa = DeterministicFiniteAutomaton() - if self.start_state: - dfa.add_start_state(self.start_state) - for final in self._final_states: - dfa.add_final_state(final) - for state in self._states: - for symbol in self._input_symbols: - state_to = self._transition_function.get_next_state( - state, symbol) - if state_to is not None: - dfa.add_transition(state, symbol, state_to) - return dfa + return self._copy_to(DeterministicFiniteAutomaton()) # type: ignore + + def __copy__(self) -> "DeterministicFiniteAutomaton": + return self.copy() def get_next_state(self, s_from: Hashable, symb_by: Hashable) \ -> Optional[State]: diff --git a/pyformlang/finite_automaton/epsilon_nfa.py b/pyformlang/finite_automaton/epsilon_nfa.py index 909b66d..97402e0 100644 --- a/pyformlang/finite_automaton/epsilon_nfa.py +++ b/pyformlang/finite_automaton/epsilon_nfa.py @@ -2,11 +2,9 @@ Nondeterministic Automaton with epsilon transitions """ -from typing import Iterable, Set, AbstractSet, Tuple, Hashable +from typing import Iterable, Set, AbstractSet, Hashable from networkx import MultiDiGraph -from pyformlang.regular_expression import Regex - from .state import State from .symbol import Symbol from .epsilon import Epsilon @@ -14,10 +12,9 @@ NondeterministicTransitionFunction from .finite_automaton import FiniteAutomaton from .utils import to_state, to_symbol -from .regexable import Regexable -class EpsilonNFA(Regexable, FiniteAutomaton): +class EpsilonNFA(FiniteAutomaton): """ Represents an epsilon NFA Parameters @@ -268,20 +265,7 @@ def copy(self) -> "EpsilonNFA": True """ - enfa = EpsilonNFA() - for start in self._start_states: - enfa.add_start_state(start) - for final in self._final_states: - enfa.add_final_state(final) - for state in self._states: - for symbol in self._input_symbols: - states = self._transition_function(state, symbol) - for state_to in states: - enfa.add_transition(state, symbol, state_to) - states = self._transition_function(state, Epsilon()) - for state_to in states: - enfa.add_transition(state, Epsilon(), state_to) - return enfa + return self._copy_to(EpsilonNFA()) # type: ignore def __copy__(self) -> "EpsilonNFA": return self.copy() @@ -335,110 +319,6 @@ def from_networkx(cls, graph: MultiDiGraph) -> "EpsilonNFA": enfa.add_final_state(node) return enfa - def to_regex(self) -> Regex: - """ Transforms the EpsilonNFA to a regular expression - - Returns - ---------- - regex : :class:`~pyformlang.regular_expression.Regex` - A regular expression equivalent to the current Epsilon NFA - - Examples - -------- - - >>> enfa = EpsilonNFA() - >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \ - (0, "epsilon", 2)]) - >>> enfa.add_start_state(0) - >>> enfa.add_final_state(1) - >>> regex = enfa.to_regex() - >>> regex.accepts(["abc"]) - True - - """ - # pylint: disable=protected-access - enfas = [self.copy() for _ in self._final_states] - final_states = list(self._final_states) - for i in range(len(self._final_states)): - for j in range(len(self._final_states)): - if i != j: - enfas[j].remove_final_state(final_states[i]) - regex_l = [] - for enfa in enfas: - enfa._remove_all_basic_states() - regex_sub = enfa._get_regex_simple() - if regex_sub: - regex_l.append(regex_sub) - res = "+".join(regex_l) - return Regex(res) - - def _get_regex_simple(self) -> str: - """ Get the regex of an automaton when it only composed of a start and - a final state - - CAUTION: For internal use only! - - Returns - ---------- - regex : str - A regex representing the automaton - """ - if not self._final_states or not self._start_states: - return "" - if len(self._final_states) != 1 or len(self._start_states) != 1: - raise ValueError("The automaton is not simple enough!") - if self._start_states == self._final_states: - # We are suppose to have only one good symbol - for symbol in self._input_symbols: - out_states = self._transition_function( - list(self._start_states)[0], symbol) - if out_states: - return "(" + str(symbol.value) + ")*" - return "epsilon" - start_to_start, start_to_end, end_to_start, end_to_end = \ - self._get_bi_transitions() - return self.__get_regex_sub(start_to_start, - start_to_end, - end_to_start, - end_to_end) - - def _get_bi_transitions(self) -> Tuple[str, str, str, str]: - """ Internal method to compute the transition in the case of a \ - simple automaton - - Returns - start_to_start : str - The transition from the start state to the start state - start_to_end : str - The transition from the start state to the end state - end_to_start : str - The transition from the end state to the start state - end_to_end : str - The transition from the end state to the end state - ---------- - """ - start = list(self._start_states)[0] - end = list(self._final_states)[0] - start_to_start = "epsilon" - start_to_end = "" - end_to_end = "epsilon" - end_to_start = "" - for state in self._states: - for symbol in self._input_symbols.union({Epsilon()}): - for out_state in self._transition_function(state, symbol): - symbol_str = str(symbol.value) - if not symbol_str.isalnum(): - symbol_str = "(" + symbol_str + ")" - if state == start and out_state == start: - start_to_start = symbol_str - elif state == start and out_state == end: - start_to_end = symbol_str - elif state == end and out_state == start: - end_to_start = symbol_str - elif state == end and out_state == end: - end_to_end = symbol_str - return start_to_start, start_to_end, end_to_start, end_to_end - def get_complement(self) -> "EpsilonNFA": """ Get the complement of the current Epsilon NFA @@ -720,149 +600,9 @@ def is_empty(self) -> bool: processed.add(state) return True - def _remove_all_basic_states(self) -> None: - """ Remove all states which are not the start state or a final state - - CAREFUL: This method modifies the current automaton, for internal usage - only! - - The function _create_or_transitions is supposed to be called before - calling this function - """ - self._create_or_transitions() - states = self._states.copy() - for state in states: - if (state not in self._start_states - and state not in self._final_states): - self._remove_state(state) - - def _remove_state(self, state: State) -> None: - """ Removes a given state from the epsilon NFA - - CAREFUL: This method modifies the current automaton, for internal usage - only! - - The function _create_or_transitions is supposed to be called before - calling this function - - Parameters - ---------- - state : :class:`~pyformlang.finite_automaton.State` - The state to remove - - """ - # First compute all endings - out_transitions = {} - for symbol in self._input_symbols.union({Epsilon()}): - out_states = self._transition_function(state, symbol).copy() - for out_state in out_states: - out_transitions[out_state] = str(symbol.value) - self.remove_transition(state, symbol, out_state) - if state in out_transitions: - to_itself = "(" + out_transitions[state] + ")*" - del out_transitions[state] - for out_state in list(out_transitions.keys()): - out_transitions[out_state] = to_itself + "." + \ - out_transitions[out_state] - input_symbols = self._input_symbols.copy().union({Epsilon()}) - for in_state in self._states: - if in_state == state: - continue - for symbol in input_symbols: - out_states = self._transition_function(in_state, symbol) - if state not in out_states: - continue - symbol_str = "(" + str(symbol.value) + ")" - self.remove_transition(in_state, symbol, state) - for out_state, next_symb in out_transitions.items(): - new_symbol = Symbol(symbol_str + "." + next_symb) - self.add_transition(in_state, new_symbol, out_state) - self._states.remove(state) - # We make sure the automaton has the good structure - self._create_or_transitions() - - def _create_or_transitions(self) -> None: - """ Creates a OR transition instead of several connections - - CAREFUL: This method modifies the automaton and is designed for \ - internal use only! - """ - for state in self._states: - new_transitions = {} - input_symbols = self._input_symbols.copy().union({Epsilon()}) - for symbol in input_symbols: - out_states = self._transition_function(state, symbol) - out_states = out_states.copy() - symbol_str = str(symbol.value) - for out_state in out_states: - self.remove_transition(state, symbol, out_state) - base = new_transitions.setdefault(out_state, "") - if "+" in symbol_str: - symbol_str = "(" + symbol_str + ")" - if base: - new_transitions[out_state] = "((" + base + ")+(" + \ - symbol_str + "))" - else: - new_transitions[out_state] = symbol_str - for out_state, next_symb in new_transitions.items(): - self.add_transition(state, - next_symb, - out_state) - def __bool__(self) -> bool: return not self.is_empty() - def __get_regex_sub(self, - start_to_start: str, - start_to_end: str, - end_to_start: str, - end_to_end: str) -> str: - """ Combines the transitions in the regex simple function """ - if not start_to_end: - return "" - temp, part1 = self.__get_temp(start_to_end, end_to_start, end_to_end) - part0 = "epsilon" - if start_to_start != "epsilon": - if temp: - part0 = "(" + start_to_start + "+" + temp + ")*" - else: - part0 = "(" + start_to_start + ")*" - elif temp != "epsilon" and temp: - part0 = "(" + temp + ")*" - return "(" + part0 + "." + part1 + ")" - - @staticmethod - def __get_temp(start_to_end: str, - end_to_start: str, - end_to_end: str) -> Tuple[str, str]: - """ - Gets a temp values in the computation - of the simple automaton regex. - """ - temp = "epsilon" - if (start_to_end != "epsilon" - or end_to_end != "epsilon" - or end_to_start != "epsilon"): - temp = "" - if start_to_end != "epsilon": - temp = start_to_end - if end_to_end != "epsilon": - if temp: - temp += "." + end_to_end + "*" - else: - temp = end_to_end + "*" - part1 = temp - if not part1: - part1 = "epsilon" - if end_to_start != "epsilon": - if temp: - temp += "." + end_to_start - else: - temp = end_to_start - if not end_to_start: - temp = "" - return (temp, part1) - @staticmethod def __combine_state_pair(state0: State, state1: State) -> State: """ Combine two states """ diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index d605d81..b91d38a 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -646,6 +646,29 @@ def to_dict(self) -> Dict[State, Dict[Symbol, Set[State]]]: """ return self._transition_function.to_dict() + def __copy__(self) -> "FiniteAutomaton": + return self.copy() + + def copy(self) -> "FiniteAutomaton": + """ Copies the current Finite Automaton instance """ + return self._copy_to(FiniteAutomaton()) + + def _copy_to(self, fa_to_copy_to: "FiniteAutomaton") -> "FiniteAutomaton": + """ Copies current automaton properties to the given one """ + for start in self._start_states: + fa_to_copy_to.add_start_state(start) + for final in self._final_states: + fa_to_copy_to.add_final_state(final) + for state in self._states: + for symbol in self._input_symbols: + states = self._transition_function(state, symbol) + for state_to in states: + fa_to_copy_to.add_transition(state, symbol, state_to) + states = self._transition_function(state, Epsilon()) + for state_to in states: + fa_to_copy_to.add_transition(state, Epsilon(), state_to) + return fa_to_copy_to + @staticmethod def __try_add(set_to_add_to: Set[Any], element_to_add: Any) -> bool: """ diff --git a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py index 2d0d60f..5068740 100644 --- a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py @@ -120,6 +120,13 @@ def add_transition(self, raise InvalidEpsilonTransition return super().add_transition(s_from, symb_by, s_to) + def copy(self) -> "NondeterministicFiniteAutomaton": + """ Copies the current NFA instance """ + return self._copy_to(NondeterministicFiniteAutomaton()) # type: ignore + + def __copy__(self) -> "NondeterministicFiniteAutomaton": + return self.copy() + @classmethod def from_epsilon_nfa(cls, enfa: EpsilonNFA) \ -> "NondeterministicFiniteAutomaton": diff --git a/pyformlang/finite_automaton/regexable.py b/pyformlang/finite_automaton/regexable.py deleted file mode 100644 index 52caf49..0000000 --- a/pyformlang/finite_automaton/regexable.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -An abstract class to represent something -which are be transformed into a regex -""" - -from pyformlang.finite_automaton import EpsilonNFA -from pyformlang.regular_expression import Regex - - -class Regexable: - """ An abstract class to represent something which are be transformed into - a regex - """ - - def to_regex(self) -> Regex: - """ Tranforms the EpsilonNFA to a regular expression - - Returns - ---------- - regex : :class:`~pyformlang.regular_expression.Regex` - A regular expression equivalent to the current Epsilon NFA - """ - raise NotImplementedError() - - def union(self, other: "Regexable") -> EpsilonNFA: - """ Makes the union of two regexable objects - - Parameters - ---------- - other : :class:`~pyformlang.finite_automaton.Regexable` - The other regexable object - - Returns - ---------- - enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA` - The union of the two regexable objects - """ - regex0 = self.to_regex() - regex1 = other.to_regex() - regex = regex0 | regex1 - return regex.to_epsilon_nfa() - - def concatenate(self, other: "Regexable") -> EpsilonNFA: - """ Makes the concatenation of two regexable objects - - Parameters - ---------- - other : :class:`~pyformlang.finite_automaton.Regexable` - The other regexable object - - Returns - ---------- - enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA` - The concatenation of the two regexable objects - """ - regex0 = self.to_regex() - regex1 = other.to_regex() - regex = regex0 + regex1 - return regex.to_epsilon_nfa() - - def kleene_star(self) -> EpsilonNFA: - """ Makes the kleene star of the current regexable object - - Returns - ---------- - enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA` - The kleene star of the regexable object - """ - regex0 = self.to_regex() - regex = regex0.kleene_star() - return regex.to_epsilon_nfa() diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index 819126c..1823ff5 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -4,8 +4,8 @@ from typing import List, Iterable, Tuple, Optional, Any -from pyformlang.finite_automaton import Epsilon as FAEpsilon -from pyformlang.finite_automaton import EpsilonNFA, State, Symbol +from pyformlang.finite_automaton import FiniteAutomaton, EpsilonNFA +from pyformlang.finite_automaton import State, Symbol, Epsilon as FAEpsilon from pyformlang.cfg.cfg import CFG, Production from pyformlang.cfg.utils import to_variable @@ -564,3 +564,254 @@ def accepts(self, word: Iterable[str]) -> bool: """ self._enfa = self._to_epsilon_nfa_internal(False) return self._enfa.accepts(word) + + @classmethod + def from_finite_automaton(cls, automaton: FiniteAutomaton) -> "Regex": + """ Creates a regular expression from given finite automaton + + Returns + ---------- + regex : :class:`~pyformlang.regular_expression.Regex` + A regular expression equivalent to the current Epsilon NFA + + Examples + -------- + + >>> enfa = EpsilonNFA() + >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \ + (0, "epsilon", 2)]) + >>> enfa.add_start_state(0) + >>> enfa.add_final_state(1) + >>> regex = enfa.to_regex() + >>> regex.accepts(["abc"]) + True + + """ + copies = [automaton.copy() for _ in automaton.final_states] + final_states = list(automaton.final_states) + for i in range(len(automaton.final_states)): + for j in range(len(automaton.final_states)): + if i != j: + copies[j].remove_final_state(final_states[i]) + regex_l = [] + for copy in copies: + cls._remove_all_basic_states(copy) + regex_sub = cls._get_regex_simple(copy) + if regex_sub: + regex_l.append(regex_sub) + res = "+".join(regex_l) + return Regex(res) + + @classmethod + def _get_regex_simple(cls, automaton: FiniteAutomaton) -> str: + """ Get the regex of an automaton when it only composed of a start and + a final state + + CAUTION: For internal use only! + + Returns + ---------- + regex : str + A regex representing the automaton + """ + if not automaton.final_states or not automaton.start_states: + return "" + if len(automaton.final_states) != 1 or len(automaton.start_states) != 1: + raise ValueError("The automaton is not simple enough!") + if automaton.start_states == automaton.final_states: + # We are suppose to have only one good symbol + for symbol in automaton.symbols: + out_states = automaton(list(automaton.start_states)[0], symbol) + if out_states: + return "(" + str(symbol.value) + ")*" + return "epsilon" + start_to_start, start_to_end, end_to_start, end_to_end = \ + cls._get_bi_transitions(automaton) + return cls.__get_regex_sub(start_to_start, + start_to_end, + end_to_start, + end_to_end) + + @classmethod + def _get_bi_transitions(cls, automaton: FiniteAutomaton) \ + -> Tuple[str, str, str, str]: + """ Internal method to compute the transition in the case of a \ + simple automaton + + Returns + start_to_start : str + The transition from the start state to the start state + start_to_end : str + The transition from the start state to the end state + end_to_start : str + The transition from the end state to the start state + end_to_end : str + The transition from the end state to the end state + ---------- + """ + start = list(automaton.start_states)[0] + end = list(automaton.final_states)[0] + start_to_start = "epsilon" + start_to_end = "" + end_to_end = "epsilon" + end_to_start = "" + for state in automaton.states: + for symbol in automaton.symbols.union({FAEpsilon()}): + for out_state in automaton(state, symbol): + symbol_str = str(symbol.value) + if not symbol_str.isalnum(): + symbol_str = "(" + symbol_str + ")" + if state == start and out_state == start: + start_to_start = symbol_str + elif state == start and out_state == end: + start_to_end = symbol_str + elif state == end and out_state == start: + end_to_start = symbol_str + elif state == end and out_state == end: + end_to_end = symbol_str + return start_to_start, start_to_end, end_to_start, end_to_end + + @classmethod + def _remove_all_basic_states(cls, automaton: FiniteAutomaton) -> None: + """ Remove all states which are not the start state or a final state + + CAREFUL: This method modifies the current automaton, for internal usage + only! + + The function _create_or_transitions is supposed to be called before + calling this function + """ + cls._create_or_transitions(automaton) + states = automaton.states.copy() + for state in states: + if (state not in automaton.start_states \ + and state not in automaton.final_states): + cls._remove_state(automaton, state) + + @classmethod + def _remove_state(cls, automaton: FiniteAutomaton, state: State) -> None: + """ Removes a given state from the epsilon NFA + + CAREFUL: This method modifies the current automaton, for internal usage + only! + + The function _create_or_transitions is supposed to be called before + calling this function + + Parameters + ---------- + state : :class:`~pyformlang.finite_automaton.State` + The state to remove + + """ + # First compute all endings + out_transitions = {} + input_symbols = automaton.symbols.union({FAEpsilon()}) + for symbol in input_symbols: + out_states = automaton(state, symbol).copy() + for out_state in out_states: + out_transitions[out_state] = str(symbol.value) + automaton.remove_transition(state, symbol, out_state) + if state in out_transitions: + to_itself = "(" + out_transitions[state] + ")*" + del out_transitions[state] + for out_state in list(out_transitions.keys()): + out_transitions[out_state] = to_itself + "." + \ + out_transitions[out_state] + for in_state in automaton.states: + if in_state == state: + continue + for symbol in input_symbols: + out_states = automaton(in_state, symbol) + if state not in out_states: + continue + symbol_str = "(" + str(symbol.value) + ")" + automaton.remove_transition(in_state, symbol, state) + for out_state, next_symb in out_transitions.items(): + new_symbol = Symbol(symbol_str + "." + next_symb) + automaton.add_transition(in_state, new_symbol, out_state) + automaton.states.remove(state) + # We make sure the automaton has the good structure + cls._create_or_transitions(automaton) + + @classmethod + def _create_or_transitions(cls, automaton: FiniteAutomaton) -> None: + """ Creates a OR transition instead of several connections + + CAREFUL: This method modifies the automaton and is designed for \ + internal use only! + """ + for state in automaton.states: + new_transitions = {} + input_symbols = automaton.symbols.union({FAEpsilon()}) + for symbol in input_symbols: + out_states = automaton(state, symbol) + out_states = out_states.copy() + symbol_str = str(symbol.value) + for out_state in out_states: + automaton.remove_transition(state, symbol, out_state) + base = new_transitions.setdefault(out_state, "") + if "+" in symbol_str: + symbol_str = "(" + symbol_str + ")" + if base: + new_transitions[out_state] = "((" + base + ")+(" + \ + symbol_str + "))" + else: + new_transitions[out_state] = symbol_str + for out_state, next_symb in new_transitions.items(): + automaton.add_transition(state, + next_symb, + out_state) + + @classmethod + def __get_regex_sub(cls, + start_to_start: str, + start_to_end: str, + end_to_start: str, + end_to_end: str) -> str: + """ Combines the transitions in the regex simple function """ + if not start_to_end: + return "" + temp, part1 = cls.__get_temp(start_to_end, end_to_start, end_to_end) + part0 = "epsilon" + if start_to_start != "epsilon": + if temp: + part0 = "(" + start_to_start + "+" + temp + ")*" + else: + part0 = "(" + start_to_start + ")*" + elif temp != "epsilon" and temp: + part0 = "(" + temp + ")*" + return "(" + part0 + "." + part1 + ")" + + @classmethod + def __get_temp(cls, + start_to_end: str, + end_to_start: str, + end_to_end: str) -> Tuple[str, str]: + """ + Gets a temp values in the computation + of the simple automaton regex. + """ + temp = "epsilon" + if (start_to_end != "epsilon" + or end_to_end != "epsilon" + or end_to_start != "epsilon"): + temp = "" + if start_to_end != "epsilon": + temp = start_to_end + if end_to_end != "epsilon": + if temp: + temp += "." + end_to_end + "*" + else: + temp = end_to_end + "*" + part1 = temp + if not part1: + part1 = "epsilon" + if end_to_start != "epsilon": + if temp: + temp += "." + end_to_start + else: + temp = end_to_start + if not end_to_start: + temp = "" + return temp, part1 diff --git a/requirements.txt b/requirements.txt index 3179fd5..75d858d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,7 @@ numpy pylint pycodestyle pydot +fastcore pygments>=2.7.4 # not directly required, pinned by Snyk to avoid a vulnerability pylint>=2.7.0 # not directly required, pinned by Snyk to avoid a vulnerability sphinx>=3.0.4 # not directly required, pinned by Snyk to avoid a vulnerability From c1d8f4689252a8f8ef1e0f6d4f64e20a9118b1a8 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 12 Nov 2024 19:33:26 +0300 Subject: [PATCH 28/42] make finite_automaton.copy generic, move regex to minimal dfa to Regex, refactor --- .../deterministic_finite_automaton.py | 16 ++---- pyformlang/finite_automaton/epsilon_nfa.py | 5 +- .../finite_automaton/finite_automaton.py | 55 ++++++++++++------- .../nondeterministic_finite_automaton.py | 5 +- .../test_deterministic_finite_automaton.py | 14 +++-- .../tests/test_epsilon_nfa.py | 32 +++++++---- .../test_nondeterministic_finite_automaton.py | 19 ++++--- .../finite_automaton/transition_function.py | 11 +++- pyformlang/regular_expression/regex.py | 7 +++ .../tests/test_python_regex.py | 6 ++ .../regular_expression/tests/test_regex.py | 17 +----- pyformlang/rsa/recursive_automaton.py | 17 ++---- 12 files changed, 106 insertions(+), 98 deletions(-) diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index 0d9a657..c2c73b4 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -115,8 +115,8 @@ def __init__(self, None, start_states, final_states) - self._transition_function = transition_function \ - or DeterministicTransitionFunction() + self._transition_function: DeterministicTransitionFunction = \ + transition_function or DeterministicTransitionFunction() @property def start_state(self) -> Optional[State]: @@ -247,10 +247,7 @@ def copy(self) -> "DeterministicFiniteAutomaton": True """ - return self._copy_to(DeterministicFiniteAutomaton()) # type: ignore - - def __copy__(self) -> "DeterministicFiniteAutomaton": - return self.copy() + return self._copy_to(DeterministicFiniteAutomaton()) def get_next_state(self, s_from: Hashable, symb_by: Hashable) \ -> Optional[State]: @@ -429,11 +426,11 @@ def _get_partition(self) -> Partition: return partition def __eq__(self, other: Any) -> bool: - if not isinstance(other, EpsilonNFA): + if not isinstance(other, DeterministicFiniteAutomaton): return False return self.is_equivalent_to(other) - def is_equivalent_to(self, other: EpsilonNFA) -> bool: + def is_equivalent_to(self, other: "DeterministicFiniteAutomaton") -> bool: """ Check whether two automata are equivalent Parameters @@ -459,9 +456,6 @@ def is_equivalent_to(self, other: EpsilonNFA) -> bool: True """ - if not isinstance(other, DeterministicFiniteAutomaton): - other_dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(other) - return self.is_equivalent_to(other_dfa) self_minimal = self.minimize() other_minimal = other.minimize() return self._is_equivalent_to_minimal(self_minimal, other_minimal) diff --git a/pyformlang/finite_automaton/epsilon_nfa.py b/pyformlang/finite_automaton/epsilon_nfa.py index 97402e0..2ec5957 100644 --- a/pyformlang/finite_automaton/epsilon_nfa.py +++ b/pyformlang/finite_automaton/epsilon_nfa.py @@ -265,10 +265,7 @@ def copy(self) -> "EpsilonNFA": True """ - return self._copy_to(EpsilonNFA()) # type: ignore - - def __copy__(self) -> "EpsilonNFA": - return self.copy() + return self._copy_to(EpsilonNFA()) @classmethod def from_networkx(cls, graph: MultiDiGraph) -> "EpsilonNFA": diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index b91d38a..0ec187f 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -2,7 +2,8 @@ # pylint: disable=function-redefined -from typing import Dict, List, Set, Tuple, Iterable, Optional, Hashable, Any +from typing import Dict, List, Set, Tuple, \ + Iterable, Iterator, Optional, Hashable, Any, TypeVar from abc import abstractmethod from collections import deque from networkx import MultiDiGraph @@ -17,8 +18,10 @@ from .transition_function import TransitionFunction from .utils import to_state, to_symbol +fa_type = TypeVar("fa_type", bound="FiniteAutomaton") -class FiniteAutomaton: + +class FiniteAutomaton(Iterable[Tuple[State, Symbol, State]]): """ Represents a general finite automaton Attributes @@ -40,11 +43,11 @@ class FiniteAutomaton: """ def __init__(self) -> None: - self._states: Set[State] = set() - self._input_symbols: Set[Symbol] = set() - self._transition_function = TransitionFunction() - self._start_states: Set[State] = set() - self._final_states: Set[State] = set() + self._states: Set[State] + self._input_symbols: Set[Symbol] + self._transition_function: TransitionFunction + self._start_states: Set[State] + self._final_states: Set[State] @property def states(self) -> Set[State]: @@ -359,6 +362,15 @@ def __call__(self, s_from: Hashable, symb_by: Hashable) -> Set[State]: symb_by = to_symbol(symb_by) return self._transition_function(s_from, symb_by) + def __contains__(self, + transition: Tuple[Hashable, Hashable, Hashable]) -> bool: + """ Whether the given transition is present in finite automaton """ + s_from, symb_by, s_to = transition + s_from = to_state(s_from) + symb_by = to_symbol(symb_by) + s_to = to_state(s_to) + return (s_from, symb_by, s_to) in self._transition_function + def get_transitions_from(self, s_from: Hashable) \ -> Iterable[Tuple[Symbol, State]]: """ Gets transitions from the given state """ @@ -441,7 +453,7 @@ def to_fst(self) -> FST: fst.add_start_state(start_state.value) for final_state in self._final_states: fst.add_final_state(final_state.value) - for s_from, symb_by, s_to in self._transition_function.get_edges(): + for s_from, symb_by, s_to in self._transition_function: fst.add_transition(s_from.value, symb_by.value, s_to.value, @@ -609,17 +621,12 @@ def _get_reachable_states(self) -> Set[State]: states_to_process.append(next_state) return visited - @abstractmethod - def is_deterministic(self) -> bool: - """ Checks if the automaton is deterministic """ - raise NotImplementedError - def __len__(self) -> int: """Number of transitions""" return len(self._transition_function) - def __iter__(self) -> Iterable[Tuple[State, Symbol, State]]: - yield from self._transition_function.__iter__() + def __iter__(self) -> Iterator[Tuple[State, Symbol, State]]: + yield from self._transition_function def to_dict(self) -> Dict[State, Dict[Symbol, Set[State]]]: """ @@ -646,14 +653,15 @@ def to_dict(self) -> Dict[State, Dict[Symbol, Set[State]]]: """ return self._transition_function.to_dict() - def __copy__(self) -> "FiniteAutomaton": - return self.copy() - - def copy(self) -> "FiniteAutomaton": + @abstractmethod + def copy(self: fa_type) -> fa_type: """ Copies the current Finite Automaton instance """ - return self._copy_to(FiniteAutomaton()) + raise NotImplementedError + + def __copy__(self: fa_type) -> fa_type: + return self.copy() - def _copy_to(self, fa_to_copy_to: "FiniteAutomaton") -> "FiniteAutomaton": + def _copy_to(self, fa_to_copy_to: fa_type) -> fa_type: """ Copies current automaton properties to the given one """ for start in self._start_states: fa_to_copy_to.add_start_state(start) @@ -669,6 +677,11 @@ def _copy_to(self, fa_to_copy_to: "FiniteAutomaton") -> "FiniteAutomaton": fa_to_copy_to.add_transition(state, Epsilon(), state_to) return fa_to_copy_to + @abstractmethod + def is_deterministic(self) -> bool: + """ Checks if the automaton is deterministic """ + raise NotImplementedError + @staticmethod def __try_add(set_to_add_to: Set[Any], element_to_add: Any) -> bool: """ diff --git a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py index 5068740..a0155f2 100644 --- a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py @@ -122,10 +122,7 @@ def add_transition(self, def copy(self) -> "NondeterministicFiniteAutomaton": """ Copies the current NFA instance """ - return self._copy_to(NondeterministicFiniteAutomaton()) # type: ignore - - def __copy__(self) -> "NondeterministicFiniteAutomaton": - return self.copy() + return self._copy_to(NondeterministicFiniteAutomaton()) @classmethod def from_epsilon_nfa(cls, enfa: EpsilonNFA) \ diff --git a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py index b0c0981..7618a7b 100644 --- a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py @@ -1,13 +1,16 @@ """ Tests for the deterministic finite automata """ -from pyformlang.finite_automaton import DeterministicFiniteAutomaton, Epsilon + +import pytest + +from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton import State from pyformlang.finite_automaton import Symbol -from pyformlang.finite_automaton import TransitionFunction -from pyformlang.finite_automaton.transition_function import \ +from pyformlang.finite_automaton import Epsilon +from pyformlang.finite_automaton import DeterministicTransitionFunction +from pyformlang.finite_automaton.deterministic_transition_function import \ InvalidEpsilonTransition -import pytest class TestDeterministicFiniteAutomaton: @@ -24,7 +27,7 @@ def test_can_create(self): symb0 = Symbol("a") states = {state0, state1} input_symbols = {symb0} - transition_function = TransitionFunction() + transition_function = DeterministicTransitionFunction() transition_function.add_transition(state0, symb0, state1) start_state = state0 final_states = {state1} @@ -41,7 +44,6 @@ def test_can_create(self): dfa = DeterministicFiniteAutomaton(start_state=state1, final_states={state0, state1}) assert dfa is not None - assert dfa is dfa.to_deterministic() def test_add_transition(self): """ Tests the addition of transitions diff --git a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py index f273e35..f9f840d 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py +++ b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py @@ -1,13 +1,15 @@ """ Tests for epsilon NFA """ -import copy +import pytest +import copy import networkx -from pyformlang.finite_automaton import EpsilonNFA, State, Symbol, Epsilon -from ..regexable import Regexable -import pytest +from pyformlang.finite_automaton import EpsilonNFA +from pyformlang.finite_automaton import NondeterministicFiniteAutomaton +from pyformlang.finite_automaton import DeterministicFiniteAutomaton +from pyformlang.finite_automaton import State, Symbol, Epsilon class TestEpsilonNFA: @@ -73,7 +75,7 @@ def _perform_tests_digits(self, should_copy=False): def test_deterministic(self): """ Tests the transformation to a dfa""" enfa, digits, _, plus, minus, point = get_digits_enfa() - dfa = enfa.to_deterministic() + dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) assert dfa.is_deterministic() assert len(dfa.states) == 6 assert dfa.get_number_transitions() == 65 @@ -382,7 +384,7 @@ def test_minimization(self): enfa = get_enfa_example0_bis() symb_a = Symbol("a") symb_b = Symbol("b") - enfa = enfa.minimize() + enfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa).minimize() assert enfa.is_deterministic() assert len(enfa.states) == 2 assert enfa.accepts([symb_a, symb_b]) @@ -390,7 +392,7 @@ def test_minimization(self): assert enfa.accepts([symb_b]) assert not enfa.accepts([symb_a]) enfa = get_example_non_minimal() - enfa = enfa.minimize() + enfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa).minimize() assert enfa.is_deterministic() assert len(enfa.states) == 3 assert enfa.accepts([symb_a, symb_b]) @@ -398,7 +400,7 @@ def test_minimization(self): assert not enfa.accepts([symb_b]) assert not enfa.accepts([symb_a]) enfa = EpsilonNFA() - enfa = enfa.minimize() + enfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa).minimize() assert enfa.is_deterministic() assert len(enfa.states) == 1 assert not enfa.accepts([]) @@ -546,7 +548,9 @@ def test_equivalent(self): enfa1.add_final_state(state1) enfa1.add_transition(state0, symb_a, state1) enfa1.add_transition(state1, symb_a, state1) - assert enfa0.is_equivalent_to(enfa1) + dfa0 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa0) + dfa1 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa1) + assert dfa0.is_equivalent_to(dfa1) def test_non_equivalent(self): enfa0 = EpsilonNFA() @@ -562,7 +566,9 @@ def test_non_equivalent(self): enfa1.add_final_state(state1) enfa1.add_transition(state0, symb_a, state1) enfa1.add_transition(state1, symb_a, state0) - assert not enfa0.is_equivalent_to(enfa1) + dfa0 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa0) + dfa1 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa1) + assert not dfa0.is_equivalent_to(dfa1) def test_get_as_dict(self): enfa0 = EpsilonNFA() @@ -616,11 +622,13 @@ def test_remove_epsilon_transitions(self): enfa.add_start_state("a") enfa.add_final_state("b") assert len(enfa.start_states) == 1 - nfa = enfa.remove_epsilon_transitions() + nfa = NondeterministicFiniteAutomaton.from_epsilon_nfa(enfa) assert len(nfa.start_states) == 3 assert len(nfa.final_states) == 2 assert nfa.get_number_transitions() == 3 - assert nfa.is_equivalent_to(enfa) + dfa0 = DeterministicFiniteAutomaton.from_nfa(nfa) + dfa1 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) + assert dfa0.is_equivalent_to(dfa1) def test_word_generation(self): enfa = get_enfa_example_for_word_generation() diff --git a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py index fe7819c..bd15e5f 100644 --- a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py @@ -1,14 +1,15 @@ """ Tests for nondeterministic finite automata """ -from pyformlang.finite_automaton import NondeterministicFiniteAutomaton,\ - Epsilon -from pyformlang.finite_automaton import State -from pyformlang.finite_automaton import Symbol -from pyformlang.finite_automaton.transition_function import \ - InvalidEpsilonTransition + import pytest +from pyformlang.finite_automaton import NondeterministicFiniteAutomaton +from pyformlang.finite_automaton import DeterministicFiniteAutomaton +from pyformlang.finite_automaton import State, Symbol, Epsilon +from pyformlang.finite_automaton.deterministic_transition_function import \ + InvalidEpsilonTransition + class TestNondeterministicFiniteAutomaton: """ @@ -23,7 +24,7 @@ def test_creation(self): nfa = NondeterministicFiniteAutomaton() assert nfa is not None states = [State(x) for x in range(10)] - nfa = NondeterministicFiniteAutomaton(start_state=set(states)) + nfa = NondeterministicFiniteAutomaton(start_states=set(states)) assert nfa is not None def test_remove_initial(self): @@ -79,7 +80,7 @@ def test_accepts(self): assert not nfa.is_deterministic() assert nfa.accepts([symb_c]) nfa.remove_start_state(state1) - dfa = nfa.to_deterministic() + dfa = DeterministicFiniteAutomaton.from_nfa(nfa) assert dfa.is_deterministic() assert dfa.accepts([symb_a, symb_b, symb_c]) assert dfa.accepts([symb_a, symb_b, symb_b, symb_b, symb_c]) @@ -105,7 +106,7 @@ def test_deterministic(self): nfa.add_transition(state0, symb0, state1) nfa.add_transition(state0, symb1, state0) nfa.add_transition(state1, symb1, state2) - dfa = nfa.to_deterministic() + dfa = DeterministicFiniteAutomaton.from_nfa(nfa) assert len(dfa.states) == 3 assert dfa.get_number_transitions() == 6 diff --git a/pyformlang/finite_automaton/transition_function.py b/pyformlang/finite_automaton/transition_function.py index 8e1795a..b796272 100644 --- a/pyformlang/finite_automaton/transition_function.py +++ b/pyformlang/finite_automaton/transition_function.py @@ -4,14 +4,14 @@ # pylint: disable=function-redefined -from typing import Dict, Set, Tuple, Iterable +from typing import Dict, Set, Tuple, Iterable, Iterator from abc import abstractmethod from fastcore.dispatch import typedispatch from .state import State from .symbol import Symbol -class TransitionFunction: +class TransitionFunction(Iterable[Tuple[State, Symbol, State]]): """ General transition function representation """ @abstractmethod @@ -56,6 +56,11 @@ def __call__(self, s_from: State, symb_by: Symbol) -> Set[State]: """ raise NotImplementedError + def __contains__(self, transition: Tuple[State, Symbol, State]) -> bool: + """ Whether the given transition is present in the function """ + s_from, symb_by, s_to = transition + return s_to in self(s_from, symb_by) + @abstractmethod def get_transitions_from(self, s_from: State) \ -> Iterable[Tuple[Symbol, State]]: @@ -74,7 +79,7 @@ def get_edges(self) -> Iterable[Tuple[State, Symbol, State]]: """ Gets the edges """ raise NotImplementedError - def __iter__(self) -> Iterable[Tuple[State, Symbol, State]]: + def __iter__(self) -> Iterator[Tuple[State, Symbol, State]]: yield from self.get_edges() @abstractmethod diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index 1823ff5..1e67840 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -5,6 +5,7 @@ from typing import List, Iterable, Tuple, Optional, Any from pyformlang.finite_automaton import FiniteAutomaton, EpsilonNFA +from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton import State, Symbol, Epsilon as FAEpsilon from pyformlang.cfg.cfg import CFG, Production from pyformlang.cfg.utils import to_variable @@ -136,6 +137,12 @@ def get_number_operators(self) -> int: return 1 + sum(son.get_number_operators() for son in self.sons) return 0 + def to_minimal_dfa(self) -> "DeterministicFiniteAutomaton": + """ Builds minimal dfa from current regex """ + enfa = self.to_epsilon_nfa() + dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) + return dfa.minimize() + def to_epsilon_nfa(self) -> EpsilonNFA: """ Transforms the regular expression into an epsilon NFA diff --git a/pyformlang/regular_expression/tests/test_python_regex.py b/pyformlang/regular_expression/tests/test_python_regex.py index db095ae..bd88e6b 100644 --- a/pyformlang/regular_expression/tests/test_python_regex.py +++ b/pyformlang/regular_expression/tests/test_python_regex.py @@ -11,6 +11,12 @@ class TestPythonRegex: # pylint: disable=missing-function-docstring, too-many-public-methods + def test_simple(self): + regex = PythonRegex("abc") + assert regex.accepts(["a", "b", "c"]) + assert not regex.accepts(["a", "b", "b"]) + assert not regex.accepts(["a", "b"]) + def test_with_brackets(self): regex = PythonRegex("a[bc]") assert regex.accepts(["a", "b"]) diff --git a/pyformlang/regular_expression/tests/test_regex.py b/pyformlang/regular_expression/tests/test_regex.py index 6bbb821..d9bc20a 100644 --- a/pyformlang/regular_expression/tests/test_regex.py +++ b/pyformlang/regular_expression/tests/test_regex.py @@ -163,8 +163,8 @@ def test_get_repr(self): regex0 = Regex("a*.(b|c)epsilon") regex_str = str(regex0) regex1 = Regex(regex_str) - dfa0 = regex0.to_epsilon_nfa().to_deterministic().minimize() - dfa1 = regex1.to_epsilon_nfa().to_deterministic().minimize() + dfa0 = regex0.to_minimal_dfa() + dfa1 = regex1.to_minimal_dfa() assert dfa0 == dfa1 def test_accepts(self): @@ -172,19 +172,6 @@ def test_accepts(self): assert regex.accepts(["a"]) assert not regex.accepts(["a", "b"]) - def test_from_python_simple(self): - regex = Regex.from_python_regex("abc") - assert regex.accepts(["a", "b", "c"]) - assert not regex.accepts(["a", "b", "b"]) - assert not regex.accepts(["a", "b"]) - - def test_from_python_brackets(self): - regex = Regex.from_python_regex("a[bc]") - assert regex.accepts(["a", "b"]) - assert regex.accepts(["a", "c"]) - assert not regex.accepts(["a", "b", "c"]) - assert not regex.accepts(["a", "a"]) - def test_space(self): regex = Regex("\\ ") assert regex.accepts([" "]) diff --git a/pyformlang/rsa/recursive_automaton.py b/pyformlang/rsa/recursive_automaton.py index c12d652..158b13b 100644 --- a/pyformlang/rsa/recursive_automaton.py +++ b/pyformlang/rsa/recursive_automaton.py @@ -112,7 +112,7 @@ def from_regex(cls, regex: Regex, start_nonterminal: Union[Symbol, str]): The new recursive automaton built from regular expression """ start_nonterminal = to_symbol(start_nonterminal) - box = Box(cls.__regex_to_minimal_dfa(regex), start_nonterminal) + box = Box(regex.to_minimal_dfa(), start_nonterminal) return RecursiveAutomaton(box, {box}) @classmethod @@ -154,10 +154,9 @@ def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")): productions[head] = body for head, body in productions.items(): - boxes.add(Box(cls.__regex_to_minimal_dfa(Regex(body)), - to_symbol(head))) - start_box_dfa = cls.__regex_to_minimal_dfa( - Regex(productions[start_nonterminal.value])) + boxes.add(Box(Regex(body).to_minimal_dfa(), to_symbol(head))) + start_box_dfa = Regex(productions[start_nonterminal.value]) \ + .to_minimal_dfa() start_box = Box(start_box_dfa, start_nonterminal) return RecursiveAutomaton(start_box, boxes) @@ -182,11 +181,3 @@ def is_equals_to(self, other): def __eq__(self, other): return self.is_equals_to(other) - - @classmethod - def __regex_to_minimal_dfa(cls, regex: Regex) \ - -> DeterministicFiniteAutomaton: - """ Build minimal dfa from given regex """ - enfa = regex.to_epsilon_nfa() - dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) - return dfa.minimize() From f0b00309c8794749e04a47b0a8dcc0cff9e8665a Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 12 Nov 2024 21:45:24 +0300 Subject: [PATCH 29/42] reimplement regexable methods for epsilon_nfa --- pyformlang/finite_automaton/epsilon_nfa.py | 62 +++++++++++++++++++++- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/pyformlang/finite_automaton/epsilon_nfa.py b/pyformlang/finite_automaton/epsilon_nfa.py index 2ec5957..d680bbe 100644 --- a/pyformlang/finite_automaton/epsilon_nfa.py +++ b/pyformlang/finite_automaton/epsilon_nfa.py @@ -345,7 +345,7 @@ def get_complement(self) -> "EpsilonNFA": """ enfa = self.copy() - trash = State("TrashNode") + trash = self.__get_new_state("Trash") enfa.add_final_state(trash) for state in self._states: if state in self._final_states: @@ -451,8 +451,44 @@ def __and__(self, other: "EpsilonNFA") -> "EpsilonNFA": """ return self.get_intersection(other) + def get_union(self, other: "EpsilonNFA") -> "EpsilonNFA": + """ Computes the union with given Epsilon NFA """ + union = other.copy() + self._copy_to(union) + return union + + def __or__(self, other: "EpsilonNFA") -> "EpsilonNFA": + """ Computes the union with given Epsilon NFA """ + return self.get_union(other) + + def concatenate(self, other: "EpsilonNFA") -> "EpsilonNFA": + """ Computes the concatenation of two Epsilon NFAs """ + concatenation = EpsilonNFA() + for s_from, symb_by, s_to in self: + concatenation.add_transition((0, s_from.value), + symb_by, + (0, s_to.value)) + if s_from in self.start_states: + concatenation.add_start_state((0, s_from.value)) + for s_from, symb_by, s_to in other: + concatenation.add_transition((1, s_from.value), + symb_by, + (1, s_to.value)) + if other.is_final_state(s_to): + concatenation.add_final_state((1, s_to.value)) + for self_final in self.final_states: + for other_start in other.start_states: + concatenation.add_transition((0, self_final.value), + Epsilon(), + (1, other_start.value)) + return concatenation + + def __add__(self, other: "EpsilonNFA") -> "EpsilonNFA": + """ Computes the concatenation of two Epsilon NFAs """ + return self.concatenate(other) + def get_difference(self, other: "EpsilonNFA") -> "EpsilonNFA": - """ Compute the difference with another Epsilon NFA + """ Computes the difference with another Epsilon NFA Equivalent to: @@ -557,6 +593,18 @@ def __invert__(self) -> "EpsilonNFA": """ return self.reverse() + def kleene_star(self) -> "EpsilonNFA": + """ Compute the kleene closure of current EpsilonNFA""" + kleene_closure = self.copy() + new_start = self.__get_new_state("Start") + for old_start in self.start_states: + kleene_closure.add_transition(new_start, Epsilon(), old_start) + kleene_closure.start_states.clear() + kleene_closure.add_start_state(new_start) + for final_state in self.final_states: + kleene_closure.add_transition(final_state, Epsilon(), new_start) + return kleene_closure + def is_empty(self) -> bool: """ Checks if the language represented by the FSM is empty or not @@ -600,6 +648,16 @@ def is_empty(self) -> bool: def __bool__(self) -> bool: return not self.is_empty() + def __get_new_state(self, prefix: str) -> State: + """ + Get a state that wasn't previously in automaton + starting with given string. + """ + existing_values = set(state.value for state in self.states) + while prefix in existing_values: + prefix += '`' + return State(prefix) + @staticmethod def __combine_state_pair(state0: State, state1: State) -> State: """ Combine two states """ From fab3a37ef49f9ae77682fb4e6160b83e4dcd7eb7 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 12 Nov 2024 22:42:00 +0300 Subject: [PATCH 30/42] refactor tests for fa to regex transitions --- .../test_deterministic_finite_automaton.py | 63 +++-- .../finite_automaton/tests/test_epsilon.py | 1 + .../tests/test_epsilon_nfa.py | 184 +-------------- .../test_nondeterministic_finite_automaton.py | 11 + ...st_nondeterministic_transition_function.py | 17 +- .../finite_automaton/tests/test_state.py | 1 + .../finite_automaton/tests/test_symbol.py | 1 + .../tests/test_python_regex.py | 1 + .../regular_expression/tests/test_regex.py | 215 +++++++++++++++++- 9 files changed, 261 insertions(+), 233 deletions(-) diff --git a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py index 7618a7b..27fc511 100644 --- a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py @@ -4,10 +4,9 @@ import pytest +from pyformlang.finite_automaton import EpsilonNFA from pyformlang.finite_automaton import DeterministicFiniteAutomaton -from pyformlang.finite_automaton import State -from pyformlang.finite_automaton import Symbol -from pyformlang.finite_automaton import Epsilon +from pyformlang.finite_automaton import State, Symbol, Epsilon from pyformlang.finite_automaton import DeterministicTransitionFunction from pyformlang.finite_automaton.deterministic_transition_function import \ InvalidEpsilonTransition @@ -79,29 +78,9 @@ def test_accepts(self): """ Tests the acceptance of dfa """ dfa = get_example0() - self._perform_tests_example0(dfa) + perform_tests_example0(dfa) dfa = get_example0_bis() - self._perform_tests_example0(dfa) - - def _perform_tests_example0(self, dfa): - """ Tests for DFA from example 0 """ - symb_a = Symbol("a") - symb_b = Symbol("b") - symb_c = Symbol("c") - symb_d = Symbol("d") - state0 = State(0) - state1 = State(1) - assert dfa.accepts([symb_a, symb_b, symb_c]) - assert dfa.accepts([symb_a, symb_b, symb_b, symb_b, symb_c]) - assert dfa.accepts([symb_a, symb_b, symb_d]) - assert dfa.accepts([symb_a, symb_d]) - assert not dfa.accepts([symb_a, symb_c, symb_d]) - assert not dfa.accepts([symb_d, symb_c, symb_d]) - assert not dfa.accepts([]) - assert dfa.remove_start_state(state1) == 0 - assert dfa.accepts([symb_a, symb_b, symb_c]) - assert dfa.remove_start_state(state0) == 1 - assert not dfa.accepts([symb_a, symb_b, symb_c]) + perform_tests_example0(dfa) dfa.add_start_state(0) assert dfa.accepts(["a", "b", "c"]) @@ -119,13 +98,7 @@ def _perform_tests_example0(self, dfa): def test_copy(self): """ Test the copy of a DFA """ dfa = get_example0().copy() - self._perform_tests_example0(dfa) - - def test_regex(self): - """ Tests the regex transformation """ - dfa = get_example0() - dfa = dfa.to_regex().to_epsilon_nfa() - self._perform_tests_example0(dfa) + perform_tests_example0(dfa) def test_complement(self): """ Tests the complement operation """ @@ -277,11 +250,6 @@ def test_equivalent(self): dfa2.add_final_state(State("D")) assert dfa2 != dfa1 - def test_regex_dfa(self): - dfa1 = get_dfa_example() - dfa_regex = dfa1.to_regex().to_epsilon_nfa() - assert dfa1 == dfa_regex - def test_word_generation(self): dfa = get_dfa_example_for_word_generation() accepted_words = list(dfa.get_accepted_words()) @@ -377,3 +345,24 @@ def get_dfa_example_without_accepted_words(): dfa.add_start_state(states[0]) dfa.add_final_state(states[3]) return dfa + + +def perform_tests_example0(enfa: EpsilonNFA): + """ Tests for DFA from example 0 """ + symb_a = Symbol("a") + symb_b = Symbol("b") + symb_c = Symbol("c") + symb_d = Symbol("d") + state0 = State(0) + state1 = State(1) + assert enfa.accepts([symb_a, symb_b, symb_c]) + assert enfa.accepts([symb_a, symb_b, symb_b, symb_b, symb_c]) + assert enfa.accepts([symb_a, symb_b, symb_d]) + assert enfa.accepts([symb_a, symb_d]) + assert not enfa.accepts([symb_a, symb_c, symb_d]) + assert not enfa.accepts([symb_d, symb_c, symb_d]) + assert not enfa.accepts([]) + assert enfa.remove_start_state(state1) == 0 + assert enfa.accepts([symb_a, symb_b, symb_c]) + assert enfa.remove_start_state(state0) == 1 + assert not enfa.accepts([symb_a, symb_b, symb_c]) diff --git a/pyformlang/finite_automaton/tests/test_epsilon.py b/pyformlang/finite_automaton/tests/test_epsilon.py index 16b57ed..955ac6f 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon.py +++ b/pyformlang/finite_automaton/tests/test_epsilon.py @@ -1,6 +1,7 @@ """ Tests for epsilon transitions """ + from pyformlang.finite_automaton import Epsilon from pyformlang.finite_automaton import Symbol diff --git a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py index f9f840d..d3ca14d 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py +++ b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py @@ -2,7 +2,6 @@ Tests for epsilon NFA """ -import pytest import copy import networkx @@ -15,7 +14,7 @@ class TestEpsilonNFA: """ Tests epsilon NFA """ - # pylint: disable=missing-function-docstring, protected-access + # pylint: disable=missing-function-docstring # pylint: disable=too-many-statements, too-many-public-methods def test_eclose(self): @@ -36,9 +35,8 @@ def test_eclose(self): assert len(enfa.eclose(states[2])) == 3 assert len(enfa.eclose(states[5])) == 2 assert len(enfa.eclose(states[6])) == 1 - assert len(list(enfa._transition_function.get_edges())) == 7 - assert enfa.remove_transition(states[1], epsilon, states[4]) == \ - 1 + assert len(list(iter(enfa))) == 7 + assert enfa.remove_transition(states[1], epsilon, states[4]) == 1 assert not enfa.is_deterministic() def test_accept(self): @@ -49,7 +47,7 @@ def test_copy(self): """ Tests the copy of enda """ self._perform_tests_digits(True) - def _perform_tests_digits(self, should_copy=False): + def _perform_tests_digits(self, should_copy: bool = False): enfa, digits, epsilon, plus, minus, point = get_digits_enfa() if should_copy: enfa = copy.copy(enfa) @@ -89,168 +87,14 @@ def test_deterministic(self): assert not dfa.accepts([point]) assert not dfa.accepts([plus]) - def test_remove_state(self): - " Tests the remove of state """ - enfa = EpsilonNFA() - state0 = State(0) - state1 = State(1) - state2 = State(2) - symb02 = Symbol("a+b") - symb01 = Symbol("c*") - symb11 = Symbol("b+(c.d)") - symb12 = Symbol("a.b.c") - enfa.add_start_state(state0) - enfa.add_final_state(state2) - enfa.add_transition(state0, symb01, state1) - enfa.add_transition(state0, symb02, state2) - enfa.add_transition(state1, symb11, state1) - enfa.add_transition(state1, symb12, state2) - enfa._remove_all_basic_states() - assert enfa.get_number_transitions() == 1 - assert len(enfa.states) == 2 - - def test_to_regex(self): - """ Tests the transformation to regex """ - enfa = EpsilonNFA() - state0 = State(0) - state1 = State(1) - state2 = State(2) - symb_e = Symbol("e") - symb_f = Symbol("f") - symb_g = Symbol("g") - enfa.add_start_state(state0) - enfa.add_final_state(state2) - enfa.add_transition(state0, symb_e, state1) - enfa.add_transition(state1, symb_f, state2) - enfa.add_transition(state0, symb_g, state2) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert enfa2.accepts([symb_e, symb_f]) - assert enfa2.accepts([symb_g]) - assert not enfa2.accepts([]) - assert not enfa2.accepts([symb_e]) - assert not enfa2.accepts([symb_f]) - enfa.add_final_state(state0) - with pytest.raises(ValueError) as _: - enfa._get_regex_simple() - regex = enfa.to_regex() - enfa3 = regex.to_epsilon_nfa() - assert enfa3.accepts([symb_e, symb_f]) - assert enfa3.accepts([symb_g]) - assert enfa3.accepts([]) - assert not enfa3.accepts([symb_e]) - assert not enfa3.accepts([symb_f]) - enfa.remove_start_state(state0) - regex = enfa.to_regex() - enfa3 = regex.to_epsilon_nfa() - assert not enfa3.accepts([symb_e, symb_f]) - assert not enfa3.accepts([symb_g]) - assert not enfa3.accepts([]) - assert not enfa3.accepts([symb_e]) - assert not enfa3.accepts([symb_f]) - enfa.add_start_state(state0) - enfa.add_transition(state0, symb_f, state0) - regex = enfa.to_regex() - enfa3 = regex.to_epsilon_nfa() - assert enfa3.accepts([symb_e, symb_f]) - assert enfa3.accepts([symb_f, symb_e, symb_f]) - assert enfa3.accepts([symb_g]) - assert enfa3.accepts([symb_f, symb_f, symb_g]) - assert enfa3.accepts([]) - assert not enfa3.accepts([symb_e]) - assert enfa3.accepts([symb_f]) - - def test_to_regex2(self): - """ Tests the transformation to regex """ - enfa = EpsilonNFA() - state0 = State(0) - state1 = State(1) - symb_a = Symbol("0") - symb_b = Symbol("1") - enfa.add_start_state(state0) - enfa.add_final_state(state1) - enfa.add_transition(state0, symb_a, state0) - enfa.add_transition(state0, symb_a, state1) - enfa.add_transition(state1, symb_b, state0) - enfa.add_transition(state1, symb_b, state1) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert enfa2.accepts([symb_a]) - assert enfa2.accepts([symb_a, symb_a]) - assert enfa2.accepts([symb_a, symb_a, symb_b]) - assert enfa2.accepts([symb_a, symb_a, symb_b, symb_b]) - assert enfa2.accepts([symb_a, symb_a, - symb_b, symb_b, symb_a]) - assert enfa2.accepts([symb_a, symb_a, symb_b, - symb_b, symb_a, symb_b]) - assert not enfa2.accepts([symb_b]) - - def test_to_regex3(self): - """ Tests the transformation to regex """ - enfa = EpsilonNFA() - state0 = State(0) - state1 = State(1) - symb_a = Symbol("0") - symb_b = Symbol("1") - enfa.add_start_state(state0) - enfa.add_final_state(state1) - enfa.add_transition(state0, symb_a, state0) - enfa.add_transition(state1, symb_b, state0) - enfa.add_transition(state1, symb_b, state1) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert not enfa2.accepts([symb_a]) - assert not enfa2.accepts([symb_a, symb_a]) - assert not enfa2.accepts([symb_a, symb_a, symb_b]) - assert not enfa2.accepts([symb_a, symb_a, - symb_b, symb_b, symb_a]) - assert not enfa2.accepts([symb_a, symb_a, symb_b, - symb_b, symb_a, symb_b]) - assert not enfa2.accepts([symb_b]) - epsilon = Epsilon() - enfa.add_transition(state0, epsilon, state1) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert enfa.accepts([]) - assert enfa.accepts([symb_a]) - assert enfa2.accepts([symb_a]) - assert enfa2.accepts([symb_a, symb_a]) - assert enfa2.accepts([symb_a, symb_a, symb_b, symb_b]) - assert enfa2.accepts([symb_a, symb_a, symb_b, symb_b, - symb_a, symb_b]) - assert enfa2.accepts([symb_b]) - assert enfa2.accepts([]) - enfa.remove_transition(state0, symb_a, state0) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert not enfa2.accepts([symb_a]) - assert not enfa2.accepts([symb_a, symb_a]) - assert not enfa2.accepts([symb_a, symb_a, symb_b]) - assert not enfa2.accepts([symb_a, symb_a, symb_b, - symb_b, symb_a]) - assert not enfa2.accepts([symb_a, symb_a, symb_b, symb_b, - symb_a, symb_b]) - assert enfa2.accepts([symb_b]) - assert enfa2.accepts([]) - enfa.remove_transition(state1, symb_b, state1) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert enfa2.accepts([symb_b, symb_b]) - enfa.add_transition(state0, symb_a, state0) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert enfa2.accepts([symb_a, symb_b]) - def test_union(self): """ Tests the union of two epsilon NFA """ - with pytest.raises(NotImplementedError) as _: - Regexable().to_regex() enfa0 = get_enfa_example0() enfa1 = get_enfa_example1() symb_a = Symbol("a") symb_b = Symbol("b") symb_c = Symbol("c") - enfa = enfa0.union(enfa1) + enfa = enfa0.get_union(enfa1) assert enfa.accepts([symb_b]) assert enfa.accepts([symb_a, symb_b]) assert enfa.accepts([symb_c]) @@ -592,24 +436,6 @@ def test_call(self): enfa = get_enfa_example1() assert len(enfa(2)) == 1 - def test_example_doc(self): - enfa = EpsilonNFA() - state0 = State(0) - state1 = State(1) - symb_a = Symbol("0") - symb_b = Symbol("1") - enfa.add_start_state(state0) - enfa.add_final_state(state1) - enfa.add_transition(state0, symb_a, state0) - enfa.add_transition(state1, symb_b, state0) - enfa.add_transition(state1, symb_b, state1) - - # Turn a finite automaton into a regex... - regex = enfa.to_regex() - # And turn it back into an epsilon non deterministic automaton - enfa2 = regex.to_epsilon_nfa() - assert enfa == enfa2 - def test_remove_epsilon_transitions(self): enfa = EpsilonNFA() enfa.add_transitions([ diff --git a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py index bd15e5f..e52e383 100644 --- a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py @@ -134,6 +134,17 @@ def test_for_duplicate_generation(self): assert [Symbol("b"), Symbol("c")] in accepted_words assert len(accepted_words) == 2 + def test_copy(self): + nfa = get_nfa_example_with_duplicates().copy() + assert len(nfa.states) == 9 + assert len(nfa.symbols) == 3 + assert len(nfa.start_states) == 4 + assert len(nfa.final_states) == 3 + assert nfa.get_number_transitions() == 9 + assert nfa.accepts([Symbol("a"), Symbol("b")]) + assert nfa.accepts([Symbol("b"), Symbol("c")]) + assert not nfa.is_deterministic() + def get_nfa_example_for_word_generation(): """ diff --git a/pyformlang/finite_automaton/tests/test_nondeterministic_transition_function.py b/pyformlang/finite_automaton/tests/test_nondeterministic_transition_function.py index ae42701..b74440a 100644 --- a/pyformlang/finite_automaton/tests/test_nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/tests/test_nondeterministic_transition_function.py @@ -1,8 +1,9 @@ """ Test the nondeterministic transition functions """ -from pyformlang.finite_automaton import State, Symbol, \ - NondeterministicTransitionFunction, Epsilon + +from pyformlang.finite_automaton import NondeterministicTransitionFunction +from pyformlang.finite_automaton import State, Symbol, Epsilon class TestNondeterministicTransitionFunction: @@ -59,19 +60,19 @@ def test_remove_transitions(self): symb_by = Symbol("a") transition_function.add_transition(s_from, symb_by, s_to) assert transition_function.remove_transition(s_from, - symb_by, - s_to) == 1 + symb_by, + s_to) == 1 assert len(transition_function(s_to, symb_by)) == 0 assert transition_function.get_number_transitions() == 0 assert len(transition_function(s_from, symb_by)) == 0 assert transition_function.remove_transition(s_from, - symb_by, - s_to) == 0 + symb_by, + s_to) == 0 transition_function.add_transition(s_from, symb_by, s_to) transition_function.add_transition(s_from, symb_by, s_from) assert transition_function.remove_transition(s_from, - symb_by, - s_to) == 1 + symb_by, + s_to) == 1 assert transition_function.get_number_transitions() == 1 assert len(transition_function(s_from, symb_by)) == 1 diff --git a/pyformlang/finite_automaton/tests/test_state.py b/pyformlang/finite_automaton/tests/test_state.py index 3083a79..0d3d150 100644 --- a/pyformlang/finite_automaton/tests/test_state.py +++ b/pyformlang/finite_automaton/tests/test_state.py @@ -1,6 +1,7 @@ """ Tests the states """ + from pyformlang.finite_automaton import State diff --git a/pyformlang/finite_automaton/tests/test_symbol.py b/pyformlang/finite_automaton/tests/test_symbol.py index 74c560f..5d7be9a 100644 --- a/pyformlang/finite_automaton/tests/test_symbol.py +++ b/pyformlang/finite_automaton/tests/test_symbol.py @@ -1,6 +1,7 @@ """ Tests for the symbols """ + from pyformlang.finite_automaton import Symbol diff --git a/pyformlang/regular_expression/tests/test_python_regex.py b/pyformlang/regular_expression/tests/test_python_regex.py index bd88e6b..c0743c0 100644 --- a/pyformlang/regular_expression/tests/test_python_regex.py +++ b/pyformlang/regular_expression/tests/test_python_regex.py @@ -1,6 +1,7 @@ """ Testing python regex parsing """ + import re from pyformlang.regular_expression.python_regex import PythonRegex diff --git a/pyformlang/regular_expression/tests/test_regex.py b/pyformlang/regular_expression/tests/test_regex.py index d9bc20a..c875544 100644 --- a/pyformlang/regular_expression/tests/test_regex.py +++ b/pyformlang/regular_expression/tests/test_regex.py @@ -1,15 +1,22 @@ """ Tests for regular expressions """ -from pyformlang.regular_expression import Regex, MisformedRegexError -from pyformlang import finite_automaton + import pytest +from pyformlang.regular_expression import Regex, MisformedRegexError +from pyformlang.finite_automaton import EpsilonNFA +from pyformlang.finite_automaton import DeterministicFiniteAutomaton +from pyformlang.finite_automaton import State, Symbol, Epsilon +from pyformlang.finite_automaton.tests.test_deterministic_finite_automaton \ + import get_example0, get_dfa_example, perform_tests_example0 + class TestRegex: """ Tests for regex """ # pylint: disable=missing-function-docstring,too-many-public-methods + # pylint: disable=protected-access def test_creation(self): """ Try to create regex """ @@ -65,10 +72,10 @@ def test_creation(self): def test_to_enfa0(self): """ Tests the transformation to a regex """ - symb_a = finite_automaton.Symbol("a") - symb_b = finite_automaton.Symbol("b") - symb_c = finite_automaton.Symbol("c") - epsilon = finite_automaton.Epsilon() + symb_a = Symbol("a") + symb_b = Symbol("b") + symb_c = Symbol("c") + epsilon = Epsilon() regex = Regex("a|b") enfa = regex.to_epsilon_nfa() assert enfa.accepts([symb_a]) @@ -107,9 +114,9 @@ def test_to_enfa0(self): def test_to_enfa1(self): """ Tests the transformation to a regex """ - symb_a = finite_automaton.Symbol("a") - symb_b = finite_automaton.Symbol("b") - symb_c = finite_automaton.Symbol("c") + symb_a = Symbol("a") + symb_b = Symbol("b") + symb_c = Symbol("c") regex = Regex("a**") enfa = regex.to_epsilon_nfa() assert enfa.accepts([symb_a]) @@ -279,3 +286,193 @@ def test_backslash_b(self): def test_backslash(self): assert Regex("(\\\\|])").accepts("\\") assert Regex("(\\\\|])").accepts("]") + + def test_remove_state(self): + " Tests the remove of state """ + enfa = EpsilonNFA() + state0 = State(0) + state1 = State(1) + state2 = State(2) + symb02 = Symbol("a+b") + symb01 = Symbol("c*") + symb11 = Symbol("b+(c.d)") + symb12 = Symbol("a.b.c") + enfa.add_start_state(state0) + enfa.add_final_state(state2) + enfa.add_transition(state0, symb01, state1) + enfa.add_transition(state0, symb02, state2) + enfa.add_transition(state1, symb11, state1) + enfa.add_transition(state1, symb12, state2) + Regex._remove_all_basic_states(enfa) + assert enfa.get_number_transitions() == 1 + assert len(enfa.states) == 2 + + def test_from_enfa1(self): + """ Tests the transformation to regex """ + enfa = EpsilonNFA() + state0 = State(0) + state1 = State(1) + state2 = State(2) + symb_e = Symbol("e") + symb_f = Symbol("f") + symb_g = Symbol("g") + enfa.add_start_state(state0) + enfa.add_final_state(state2) + enfa.add_transition(state0, symb_e, state1) + enfa.add_transition(state1, symb_f, state2) + enfa.add_transition(state0, symb_g, state2) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert enfa2.accepts([symb_e, symb_f]) + assert enfa2.accepts([symb_g]) + assert not enfa2.accepts([]) + assert not enfa2.accepts([symb_e]) + assert not enfa2.accepts([symb_f]) + enfa.add_final_state(state0) + with pytest.raises(ValueError) as _: + Regex._get_regex_simple(enfa) + regex = Regex.from_finite_automaton(enfa) + enfa3 = regex.to_epsilon_nfa() + assert enfa3.accepts([symb_e, symb_f]) + assert enfa3.accepts([symb_g]) + assert enfa3.accepts([]) + assert not enfa3.accepts([symb_e]) + assert not enfa3.accepts([symb_f]) + enfa.remove_start_state(state0) + regex = Regex.from_finite_automaton(enfa) + enfa3 = regex.to_epsilon_nfa() + assert not enfa3.accepts([symb_e, symb_f]) + assert not enfa3.accepts([symb_g]) + assert not enfa3.accepts([]) + assert not enfa3.accepts([symb_e]) + assert not enfa3.accepts([symb_f]) + enfa.add_start_state(state0) + enfa.add_transition(state0, symb_f, state0) + regex = Regex.from_finite_automaton(enfa) + enfa3 = regex.to_epsilon_nfa() + assert enfa3.accepts([symb_e, symb_f]) + assert enfa3.accepts([symb_f, symb_e, symb_f]) + assert enfa3.accepts([symb_g]) + assert enfa3.accepts([symb_f, symb_f, symb_g]) + assert enfa3.accepts([]) + assert not enfa3.accepts([symb_e]) + assert enfa3.accepts([symb_f]) + + def test_from_enfa2(self): + """ Tests the transformation to regex """ + enfa = EpsilonNFA() + state0 = State(0) + state1 = State(1) + symb_a = Symbol("0") + symb_b = Symbol("1") + enfa.add_start_state(state0) + enfa.add_final_state(state1) + enfa.add_transition(state0, symb_a, state0) + enfa.add_transition(state0, symb_a, state1) + enfa.add_transition(state1, symb_b, state0) + enfa.add_transition(state1, symb_b, state1) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert enfa2.accepts([symb_a]) + assert enfa2.accepts([symb_a, symb_a]) + assert enfa2.accepts([symb_a, symb_a, symb_b]) + assert enfa2.accepts([symb_a, symb_a, symb_b, symb_b]) + assert enfa2.accepts([symb_a, symb_a, + symb_b, symb_b, symb_a]) + assert enfa2.accepts([symb_a, symb_a, symb_b, + symb_b, symb_a, symb_b]) + assert not enfa2.accepts([symb_b]) + + def test_from_enfa3(self): + """ Tests the transformation to regex """ + enfa = EpsilonNFA() + state0 = State(0) + state1 = State(1) + symb_a = Symbol("0") + symb_b = Symbol("1") + enfa.add_start_state(state0) + enfa.add_final_state(state1) + enfa.add_transition(state0, symb_a, state0) + enfa.add_transition(state1, symb_b, state0) + enfa.add_transition(state1, symb_b, state1) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert not enfa2.accepts([symb_a]) + assert not enfa2.accepts([symb_a, symb_a]) + assert not enfa2.accepts([symb_a, symb_a, symb_b]) + assert not enfa2.accepts([symb_a, symb_a, + symb_b, symb_b, symb_a]) + assert not enfa2.accepts([symb_a, symb_a, symb_b, + symb_b, symb_a, symb_b]) + assert not enfa2.accepts([symb_b]) + epsilon = Epsilon() + enfa.add_transition(state0, epsilon, state1) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert enfa.accepts([]) + assert enfa.accepts([symb_a]) + assert enfa2.accepts([symb_a]) + assert enfa2.accepts([symb_a, symb_a]) + assert enfa2.accepts([symb_a, symb_a, symb_b, symb_b]) + assert enfa2.accepts([symb_a, symb_a, symb_b, symb_b, + symb_a, symb_b]) + assert enfa2.accepts([symb_b]) + assert enfa2.accepts([]) + enfa.remove_transition(state0, symb_a, state0) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert not enfa2.accepts([symb_a]) + assert not enfa2.accepts([symb_a, symb_a]) + assert not enfa2.accepts([symb_a, symb_a, symb_b]) + assert not enfa2.accepts([symb_a, symb_a, symb_b, + symb_b, symb_a]) + assert not enfa2.accepts([symb_a, symb_a, symb_b, symb_b, + symb_a, symb_b]) + assert enfa2.accepts([symb_b]) + assert enfa2.accepts([]) + enfa.remove_transition(state1, symb_b, state1) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert enfa2.accepts([symb_b, symb_b]) + enfa.add_transition(state0, symb_a, state0) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert enfa2.accepts([symb_a, symb_b]) + + def test_example_doc(self): + enfa0 = EpsilonNFA() + state0 = State(0) + state1 = State(1) + symb_a = Symbol("0") + symb_b = Symbol("1") + enfa0.add_start_state(state0) + enfa0.add_final_state(state1) + enfa0.add_transition(state0, symb_a, state0) + enfa0.add_transition(state1, symb_b, state0) + enfa0.add_transition(state1, symb_b, state1) + + # Turn a finite automaton into a regex... + regex = Regex.from_finite_automaton(enfa0) + # And turn it back into an epsilon non deterministic automaton + enfa1 = regex.to_epsilon_nfa() + dfa0 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa0) + dfa1 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa1) + assert dfa0.is_equivalent_to(dfa1) + + def test_from_dfa0(self): + """ Tests the regex transformation """ + dfa0 = get_example0() + enfa = Regex.from_finite_automaton(dfa0).to_epsilon_nfa() + perform_tests_example0(enfa) + + def test_from_dfa1(self): + dfa1 = get_dfa_example() + enfa = Regex.from_finite_automaton(dfa1).to_epsilon_nfa() + dfa2 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) + assert dfa1.is_equivalent_to(dfa2) + + def test_to_minimal_dfa(self): + dfa0 = get_example0() + dfa_regex = Regex.from_finite_automaton(dfa0) + dfa1 = dfa_regex.to_minimal_dfa() + assert dfa0 == dfa1 From f17cf8533cf10893da98778fe259a270f082abce Mon Sep 17 00:00:00 2001 From: bygu4 Date: Wed, 13 Nov 2024 14:29:46 +0300 Subject: [PATCH 31/42] correct union of enfas, refactor --- pyformlang/cfg/cfg.py | 14 +---- .../deterministic_transition_function.py | 6 +- pyformlang/finite_automaton/epsilon_nfa.py | 57 ++++++++++++------- .../nondeterministic_finite_automaton.py | 7 ++- .../test_deterministic_finite_automaton.py | 3 +- .../test_deterministic_transition_function.py | 3 +- .../test_nondeterministic_finite_automaton.py | 3 +- pyformlang/pda/pda.py | 15 +---- 8 files changed, 53 insertions(+), 55 deletions(-) diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 4a07efd..ea83e23 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -7,10 +7,9 @@ # pylint: disable=cyclic-import from pyformlang import pda -from pyformlang.finite_automaton import FiniteAutomaton +from pyformlang.finite_automaton import DeterministicFiniteAutomaton # pylint: disable=cyclic-import from pyformlang.pda import cfg_variable_converter as cvc -from pyformlang import regular_expression from .cfg_object import CFGObject # pylint: disable=cyclic-import from .cyk_table import CYKTable, DerivationDoesNotExist @@ -788,7 +787,7 @@ def to_pda(self) -> "pda.PDA": state, []) return new_pda - def intersection(self, other: Any) -> "CFG": + def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": """ Gives the intersection of the current CFG with an other object Equivalent to: @@ -810,13 +809,6 @@ def intersection(self, other: Any) -> "CFG": When trying to intersect with something else than a regex or a finite automaton """ - if isinstance(other, regular_expression.Regex): - other = other.to_epsilon_nfa().to_deterministic() - elif isinstance(other, FiniteAutomaton): - if not other.is_deterministic(): - other = other.to_deterministic() - else: - raise NotImplementedError if other.is_empty(): return CFG() generate_empty = self.contains([]) and other.accepts([]) @@ -904,7 +896,7 @@ def _get_all_bodies(production, state_p, state_r, states, cv_converter): state_r)] for state_q in states] - def __and__(self, other): + def __and__(self, other: DeterministicFiniteAutomaton) -> "CFG": """ Gives the intersection of the current CFG with an other object Parameters diff --git a/pyformlang/finite_automaton/deterministic_transition_function.py b/pyformlang/finite_automaton/deterministic_transition_function.py index 4a03c18..1426ea5 100644 --- a/pyformlang/finite_automaton/deterministic_transition_function.py +++ b/pyformlang/finite_automaton/deterministic_transition_function.py @@ -11,6 +11,7 @@ from .epsilon import Epsilon from .nondeterministic_transition_function import \ NondeterministicTransitionFunction +from .nondeterministic_finite_automaton import InvalidEpsilonTransition class DeterministicTransitionFunction(NondeterministicTransitionFunction): """A deterministic transition function in a finite automaton @@ -85,11 +86,6 @@ def is_deterministic(self) -> bool: return True -class InvalidEpsilonTransition(Exception): - """Exception raised when an epsilon transition is created in - deterministic automaton""" - - class DuplicateTransitionError(Exception): """ Signals a duplicated transition diff --git a/pyformlang/finite_automaton/epsilon_nfa.py b/pyformlang/finite_automaton/epsilon_nfa.py index d680bbe..f26f461 100644 --- a/pyformlang/finite_automaton/epsilon_nfa.py +++ b/pyformlang/finite_automaton/epsilon_nfa.py @@ -86,10 +86,10 @@ def __init__( for state in self._start_states: self._states.add(state) - def _get_next_states_iterable(self, - current_states: Iterable[State], - symbol: Symbol) \ - -> Set[State]: + def _get_next_states_iterable( + self, + current_states: Iterable[State], + symbol: Symbol) -> Set[State]: """ Gives the set of next states, starting from a set of states Parameters @@ -453,8 +453,19 @@ def __and__(self, other: "EpsilonNFA") -> "EpsilonNFA": def get_union(self, other: "EpsilonNFA") -> "EpsilonNFA": """ Computes the union with given Epsilon NFA """ - union = other.copy() - self._copy_to(union) + union = EpsilonNFA() + self.__copy_transitions_marked(self, union, 0) + self.__copy_transitions_marked(other, union, 1) + new_start = State("Start") + union.add_start_state(new_start) + for self_start in self.start_states: + union.add_transition(new_start, Epsilon(), (0, self_start.value)) + for other_start in other.start_states: + union.add_transition(new_start, Epsilon(), (1, other_start.value)) + for self_final in self.final_states: + union.add_final_state((0, self_final.value)) + for other_final in other.final_states: + union.add_final_state((1, other_final.value)) return union def __or__(self, other: "EpsilonNFA") -> "EpsilonNFA": @@ -464,18 +475,12 @@ def __or__(self, other: "EpsilonNFA") -> "EpsilonNFA": def concatenate(self, other: "EpsilonNFA") -> "EpsilonNFA": """ Computes the concatenation of two Epsilon NFAs """ concatenation = EpsilonNFA() - for s_from, symb_by, s_to in self: - concatenation.add_transition((0, s_from.value), - symb_by, - (0, s_to.value)) - if s_from in self.start_states: - concatenation.add_start_state((0, s_from.value)) - for s_from, symb_by, s_to in other: - concatenation.add_transition((1, s_from.value), - symb_by, - (1, s_to.value)) - if other.is_final_state(s_to): - concatenation.add_final_state((1, s_to.value)) + self.__copy_transitions_marked(self, concatenation, 0) + self.__copy_transitions_marked(other, concatenation, 1) + for self_start in self.start_states: + concatenation.add_start_state((0, self_start.value)) + for other_final in other.final_states: + concatenation.add_final_state((1, other_final.value)) for self_final in self.final_states: for other_start in other.start_states: concatenation.add_transition((0, self_final.value), @@ -595,12 +600,12 @@ def __invert__(self) -> "EpsilonNFA": def kleene_star(self) -> "EpsilonNFA": """ Compute the kleene closure of current EpsilonNFA""" - kleene_closure = self.copy() new_start = self.__get_new_state("Start") + kleene_closure = EpsilonNFA(start_states={new_start}, + final_states={new_start}) + kleene_closure.add_transitions(iter(self)) for old_start in self.start_states: kleene_closure.add_transition(new_start, Epsilon(), old_start) - kleene_closure.start_states.clear() - kleene_closure.add_start_state(new_start) for final_state in self.final_states: kleene_closure.add_transition(final_state, Epsilon(), new_start) return kleene_closure @@ -658,6 +663,16 @@ def __get_new_state(self, prefix: str) -> State: prefix += '`' return State(prefix) + @staticmethod + def __copy_transitions_marked(fa_to_add_from: FiniteAutomaton, + fa_to_add_to: FiniteAutomaton, + mark: int) -> None: + """ Copy transitions from one FA to another with each state marked """ + for s_from, symb_by, s_to in fa_to_add_from: + fa_to_add_to.add_transition((mark, s_from.value), + symb_by, + (mark, s_to.value)) + @staticmethod def __combine_state_pair(state0: State, state1: State) -> State: """ Combine two states """ diff --git a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py index a0155f2..dfcec6d 100644 --- a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py @@ -6,7 +6,6 @@ from .epsilon import Epsilon from .epsilon_nfa import EpsilonNFA -from .deterministic_transition_function import InvalidEpsilonTransition from .utils import to_symbol @@ -116,6 +115,7 @@ def add_transition(self, s_from: Hashable, symb_by: Hashable, s_to: Hashable) -> int: + symb_by = to_symbol(symb_by) if symb_by == Epsilon(): raise InvalidEpsilonTransition return super().add_transition(s_from, symb_by, s_to) @@ -153,3 +153,8 @@ def from_epsilon_nfa(cls, enfa: EpsilonNFA) \ for next_state in enfa(e_state, symb): nfa.add_transition(state, symb, next_state) return nfa + + +class InvalidEpsilonTransition(Exception): + """Exception raised when an epsilon transition is created in + nondeterministic automaton""" diff --git a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py index 27fc511..31a7b3b 100644 --- a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py @@ -8,8 +8,7 @@ from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton import State, Symbol, Epsilon from pyformlang.finite_automaton import DeterministicTransitionFunction -from pyformlang.finite_automaton.deterministic_transition_function import \ - InvalidEpsilonTransition +from pyformlang.finite_automaton import InvalidEpsilonTransition class TestDeterministicFiniteAutomaton: diff --git a/pyformlang/finite_automaton/tests/test_deterministic_transition_function.py b/pyformlang/finite_automaton/tests/test_deterministic_transition_function.py index 734b4b8..940dfbb 100644 --- a/pyformlang/finite_automaton/tests/test_deterministic_transition_function.py +++ b/pyformlang/finite_automaton/tests/test_deterministic_transition_function.py @@ -4,8 +4,9 @@ import pytest +from pyformlang.finite_automaton import DeterministicTransitionFunction +from pyformlang.finite_automaton import State, Symbol, Epsilon from pyformlang.finite_automaton import \ - State, Symbol, Epsilon, DeterministicTransitionFunction, \ DuplicateTransitionError, InvalidEpsilonTransition diff --git a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py index e52e383..4b8b1a9 100644 --- a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py @@ -7,8 +7,7 @@ from pyformlang.finite_automaton import NondeterministicFiniteAutomaton from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton import State, Symbol, Epsilon -from pyformlang.finite_automaton.deterministic_transition_function import \ - InvalidEpsilonTransition +from pyformlang.finite_automaton import InvalidEpsilonTransition class TestNondeterministicFiniteAutomaton: diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 04642d9..bda531b 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -9,7 +9,7 @@ from pyformlang import cfg from pyformlang import finite_automaton -from pyformlang import regular_expression +from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.pda.cfg_variable_converter import CFGVariableConverter from .epsilon import Epsilon from .stack_symbol import StackSymbol @@ -439,7 +439,7 @@ def _initialize_production_from_start_in_to_cfg(self, start): state)])) return productions - def intersection(self, other: Any) -> "PDA": + def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": """ Gets the intersection of the language L generated by the \ current PDA when accepting by final state with something else @@ -467,15 +467,6 @@ def intersection(self, other: Any) -> "PDA": When intersecting with something else than a regex or a finite automaton """ - if isinstance(other, regular_expression.Regex): - enfa = other.to_epsilon_nfa() - other = enfa.to_deterministic() - elif isinstance(other, FiniteAutomaton): - is_deterministic = other.is_deterministic() - if not is_deterministic: - other = other.to_deterministic() - else: - raise NotImplementedError start_state_other = other.start_states if len(start_state_other) == 0: return PDA() @@ -529,7 +520,7 @@ def intersection(self, other: Any) -> "PDA": processed.add((next_state, next_state_dfa)) return pda - def __and__(self, other): + def __and__(self, other: DeterministicFiniteAutomaton) -> "PDA": """ Gets the intersection of the current PDA with something else Equivalent to: From 2a7aaac485ab32142a736ca4f080d7bf67cc742b Mon Sep 17 00:00:00 2001 From: bygu4 Date: Wed, 13 Nov 2024 20:36:57 +0300 Subject: [PATCH 32/42] correct partition, debug regex --- .../hopcroft_processing_list.py | 16 ++++++------ pyformlang/finite_automaton/partition.py | 23 ++++++++++------- .../test_deterministic_transition_function.py | 6 ++--- .../tests/test_epsilon_nfa.py | 2 +- .../test_nondeterministic_finite_automaton.py | 4 +-- pyformlang/regular_expression/python_regex.py | 4 +-- pyformlang/regular_expression/regex.py | 9 +++---- .../regular_expression/regex_objects.py | 25 +++++++++++-------- pyformlang/regular_expression/regex_reader.py | 15 ++++++----- 9 files changed, 56 insertions(+), 48 deletions(-) diff --git a/pyformlang/finite_automaton/hopcroft_processing_list.py b/pyformlang/finite_automaton/hopcroft_processing_list.py index ff91b32..2bc02f4 100644 --- a/pyformlang/finite_automaton/hopcroft_processing_list.py +++ b/pyformlang/finite_automaton/hopcroft_processing_list.py @@ -2,38 +2,40 @@ For internal usage """ -from typing import Dict, List, Set, Tuple, Any +from typing import Dict, List, Set, Tuple from numpy import zeros +from .symbol import Symbol + class HopcroftProcessingList: """ A representation for Hopcroft minimization algorithm For internal usage """ - def __init__(self, n_states: int, symbols: Set[Any]) -> None: - self._reverse_symbols: Dict[Any, int] = {} + def __init__(self, n_states: int, symbols: Set[Symbol]) -> None: + self._reverse_symbols: Dict[Symbol, int] = {} for i, symbol in enumerate(symbols): self._reverse_symbols[symbol] = i self._inclusion = zeros((n_states, len(symbols)), dtype=bool) - self._elements: List[Tuple[int, Any]] = [] + self._elements: List[Tuple[int, Symbol]] = [] def is_empty(self) -> bool: """Check if empty""" return len(self._elements) == 0 - def contains(self, class_name: int, symbol: Any) -> bool: + def contains(self, class_name: int, symbol: Symbol) -> bool: """ Check containment """ i_symbol = self._reverse_symbols[symbol] return self._inclusion[class_name, i_symbol] - def insert(self, class_name: int, symbol: Any) -> None: + def insert(self, class_name: int, symbol: Symbol) -> None: """ Insert new element """ i_symbol = self._reverse_symbols[symbol] self._inclusion[class_name, i_symbol] = True self._elements.append((class_name, symbol)) - def pop(self) -> Tuple[int, Any]: + def pop(self) -> Tuple[int, Symbol]: """ Pop an element """ res = self._elements.pop() i_symbol = self._reverse_symbols[res[1]] diff --git a/pyformlang/finite_automaton/partition.py b/pyformlang/finite_automaton/partition.py index dbb4cdc..e292900 100644 --- a/pyformlang/finite_automaton/partition.py +++ b/pyformlang/finite_automaton/partition.py @@ -2,22 +2,26 @@ For internal usage. """ -from typing import Dict, List, Iterable, Any +from typing import Dict, List, Iterable + from .doubly_linked_list import DoublyLinkedList +from .doubly_linked_node import DoublyLinkedNode +from .state import State class Partition: """Class to manage partitions used in Hopcroft minimization algorithm""" def __init__(self, n_states: int) -> None: - self._class_names: Dict[Any, int] = {} # States to class index + self._class_names: Dict[State, int] = {} # States to class index # Class idx to states self.part: List[DoublyLinkedList] = \ [DoublyLinkedList() for _ in range(n_states)] - self._place: Dict[Any, Any] = {} # state to position in list + self._place: Dict[State, DoublyLinkedNode] = {} + # state to position in list self._counter = 0 # Number of classes - def add_class(self, new_class: Iterable[Any]) -> None: + def add_class(self, new_class: Iterable[State]) -> None: """Adds a new class""" index = self._counter self._counter += 1 @@ -26,14 +30,15 @@ def add_class(self, new_class: Iterable[Any]) -> None: node = self.part[index].append(element) self._place[element] = node - def move_to_new_class(self, elements_to_move: Iterable[Any]) -> None: + def move_to_new_class(self, elements_to_move: Iterable[State]) -> None: """Move elements to a new class""" for element in elements_to_move: place = self._place[element] - place.delete() + class_name = self._class_names[element] + self.part[class_name].delete(place) self.add_class(elements_to_move) - def get_valid_sets(self, inverse: Iterable[Any]) -> List[int]: + def get_valid_sets(self, inverse: Iterable[State]) -> List[int]: """Get the valid sets""" class_names = [0] * self._counter for element in inverse: @@ -41,7 +46,7 @@ def get_valid_sets(self, inverse: Iterable[Any]) -> List[int]: return [i for i, value in enumerate(class_names) if value != 0 and value != len(self.part[i])] - def split(self, to_split: Any, splitter: Iterable[Any]) -> int: + def split(self, to_split: int, splitter: Iterable[State]) -> int: """ Splits """ elements_to_move = [] for element in splitter: @@ -50,7 +55,7 @@ def split(self, to_split: Any, splitter: Iterable[Any]) -> int: self.move_to_new_class(elements_to_move) return self._counter - 1 - def get_groups(self) -> List[Any]: + def get_groups(self) -> List[List[State]]: """ Get the groups """ res = [] for i in range(self._counter): diff --git a/pyformlang/finite_automaton/tests/test_deterministic_transition_function.py b/pyformlang/finite_automaton/tests/test_deterministic_transition_function.py index 940dfbb..b04e631 100644 --- a/pyformlang/finite_automaton/tests/test_deterministic_transition_function.py +++ b/pyformlang/finite_automaton/tests/test_deterministic_transition_function.py @@ -10,7 +10,7 @@ DuplicateTransitionError, InvalidEpsilonTransition -class TestTransitionFunction: +class TestDeterministicTransitionFunction: """ Tests the transitions functions """ @@ -69,8 +69,8 @@ def test_remove_transitions(self): symb_by, s_to) == 1 assert transition_function.get_number_transitions() == 0 - assert transition_function(s_to, symb_by) == [] - assert transition_function(s_from, symb_by) == [] + assert transition_function(s_to, symb_by) == set() + assert transition_function(s_from, symb_by) == set() assert transition_function.remove_transition(s_from, symb_by, s_to) == 0 diff --git a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py index d3ca14d..9d1179e 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py +++ b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py @@ -434,7 +434,7 @@ def test_len(self): def test_call(self): enfa = get_enfa_example1() - assert len(enfa(2)) == 1 + assert len(list(enfa(2))) == 1 def test_remove_epsilon_transitions(self): enfa = EpsilonNFA() diff --git a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py index 4b8b1a9..a2f3459 100644 --- a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py @@ -139,8 +139,8 @@ def test_copy(self): assert len(nfa.symbols) == 3 assert len(nfa.start_states) == 4 assert len(nfa.final_states) == 3 - assert nfa.get_number_transitions() == 9 - assert nfa.accepts([Symbol("a"), Symbol("b")]) + assert nfa.get_number_transitions() == 7 + assert nfa.accepts([Symbol("a"), Symbol("c")]) assert nfa.accepts([Symbol("b"), Symbol("c")]) assert not nfa.is_deterministic() diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py index 8713c9c..e8631ff 100644 --- a/pyformlang/regular_expression/python_regex.py +++ b/pyformlang/regular_expression/python_regex.py @@ -3,7 +3,7 @@ """ from typing import List, Tuple, Union -from re import compile as comp, Pattern +from re import compile as compile_regex, Pattern from string import printable from unicodedata import lookup @@ -100,7 +100,7 @@ class PythonRegex(Regex): def __init__(self, python_regex: Union[str, Pattern[str]]) -> None: if isinstance(python_regex, str): - comp(python_regex) # Check if it is valid + compile_regex(python_regex) # Check if it is valid else: python_regex = python_regex.pattern diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index 1e67840..15cde3b 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -2,7 +2,7 @@ Representation of a regular expression """ -from typing import List, Iterable, Tuple, Optional, Any +from typing import List, Iterable, Tuple, Optional from pyformlang.finite_automaton import FiniteAutomaton, EpsilonNFA from pyformlang.finite_automaton import DeterministicFiniteAutomaton @@ -11,7 +11,7 @@ from pyformlang.cfg.utils import to_variable from .regex_reader import RegexReader -from .regex_objects import Epsilon as RegexEpsilon, Node, \ +from .regex_objects import Epsilon as RegexEpsilon, \ Empty, Concatenation, Union, KleeneStar @@ -88,9 +88,8 @@ class Regex(RegexReader): """ def __init__(self, regex: str) -> None: - super().__init__(regex) - self.head: Node = Empty() # type: ignore self.sons: List[Regex] = [] # type: ignore + super().__init__(regex) self._counter = 0 self._enfa: Optional[EpsilonNFA] = None @@ -337,7 +336,7 @@ def to_cfg(self, starting_symbol: str = "S") -> CFG: productions=set(productions)) return cfg_res - def _get_production(self, current_symbol: Any, count: int = 0) \ + def _get_production(self, current_symbol: str, count: int = 0) \ -> Tuple[List[Production], int]: next_symbols = [] next_productions = [] diff --git a/pyformlang/regular_expression/regex_objects.py b/pyformlang/regular_expression/regex_objects.py index 73c4913..65fc5b1 100644 --- a/pyformlang/regular_expression/regex_objects.py +++ b/pyformlang/regular_expression/regex_objects.py @@ -2,7 +2,8 @@ Representation of some objects used in regex. """ -from typing import List, Iterable, Any +from typing import List, Iterable +from abc import abstractmethod from pyformlang.cfg import Production from pyformlang.cfg.utils import to_variable, to_terminal @@ -17,11 +18,11 @@ class Node: # pylint: disable=too-few-public-methods The value of the node """ - def __init__(self, value: Any) -> None: + def __init__(self, value: str) -> None: self._value = value @property - def value(self) -> Any: + def value(self) -> str: """ Give the value of the node Returns @@ -31,6 +32,7 @@ def value(self) -> Any: """ return self._value + @abstractmethod def get_str_repr(self, sons_repr: Iterable[str]) -> str: """ The string representation of the node @@ -48,7 +50,8 @@ def get_str_repr(self, sons_repr: Iterable[str]) -> str: """ raise NotImplementedError - def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + @abstractmethod + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ -> List[Production]: """ Gets the rules for a context-free grammar to represent the \ operator""" @@ -103,7 +106,7 @@ def get_str_repr(self, sons_repr: Iterable[str]) -> str: """ Get the string representation """ raise NotImplementedError - def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ -> List[Production]: """ Gets the rules for a context-free grammar to represent the \ operator""" @@ -122,7 +125,7 @@ class Symbol(Node): # pylint: disable=too-few-public-methods def get_str_repr(self, sons_repr: Iterable[str]) -> str: return str(self.value) - def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ -> List[Production]: """ Gets the rules for a context-free grammar to represent the \ operator""" @@ -141,7 +144,7 @@ class Concatenation(Operator): # pylint: disable=too-few-public-methods def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "(" + ".".join(sons_repr) + ")" - def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ -> List[Production]: return [Production( to_variable(current_symbol), @@ -158,7 +161,7 @@ class Union(Operator): # pylint: disable=too-few-public-methods def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "(" + "|".join(sons_repr) + ")" - def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ -> List[Production]: return [Production( to_variable(current_symbol), @@ -175,7 +178,7 @@ class KleeneStar(Operator): # pylint: disable=too-few-public-methods def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "(" + ".".join(sons_repr) + ")*" - def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ -> List[Production]: return [ Production( @@ -199,7 +202,7 @@ class Epsilon(Symbol): # pylint: disable=too-few-public-methods def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "$" - def get_cfg_rules(self, current_symbol: Any, sons: Iterable[str]) \ + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ -> List[Production]: return [Production(to_variable(current_symbol), [])] @@ -214,7 +217,7 @@ class Empty(Symbol): # pylint: disable=too-few-public-methods def __init__(self) -> None: super().__init__("Empty") - def get_cfg_rules(self, current_symbol: Any, sons: Iterable[Any]) \ + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ -> List[Production]: return [] diff --git a/pyformlang/regular_expression/regex_reader.py b/pyformlang/regular_expression/regex_reader.py index ac0a443..11d86fa 100644 --- a/pyformlang/regular_expression/regex_reader.py +++ b/pyformlang/regular_expression/regex_reader.py @@ -2,10 +2,10 @@ A class to read regex """ -from typing import List, Optional, Any +from typing import List, Optional from re import sub -from .regex_objects import to_node, Node, Operator, Symbol, \ +from .regex_objects import to_node, Node, Operator, Symbol, Empty, \ Concatenation, Union, KleeneStar, MisformedRegexError, SPECIAL_SYMBOLS MISFORMED_MESSAGE = "The regex is misformed here." @@ -21,7 +21,7 @@ class RegexReader: def __init__(self, regex: str) -> None: self._current_node: Optional[Node] = None - self.head: Optional[Node] = None + self.head: Node = Empty() self.sons: List[RegexReader] = [] self._end_current_group = 0 regex = _pre_process_regex(regex) @@ -39,8 +39,7 @@ def _pre_process_input_regex_componants(self) -> None: self._compute_precedence() self._remove_useless_extreme_parenthesis_from_components() - def _remove_useless_extreme_parenthesis_from_componants( - self) -> None: + def _remove_useless_extreme_parenthesis_from_componants(self) -> None: if self._is_surrounded_by_parenthesis(): self._components = self._components[1:-1] self._remove_useless_extreme_parenthesis_from_components() @@ -102,8 +101,8 @@ def _compute_precedence(self) -> None: self._compute_precedent_when_not_kleene_nor_union() def _set_next_end_group_and_node(self) -> None: - if isinstance(self._current_node, Operator) and not isinstance( - self._current_node, KleeneStar): + if isinstance(self._current_node, Operator) and \ + not isinstance(self._current_node, KleeneStar): self._end_current_group += 1 self._set_end_first_group_in_components(self._end_current_group) if self._end_current_group < len(self._components): @@ -168,7 +167,7 @@ def _process_sub_regex(self, idx_from: int, idx_to: int) -> "RegexReader": sub_regex = " ".join(self._components[idx_from:idx_to]) return self.from_string(sub_regex) - def _check_is_valid_single_first_symbol(self, first_symbol: Any) -> None: + def _check_is_valid_single_first_symbol(self, first_symbol: Node) -> None: if not isinstance(first_symbol, Symbol): raise MisformedRegexError(MISFORMED_MESSAGE, self._regex) From cf3ec20c83e2fda4208f5f188fac677ac764026f Mon Sep 17 00:00:00 2001 From: bygu4 Date: Wed, 13 Nov 2024 21:39:26 +0300 Subject: [PATCH 33/42] debug, refactor tests --- pyformlang/cfg/cfg.py | 16 +++++++++------ pyformlang/cfg/tests/test_cfg.py | 7 ++++--- pyformlang/cfg/tests/test_llone_parser.py | 2 +- .../deterministic_finite_automaton.py | 12 +++++------ pyformlang/pda/tests/test_pda.py | 20 +++++++++---------- pyformlang/rsa/tests/test_rsa.py | 10 ++++------ 6 files changed, 35 insertions(+), 32 deletions(-) diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index ea83e23..50af3fc 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -837,10 +837,12 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": return res_cfg @staticmethod - def _intersection_starting_rules(cfg, other, cv_converter): + def _intersection_starting_rules(cfg: "CFG", + other: DeterministicFiniteAutomaton, + cv_converter): start = Variable("Start") productions_temp = [] - start_other = list(other.start_states)[0] # it is deterministic + start_other = other.start_state for final_state in other.final_states: new_body = [ cv_converter.to_cfg_combined_variable( @@ -852,15 +854,17 @@ def _intersection_starting_rules(cfg, other, cv_converter): return productions_temp @staticmethod - def _intersection_when_terminal(other_fst, production, + def _intersection_when_terminal(other: DeterministicFiniteAutomaton, + production, cv_converter, states): productions_temp = [] for state_p in states: - next_states = other_fst(state_p, production.body[0].value) - if next_states: + next_state = other.get_next_state( + state_p, production.body[0].value) + if next_state: new_head = \ cv_converter.to_cfg_combined_variable( - state_p, production.head, next_states[0]) + state_p, production.head, next_state) productions_temp.append( Production(new_head, [production.body[0]], diff --git a/pyformlang/cfg/tests/test_cfg.py b/pyformlang/cfg/tests/test_cfg.py index cb6afde..75c480a 100644 --- a/pyformlang/cfg/tests/test_cfg.py +++ b/pyformlang/cfg/tests/test_cfg.py @@ -516,7 +516,8 @@ def test_finite(self): def test_intersection(self): """ Tests the intersection with a regex """ regex = Regex("a*b*") - dfa = regex.to_epsilon_nfa() + enfa = regex.to_epsilon_nfa() + dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) symb_a = Symbol("a") symb_b = Symbol("b") assert dfa.accepts([symb_a, symb_a, symb_b, symb_b]) @@ -530,7 +531,7 @@ def test_intersection(self): cfg = CFG(productions=productions, start_symbol=var_s) assert cfg.contains([ter_a, ter_a, ter_b, ter_b]) assert not cfg.contains([ter_a, ter_a, ter_b]) - cfg_i = cfg.intersection(regex) + cfg_i = cfg.intersection(regex.to_minimal_dfa()) assert cfg_i.contains([ter_a, ter_a, ter_b, ter_b]) assert not cfg_i.contains([ter_a, ter_a, ter_b]) assert cfg_i.contains([]) @@ -548,7 +549,7 @@ def test_intersection_empty(self): Production(var_s, [ter_b, var_s, ter_a]), Production(var_s, [])} cfg = CFG(productions=productions, start_symbol=var_s) - cfg_i = cfg & regex + cfg_i = cfg & regex.to_minimal_dfa() assert not cfg_i def test_intersection_dfa(self): diff --git a/pyformlang/cfg/tests/test_llone_parser.py b/pyformlang/cfg/tests/test_llone_parser.py index bfe0a9c..a4a843a 100644 --- a/pyformlang/cfg/tests/test_llone_parser.py +++ b/pyformlang/cfg/tests/test_llone_parser.py @@ -250,7 +250,7 @@ def test_sentence_cfg(self): N -> gorilla | sky | carrots """) regex = Regex("georges touches (a|an) (sky|gorilla) !") - cfg_inter = cfg.intersection(regex) + cfg_inter = cfg.intersection(regex.to_minimal_dfa()) assert not cfg_inter.is_empty() assert cfg_inter.is_finite() assert not cfg_inter.contains(["georges", "sees", "a", "gorilla", "."]) diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index c2c73b4..8c27f06 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -474,16 +474,16 @@ def _is_equivalent_to_minimal( (not self_minimal.is_final_state(current_self) and other_minimal.is_final_state(current_other)): return False - next_self = self_minimal(current_self) - next_other = other_minimal(current_other) + next_self = list(self_minimal.get_transitions_from(current_self)) + next_other = list(other_minimal.get_transitions_from(current_other)) if len(next_self) != len(next_other): return False if len(next_self) == 0: continue - for next_temp, other_temp in zip(sorted(list(next_self), - key=lambda x: x[0].value), - sorted(list(next_other), - key=lambda x: x[0].value)): + for next_temp, other_temp in zip(sorted(next_self, + key=lambda x: hash(x[0])), + sorted(next_other, + key=lambda x: hash(x[0]))): next_symbol_self, next_state_self = next_temp next_symbol_other, next_state_other = other_temp if next_symbol_other != next_symbol_self: diff --git a/pyformlang/pda/tests/test_pda.py b/pyformlang/pda/tests/test_pda.py index 043f63b..569415e 100644 --- a/pyformlang/pda/tests/test_pda.py +++ b/pyformlang/pda/tests/test_pda.py @@ -3,7 +3,8 @@ from pyformlang.pda import PDA, State, StackSymbol, Symbol, Epsilon from pyformlang.cfg import Terminal -from pyformlang import finite_automaton +from pyformlang.finite_automaton import DeterministicFiniteAutomaton +from pyformlang.finite_automaton import State as FAState, Symbol as FASymbol from pyformlang.pda.utils import PDAObjectCreator from pyformlang.regular_expression import Regex @@ -285,11 +286,11 @@ def test_intersection_regex(self): pda.add_transition(state_q, state_e, state_z, state_q, []) pda.add_transition(state_q, Epsilon(), state_x0, state_r, []) - state_s = finite_automaton.State("s") - state_t = finite_automaton.State("t") - i_dfa = finite_automaton.Symbol("i") - e_dfa = finite_automaton.Symbol("e") - dfa = finite_automaton.DeterministicFiniteAutomaton( + state_s = FAState("s") + state_t = FAState("t") + i_dfa = FASymbol("i") + e_dfa = FASymbol("e") + dfa = DeterministicFiniteAutomaton( states={state_s, state_t}, input_symbols={i_dfa, e_dfa}, start_state=state_s, @@ -312,16 +313,15 @@ def test_intersection_regex(self): assert cfg.contains([i_cfg, i_cfg, e_cfg, e_cfg, e_cfg]) - new_pda = pda.intersection( - finite_automaton.DeterministicFiniteAutomaton()) + new_pda = pda.intersection(DeterministicFiniteAutomaton()) assert new_pda.get_number_transitions() == 0 - new_pda = pda.intersection(Regex("")) + new_pda = pda.intersection(Regex("").to_minimal_dfa()) pda_es = new_pda.to_empty_stack() cfg = pda_es.to_cfg() assert not cfg - new_pda = pda & Regex("z|y").to_epsilon_nfa() + new_pda = pda & Regex("z|y").to_minimal_dfa() pda_es = new_pda.to_empty_stack() cfg = pda_es.to_cfg() assert not cfg diff --git a/pyformlang/rsa/tests/test_rsa.py b/pyformlang/rsa/tests/test_rsa.py index a24dc13..1fa3162 100644 --- a/pyformlang/rsa/tests/test_rsa.py +++ b/pyformlang/rsa/tests/test_rsa.py @@ -12,8 +12,7 @@ def test_creation(self): """ Test the creation of an RSA """ # S -> a S b | a b regex = Regex("a S b | a b") - enfa = regex.to_epsilon_nfa() - dfa = enfa.minimize() + dfa = regex.to_minimal_dfa() box = Box(dfa, "S") rsa_1 = RecursiveAutomaton(box, set()) @@ -31,8 +30,7 @@ def test_from_regex(self): # S -> a* rsa_2 = RecursiveAutomaton.from_regex(Regex("a*"), "S") - enfa = Regex("a*").to_epsilon_nfa() - dfa = enfa.minimize() + dfa = Regex("a*").to_minimal_dfa() box = Box(dfa, "S") rsa_1 = RecursiveAutomaton(box, set()) @@ -65,8 +63,8 @@ def test_from_ebnf(self): assert rsa1_g2.get_number_boxes() == 2 assert rsa1_g2.nonterminals == {Symbol("S"), Symbol("V")} - dfa_s = Regex("a V b").to_epsilon_nfa().minimize() + dfa_s = Regex("a V b").to_minimal_dfa() assert rsa1_g2.get_box_by_nonterminal("S") == Box(dfa_s, "S") - dfa_v = Regex("c S d | c d").to_epsilon_nfa().minimize() + dfa_v = Regex("c S d | c d").to_minimal_dfa() assert rsa1_g2.get_box_by_nonterminal("V") == Box(dfa_v, "V") From fafd489aaa1cda53156d05c74fa3151bfd9d930f Mon Sep 17 00:00:00 2001 From: bygu4 Date: Wed, 13 Nov 2024 22:03:19 +0300 Subject: [PATCH 34/42] try to fix typing issue in python_regex --- pyformlang/regular_expression/python_regex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py index e8631ff..3f6f48f 100644 --- a/pyformlang/regular_expression/python_regex.py +++ b/pyformlang/regular_expression/python_regex.py @@ -2,8 +2,8 @@ A class to read Python format regex """ -from typing import List, Tuple, Union -from re import compile as compile_regex, Pattern +from typing import List, Tuple, Union, Pattern +from re import compile as compile_regex from string import printable from unicodedata import lookup From e4bf660b14264685ea23f3fb4c0e85420b27351d Mon Sep 17 00:00:00 2001 From: bygu4 Date: Wed, 13 Nov 2024 23:42:06 +0300 Subject: [PATCH 35/42] correct pda intersection, add more tests for union, concatenation and kleene star --- .../tests/test_epsilon_nfa.py | 74 ++++++++++++++++++- pyformlang/pda/pda.py | 44 +++++------ 2 files changed, 90 insertions(+), 28 deletions(-) diff --git a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py index 9d1179e..1b34fb8 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py +++ b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py @@ -87,7 +87,7 @@ def test_deterministic(self): assert not dfa.accepts([point]) assert not dfa.accepts([plus]) - def test_union(self): + def test_union0(self): """ Tests the union of two epsilon NFA """ enfa0 = get_enfa_example0() enfa1 = get_enfa_example1() @@ -101,7 +101,25 @@ def test_union(self): assert not enfa.accepts([symb_a]) assert not enfa.accepts([]) - def test_concatenate(self): + def test_union1(self): + """ + Tests the union of three ENFAs. + Union is (a*b)|(ab+)|c + """ + enfa0 = get_enfa_example0() + enfa1 = get_enfa_example1() + enfa2 = get_enfa_example2() + enfa = enfa0 | enfa2 + enfa |= enfa1 + accepted_words = list(enfa.get_accepted_words(3)) + assert ["b"] in accepted_words + assert ["a", "b"] in accepted_words + assert ["a", "a", "b"] in accepted_words + assert ["a", "b", "b"] in accepted_words + assert ["c"] in accepted_words + assert len(accepted_words) == 5 + + def test_concatenate0(self): """ Tests the concatenation of two epsilon NFA """ enfa0 = get_enfa_example0() enfa1 = get_enfa_example1() @@ -116,7 +134,23 @@ def test_concatenate(self): assert not enfa.accepts([symb_b]) assert not enfa.accepts([]) - def test_kleene(self): + def test_concatenate1(self): + """ + Tests the concatenation of three ENFAs. + Concatenation is a*bc((ab+)|c) + """ + enfa0 = get_enfa_example0() + enfa1 = get_enfa_example1() + enfa2 = get_enfa_example2() + enfa = enfa0 + enfa1 + enfa += enfa2 + accepted_words = list(enfa.get_accepted_words(4)) + assert ["b", "c", "c"] in accepted_words + assert ["a", "b", "c", "c"] in accepted_words + assert ["b", "c", "a", "b"] in accepted_words + assert len(accepted_words) == 3 + + def test_kleene0(self): """ Tests the kleene star of an epsilon NFA """ enfa0 = get_enfa_example0() symb_a = Symbol("a") @@ -130,6 +164,23 @@ def test_kleene(self): assert not enfa.accepts([symb_a]) assert not enfa.accepts([symb_a, symb_b, symb_a]) + def test_kleene1(self): + """ + Tests the kleene star of an ENFA. + Expression is ((ab+)|c)* + """ + enfa = get_enfa_example2() + enfa = enfa.kleene_star() + accepted_words = list(enfa.get_accepted_words(3)) + assert [] in accepted_words + assert ["a", "b"] in accepted_words + assert ["a", "b", "b"] in accepted_words + assert ["a", "b", "c"] in accepted_words + assert ["c", "a", "b"] in accepted_words + for i in range(3): + assert ["c"] * (i + 1) in accepted_words + assert len(accepted_words) == 8 + def test_complement(self): """ Tests the complement operation """ enfa = EpsilonNFA() @@ -544,7 +595,7 @@ def get_enfa_example0(): def get_enfa_example1(): - """ Gives and example ENFA + """ Gives an example ENFA Accepts c """ enfa1 = EpsilonNFA() @@ -557,6 +608,21 @@ def get_enfa_example1(): return enfa1 +def get_enfa_example2(): + """ Gives an example ENFA + Accepts (ab+)|c + """ + enfa = EpsilonNFA(start_states={0, 3}, + final_states={2, 4}) + enfa.add_transitions([ + (0, "a", 1), + (1, "b", 2), + (2, "b", 2), + (3, "c", 4), + ]) + return enfa + + def get_enfa_example0_bis(): """ A non minimal NFA, equivalent to example0 """ enfa0 = EpsilonNFA() diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index bda531b..1deb75d 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -467,12 +467,11 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": When intersecting with something else than a regex or a finite automaton """ - start_state_other = other.start_states - if len(start_state_other) == 0: + start_state_other = other.start_state + if not start_state_other: return PDA() pda_state_converter = _PDAStateConverter(self._states, other.states) - start_state_other = list(start_state_other)[0] - final_state_other = other.final_states + final_states_other = other.final_states start = pda_state_converter.to_pda_combined_state(self._start_state, start_state_other) pda = PDA(start_state=start, @@ -484,40 +483,37 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": while to_process: state_in, state_dfa = to_process.pop() if (state_in in self._final_states and state_dfa in - final_state_other): + final_states_other): pda.add_final_state( pda_state_converter.to_pda_combined_state(state_in, state_dfa)) for symbol in symbols: if symbol == Epsilon(): symbol_dfa = finite_automaton.Epsilon() + next_state_dfa = state_dfa else: symbol_dfa = finite_automaton.Symbol(symbol.value) - if symbol == Epsilon(): - next_states_dfa = [state_dfa] - else: - next_states_dfa = other(state_dfa, symbol_dfa) - if len(next_states_dfa) == 0: + next_state_dfa = other.get_next_state(state_dfa, symbol_dfa) + if not next_state_dfa: continue for stack_symbol in self._stack_alphabet: next_states_self = self._transition_function(state_in, symbol, stack_symbol) for next_state, next_stack in next_states_self: - for next_state_dfa in next_states_dfa: - pda.add_transition( - pda_state_converter.to_pda_combined_state( - state_in, - state_dfa), - symbol, - stack_symbol, - pda_state_converter.to_pda_combined_state( - next_state, - next_state_dfa), - next_stack) - if (next_state, next_state_dfa) not in processed: - to_process.append((next_state, next_state_dfa)) - processed.add((next_state, next_state_dfa)) + pda.add_transition( + pda_state_converter.to_pda_combined_state( + state_in, + state_dfa), + symbol, + stack_symbol, + pda_state_converter.to_pda_combined_state( + next_state, + next_state_dfa), + next_stack) + if (next_state, next_state_dfa) not in processed: + to_process.append((next_state, next_state_dfa)) + processed.add((next_state, next_state_dfa)) return pda def __and__(self, other: DeterministicFiniteAutomaton) -> "PDA": From 33d0895c985cf23f2bb33bc694f3bb13218520a9 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Thu, 14 Nov 2024 20:07:03 +0300 Subject: [PATCH 36/42] add from_networkx abstract method to fa, use update instead of union in enfa --- .../deterministic_finite_automaton.py | 2 +- pyformlang/finite_automaton/epsilon_nfa.py | 8 +++----- pyformlang/finite_automaton/finite_automaton.py | 15 ++++++++++++--- .../nondeterministic_finite_automaton.py | 2 +- pyformlang/regular_expression/regex.py | 15 +++++++-------- 5 files changed, 24 insertions(+), 18 deletions(-) diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index 8c27f06..eebbd00 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -368,7 +368,7 @@ def _from_epsilon_nfa_internal(cls, enfa: EpsilonNFA, eclose: bool) \ all_trans = [enfa(x, symbol) for x in current] state = set() for trans in all_trans: - state = state.union(trans) + state.update(trans) if not state: continue # Eclose added diff --git a/pyformlang/finite_automaton/epsilon_nfa.py b/pyformlang/finite_automaton/epsilon_nfa.py index f26f461..626019d 100644 --- a/pyformlang/finite_automaton/epsilon_nfa.py +++ b/pyformlang/finite_automaton/epsilon_nfa.py @@ -107,9 +107,7 @@ def _get_next_states_iterable( """ next_states = set() for current_state in current_states: - next_states_temp = self._transition_function(current_state, - symbol) - next_states = next_states.union(next_states_temp) + next_states.update(self(current_state, symbol)) return next_states def accepts(self, word: Iterable[Hashable]) -> bool: @@ -177,7 +175,7 @@ def eclose_iterable(self, states: Iterable[Hashable]) -> Set[State]: states = [to_state(x) for x in states] res = set() for state in states: - res = res.union(self.eclose(state)) + res.update(self.eclose(state)) return res def eclose(self, state: Hashable) -> Set[State]: @@ -599,7 +597,7 @@ def __invert__(self) -> "EpsilonNFA": return self.reverse() def kleene_star(self) -> "EpsilonNFA": - """ Compute the kleene closure of current EpsilonNFA""" + """ Compute the kleene closure of current EpsilonNFA """ new_start = self.__get_new_state("Start") kleene_closure = EpsilonNFA(start_states={new_start}, final_states={new_start}) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 0ec187f..058ba2a 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -533,6 +533,16 @@ def to_networkx(self) -> MultiDiGraph: graph.add_edge(s_from.value, s_to.value, label=label_) return graph + @classmethod + @abstractmethod + def from_networkx(cls, graph: MultiDiGraph) -> "FiniteAutomaton": + """ + Import a networkx graph into an finite state automaton. \ + The imported graph requires to have the good format, i.e. to come \ + from the function to_networkx + """ + raise NotImplementedError + def write_as_dot(self, filename: str) -> None: """ Write the automaton in dot format into a file @@ -573,8 +583,7 @@ def get_accepted_words(self, max_length: Optional[int] = None) \ word_to_add = tuple(current_word) if not self.__try_add(words_by_state[current_state], word_to_add): continue - transitions = self._transition_function.get_transitions_from( - current_state) + transitions = self.get_transitions_from(current_state) for symbol, next_state in transitions: if next_state in states_leading_to_final: temp_word = current_word.copy() @@ -593,7 +602,7 @@ def _get_states_leading_to_final(self) -> Set[State]: leading_to_final = self.final_states.copy() visited = set() states_to_process: deque[Any] = \ - deque((None, start_state) for start_state in self.start_states) + deque((None, start_state) for start_state in self.start_states) while states_to_process: previous_state, current_state = states_to_process.pop() if previous_state and current_state in leading_to_final: diff --git a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py index dfcec6d..5d32776 100644 --- a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py @@ -157,4 +157,4 @@ def from_epsilon_nfa(cls, enfa: EpsilonNFA) \ class InvalidEpsilonTransition(Exception): """Exception raised when an epsilon transition is created in - nondeterministic automaton""" + non-epsilon NFA""" diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index 15cde3b..6987977 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -136,7 +136,7 @@ def get_number_operators(self) -> int: return 1 + sum(son.get_number_operators() for son in self.sons) return 0 - def to_minimal_dfa(self) -> "DeterministicFiniteAutomaton": + def to_minimal_dfa(self) -> DeterministicFiniteAutomaton: """ Builds minimal dfa from current regex """ enfa = self.to_epsilon_nfa() dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) @@ -164,13 +164,12 @@ def _to_epsilon_nfa_internal(self, copy: bool) -> EpsilonNFA: Transforms the regular expression into an epsilon NFA. Copy enfa in case of external usage. """ - if self._enfa is not None: - return self._enfa.copy() if copy else self._enfa - self._enfa = EpsilonNFA() - s_initial = self._set_and_get_initial_state_in_enfa(self._enfa) - s_final = self._set_and_get_final_state_in_enfa(self._enfa) - self._process_to_enfa(self._enfa, s_initial, s_final) - return self._to_epsilon_nfa_internal(copy) + if self._enfa is None: + self._enfa = EpsilonNFA() + s_initial = self._set_and_get_initial_state_in_enfa(self._enfa) + s_final = self._set_and_get_final_state_in_enfa(self._enfa) + self._process_to_enfa(self._enfa, s_initial, s_final) + return self._enfa.copy() if copy else self._enfa def _set_and_get_final_state_in_enfa(self, enfa: EpsilonNFA) -> State: s_final = self._get_next_state_enfa() From 38880e7d4f0e20f08d3fa4e5a95d5e5d3da077a7 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 22 Oct 2024 22:14:17 +0300 Subject: [PATCH 37/42] add recursive automaton annotations --- pyformlang/rsa/box.py | 83 ++++++++++++++------------- pyformlang/rsa/recursive_automaton.py | 79 ++++++++++++------------- 2 files changed, 79 insertions(+), 83 deletions(-) diff --git a/pyformlang/rsa/box.py b/pyformlang/rsa/box.py index d9ba91f..4f792b9 100644 --- a/pyformlang/rsa/box.py +++ b/pyformlang/rsa/box.py @@ -1,10 +1,11 @@ """ Representation of a box for recursive automaton """ -from typing import Union, Any + +from typing import Set, Hashable, Any from pyformlang.finite_automaton import DeterministicFiniteAutomaton -from pyformlang.finite_automaton.symbol import Symbol +from pyformlang.finite_automaton import State, Symbol from pyformlang.finite_automaton.utils import to_symbol @@ -24,58 +25,27 @@ class Box: def __init__(self, dfa: DeterministicFiniteAutomaton, - nonterminal: Union[Symbol, str]): + nonterminal: Hashable) -> None: self._dfa = dfa - - nonterminal = to_symbol(nonterminal) - self._nonterminal = nonterminal - - def to_subgraph_dot(self): - """Creates a named subgraph representing a box""" - graph = self._dfa.to_networkx() - strange_nodes = [] - nonterminal = self.nonterminal.value.replace('"', '').replace("'", "").replace(".", "") - dot_string = (f'subgraph cluster_{nonterminal}\n{{ label="{nonterminal}"\n' - f'fontname="Helvetica,Arial,sans-serif"\n' - f'node [fontname="Helvetica,Arial,sans-serif"]\n' - f'edge [fontname="Helvetica,Arial,sans-serif"]\nrankdir=LR;\n' - f'node [shape = circle style=filled fillcolor=white]') - for node, data in graph.nodes(data=True): - node = node.replace('"', '').replace("'", "") - if 'is_start' not in data.keys() or 'is_final' not in data.keys(): - strange_nodes.append(node) - continue - if data['is_start']: - dot_string += f'\n"{node}" [fillcolor = green];' - if data['is_final']: - dot_string += f'\n"{node}" [shape = doublecircle];' - for strange_node in strange_nodes: - graph.remove_node(strange_node) - for node_from, node_to, data in graph.edges(data=True): - node_from = node_from.replace('"', '').replace("'", "") - node_to = node_to.replace('"', '').replace("'", "") - label = data['label'].replace('"', '').replace("'", "") - dot_string += f'\n"{node_from}" -> "{node_to}" [label = "{label}"];' - dot_string += "\n}" - return dot_string + self._nonterminal = to_symbol(nonterminal) @property - def dfa(self): + def dfa(self) -> DeterministicFiniteAutomaton: """ Box's dfa """ return self._dfa @property - def nonterminal(self): + def nonterminal(self) -> Symbol: """ Box's nonterminal """ return self._nonterminal @property - def start_states(self): + def start_states(self) -> Set[State]: """ The start states """ return self._dfa.start_states @property - def final_states(self): + def final_states(self) -> Set[State]: """ The final states """ return self._dfa.final_states @@ -100,5 +70,36 @@ def __eq__(self, other: Any) -> bool: return False return self.is_equivalent_to(other) - def __hash__(self): - return self._nonterminal.__hash__() + def __hash__(self) -> int: + return hash(self.nonterminal) + + def to_subgraph_dot(self) -> str: + """Creates a named subgraph representing a box""" + graph = self._dfa.to_networkx() + strange_nodes = [] + nonterminal = str(self.nonterminal) \ + .replace('"', '').replace("'", "").replace(".", "") + dot_string = \ + (f'subgraph cluster_{nonterminal}\n{{ label="{nonterminal}"\n' + f'fontname="Helvetica,Arial,sans-serif"\n' + f'node [fontname="Helvetica,Arial,sans-serif"]\n' + f'edge [fontname="Helvetica,Arial,sans-serif"]\nrankdir=LR;\n' + f'node [shape = circle style=filled fillcolor=white]') + for node, data in graph.nodes(data=True): + node = node.replace('"', '').replace("'", "") + if 'is_start' not in data.keys() or 'is_final' not in data.keys(): + strange_nodes.append(node) + continue + if data['is_start']: + dot_string += f'\n"{node}" [fillcolor = green];' + if data['is_final']: + dot_string += f'\n"{node}" [shape = doublecircle];' + for strange_node in strange_nodes: + graph.remove_node(strange_node) + for node_from, node_to, data in graph.edges(data=True): + node_from = node_from.replace('"', '').replace("'", "") + node_to = node_to.replace('"', '').replace("'", "") + label = data['label'].replace('"', '').replace("'", "") + dot_string += f'\n"{node_from}" -> "{node_to}" [label = "{label}"];' + dot_string += "\n}" + return dot_string diff --git a/pyformlang/rsa/recursive_automaton.py b/pyformlang/rsa/recursive_automaton.py index 158b13b..f37f876 100644 --- a/pyformlang/rsa/recursive_automaton.py +++ b/pyformlang/rsa/recursive_automaton.py @@ -2,15 +2,14 @@ Representation of a recursive automaton """ -from typing import AbstractSet, Union +from typing import Dict, Set, AbstractSet, Optional, Hashable, Any -from pyformlang.finite_automaton import DeterministicFiniteAutomaton -from pyformlang.finite_automaton.symbol import Symbol +from pyformlang.finite_automaton import Symbol from pyformlang.finite_automaton.utils import to_symbol from pyformlang.regular_expression import Regex from pyformlang.cfg import Epsilon -from pyformlang.rsa.box import Box +from .box import Box class RecursiveAutomaton: @@ -29,15 +28,15 @@ class RecursiveAutomaton: def __init__(self, start_box: Box, - boxes: AbstractSet[Box]): - self._nonterminal_to_box = {} + boxes: AbstractSet[Box]) -> None: + self._nonterminal_to_box: Dict[Symbol, Box] = {} + self._start_nonterminal = start_box.nonterminal if start_box not in boxes: - self._nonterminal_to_box[to_symbol(start_box.nonterminal)] = start_box - self._start_nonterminal = to_symbol(start_box.nonterminal) + self._nonterminal_to_box[start_box.nonterminal] = start_box for box in boxes: - self._nonterminal_to_box[to_symbol(box.nonterminal)] = box + self._nonterminal_to_box[box.nonterminal] = box - def get_box_by_nonterminal(self, nonterminal: Union[Symbol, str]): + def get_box_by_nonterminal(self, nonterminal: Hashable) -> Optional[Box]: """ Box by nonterminal @@ -53,50 +52,35 @@ def get_box_by_nonterminal(self, nonterminal: Union[Symbol, str]): """ nonterminal = to_symbol(nonterminal) - if nonterminal in self._nonterminal_to_box: - return self._nonterminal_to_box[nonterminal] + return self._nonterminal_to_box.get(nonterminal, None) - return None - - def get_number_boxes(self): + def get_number_boxes(self) -> int: """ Size of set of boxes """ - return len(self._nonterminal_to_box) - def to_dot(self): - """ Create dot representation of recursive automaton """ - dot_string = 'digraph "" {' - for box in self._nonterminal_to_box.values(): - dot_string += f'\n{box.to_subgraph_dot()}' - dot_string += "\n}" - return dot_string - @property - def nonterminals(self) -> set: + def nonterminals(self) -> Set[Symbol]: """ The set of nonterminals """ - return set(self._nonterminal_to_box.keys()) @property - def boxes(self) -> dict: + def boxes(self) -> Set[Box]: """ The set of boxes """ - - return self._nonterminal_to_box + return set(self._nonterminal_to_box.values()) @property def start_nonterminal(self) -> Symbol: """ The start nonterminal """ - return self._start_nonterminal @property - def start_box(self): + def start_box(self) -> Box: """ The start box """ - - return self.boxes[self.start_nonterminal] + return self._nonterminal_to_box[self.start_nonterminal] @classmethod - def from_regex(cls, regex: Regex, start_nonterminal: Union[Symbol, str]): + def from_regex(cls, regex: Regex, start_nonterminal: Hashable) \ + -> "RecursiveAutomaton": """ Create a recursive automaton from regular expression Parameters @@ -116,14 +100,17 @@ def from_regex(cls, regex: Regex, start_nonterminal: Union[Symbol, str]): return RecursiveAutomaton(box, {box}) @classmethod - def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")): - """ Create a recursive automaton from ebnf (ebnf = Extended Backus-Naur Form) + def from_ebnf(cls, text: str, start_nonterminal: Hashable = Symbol("S")) \ + -> "RecursiveAutomaton": + """ Create a recursive automaton from ebnf \ + (ebnf = Extended Backus-Naur Form) Parameters ----------- text : str The text of transform - start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str, optional + start_nonterminal : \ + :class:`~pyformlang.finite_automaton.Symbol` | str, optional The start nonterminal, S by default Returns @@ -132,7 +119,7 @@ def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")): The new recursive automaton built from context-free grammar """ start_nonterminal = to_symbol(start_nonterminal) - productions = {} + productions: Dict[Hashable, str] = {} boxes = set() nonterminals = set() for production in text.splitlines(): @@ -160,7 +147,7 @@ def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")): start_box = Box(start_box_dfa, start_nonterminal) return RecursiveAutomaton(start_box, boxes) - def is_equals_to(self, other): + def is_equal_to(self, other: "RecursiveAutomaton") -> bool: """ Check whether two recursive automata are equals by boxes. Not equivalency in terms of formal languages theory, just mapping boxes @@ -175,9 +162,17 @@ def is_equals_to(self, other): are_equivalent : bool Whether the two recursive automata are equals or not """ + return self.boxes == other.boxes + + def __eq__(self, other: Any) -> bool: if not isinstance(other, RecursiveAutomaton): return False - return self.boxes == other.boxes + return self.is_equal_to(other) - def __eq__(self, other): - return self.is_equals_to(other) + def to_dot(self) -> str: + """ Create dot representation of recursive automaton """ + dot_string = 'digraph "" {' + for box in self._nonterminal_to_box.values(): + dot_string += f'\n{box.to_subgraph_dot()}' + dot_string += "\n}" + return dot_string From 30d00d9f7977c73589d7e515312fe403bff14d2e Mon Sep 17 00:00:00 2001 From: bygu4 Date: Fri, 15 Nov 2024 15:59:08 +0300 Subject: [PATCH 38/42] minor style changes in automata --- .../deterministic_transition_function.py | 1 + .../finite_automaton/transition_function.py | 1 + pyformlang/rsa/recursive_automaton.py | 40 +++++++++---------- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/pyformlang/finite_automaton/deterministic_transition_function.py b/pyformlang/finite_automaton/deterministic_transition_function.py index 1426ea5..d739342 100644 --- a/pyformlang/finite_automaton/deterministic_transition_function.py +++ b/pyformlang/finite_automaton/deterministic_transition_function.py @@ -13,6 +13,7 @@ NondeterministicTransitionFunction from .nondeterministic_finite_automaton import InvalidEpsilonTransition + class DeterministicTransitionFunction(NondeterministicTransitionFunction): """A deterministic transition function in a finite automaton diff --git a/pyformlang/finite_automaton/transition_function.py b/pyformlang/finite_automaton/transition_function.py index b796272..310a5a4 100644 --- a/pyformlang/finite_automaton/transition_function.py +++ b/pyformlang/finite_automaton/transition_function.py @@ -11,6 +11,7 @@ from .state import State from .symbol import Symbol + class TransitionFunction(Iterable[Tuple[State, Symbol, State]]): """ General transition function representation """ diff --git a/pyformlang/rsa/recursive_automaton.py b/pyformlang/rsa/recursive_automaton.py index f37f876..d823dc8 100644 --- a/pyformlang/rsa/recursive_automaton.py +++ b/pyformlang/rsa/recursive_automaton.py @@ -36,6 +36,26 @@ def __init__(self, for box in boxes: self._nonterminal_to_box[box.nonterminal] = box + @property + def nonterminals(self) -> Set[Symbol]: + """ The set of nonterminals """ + return set(self._nonterminal_to_box.keys()) + + @property + def boxes(self) -> Set[Box]: + """ The set of boxes """ + return set(self._nonterminal_to_box.values()) + + @property + def start_nonterminal(self) -> Symbol: + """ The start nonterminal """ + return self._start_nonterminal + + @property + def start_box(self) -> Box: + """ The start box """ + return self._nonterminal_to_box[self.start_nonterminal] + def get_box_by_nonterminal(self, nonterminal: Hashable) -> Optional[Box]: """ Box by nonterminal @@ -58,26 +78,6 @@ def get_number_boxes(self) -> int: """ Size of set of boxes """ return len(self._nonterminal_to_box) - @property - def nonterminals(self) -> Set[Symbol]: - """ The set of nonterminals """ - return set(self._nonterminal_to_box.keys()) - - @property - def boxes(self) -> Set[Box]: - """ The set of boxes """ - return set(self._nonterminal_to_box.values()) - - @property - def start_nonterminal(self) -> Symbol: - """ The start nonterminal """ - return self._start_nonterminal - - @property - def start_box(self) -> Box: - """ The start box """ - return self._nonterminal_to_box[self.start_nonterminal] - @classmethod def from_regex(cls, regex: Regex, start_nonterminal: Hashable) \ -> "RecursiveAutomaton": From 72cd938f94f5da355d5e9b3dcbdfa016ac8327f9 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sat, 16 Nov 2024 15:38:15 +0300 Subject: [PATCH 39/42] refactor PreviousTransitions --- .../deterministic_finite_automaton.py | 43 +------------------ pyformlang/finite_automaton/utils.py | 40 ++++++++++++++++- 2 files changed, 41 insertions(+), 42 deletions(-) diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index eebbd00..98d562e 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -2,52 +2,15 @@ Representation of a deterministic finite automaton """ -from typing import Dict, List, Iterable, AbstractSet, Optional, Hashable, Any -from numpy import empty +from typing import Iterable, AbstractSet, Optional, Hashable, Any from .state import State -from .symbol import Symbol from .deterministic_transition_function import DeterministicTransitionFunction from .epsilon_nfa import EpsilonNFA from .nondeterministic_finite_automaton import NondeterministicFiniteAutomaton from .hopcroft_processing_list import HopcroftProcessingList from .partition import Partition -from .utils import to_state, to_symbol, to_single_state - - -class PreviousTransitions: - """For internal usage""" - - def __init__(self, - states: AbstractSet[State], - symbols: AbstractSet[Symbol]) -> None: - self._to_index_state: Dict[Optional[State], int] = {} - self._to_index_state[None] = 0 - for i, state in enumerate(states): - self._to_index_state[state] = i + 1 - self._to_index_symbol: Dict[Optional[Symbol], int] = {} - for i, symbol in enumerate(symbols): - self._to_index_symbol[symbol] = i - self._conversion = empty((len(states) + 1, len(symbols)), - dtype=object) - - def add(self, - next0: Optional[State], - symbol: Symbol, - state: Optional[State]) -> None: - """ Internal """ - i_next0 = self._to_index_state[next0] - i_symbol = self._to_index_symbol[symbol] - if self._conversion[i_next0, i_symbol] is None: - self._conversion[i_next0, i_symbol] = [state] - else: - self._conversion[i_next0, i_symbol].append(state) - - def get(self, next0: State, symbol: Symbol) -> List[object]: - """ Internal """ - i_next0 = self._to_index_state[next0] - i_symbol = self._to_index_symbol[symbol] - return self._conversion[i_next0, i_symbol] or [] +from .utils import to_state, to_symbol, to_single_state, PreviousTransitions class DeterministicFiniteAutomaton(NondeterministicFiniteAutomaton): @@ -263,8 +226,6 @@ def _get_previous_transitions(self) -> PreviousTransitions: for symbol in self._input_symbols: next0 = self._transition_function.get_next_state(state, symbol) previous_transitions.add(next0, symbol, state) - for symbol in self._input_symbols: - previous_transitions.add(None, symbol, None) return previous_transitions def minimize(self) -> "DeterministicFiniteAutomaton": diff --git a/pyformlang/finite_automaton/utils.py b/pyformlang/finite_automaton/utils.py index f8eddca..ed7e4a2 100644 --- a/pyformlang/finite_automaton/utils.py +++ b/pyformlang/finite_automaton/utils.py @@ -1,6 +1,7 @@ """ Utility for finite automata """ -from typing import Iterable, Hashable +from typing import Dict, List, AbstractSet, Iterable, Optional, Hashable +from numpy import empty from .state import State from .symbol import Symbol @@ -56,3 +57,40 @@ def to_single_state(l_states: Iterable[State]) -> State: values.append("TRASH") values = sorted(values) return State(";".join(values)) + + +class PreviousTransitions: + """ + Previous transitions for deterministic automata + minimization algorithm. + """ + + def __init__(self, + states: AbstractSet[State], + symbols: AbstractSet[Symbol]) -> None: + self._to_index_state: Dict[State, int] = {} + for i, state in enumerate(states): + self._to_index_state[state] = i + 1 + self._to_index_symbol: Dict[Symbol, int] = {} + for i, symbol in enumerate(symbols): + self._to_index_symbol[symbol] = i + self._conversion = empty((len(states) + 1, len(symbols)), + dtype=State) + + def add(self, + next0: Optional[State], + symbol: Symbol, + state: State) -> None: + """ Internal """ + i_next0 = self._to_index_state[next0] if next0 else 0 + i_symbol = self._to_index_symbol[symbol] + if self._conversion[i_next0, i_symbol] is None: + self._conversion[i_next0, i_symbol] = [state] + else: + self._conversion[i_next0, i_symbol].append(state) + + def get(self, next0: Optional[State], symbol: Symbol) -> List[State]: + """ Internal """ + i_next0 = self._to_index_state[next0] if next0 else 0 + i_symbol = self._to_index_symbol[symbol] + return self._conversion[i_next0, i_symbol] or [] From ac97aece625f30e1ca54a54e2005ae5c342d0baf Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 25 Nov 2024 23:23:49 +0300 Subject: [PATCH 40/42] handle cycles in get_states_leading_to_final, add more tests --- .../finite_automaton/finite_automaton.py | 5 + .../test_deterministic_finite_automaton.py | 24 ++++ .../tests/test_epsilon_nfa.py | 2 +- .../test_nondeterministic_finite_automaton.py | 133 ++++++++++++------ 4 files changed, 121 insertions(+), 43 deletions(-) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index d33ba84..a93f247 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -635,12 +635,14 @@ def _get_states_leading_to_final(self) -> Set[State]: visited = set() states_to_process = deque((None, start_state) for start_state in self.start_states) + delayed_states = deque() while states_to_process: previous_state, current_state = states_to_process.pop() if previous_state and current_state in leading_to_final: leading_to_final.add(previous_state) continue if current_state in visited: + delayed_states.append((previous_state, current_state)) continue visited.add(current_state) next_states = self._get_next_states_from(current_state) @@ -648,6 +650,9 @@ def _get_states_leading_to_final(self) -> Set[State]: states_to_process.append((previous_state, current_state)) for next_state in next_states: states_to_process.append((current_state, next_state)) + for previous_state, current_state in delayed_states: + if previous_state and current_state in leading_to_final: + leading_to_final.add(previous_state) return leading_to_final def _get_reachable_states(self) -> Set[State]: diff --git a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py index b0c0981..792cccf 100644 --- a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py @@ -288,6 +288,15 @@ def test_word_generation(self): assert [Symbol("b"), Symbol("d")] in accepted_words assert len(accepted_words) == 3 + def test_cyclic_word_generation(self): + dfa = get_cyclic_dfa_example() + accepted_words = list(dfa.get_accepted_words(5)) + assert ["a", "f"] in accepted_words + assert ["a", "b", "e", "f"] in accepted_words + assert ["a", "b", "c", "e", "f"] in accepted_words + assert ["a", "b", "d", "a", "f"] in accepted_words + assert len(accepted_words) == 4 + def test_dfa_generating_no_words(self): dfa = get_dfa_example_without_accepted_words() accepted_words = list(dfa.get_accepted_words()) @@ -362,6 +371,21 @@ def get_dfa_example_for_word_generation(): return dfa +def get_cyclic_dfa_example(): + """ Gets DFA example with several cycles on path to final """ + dfa = DeterministicFiniteAutomaton(start_state=0, + final_states={3}) + dfa.add_transitions([ + (0, "a", 1), + (1, "b", 2), + (2, "c", 2), + (2, "d", 0), + (2, "e", 1), + (1, "f", 3), + ]) + return dfa + + def get_dfa_example_without_accepted_words(): """ DFA example accepting no words """ dfa = DeterministicFiniteAutomaton() diff --git a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py index f273e35..742b0c9 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py +++ b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py @@ -648,7 +648,7 @@ def test_epsilon_cycle_word_generation(self): accepted_words = list(enfa.get_accepted_words(max_length)) assert [] in accepted_words assert [Symbol("a"), Symbol("c")] in accepted_words - assert [Symbol("a"), Symbol("b"),Symbol("c")] in accepted_words + assert [Symbol("a"), Symbol("b"), Symbol("c")] in accepted_words assert [Symbol("a"), Symbol("b"), Symbol("b"), Symbol("c")] in accepted_words assert len(accepted_words) == 4 diff --git a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py index fe7819c..f0d7143 100644 --- a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py @@ -133,61 +133,110 @@ def test_for_duplicate_generation(self): assert [Symbol("b"), Symbol("c")] in accepted_words assert len(accepted_words) == 2 + def test_cyclic_word_generation(self): + nfa = get_cyclic_nfa_example() + accepted_words = list(nfa.get_accepted_words(5)) + assert ["a", "d", "g"] in accepted_words + assert ["a", "b", "c", "d", "g"] in accepted_words + assert ["a", "d", "e", "f", "g"] in accepted_words + assert ["b", "f", "g"] in accepted_words + assert ["b", "f", "e", "f", "g"] in accepted_words + assert len(accepted_words) == 5 + + def test_final_state_at_start_generation(self): + nfa = get_nfa_example_with_final_state_at_start() + accepted_words = list(nfa.get_accepted_words()) + assert accepted_words == [[]] + + def test_start_state_at_the_end_generation(self): + nfa = get_nfa_example_with_start_state_at_the_end() + accepted_words = list(nfa.get_accepted_words(5)) + assert [] in accepted_words + assert ["a", "b", "c"] in accepted_words + assert ["a", "b", "e", "b", "c"] in accepted_words + assert ["d", "b", "c"] in accepted_words + assert ["d", "b", "e", "b", "c"] in accepted_words + assert len(accepted_words) == 5 + def get_nfa_example_for_word_generation(): """ Gets Nondeterministic Finite Automaton \ example for the word generation test. """ - nfa = NondeterministicFiniteAutomaton() - states = [State(x) for x in range(9)] - symbol_a = Symbol("a") - symbol_b = Symbol("b") - symbol_c = Symbol("c") - symbol_d = Symbol("d") - symbol_e = Symbol("e") - symbol_f = Symbol("f") + nfa = NondeterministicFiniteAutomaton(start_state={0, 4}, + final_states={3, 4, 6, 8}) nfa.add_transitions([ - (states[0], symbol_a, states[1]), - (states[0], symbol_a, states[2]), - (states[1], symbol_a, states[1]), - (states[2], symbol_b, states[3]), - (states[2], symbol_c, states[3]), - (states[4], symbol_d, states[5]), - (states[5], symbol_e, states[6]), - (states[5], symbol_e, states[7]), - (states[7], symbol_f, states[8]), + (0, "a", 1), + (0, "a", 2), + (1, "a", 1), + (2, "b", 3), + (2, "c", 3), + (4, "d", 5), + (5, "e", 6), + (5, "e", 7), + (7, "f", 8), ]) - nfa.add_start_state(states[0]) - nfa.add_start_state(states[4]) - nfa.add_final_state(states[3]) - nfa.add_final_state(states[4]) - nfa.add_final_state(states[6]) - nfa.add_final_state(states[8]) return nfa def get_nfa_example_with_duplicates(): """ Gets NFA example with duplicate word chains """ - nfa = NondeterministicFiniteAutomaton() - states = [State(x) for x in range(9)] - symbol_a = Symbol("a") - symbol_b = Symbol("b") - symbol_c = Symbol("c") + nfa = NondeterministicFiniteAutomaton(start_state={0, 1, 5, 6}, + final_states={3, 4, 8}) + nfa.add_transitions([ + (0, "a", 2), + (1, "a", 2), + (2, "c", 3), + (2, "c", 4), + (5, "a", 7), + (6, "b", 7), + (7, "c", 8), + ]) + return nfa + + +def get_cyclic_nfa_example(): + """ Gets NFA example with several cycles on path to final """ + nfa = NondeterministicFiniteAutomaton(start_state={0, 5}, + final_states={4}) + nfa.add_transitions([ + (0, "a", 1), + (1, "b", 2), + (2, "c", 1), + (1, "d", 3), + (3, "e", 6), + (6, "f", 3), + (3, "g", 4), + (5, "b", 6), + ]) + return nfa + + +def get_nfa_example_with_final_state_at_start(): + """ Gets NFA example with final state at start """ + nfa = NondeterministicFiniteAutomaton(start_state={0, 5}, + final_states={0}) + nfa.add_transitions([ + (0, "a", 1), + (1, "b", 2), + (2, "c", 3), + (2, "d", 4), + (5, "e", 1), + (5, "e", 2), + ]) + return nfa + + +def get_nfa_example_with_start_state_at_the_end(): + """ Gets NFA example with start state at the end """ + nfa = NondeterministicFiniteAutomaton(start_state={0, 3, 4}, + final_states={3}) nfa.add_transitions([ - (states[0], symbol_a, states[2]), - (states[1], symbol_a, states[2]), - (states[2], symbol_c, states[3]), - (states[2], symbol_c, states[4]), - (states[5], symbol_a, states[7]), - (states[6], symbol_b, states[7]), - (states[7], symbol_c, states[8]), + (0, "a", 1), + (1, "b", 2), + (2, "e", 1), + (2, "c", 3), + (4, "d", 1), ]) - nfa.add_start_state(states[0]) - nfa.add_start_state(states[1]) - nfa.add_start_state(states[5]) - nfa.add_start_state(states[6]) - nfa.add_final_state(states[3]) - nfa.add_final_state(states[4]) - nfa.add_final_state(states[8]) return nfa From b5a07355d25c8b1eb82592f108f0c78f84315ca7 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 9 Dec 2024 13:06:43 +0300 Subject: [PATCH 41/42] remove fastcore import, update pyright config --- .../deterministic_finite_automaton.py | 28 ++++++++----------- .../finite_automaton/finite_automaton.py | 11 -------- .../nondeterministic_transition_function.py | 8 ------ .../tests/test_epsilon_nfa.py | 12 +++++++- .../finite_automaton/transition_function.py | 11 -------- pyrightconfig.json | 1 - requirements.txt | 1 - 7 files changed, 23 insertions(+), 49 deletions(-) diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index 98d562e..61b4acf 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -138,6 +138,13 @@ def remove_start_state(self, state: Hashable) -> int: return 1 return 0 + def get_next_state(self, s_from: Hashable, symb_by: Hashable) \ + -> Optional[State]: + """ Make a call of deterministic transition function """ + s_from = to_state(s_from) + symb_by = to_symbol(symb_by) + return self._transition_function.get_next_state(s_from, symb_by) + def accepts(self, word: Iterable[Hashable]) -> bool: """ Checks whether the dfa accepts a given word @@ -167,8 +174,7 @@ def accepts(self, word: Iterable[Hashable]) -> bool: for symbol in word: if current_state is None: return False - current_state = self._transition_function.get_next_state( - current_state, symbol) + current_state = self.get_next_state(current_state, symbol) return current_state is not None and self.is_final_state(current_state) def is_deterministic(self) -> bool: @@ -212,19 +218,12 @@ def copy(self) -> "DeterministicFiniteAutomaton": """ return self._copy_to(DeterministicFiniteAutomaton()) - def get_next_state(self, s_from: Hashable, symb_by: Hashable) \ - -> Optional[State]: - """ Make a call of deterministic transition function """ - s_from = to_state(s_from) - symb_by = to_symbol(symb_by) - return self._transition_function.get_next_state(s_from, symb_by) - def _get_previous_transitions(self) -> PreviousTransitions: previous_transitions = PreviousTransitions(self._states, self._input_symbols) for state in self._states: for symbol in self._input_symbols: - next0 = self._transition_function.get_next_state(state, symbol) + next0 = self.get_next_state(state, symbol) previous_transitions.add(next0, symbol, state) return previous_transitions @@ -275,8 +274,7 @@ def minimize(self) -> "DeterministicFiniteAutomaton": done = set() new_state = to_new_states[state] for symbol in self._input_symbols: - next_node = self._transition_function.get_next_state( - state, symbol) + next_node = self.get_next_state(state, symbol) if next_node and next_node in states: next_node = to_new_states[next_node] if (next_node, symbol) not in done: @@ -430,10 +428,8 @@ def _is_equivalent_to_minimal( matches = {self_minimal.start_state: other_minimal.start_state} while to_process: current_self, current_other = to_process.pop() - if (self_minimal.is_final_state(current_self) - and not other_minimal.is_final_state(current_other)) or \ - (not self_minimal.is_final_state(current_self) - and other_minimal.is_final_state(current_other)): + if self_minimal.is_final_state(current_self) != \ + other_minimal.is_final_state(current_other): return False next_self = list(self_minimal.get_transitions_from(current_self)) next_other = list(other_minimal.get_transitions_from(current_other)) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 919e385..d2b9a8a 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -8,7 +8,6 @@ from collections import deque from networkx import MultiDiGraph from networkx.drawing.nx_pydot import write_dot -from fastcore.dispatch import typedispatch from pyformlang.fst import FST @@ -322,16 +321,6 @@ def remove_final_state(self, state: Hashable) -> int: return 1 return 0 - @typedispatch - def __call__(self, s_from: Hashable) -> Iterable[Tuple[Symbol, Set[State]]]: - """ - Gives FA transitions from given state. - Calls the transition function - """ - s_from = to_state(s_from) - return self._transition_function(s_from) - - @typedispatch def __call__(self, s_from: Hashable, symb_by: Hashable) -> Set[State]: """ Gives the states obtained after calling a symbol on a state Calls the transition function diff --git a/pyformlang/finite_automaton/nondeterministic_transition_function.py b/pyformlang/finite_automaton/nondeterministic_transition_function.py index b941bb6..8162f84 100644 --- a/pyformlang/finite_automaton/nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/nondeterministic_transition_function.py @@ -6,7 +6,6 @@ from typing import Dict, Set, Iterable, Tuple from copy import deepcopy -from fastcore.dispatch import typedispatch from .state import State from .symbol import Symbol @@ -129,13 +128,6 @@ def get_number_transitions(self) -> int: counter += len(s_to) return counter - @typedispatch - def __call__(self, s_from: State) -> Iterable[Tuple[Symbol, Set[State]]]: - """ Calls the transition function as a real function """ - if s_from in self._transitions: - yield from self._transitions[s_from].items() - - @typedispatch def __call__(self, s_from: State, symb_by: Symbol) -> Set[State]: """ Calls the transition function as a real function diff --git a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py index 362b05e..b498b8c 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py +++ b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py @@ -484,8 +484,18 @@ def test_len(self): assert len(enfa) == 1 def test_call(self): + """ Tests the call of the transition function of the ENFA """ enfa = get_enfa_example1() - assert len(list(enfa(2))) == 1 + assert enfa(2, "c") == {3} + assert not enfa(3, "a") + assert not enfa(2313, "qwe") + + def test_get_transitions_from(self): + """ Tests the transition obtaining from the given state """ + enfa = get_enfa_example1() + assert list(enfa.get_transitions_from(2)) == [("c", 3)] + assert not list(enfa.get_transitions_from(3)) + assert not list(enfa.get_transitions_from(4210)) def test_remove_epsilon_transitions(self): enfa = EpsilonNFA() diff --git a/pyformlang/finite_automaton/transition_function.py b/pyformlang/finite_automaton/transition_function.py index 310a5a4..7ff90a9 100644 --- a/pyformlang/finite_automaton/transition_function.py +++ b/pyformlang/finite_automaton/transition_function.py @@ -6,7 +6,6 @@ from typing import Dict, Set, Tuple, Iterable, Iterator from abc import abstractmethod -from fastcore.dispatch import typedispatch from .state import State from .symbol import Symbol @@ -39,16 +38,6 @@ def get_number_transitions(self) -> int: def __len__(self) -> int: return self.get_number_transitions() - @typedispatch - @abstractmethod - def __call__(self, s_from: State) -> Iterable[Tuple[Symbol, Set[State]]]: - """ - Calls the transition function - as a real function for given state. - """ - raise NotImplementedError - - @typedispatch @abstractmethod def __call__(self, s_from: State, symb_by: Symbol) -> Set[State]: """ diff --git a/pyrightconfig.json b/pyrightconfig.json index e281b53..7fd2a49 100644 --- a/pyrightconfig.json +++ b/pyrightconfig.json @@ -19,5 +19,4 @@ "strictParameterNoneValue": false, "reportMissingParameterType": "warning", - "reportRedeclaration": "none", } diff --git a/requirements.txt b/requirements.txt index 75d858d..3179fd5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,7 +11,6 @@ numpy pylint pycodestyle pydot -fastcore pygments>=2.7.4 # not directly required, pinned by Snyk to avoid a vulnerability pylint>=2.7.0 # not directly required, pinned by Snyk to avoid a vulnerability sphinx>=3.0.4 # not directly required, pinned by Snyk to avoid a vulnerability From 94af59ab2602e9618d83e33179175ac7bd61a37b Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sun, 29 Dec 2024 21:03:15 +0300 Subject: [PATCH 42/42] correct generic type naming, simplify _to_epsilon_nfa_internal method --- .../deterministic_transition_function.py | 2 -- .../finite_automaton/finite_automaton.py | 18 +++++++++++------- .../nondeterministic_transition_function.py | 2 -- .../finite_automaton/transition_function.py | 2 -- pyformlang/regular_expression/regex.py | 18 +++++++----------- 5 files changed, 18 insertions(+), 24 deletions(-) diff --git a/pyformlang/finite_automaton/deterministic_transition_function.py b/pyformlang/finite_automaton/deterministic_transition_function.py index d739342..39d0a8f 100644 --- a/pyformlang/finite_automaton/deterministic_transition_function.py +++ b/pyformlang/finite_automaton/deterministic_transition_function.py @@ -2,8 +2,6 @@ A deterministic transition function """ -# pylint: disable=function-redefined - from typing import Optional from .state import State diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index d2b9a8a..efc7392 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -1,7 +1,5 @@ """ A general finite automaton representation """ -# pylint: disable=function-redefined - from typing import Dict, List, Set, Tuple, \ Iterable, Iterator, Optional, Hashable, Any, TypeVar from abc import abstractmethod @@ -17,7 +15,7 @@ from .transition_function import TransitionFunction from .utils import to_state, to_symbol -fa_type = TypeVar("fa_type", bound="FiniteAutomaton") +AutomatonT = TypeVar("AutomatonT", bound="FiniteAutomaton") class FiniteAutomaton(Iterable[Tuple[State, Symbol, State]]): @@ -41,6 +39,7 @@ class FiniteAutomaton(Iterable[Tuple[State, Symbol, State]]): A set of final or accepting states. It is a subset of states. """ + @abstractmethod def __init__(self) -> None: self._states: Set[State] self._input_symbols: Set[Symbol] @@ -553,13 +552,18 @@ def write_as_dot(self, filename: str) -> None: """ write_dot(self.to_networkx(), filename) + @abstractmethod + def accepts(self, word: Iterable[Hashable]) -> bool: + """ Checks whether the finite automaton accepts a given word """ + raise NotImplementedError + def get_accepted_words(self, max_length: Optional[int] = None) \ -> Iterable[List[Symbol]]: """ Gets words accepted by the finite automaton. """ if max_length is not None and max_length < 0: - return [] + return states_to_visit = deque((start_state, []) for start_state in self.start_states) states_leading_to_final = self._get_states_leading_to_final() @@ -657,14 +661,14 @@ def to_dict(self) -> Dict[State, Dict[Symbol, Set[State]]]: return self._transition_function.to_dict() @abstractmethod - def copy(self: fa_type) -> fa_type: + def copy(self: AutomatonT) -> AutomatonT: """ Copies the current Finite Automaton instance """ raise NotImplementedError - def __copy__(self: fa_type) -> fa_type: + def __copy__(self: AutomatonT) -> AutomatonT: return self.copy() - def _copy_to(self, fa_to_copy_to: fa_type) -> fa_type: + def _copy_to(self, fa_to_copy_to: AutomatonT) -> AutomatonT: """ Copies current automaton properties to the given one """ for start in self._start_states: fa_to_copy_to.add_start_state(start) diff --git a/pyformlang/finite_automaton/nondeterministic_transition_function.py b/pyformlang/finite_automaton/nondeterministic_transition_function.py index 8162f84..030a605 100644 --- a/pyformlang/finite_automaton/nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/nondeterministic_transition_function.py @@ -2,8 +2,6 @@ A nondeterministic transition function """ -# pylint: disable=function-redefined - from typing import Dict, Set, Iterable, Tuple from copy import deepcopy diff --git a/pyformlang/finite_automaton/transition_function.py b/pyformlang/finite_automaton/transition_function.py index 7ff90a9..4682e89 100644 --- a/pyformlang/finite_automaton/transition_function.py +++ b/pyformlang/finite_automaton/transition_function.py @@ -2,8 +2,6 @@ General transition function representation """ -# pylint: disable=function-redefined - from typing import Dict, Set, Tuple, Iterable, Iterator from abc import abstractmethod diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index 6987977..80274b0 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -88,8 +88,8 @@ class Regex(RegexReader): """ def __init__(self, regex: str) -> None: - self.sons: List[Regex] = [] # type: ignore super().__init__(regex) + self.sons: List[Regex] # type: ignore self._counter = 0 self._enfa: Optional[EpsilonNFA] = None @@ -138,7 +138,7 @@ def get_number_operators(self) -> int: def to_minimal_dfa(self) -> DeterministicFiniteAutomaton: """ Builds minimal dfa from current regex """ - enfa = self.to_epsilon_nfa() + enfa = self._to_epsilon_nfa_internal() dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) return dfa.minimize() @@ -157,19 +157,16 @@ def to_epsilon_nfa(self) -> EpsilonNFA: >>> regex.to_epsilon_nfa() """ - return self._to_epsilon_nfa_internal(True) + return self._to_epsilon_nfa_internal().copy() - def _to_epsilon_nfa_internal(self, copy: bool) -> EpsilonNFA: - """ - Transforms the regular expression into an epsilon NFA. - Copy enfa in case of external usage. - """ + def _to_epsilon_nfa_internal(self) -> EpsilonNFA: + """ Transforms the regular expression into an epsilon NFA """ if self._enfa is None: self._enfa = EpsilonNFA() s_initial = self._set_and_get_initial_state_in_enfa(self._enfa) s_final = self._set_and_get_final_state_in_enfa(self._enfa) self._process_to_enfa(self._enfa, s_initial, s_final) - return self._enfa.copy() if copy else self._enfa + return self._enfa def _set_and_get_final_state_in_enfa(self, enfa: EpsilonNFA) -> State: s_final = self._get_next_state_enfa() @@ -567,8 +564,7 @@ def accepts(self, word: Iterable[str]) -> bool: True """ - self._enfa = self._to_epsilon_nfa_internal(False) - return self._enfa.accepts(word) + return self._to_epsilon_nfa_internal().accepts(word) @classmethod def from_finite_automaton(cls, automaton: FiniteAutomaton) -> "Regex":