diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 26f47de..caf70a6 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -3,11 +3,7 @@ name: Python package -on: - push: - branches: [ master ] - pull_request: - branches: [ master ] +on: [push, pull_request] jobs: build: @@ -51,7 +47,7 @@ jobs: junitxml-path: ./pytest.xml default-branch: master - name: Create coverage Badge - if: ${{ matrix.python-version == '3.8'}} + if: ${{ github.ref_name == 'master' && matrix.python-version == '3.8'}} uses: schneegans/dynamic-badges-action@v1.0.0 with: auth: ${{ secrets.GIST_SECRET }} diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 4a07efd..50af3fc 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -7,10 +7,9 @@ # pylint: disable=cyclic-import from pyformlang import pda -from pyformlang.finite_automaton import FiniteAutomaton +from pyformlang.finite_automaton import DeterministicFiniteAutomaton # pylint: disable=cyclic-import from pyformlang.pda import cfg_variable_converter as cvc -from pyformlang import regular_expression from .cfg_object import CFGObject # pylint: disable=cyclic-import from .cyk_table import CYKTable, DerivationDoesNotExist @@ -788,7 +787,7 @@ def to_pda(self) -> "pda.PDA": state, []) return new_pda - def intersection(self, other: Any) -> "CFG": + def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": """ Gives the intersection of the current CFG with an other object Equivalent to: @@ -810,13 +809,6 @@ def intersection(self, other: Any) -> "CFG": When trying to intersect with something else than a regex or a finite automaton """ - if isinstance(other, regular_expression.Regex): - other = other.to_epsilon_nfa().to_deterministic() - elif isinstance(other, FiniteAutomaton): - if not other.is_deterministic(): - other = other.to_deterministic() - else: - raise NotImplementedError if other.is_empty(): return CFG() generate_empty = self.contains([]) and other.accepts([]) @@ -845,10 +837,12 @@ def intersection(self, other: Any) -> "CFG": return res_cfg @staticmethod - def _intersection_starting_rules(cfg, other, cv_converter): + def _intersection_starting_rules(cfg: "CFG", + other: DeterministicFiniteAutomaton, + cv_converter): start = Variable("Start") productions_temp = [] - start_other = list(other.start_states)[0] # it is deterministic + start_other = other.start_state for final_state in other.final_states: new_body = [ cv_converter.to_cfg_combined_variable( @@ -860,15 +854,17 @@ def _intersection_starting_rules(cfg, other, cv_converter): return productions_temp @staticmethod - def _intersection_when_terminal(other_fst, production, + def _intersection_when_terminal(other: DeterministicFiniteAutomaton, + production, cv_converter, states): productions_temp = [] for state_p in states: - next_states = other_fst(state_p, production.body[0].value) - if next_states: + next_state = other.get_next_state( + state_p, production.body[0].value) + if next_state: new_head = \ cv_converter.to_cfg_combined_variable( - state_p, production.head, next_states[0]) + state_p, production.head, next_state) productions_temp.append( Production(new_head, [production.body[0]], @@ -904,7 +900,7 @@ def _get_all_bodies(production, state_p, state_r, states, cv_converter): state_r)] for state_q in states] - def __and__(self, other): + def __and__(self, other: DeterministicFiniteAutomaton) -> "CFG": """ Gives the intersection of the current CFG with an other object Parameters diff --git a/pyformlang/cfg/tests/test_cfg.py b/pyformlang/cfg/tests/test_cfg.py index cb6afde..75c480a 100644 --- a/pyformlang/cfg/tests/test_cfg.py +++ b/pyformlang/cfg/tests/test_cfg.py @@ -516,7 +516,8 @@ def test_finite(self): def test_intersection(self): """ Tests the intersection with a regex """ regex = Regex("a*b*") - dfa = regex.to_epsilon_nfa() + enfa = regex.to_epsilon_nfa() + dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) symb_a = Symbol("a") symb_b = Symbol("b") assert dfa.accepts([symb_a, symb_a, symb_b, symb_b]) @@ -530,7 +531,7 @@ def test_intersection(self): cfg = CFG(productions=productions, start_symbol=var_s) assert cfg.contains([ter_a, ter_a, ter_b, ter_b]) assert not cfg.contains([ter_a, ter_a, ter_b]) - cfg_i = cfg.intersection(regex) + cfg_i = cfg.intersection(regex.to_minimal_dfa()) assert cfg_i.contains([ter_a, ter_a, ter_b, ter_b]) assert not cfg_i.contains([ter_a, ter_a, ter_b]) assert cfg_i.contains([]) @@ -548,7 +549,7 @@ def test_intersection_empty(self): Production(var_s, [ter_b, var_s, ter_a]), Production(var_s, [])} cfg = CFG(productions=productions, start_symbol=var_s) - cfg_i = cfg & regex + cfg_i = cfg & regex.to_minimal_dfa() assert not cfg_i def test_intersection_dfa(self): diff --git a/pyformlang/cfg/tests/test_llone_parser.py b/pyformlang/cfg/tests/test_llone_parser.py index bfe0a9c..a4a843a 100644 --- a/pyformlang/cfg/tests/test_llone_parser.py +++ b/pyformlang/cfg/tests/test_llone_parser.py @@ -250,7 +250,7 @@ def test_sentence_cfg(self): N -> gorilla | sky | carrots """) regex = Regex("georges touches (a|an) (sky|gorilla) !") - cfg_inter = cfg.intersection(regex) + cfg_inter = cfg.intersection(regex.to_minimal_dfa()) assert not cfg_inter.is_empty() assert cfg_inter.is_finite() assert not cfg_inter.contains(["georges", "sees", "a", "gorilla", "."]) diff --git a/pyformlang/finite_automaton/__init__.py b/pyformlang/finite_automaton/__init__.py index 145ab3e..e6b7db8 100644 --- a/pyformlang/finite_automaton/__init__.py +++ b/pyformlang/finite_automaton/__init__.py @@ -41,9 +41,10 @@ from .state import State from .symbol import Symbol from .epsilon import Epsilon -from .transition_function import (TransitionFunction, - DuplicateTransitionError, - InvalidEpsilonTransition) +from .deterministic_transition_function import \ + (DeterministicTransitionFunction, + DuplicateTransitionError, + InvalidEpsilonTransition) from .nondeterministic_transition_function import \ NondeterministicTransitionFunction @@ -54,7 +55,7 @@ "State", "Symbol", "Epsilon", - "TransitionFunction", + "DeterministicTransitionFunction", "NondeterministicTransitionFunction", "DuplicateTransitionError", "InvalidEpsilonTransition"] diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index 02ec309..61b4acf 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -2,50 +2,15 @@ Representation of a deterministic finite automaton """ -from typing import AbstractSet, Iterable, Any +from typing import Iterable, AbstractSet, Optional, Hashable, Any -import numpy as np - -# pylint: disable=cyclic-import -from .epsilon_nfa import to_single_state -from .finite_automaton import to_state, to_symbol -from .hopcroft_processing_list import HopcroftProcessingList -# pylint: disable=cyclic-import +from .state import State +from .deterministic_transition_function import DeterministicTransitionFunction +from .epsilon_nfa import EpsilonNFA from .nondeterministic_finite_automaton import NondeterministicFiniteAutomaton +from .hopcroft_processing_list import HopcroftProcessingList from .partition import Partition -from .state import State -from .symbol import Symbol -from .transition_function import TransitionFunction - - -class PreviousTransitions: - """For internal usage""" - - def __init__(self, states, symbols): - self._to_index_state = {} - self._to_index_state[None] = 0 - for i, state in enumerate(states): - self._to_index_state[state] = i + 1 - self._to_index_symbol = {} - for i, symbol in enumerate(symbols): - self._to_index_symbol[symbol] = i - self._conversion = np.empty((len(states) + 1, len(symbols)), - dtype=object) - - def add(self, next0, symbol, state): - """ Internal """ - i_next0 = self._to_index_state[next0] - i_symbol = self._to_index_symbol[symbol] - if self._conversion[i_next0, i_symbol] is None: - self._conversion[i_next0, i_symbol] = [state] - else: - self._conversion[i_next0, i_symbol].append(state) - - def get(self, next0, symbol): - """ Internal """ - i_next0 = self._to_index_state[next0] - i_symbol = self._to_index_symbol[symbol] - return self._conversion[i_next0, i_symbol] or [] +from .utils import to_state, to_symbol, to_single_state, PreviousTransitions class DeterministicFiniteAutomaton(NondeterministicFiniteAutomaton): @@ -101,24 +66,27 @@ class DeterministicFiniteAutomaton(NondeterministicFiniteAutomaton): """ - # pylint: disable=too-many-arguments def __init__(self, - states: AbstractSet[State] = None, - input_symbols: AbstractSet[Symbol] = None, - transition_function: TransitionFunction = None, - start_state: State = None, - final_states: AbstractSet[State] = None): - super().__init__(states, input_symbols, None, None, final_states) - start_state = to_state(start_state) - self._transition_function = transition_function or TransitionFunction() - if start_state is not None: - self._start_state = {start_state} - else: - self._start_state = {} - if start_state is not None: - self._states.add(start_state) + states: AbstractSet[Hashable] = None, + input_symbols: AbstractSet[Hashable] = None, + transition_function: DeterministicTransitionFunction = None, + start_state: Hashable = None, + final_states: AbstractSet[Hashable] = None) -> None: + start_states = {start_state} if start_state is not None else None + super().__init__(states, + input_symbols, + None, + start_states, + final_states) + self._transition_function: DeterministicTransitionFunction = \ + transition_function or DeterministicTransitionFunction() - def add_start_state(self, state: Any) -> int: + @property + def start_state(self) -> Optional[State]: + """ Gets the start state """ + return list(self._start_states)[0] if self._start_states else None + + def add_start_state(self, state: Hashable) -> int: """ Set an initial state Parameters @@ -139,11 +107,11 @@ def add_start_state(self, state: Any) -> int: """ state = to_state(state) - self._start_state = {state} + self._start_states = {state} self._states.add(state) return 1 - def remove_start_state(self, state: Any) -> int: + def remove_start_state(self, state: Hashable) -> int: """ remove an initial state Parameters @@ -165,12 +133,19 @@ def remove_start_state(self, state: Any) -> int: """ state = to_state(state) - if {state} == self._start_state: - self._start_state = {} + if self._start_states == {state}: + self._start_states = set() return 1 return 0 - def accepts(self, word: Iterable[Any]) -> bool: + def get_next_state(self, s_from: Hashable, symb_by: Hashable) \ + -> Optional[State]: + """ Make a call of deterministic transition function """ + s_from = to_state(s_from) + symb_by = to_symbol(symb_by) + return self._transition_function.get_next_state(s_from, symb_by) + + def accepts(self, word: Iterable[Hashable]) -> bool: """ Checks whether the dfa accepts a given word Parameters @@ -195,17 +170,11 @@ def accepts(self, word: Iterable[Any]) -> bool: """ word = [to_symbol(x) for x in word] - current_state = None - if self._start_state: - current_state = list(self._start_state)[0] + current_state = self.start_state for symbol in word: if current_state is None: return False - current_state = self._transition_function(current_state, symbol) - if current_state: - current_state = current_state[0] - else: - current_state = None + current_state = self.get_next_state(current_state, symbol) return current_state is not None and self.is_final_state(current_state) def is_deterministic(self) -> bool: @@ -226,27 +195,6 @@ def is_deterministic(self) -> bool: """ return True - def to_deterministic(self) -> "DeterministicFiniteAutomaton": - """ Transforms the current automaton into a dfa. Does nothing if the \ - automaton is already deterministic. - - Returns - ---------- - dfa : :class:`~pyformlang.deterministic_finite_automaton\ - .DeterministicFiniteAutomaton` - A dfa equivalent to the current nfa - - Examples - -------- - - >>> dfa0 = DeterministicFiniteAutomaton() - >>> dfa1 = dfa0.to_deterministic() - >>> dfa0.is_equivalent_to(dfa1) - True - - """ - return self - def copy(self) -> "DeterministicFiniteAutomaton": """ Copies the current DFA @@ -268,54 +216,17 @@ def copy(self) -> "DeterministicFiniteAutomaton": True """ - dfa = DeterministicFiniteAutomaton() - if self._start_state: - dfa.add_start_state(list(self._start_state)[0]) - for final in self._final_states: - dfa.add_final_state(final) - for state in self._states: - for symbol in self._input_symbols: - state_to = self._transition_function(state, symbol) - if state_to: - state_to = state_to[0] - else: - state_to = None - if state_to is not None: - dfa.add_transition(state, symbol, state_to) - return dfa + return self._copy_to(DeterministicFiniteAutomaton()) - def _get_previous_transitions(self): + def _get_previous_transitions(self) -> PreviousTransitions: previous_transitions = PreviousTransitions(self._states, self._input_symbols) for state in self._states: for symbol in self._input_symbols: - next0 = self._transition_function(state, symbol) - if next0: - next0 = next0[0] - else: - next0 = None + next0 = self.get_next_state(state, symbol) previous_transitions.add(next0, symbol, state) - for symbol in self._input_symbols: - previous_transitions.add(None, symbol, None) return previous_transitions - def _get_reachable_states(self) -> AbstractSet[State]: - """ Get all states which are reachable """ - to_process = [] - processed = set() - for state in self._start_state: - to_process.append(state) - processed.add(state) - while to_process: - current = to_process.pop() - for symbol in self._input_symbols: - next_state = self._transition_function(current, symbol) - if not next_state or next_state[0] in processed: - continue - to_process.append(next_state[0]) - processed.add(next_state[0]) - return processed - def minimize(self) -> "DeterministicFiniteAutomaton": """ Minimize the current DFA @@ -337,7 +248,7 @@ def minimize(self) -> "DeterministicFiniteAutomaton": True """ - if not self._start_state or not self._final_states: + if not self._start_states or not self._final_states: res = DeterministicFiniteAutomaton() res.add_start_state(State("Empty")) return res @@ -355,7 +266,7 @@ def minimize(self) -> "DeterministicFiniteAutomaton": to_new_states[state] = new_state # Build the DFA dfa = DeterministicFiniteAutomaton() - for state in self._start_state: + for state in self._start_states: dfa.add_start_state(to_new_states[state]) for state in states: if state in self._final_states: @@ -363,15 +274,76 @@ def minimize(self) -> "DeterministicFiniteAutomaton": done = set() new_state = to_new_states[state] for symbol in self._input_symbols: - for next_node in self._transition_function(state, symbol): - if next_node in states: - next_node = to_new_states[next_node] - if (next_node, symbol) not in done: - dfa.add_transition(new_state, symbol, next_node) - done.add((next_node, symbol)) + next_node = self.get_next_state(state, symbol) + if next_node and next_node in states: + next_node = to_new_states[next_node] + if (next_node, symbol) not in done: + dfa.add_transition(new_state, symbol, next_node) + done.add((next_node, symbol)) + return dfa + + @classmethod + def from_epsilon_nfa(cls, enfa: EpsilonNFA) \ + -> "DeterministicFiniteAutomaton": + """ Builds dfa equivalent to the given enfa """ + return cls._from_epsilon_nfa_internal(enfa, True) + + @classmethod + def from_nfa(cls, nfa: NondeterministicFiniteAutomaton) \ + -> "DeterministicFiniteAutomaton": + """ Builds dfa equivalent to the given nfa """ + return cls._from_epsilon_nfa_internal(nfa, False) + + @classmethod + def _from_epsilon_nfa_internal(cls, enfa: EpsilonNFA, eclose: bool) \ + -> "DeterministicFiniteAutomaton": + """ Builds dfa equivalent to the given automaton + + Parameters + ---------- + eclose : bool + Whether to use the epsilon closure or not + + Returns + ---------- + dfa : :class:`~pyformlang.finite_automaton\ + .DeterministicFiniteAutomaton` + A dfa equivalent to the current nfa + """ + dfa = DeterministicFiniteAutomaton() + # Add Eclose + if eclose: + start_eclose = enfa.eclose_iterable(enfa.start_states) + else: + start_eclose = enfa.start_states + start_state = to_single_state(start_eclose) + dfa.add_start_state(start_state) + to_process = [start_eclose] + processed = {start_state} + while to_process: + current = to_process.pop() + s_from = to_single_state(current) + for symbol in enfa.symbols: + all_trans = [enfa(x, symbol) for x in current] + state = set() + for trans in all_trans: + state.update(trans) + if not state: + continue + # Eclose added + if eclose: + state = enfa.eclose_iterable(state) + state_merged = to_single_state(state) + dfa.add_transition(s_from, symbol, state_merged) + if state_merged not in processed: + processed.add(state_merged) + to_process.append(state) + for state in current: + if state in enfa.final_states: + dfa.add_final_state(s_from) return dfa - def _get_partition(self): + def _get_partition(self) -> Partition: previous_transitions = self._get_previous_transitions() finals = [] non_finals = [] @@ -412,7 +384,12 @@ def _get_partition(self): processing_list.insert(new_class, symbol) return partition - def is_equivalent_to(self, other): + def __eq__(self, other: Any) -> bool: + if not isinstance(other, DeterministicFiniteAutomaton): + return False + return self.is_equivalent_to(other) + + def is_equivalent_to(self, other: "DeterministicFiniteAutomaton") -> bool: """ Check whether two automata are equivalent Parameters @@ -438,40 +415,32 @@ def is_equivalent_to(self, other): True """ - if not isinstance(other, DeterministicFiniteAutomaton): - other_dfa = other.to_deterministic() - return self.is_equivalent_to(other_dfa) self_minimal = self.minimize() other_minimal = other.minimize() return self._is_equivalent_to_minimal(self_minimal, other_minimal) - @property - def start_state(self) -> State: - """ The start state """ - return list(self._start_state)[0] - @staticmethod - def _is_equivalent_to_minimal(self_minimal, other_minimal): + def _is_equivalent_to_minimal( + self_minimal: "DeterministicFiniteAutomaton", + other_minimal: "DeterministicFiniteAutomaton") -> bool: to_process = [(self_minimal.start_state, other_minimal.start_state)] matches = {self_minimal.start_state: other_minimal.start_state} while to_process: current_self, current_other = to_process.pop() - if (self_minimal.is_final_state(current_self) - and not other_minimal.is_final_state(current_other)) or \ - (not self_minimal.is_final_state(current_self) - and other_minimal.is_final_state(current_other)): + if self_minimal.is_final_state(current_self) != \ + other_minimal.is_final_state(current_other): return False - next_self = self_minimal(current_self) - next_other = other_minimal(current_other) + next_self = list(self_minimal.get_transitions_from(current_self)) + next_other = list(other_minimal.get_transitions_from(current_other)) if len(next_self) != len(next_other): return False if len(next_self) == 0: continue - for next_temp, other_temp in zip(sorted(list(next_self), - key=lambda x: x[0].value), - sorted(list(next_other), - key=lambda x: x[0].value)): + for next_temp, other_temp in zip(sorted(next_self, + key=lambda x: hash(x[0])), + sorted(next_other, + key=lambda x: hash(x[0]))): next_symbol_self, next_state_self = next_temp next_symbol_other, next_state_other = other_temp if next_symbol_other != next_symbol_self: diff --git a/pyformlang/finite_automaton/deterministic_transition_function.py b/pyformlang/finite_automaton/deterministic_transition_function.py new file mode 100644 index 0000000..39d0a8f --- /dev/null +++ b/pyformlang/finite_automaton/deterministic_transition_function.py @@ -0,0 +1,115 @@ +""" +A deterministic transition function +""" + +from typing import Optional + +from .state import State +from .symbol import Symbol +from .epsilon import Epsilon +from .nondeterministic_transition_function import \ + NondeterministicTransitionFunction +from .nondeterministic_finite_automaton import InvalidEpsilonTransition + + +class DeterministicTransitionFunction(NondeterministicTransitionFunction): + """A deterministic transition function in a finite automaton + + This is a deterministic transition function. + + Attributes + ---------- + _transitions : dict + A dictionary which contains the transitions of a finite automaton + + Examples + -------- + + >>> transition = TransitionFunction() + >>> transition.add_transition(State(0), Symbol("a"), State(1)) + + Creates a transition function and adds a transition. + + """ + + def add_transition(self, + s_from: State, + symb_by: Symbol, + s_to: State) -> int: + """ Adds a new transition to the function + + Parameters + ---------- + s_from : :class:`~pyformlang.finite_automaton.State` + The source state + symb_by : :class:`~pyformlang.finite_automaton.Symbol` + The transition symbol + s_to : :class:`~pyformlang.finite_automaton.State` + The destination state + + + Returns + -------- + done : int + Always 1 + + Raises + -------- + DuplicateTransitionError + If the transition already exists + + Examples + -------- + + >>> transition = TransitionFunction() + >>> transition.add_transition(State(0), Symbol("a"), State(1)) + + """ + if symb_by == Epsilon(): + raise InvalidEpsilonTransition() + s_to_old = self.get_next_state(s_from, symb_by) + if s_to_old is not None and s_to_old != s_to: + raise DuplicateTransitionError(s_from, + symb_by, + s_to, + s_to_old) + return super().add_transition(s_from, symb_by, s_to) + + def get_next_state(self, s_from: State, symb_by: Symbol) -> Optional[State]: + """ Make a call of deterministic transition function """ + next_state = self(s_from, symb_by) + return list(next_state)[0] if next_state else None + + def is_deterministic(self) -> bool: + """ Whether the transition function is deterministic """ + return True + + +class DuplicateTransitionError(Exception): + """ Signals a duplicated transition + + Parameters + ---------- + s_from : :class:`~pyformlang.finite_automaton.State` + The source state + symb_by : :class:`~pyformlang.finite_automaton.Symbol` + The transition symbol + s_to : :class:`~pyformlang.finite_automaton.State` + The wanted new destination state + s_to_old : :class:`~pyformlang.finite_automaton.State` + The old destination state + + """ + + def __init__(self, + s_from: State, + symb_by: Symbol, + s_to: State, + s_to_old: State) -> None: + super().__init__("Transition from " + str(s_from) + + " by " + str(symb_by) + + " goes to " + str(s_to_old) + " not " + str(s_to)) + self.s_from = s_from + self.symb_by = symb_by + self.s_to = s_to + self.s_to_old = s_to_old diff --git a/pyformlang/finite_automaton/doubly_linked_list.py b/pyformlang/finite_automaton/doubly_linked_list.py index d81ce46..ce09237 100644 --- a/pyformlang/finite_automaton/doubly_linked_list.py +++ b/pyformlang/finite_automaton/doubly_linked_list.py @@ -1,29 +1,30 @@ """A doubly linked list""" +from typing import Iterable, Optional, Any from .doubly_linked_node import DoublyLinkedNode -class DoublyLinkedList: +class DoublyLinkedList(Iterable[DoublyLinkedNode]): """ A doubly linked list """ - def __init__(self): - self.first = None - self.last = None + def __init__(self) -> None: + self.first: Optional[DoublyLinkedNode] = None + self.last: Optional[DoublyLinkedNode] = None self.size = 0 - self._current_node = None + self._current_node: Optional[DoublyLinkedNode] = None - def append(self, value): + def append(self, value: Any) -> DoublyLinkedNode: """ Appends an element """ if self.last is not None: self.last = self.last.append(value) else: - node = DoublyLinkedNode(self, value=value) + node = DoublyLinkedNode(value=value) self.first = node self.last = node - self.size += 1 + self.size += 1 return self.last - def delete(self, node): + def delete(self, node: DoublyLinkedNode) -> None: """ Delete an element """ if node.next_node is not None: node.next_node.previous_node = node.previous_node @@ -35,14 +36,14 @@ def delete(self, node): self.first = node.next_node self.size -= 1 - def __len__(self): + def __len__(self) -> int: return self.size - def __iter__(self): + def __iter__(self) -> "DoublyLinkedList": self._current_node = self.first return self - def __next__(self): + def __next__(self) -> DoublyLinkedNode: if self._current_node is None: raise StopIteration res = self._current_node diff --git a/pyformlang/finite_automaton/doubly_linked_node.py b/pyformlang/finite_automaton/doubly_linked_node.py index f776386..7621fff 100644 --- a/pyformlang/finite_automaton/doubly_linked_node.py +++ b/pyformlang/finite_automaton/doubly_linked_node.py @@ -1,24 +1,20 @@ """Linked nodes in both direction""" +from typing import Optional, Any + class DoublyLinkedNode: """Represents doubly linked list of nodes from a doubly linked list""" def __init__(self, - list_in, - next_node=None, - previous_node=None, - value=None): - self.next_node = next_node - self.previous_node = previous_node - self.value = value - self.list_in = list_in - - def delete(self): - """Delete the current node""" - self.list_in.delete(self) - - def append(self, value): + next_node: "DoublyLinkedNode" = None, + previous_node: "DoublyLinkedNode" = None, + value: Any = None) -> None: + self.next_node: Optional[DoublyLinkedNode] = next_node + self.previous_node: Optional[DoublyLinkedNode] = previous_node + self.value: Any = value + + def append(self, value: Any) -> "DoublyLinkedNode": """ Append a new node with the given value @@ -33,9 +29,6 @@ def append(self, value): The created node """ - next_node = DoublyLinkedNode(self.list_in, self.next_node, self, value) - if self.next_node is None: - self.list_in.last = next_node + next_node = DoublyLinkedNode(self.next_node, self, value) self.next_node = next_node - self.list_in.size += 1 return next_node diff --git a/pyformlang/finite_automaton/epsilon.py b/pyformlang/finite_automaton/epsilon.py index 75c81ee..431a98c 100644 --- a/pyformlang/finite_automaton/epsilon.py +++ b/pyformlang/finite_automaton/epsilon.py @@ -2,6 +2,7 @@ Represents an epsilon transition """ +from typing import Any from .symbol import Symbol @@ -15,13 +16,11 @@ class Epsilon(Symbol): # pylint: disable=too-few-public-methods """ - def __init__(self): + def __init__(self) -> None: super().__init__("epsilon") - def __hash__(self): + def __hash__(self) -> int: return hash("EPSILON TRANSITION") - def __eq__(self, other): - if isinstance(other, Epsilon): - return True - return False + def __eq__(self, other: Any) -> bool: + return isinstance(other, Epsilon) diff --git a/pyformlang/finite_automaton/epsilon_nfa.py b/pyformlang/finite_automaton/epsilon_nfa.py index 9d74542..626019d 100644 --- a/pyformlang/finite_automaton/epsilon_nfa.py +++ b/pyformlang/finite_automaton/epsilon_nfa.py @@ -2,25 +2,21 @@ Nondeterministic Automaton with epsilon transitions """ -from typing import Set, Iterable, AbstractSet, Any +from typing import Iterable, Set, AbstractSet, Hashable +from networkx import MultiDiGraph -# pylint: disable=cyclic-import -from pyformlang import finite_automaton - -from .epsilon import Epsilon from .state import State from .symbol import Symbol +from .epsilon import Epsilon from .nondeterministic_transition_function import \ NondeterministicTransitionFunction -from .regexable import Regexable from .finite_automaton import FiniteAutomaton -from .finite_automaton import to_state, to_symbol +from .utils import to_state, to_symbol -class EpsilonNFA(Regexable, FiniteAutomaton): +class EpsilonNFA(FiniteAutomaton): """ Represents an epsilon NFA - Parameters ---------- states : set of :class:`~pyformlang.finite_automaton.State`, optional @@ -63,14 +59,13 @@ class EpsilonNFA(Regexable, FiniteAutomaton): """ - # pylint: disable=too-many-arguments def __init__( self, - states: AbstractSet[State] = None, - input_symbols: AbstractSet[Symbol] = None, + states: AbstractSet[Hashable] = None, + input_symbols: AbstractSet[Hashable] = None, transition_function: NondeterministicTransitionFunction = None, - start_state: AbstractSet[State] = None, - final_states: AbstractSet[State] = None): + start_states: AbstractSet[Hashable] = None, + final_states: AbstractSet[Hashable] = None) -> None: super().__init__() if states is not None: states = {to_state(x) for x in states} @@ -78,25 +73,23 @@ def __init__( if input_symbols is not None: input_symbols = {to_symbol(x) for x in input_symbols} self._input_symbols = input_symbols or set() - self._transition_function = \ - transition_function or NondeterministicTransitionFunction() - if start_state is not None: - start_state = {to_state(x) for x in start_state} - self._start_state = start_state or set() + self._transition_function = transition_function \ + or NondeterministicTransitionFunction() + if start_states is not None: + start_states = {to_state(x) for x in start_states} + self._start_states = start_states or set() if final_states is not None: final_states = {to_state(x) for x in final_states} self._final_states = final_states or set() for state in self._final_states: - if state is not None and state not in self._states: - self._states.add(state) - for state in self._start_state: - if state is not None and state not in self._states: - self._states.add(state) - - def _get_next_states_iterable(self, - current_states: Iterable[State], - symbol: Symbol) \ - -> Set[State]: + self._states.add(state) + for state in self._start_states: + self._states.add(state) + + def _get_next_states_iterable( + self, + current_states: Iterable[State], + symbol: Symbol) -> Set[State]: """ Gives the set of next states, starting from a set of states Parameters @@ -114,12 +107,10 @@ def _get_next_states_iterable(self, """ next_states = set() for current_state in current_states: - next_states_temp = self._transition_function(current_state, - symbol) - next_states = next_states.union(next_states_temp) + next_states.update(self(current_state, symbol)) return next_states - def accepts(self, word: Iterable[Any]) -> bool: + def accepts(self, word: Iterable[Hashable]) -> bool: """ Checks whether the epsilon nfa accepts a given word Parameters @@ -148,7 +139,7 @@ def accepts(self, word: Iterable[Any]) -> bool: """ word = [to_symbol(x) for x in word] - current_states = self.eclose_iterable(self._start_state) + current_states = self.eclose_iterable(self._start_states) for symbol in word: if symbol == Epsilon(): continue @@ -157,7 +148,7 @@ def accepts(self, word: Iterable[Any]) -> bool: current_states = self.eclose_iterable(next_states) return any(self.is_final_state(x) for x in current_states) - def eclose_iterable(self, states: Iterable[Any]) -> Set[State]: + def eclose_iterable(self, states: Iterable[Hashable]) -> Set[State]: """ Compute the epsilon closure of a collection of states Parameters @@ -184,10 +175,10 @@ def eclose_iterable(self, states: Iterable[Any]) -> Set[State]: states = [to_state(x) for x in states] res = set() for state in states: - res = res.union(self.eclose(state)) + res.update(self.eclose(state)) return res - def eclose(self, state: Any) -> Set[State]: + def eclose(self, state: Hashable) -> Set[State]: """ Compute the epsilon closure of a state Parameters @@ -247,116 +238,10 @@ def is_deterministic(self) -> bool: False """ - return len(self._start_state) <= 1 \ - and self._transition_function.is_deterministic()\ + return len(self._start_states) <= 1 \ + and self._transition_function.is_deterministic() \ and all({x} == self.eclose(x) for x in self._states) - def remove_epsilon_transitions(self) -> "NondeterministicFiniteAutomaton": - """ Removes the epsilon transitions from the automaton - - Returns - ---------- - dfa : :class:`~pyformlang.finite_automaton.\ -NondeterministicFiniteAutomaton` - A non-deterministic finite automaton equivalent to the current \ -nfa, with no epsilon transition - """ - from pyformlang.finite_automaton import NondeterministicFiniteAutomaton - nfa = NondeterministicFiniteAutomaton() - for state in self._start_state: - nfa.add_start_state(state) - for state in self._final_states: - nfa.add_final_state(state) - start_eclose = self.eclose_iterable(self._start_state) - for state in start_eclose: - nfa.add_start_state(state) - for state in self._states: - eclose = self.eclose(state) - for e_state in eclose: - if e_state in self._final_states: - nfa.add_final_state(state) - for symb in self._input_symbols: - for next_state in self._transition_function(e_state, symb): - nfa.add_transition(state, symb, next_state) - return nfa - - def _to_deterministic_internal(self, - eclose: bool) \ - -> "DeterministicFiniteAutomaton": - """ Transforms the epsilon-nfa into a dfa - - Parameters - ---------- - eclose : bool - Whether to use the epsilon closure or not - - Returns - ---------- - dfa : :class:`~pyformlang.finite_automaton\ - .DeterministicFiniteAutomaton` - A dfa equivalent to the current nfa - """ - dfa = finite_automaton.DeterministicFiniteAutomaton() - # Add Eclose - if eclose: - start_eclose = self.eclose_iterable(self._start_state) - else: - start_eclose = self._start_state - start_state = to_single_state(start_eclose) - dfa.add_start_state(start_state) - to_process = [start_eclose] - processed = {start_state} - while to_process: - current = to_process.pop() - s_from = to_single_state(current) - for symb in self._input_symbols: - all_trans = [self._transition_function(x, symb) - for x in current] - state = set() - for trans in all_trans: - state = state.union(trans) - if not state: - continue - # Eclose added - if eclose: - state = self.eclose_iterable(state) - state_merged = to_single_state(state) - dfa.add_transition(s_from, symb, state_merged) - if state_merged not in processed: - processed.add(state_merged) - to_process.append(state) - for state in current: - if state in self._final_states: - dfa.add_final_state(s_from) - return dfa - - def to_deterministic(self) -> "DeterministicFiniteAutomaton": - """ Transforms the epsilon-nfa into a dfa - - Returns - ---------- - dfa : :class:`~pyformlang.finite_automaton\ - .DeterministicFiniteAutomaton` - A dfa equivalent to the current nfa - - Examples - -------- - - >>> enfa = EpsilonNFA() - >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \ - (0, "epsilon", 2)]) - >>> enfa.add_start_state(0) - >>> enfa.add_final_state(1) - >>> dfa = enfa.to_deterministic() - >>> dfa.is_deterministic() - True - - >>> enfa.is_equivalent_to(dfa) - True - - """ - return self._to_deterministic_internal(True) - def copy(self) -> "EpsilonNFA": """ Copies the current Epsilon NFA @@ -378,31 +263,29 @@ def copy(self) -> "EpsilonNFA": True """ - enfa = EpsilonNFA() - for start in self._start_state: - enfa.add_start_state(start) - for final in self._final_states: - enfa.add_final_state(final) - for state in self._states: - for symbol in self._input_symbols: - states = self._transition_function(state, symbol) - for state_to in states: - enfa.add_transition(state, symbol, state_to) - states = self._transition_function(state, Epsilon()) - for state_to in states: - enfa.add_transition(state, Epsilon(), state_to) - return enfa + return self._copy_to(EpsilonNFA()) - def __copy__(self): - return self.copy() + @classmethod + def from_networkx(cls, graph: MultiDiGraph) -> "EpsilonNFA": + """ + Import a networkx graph into an finite state automaton. \ + The imported graph requires to have the good format, i.e. to come \ + from the function to_networkx - def to_regex(self) -> "Regex": - """ Transforms the EpsilonNFA to a regular expression + Parameters + ---------- + graph : + The graph representation of the automaton Returns - ---------- - regex : :class:`~pyformlang.regular_expression.Regex` - A regular expression equivalent to the current Epsilon NFA + ------- + enfa : + A epsilon nondeterministic finite automaton read from the graph + + TODO + ------- + * We lose the type of the node value if going through a dot file + * Explain the format Examples -------- @@ -412,95 +295,24 @@ def to_regex(self) -> "Regex": (0, "epsilon", 2)]) >>> enfa.add_start_state(0) >>> enfa.add_final_state(1) - >>> regex = enfa.to_regex() - >>> regex.accepts(["abc"]) - True - - """ - from pyformlang.regular_expression import Regex - enfas = [self.copy() for _ in self._final_states] - final_states = list(self._final_states) - for i in range(len(self._final_states)): - for j in range(len(self._final_states)): - if i != j: - enfas[j].remove_final_state(final_states[i]) - regex_l = [] - for enfa in enfas: - # pylint: disable=protected-access - enfa._remove_all_basic_states() - # pylint: disable=protected-access - regex_sub = enfa._get_regex_simple() - if regex_sub: - regex_l.append(regex_sub) - res = "+".join(regex_l) - return Regex(res) - - def _get_regex_simple(self) -> str: - """ Get the regex of an automaton when it only composed of a start and - a final state - - CAUTION: For internal use only! + >>> graph = enfa.to_networkx() + >>> enfa_from_nx = EpsilonNFA.from_networkx(graph) - Returns - ---------- - regex : str - A regex representing the automaton - """ - if not self._final_states or not self._start_state: - return "" - if len(self._final_states) != 1 or len(self._start_state) != 1: - raise ValueError("The automaton is not simple enough!") - if self._start_state == self._final_states: - # We are suppose to have only one good symbol - for symbol in self._input_symbols: - out_states = self._transition_function( - list(self._start_state)[0], symbol) - if out_states: - return "(" + str(symbol.value) + ")*" - return "epsilon" - start_to_start, start_to_end, end_to_start, end_to_end = \ - self._get_bi_transitions() - return get_regex_sub(start_to_start, - start_to_end, - end_to_start, - end_to_end) - - def _get_bi_transitions(self) -> (str, str, str, str): - """ Internal method to compute the transition in the case of a \ - simple automaton - - Returns - start_to_start : str - The transition from the start state to the start state - start_to_end : str - The transition from the start state to the end state - end_to_start : str - The transition from the end state to the start state - end_to_end : str - The transition from the end state to the end state - ---------- """ - start = list(self._start_state)[0] - end = list(self._final_states)[0] - start_to_start = "epsilon" - start_to_end = "" - end_to_end = "epsilon" - end_to_start = "" - for state in self._states: - for symbol in self._input_symbols.union({Epsilon()}): - for out_state in self._transition_function(state, symbol): - symbol_str = str(symbol.value) - if not symbol_str.isalnum(): - symbol_str = "(" + symbol_str + ")" - if state == start and out_state == start: - start_to_start = symbol_str - elif state == start and out_state == end: - start_to_end = symbol_str - elif state == end and out_state == start: - end_to_start = symbol_str - elif state == end and out_state == end: - end_to_end = symbol_str - return start_to_start, start_to_end, end_to_start, end_to_end + enfa = EpsilonNFA() + for s_from in graph: + for s_to in graph[s_from]: + for transition in graph[s_from][s_to].values(): + if "label" in transition: + enfa.add_transition(s_from, + transition["label"], + s_to) + for node in graph.nodes: + if graph.nodes[node].get("is_start", False): + enfa.add_start_state(node) + if graph.nodes[node].get("is_final", False): + enfa.add_final_state(node) + return enfa def get_complement(self) -> "EpsilonNFA": """ Get the complement of the current Epsilon NFA @@ -531,7 +343,7 @@ def get_complement(self) -> "EpsilonNFA": """ enfa = self.copy() - trash = State("TrashNode") + trash = self.__get_new_state("Trash") enfa.add_final_state(trash) for state in self._states: if state in self._final_states: @@ -550,7 +362,7 @@ def get_complement(self) -> "EpsilonNFA": enfa.add_transition(trash, symbol, trash) return enfa - def __neg__(self): + def __neg__(self) -> "EpsilonNFA": """ Get the complement of the current Epsilon NFA Returns @@ -603,26 +415,26 @@ def get_intersection(self, other: "EpsilonNFA") -> "EpsilonNFA": processed = set() for st0 in self.eclose_iterable(self.start_states): for st1 in other.eclose_iterable(other.start_states): - enfa.add_start_state(combine_state_pair(st0, st1)) + enfa.add_start_state(self.__combine_state_pair(st0, st1)) to_process.append((st0, st1)) processed.add((st0, st1)) for st0 in self.final_states: for st1 in other.final_states: - enfa.add_final_state(combine_state_pair(st0, st1)) + enfa.add_final_state(self.__combine_state_pair(st0, st1)) while to_process: st0, st1 = to_process.pop() - current_state = combine_state_pair(st0, st1) + current_state = self.__combine_state_pair(st0, st1) for symb in symbols: for new_s0 in self.eclose_iterable(self(st0, symb)): for new_s1 in other.eclose_iterable(other(st1, symb)): - state = combine_state_pair(new_s0, new_s1) + state = self.__combine_state_pair(new_s0, new_s1) enfa.add_transition(current_state, symb, state) if (new_s0, new_s1) not in processed: processed.add((new_s0, new_s1)) to_process.append((new_s0, new_s1)) return enfa - def __and__(self, other): + def __and__(self, other: "EpsilonNFA") -> "EpsilonNFA": """ Computes the intersection of two Epsilon NFAs Parameters @@ -637,9 +449,49 @@ def __and__(self, other): """ return self.get_intersection(other) - def get_difference(self, other: "EpsilonNFA") \ - -> "EpsilonNFA": - """ Compute the difference with another Epsilon NFA + def get_union(self, other: "EpsilonNFA") -> "EpsilonNFA": + """ Computes the union with given Epsilon NFA """ + union = EpsilonNFA() + self.__copy_transitions_marked(self, union, 0) + self.__copy_transitions_marked(other, union, 1) + new_start = State("Start") + union.add_start_state(new_start) + for self_start in self.start_states: + union.add_transition(new_start, Epsilon(), (0, self_start.value)) + for other_start in other.start_states: + union.add_transition(new_start, Epsilon(), (1, other_start.value)) + for self_final in self.final_states: + union.add_final_state((0, self_final.value)) + for other_final in other.final_states: + union.add_final_state((1, other_final.value)) + return union + + def __or__(self, other: "EpsilonNFA") -> "EpsilonNFA": + """ Computes the union with given Epsilon NFA """ + return self.get_union(other) + + def concatenate(self, other: "EpsilonNFA") -> "EpsilonNFA": + """ Computes the concatenation of two Epsilon NFAs """ + concatenation = EpsilonNFA() + self.__copy_transitions_marked(self, concatenation, 0) + self.__copy_transitions_marked(other, concatenation, 1) + for self_start in self.start_states: + concatenation.add_start_state((0, self_start.value)) + for other_final in other.final_states: + concatenation.add_final_state((1, other_final.value)) + for self_final in self.final_states: + for other_start in other.start_states: + concatenation.add_transition((0, self_final.value), + Epsilon(), + (1, other_start.value)) + return concatenation + + def __add__(self, other: "EpsilonNFA") -> "EpsilonNFA": + """ Computes the concatenation of two Epsilon NFAs """ + return self.concatenate(other) + + def get_difference(self, other: "EpsilonNFA") -> "EpsilonNFA": + """ Computes the difference with another Epsilon NFA Equivalent to: @@ -680,7 +532,7 @@ def get_difference(self, other: "EpsilonNFA") \ other.add_symbol(symbol) return self.get_intersection(other.get_complement()) - def __sub__(self, other): + def __sub__(self, other: "EpsilonNFA") -> "EpsilonNFA": """ Compute the difference with another Epsilon NFA Equivalent to: @@ -728,13 +580,13 @@ def reverse(self) -> "EpsilonNFA": enfa.add_transition(state1, symbol, state0) for state1 in self._transition_function(state0, Epsilon()): enfa.add_transition(state1, Epsilon(), state0) - for start in self._start_state: + for start in self._start_states: enfa.add_final_state(start) for final in self._final_states: enfa.add_start_state(final) return enfa - def __invert__(self): + def __invert__(self) -> "EpsilonNFA": """ Compute the reversed EpsilonNFA Returns @@ -744,6 +596,18 @@ def __invert__(self): """ return self.reverse() + def kleene_star(self) -> "EpsilonNFA": + """ Compute the kleene closure of current EpsilonNFA """ + new_start = self.__get_new_state("Start") + kleene_closure = EpsilonNFA(start_states={new_start}, + final_states={new_start}) + kleene_closure.add_transitions(iter(self)) + for old_start in self.start_states: + kleene_closure.add_transition(new_start, Epsilon(), old_start) + for final_state in self.final_states: + kleene_closure.add_transition(final_state, Epsilon(), new_start) + return kleene_closure + def is_empty(self) -> bool: """ Checks if the language represented by the FSM is empty or not @@ -766,7 +630,7 @@ def is_empty(self) -> bool: """ to_process = [] processed = set() - for start in self._start_state: + for start in self._start_states: to_process.append(start) processed.add(start) while to_process: @@ -784,194 +648,30 @@ def is_empty(self) -> bool: processed.add(state) return True - def _remove_all_basic_states(self): - """ Remove all states which are not the start state or a final state - - - CAREFUL: This method modifies the current automaton, for internal usage - only! - - The function _create_or_transitions is supposed to be called before - calling this function - """ - self._create_or_transitions() - states = self._states.copy() - for state in states: - if (state not in self._start_state - and state not in self._final_states): - self._remove_state(state) - - def _remove_state(self, state: State): - """ Removes a given state from the epsilon NFA - - CAREFUL: This method modifies the current automaton, for internal usage - only! - - The function _create_or_transitions is supposed to be called before - calling this function - - Parameters - ---------- - state : :class:`~pyformlang.finite_automaton.State` - The state to remove - - """ - # First compute all endings - out_transitions = {} - for symbol in self._input_symbols.union({Epsilon()}): - out_states = self._transition_function(state, symbol).copy() - for out_state in out_states: - out_transitions[out_state] = str(symbol.value) - self.remove_transition(state, symbol, out_state) - if state in out_transitions: - to_itself = "(" + out_transitions[state] + ")*" - del out_transitions[state] - for out_state in list(out_transitions.keys()): - out_transitions[out_state] = to_itself + "." + \ - out_transitions[out_state] - input_symbols = self._input_symbols.copy().union({Epsilon()}) - for in_state in self._states: - if in_state == state: - continue - for symbol in input_symbols: - out_states = self._transition_function(in_state, symbol) - if state not in out_states: - continue - symbol_str = "(" + str(symbol.value) + ")" - self.remove_transition(in_state, symbol, state) - for out_state, next_symb in out_transitions.items(): - new_symbol = Symbol(symbol_str + "." + next_symb) - self.add_transition(in_state, new_symbol, out_state) - self._states.remove(state) - # We make sure the automaton has the good structure - self._create_or_transitions() - - def minimize(self) -> "DeterministicFiniteAutomaton": - """ Minimize the current epsilon NFA - - Returns - ---------- - dfa : :class:`~pyformlang.deterministic_finite_automaton\ - .DeterministicFiniteAutomaton` - The minimal DFA - - Examples - -------- - - >>> enfa = EpsilonNFA() - >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \ - (0, "epsilon", 2)]) - >>> enfa.add_start_state(0) - >>> enfa.add_final_state(1) - >>> dfa_minimal = enfa.minimize() - >>> dfa_minimal.is_equivalent(enfa) - True + def __bool__(self) -> bool: + return not self.is_empty() + def __get_new_state(self, prefix: str) -> State: """ - return self.to_deterministic().minimize() - - def _create_or_transitions(self): - """ Creates a OR transition instead of several connections - - CAREFUL: This method modifies the automaton and is designed for \ - internal use only! + Get a state that wasn't previously in automaton + starting with given string. """ - for state in self._states: - new_transitions = {} - input_symbols = self._input_symbols.copy().union({Epsilon()}) - for symbol in input_symbols: - out_states = self._transition_function(state, symbol) - out_states = out_states.copy() - symbol_str = str(symbol.value) - for out_state in out_states: - self.remove_transition(state, symbol, out_state) - base = new_transitions.setdefault(out_state, "") - if "+" in symbol_str: - symbol_str = "(" + symbol_str + ")" - if base: - new_transitions[out_state] = "((" + base + ")+(" + \ - symbol_str + "))" - else: - new_transitions[out_state] = symbol_str - for out_state, next_symb in new_transitions.items(): - self.add_transition(state, - next_symb, - out_state) - - def __bool__(self): - return not self.is_empty() - - -def get_temp(start_to_end: str, end_to_start: str, end_to_end: str) \ - -> (str, str): - """ Gets a temp values in the computation of the simple automaton regex """ - temp = "epsilon" - if (start_to_end != "epsilon" - or end_to_end != "epsilon" - or end_to_start != "epsilon"): - temp = "" - if start_to_end != "epsilon": - temp = start_to_end - if end_to_end != "epsilon": - if temp: - temp += "." + end_to_end + "*" - else: - temp = end_to_end + "*" - part1 = temp - if not part1: - part1 = "epsilon" - if end_to_start != "epsilon": - if temp: - temp += "." + end_to_start - else: - temp = end_to_start - if not end_to_start: - temp = "" - return (temp, part1) - - -def get_regex_sub(start_to_start: str, - start_to_end: str, - end_to_start: str, - end_to_end: str) -> str: - """ Combines the transitions in the regex simple function """ - if not start_to_end: - return "" - temp, part1 = get_temp(start_to_end, end_to_start, end_to_end) - part0 = "epsilon" - if start_to_start != "epsilon": - if temp: - part0 = "(" + start_to_start + "+" + temp + ")*" - else: - part0 = "(" + start_to_start + ")*" - elif temp != "epsilon" and temp: - part0 = "(" + temp + ")*" - return "(" + part0 + "." + part1 + ")" - - -def to_single_state(l_states: Iterable[State]) -> State: - """ Merge a list of states - - Parameters - ---------- - l_states : list of :class:`~pyformlang.finite_automaton.State` - A list of states - - Returns - ---------- - state : :class:`~pyformlang.finite_automaton.State` - The merged state - """ - values = [] - for state in l_states: - if state is not None: - values.append(str(state.value)) - else: - values.append("TRASH") - values = sorted(values) - return State(";".join(values)) - - -def combine_state_pair(state0, state1): - """ Combine two states """ - return State(str(state0.value) + "; " + str(state1.value)) + existing_values = set(state.value for state in self.states) + while prefix in existing_values: + prefix += '`' + return State(prefix) + + @staticmethod + def __copy_transitions_marked(fa_to_add_from: FiniteAutomaton, + fa_to_add_to: FiniteAutomaton, + mark: int) -> None: + """ Copy transitions from one FA to another with each state marked """ + for s_from, symb_by, s_to in fa_to_add_from: + fa_to_add_to.add_transition((mark, s_from.value), + symb_by, + (mark, s_to.value)) + + @staticmethod + def __combine_state_pair(state0: State, state1: State) -> State: + """ Combine two states """ + return State(str(state0.value) + "; " + str(state1.value)) diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index 92bdf45..efc7392 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -1,22 +1,25 @@ """ A general finite automaton representation """ -from typing import List, Any, Union - -import networkx as nx +from typing import Dict, List, Set, Tuple, \ + Iterable, Iterator, Optional, Hashable, Any, TypeVar +from abc import abstractmethod +from collections import deque +from networkx import MultiDiGraph from networkx.drawing.nx_pydot import write_dot from pyformlang.fst import FST -# pylint: disable=cyclic-import -from pyformlang import finite_automaton -from .epsilon import Epsilon from .state import State from .symbol import Symbol +from .epsilon import Epsilon +from .transition_function import TransitionFunction +from .utils import to_state, to_symbol +AutomatonT = TypeVar("AutomatonT", bound="FiniteAutomaton") -class FiniteAutomaton: - """ Represents a general finite automaton +class FiniteAutomaton(Iterable[Tuple[State, Symbol, State]]): + """ Represents a general finite automaton Attributes ---------- @@ -36,15 +39,44 @@ class FiniteAutomaton: A set of final or accepting states. It is a subset of states. """ - def __init__(self): - self._states = set() - self._input_symbols = set() - self._transition_function = None - self._start_state = set() - self._final_states = set() + @abstractmethod + def __init__(self) -> None: + self._states: Set[State] + self._input_symbols: Set[Symbol] + self._transition_function: TransitionFunction + self._start_states: Set[State] + self._final_states: Set[State] + + @property + def states(self) -> Set[State]: + """ Gives the states + + Returns + ---------- + states : set of :class:`~pyformlang.finite_automaton.State` + The states + """ + return self._states + + @property + def symbols(self) -> Set[Symbol]: + """The symbols""" + return self._input_symbols + + @property + def start_states(self) -> Set[State]: + """The start states""" + return self._start_states - def add_transition(self, s_from: Any, symb_by: Any, - s_to: Any) -> int: + @property + def final_states(self) -> Set[State]: + """The final states""" + return self._final_states + + def add_transition(self, + s_from: Hashable, + symb_by: Hashable, + s_to: Hashable) -> int: """ Adds a transition to the nfa Parameters @@ -84,7 +116,8 @@ def add_transition(self, s_from: Any, symb_by: Any, self._input_symbols.add(symb_by) return temp - def add_transitions(self, transitions_list): + def add_transitions(self, transitions_list: \ + Iterable[Tuple[Hashable, Hashable, Hashable]]) -> int: """ Adds several transitions to the automaton @@ -118,8 +151,10 @@ def add_transitions(self, transitions_list): temp = self.add_transition(s_from, symb_by, s_to) return temp - def remove_transition(self, s_from: State, symb_by: Symbol, - s_to: State) -> int: + def remove_transition(self, + s_from: Hashable, + symb_by: Hashable, + s_to: Hashable) -> int: """ Remove a transition of the nfa Parameters @@ -152,17 +187,6 @@ def remove_transition(self, s_from: State, symb_by: Symbol, symb_by, s_to) - @property - def states(self): - """ Gives the states - - Returns - ---------- - states : set of :class:`~pyformlang.finite_automaton.State` - The states - """ - return self._states - def get_number_transitions(self) -> int: """ Gives the number of transitions @@ -183,17 +207,7 @@ def get_number_transitions(self) -> int: """ return self._transition_function.get_number_transitions() - @property - def symbols(self): - """The symbols""" - return self._input_symbols - - @property - def final_states(self): - """The final states""" - return self._final_states - - def add_start_state(self, state: Any) -> int: + def add_start_state(self, state: Hashable) -> int: """ Set an initial state Parameters @@ -216,11 +230,11 @@ def add_start_state(self, state: Any) -> int: """ state = to_state(state) - self._start_state.add(state) + self._start_states.add(state) self._states.add(state) return 1 - def remove_start_state(self, state: State) -> int: + def remove_start_state(self, state: Hashable) -> int: """ remove an initial state Parameters @@ -244,12 +258,12 @@ def remove_start_state(self, state: State) -> int: """ state = to_state(state) - if state in self._start_state: - self._start_state.remove(state) + if state in self._start_states: + self._start_states.remove(state) return 1 return 0 - def add_final_state(self, state: Any) -> int: + def add_final_state(self, state: Hashable) -> int: """ Adds a new final state Parameters @@ -277,7 +291,7 @@ def add_final_state(self, state: Any) -> int: self._states.add(state) return 1 - def remove_final_state(self, state: State) -> int: + def remove_final_state(self, state: Hashable) -> int: """ Remove a final state Parameters @@ -306,7 +320,7 @@ def remove_final_state(self, state: State) -> int: return 1 return 0 - def __call__(self, state: Any, symbol: Any = None) -> List[State]: + def __call__(self, s_from: Hashable, symb_by: Hashable) -> Set[State]: """ Gives the states obtained after calling a symbol on a state Calls the transition function @@ -332,13 +346,31 @@ def __call__(self, state: Any, symbol: Any = None) -> List[State]: [1] """ - # pylint: disable=not-callable - state = to_state(state) - if symbol is not None: - symbol = to_symbol(symbol) - return self._transition_function(state, symbol) + s_from = to_state(s_from) + symb_by = to_symbol(symb_by) + return self._transition_function(s_from, symb_by) + + def __contains__(self, + transition: Tuple[Hashable, Hashable, Hashable]) -> bool: + """ Whether the given transition is present in finite automaton """ + s_from, symb_by, s_to = transition + s_from = to_state(s_from) + symb_by = to_symbol(symb_by) + s_to = to_state(s_to) + return (s_from, symb_by, s_to) in self._transition_function + + def get_transitions_from(self, s_from: Hashable) \ + -> Iterable[Tuple[Symbol, State]]: + """ Gets transitions from the given state """ + s_from = to_state(s_from) + return self._transition_function.get_transitions_from(s_from) - def is_final_state(self, state: State) -> bool: + def get_next_states_from(self, s_from: Hashable) -> Set[State]: + """ Gets a set of states that are next to the given one """ + s_from = to_state(s_from) + return self._transition_function.get_next_states_from(s_from) + + def is_final_state(self, state: Hashable) -> bool: """ Checks if a state is final Parameters @@ -366,12 +398,7 @@ def is_final_state(self, state: State) -> bool: state = to_state(state) return state in self._final_states - @property - def start_states(self): - """The start states""" - return self._start_state - - def add_symbol(self, symbol: Symbol): + def add_symbol(self, symbol: Hashable) -> None: """ Add a symbol Parameters @@ -389,7 +416,7 @@ def add_symbol(self, symbol: Symbol): symbol = to_symbol(symbol) self._input_symbols.add(symbol) - def to_fst(self) -> "FST": + def to_fst(self) -> FST: """ Turns the finite automaton into a finite state transducer The transducers accepts only the words in the language of the \ @@ -410,11 +437,11 @@ def to_fst(self) -> "FST": """ fst = FST() - for start_state in self._start_state: + for start_state in self._start_states: fst.add_start_state(start_state.value) for final_state in self._final_states: fst.add_final_state(final_state.value) - for s_from, symb_by, s_to in self._transition_function.get_edges(): + for s_from, symb_by, s_to in self._transition_function: fst.add_transition(s_from.value, symb_by.value, s_to.value, @@ -443,7 +470,7 @@ def is_acyclic(self) -> bool: """ to_process = [] - for state in self._start_state: + for state in self._start_states: to_process.append((state, set())) while to_process: current, visited = to_process.pop() @@ -458,7 +485,7 @@ def is_acyclic(self) -> bool: to_process.append((state, visited.copy())) return True - def to_networkx(self) -> nx.MultiDiGraph: + def to_networkx(self) -> MultiDiGraph: """ Transform the current automaton into a networkx graph @@ -478,7 +505,7 @@ def to_networkx(self) -> nx.MultiDiGraph: >>> graph = enfa.to_networkx() """ - graph = nx.MultiDiGraph() + graph = MultiDiGraph() for state in self._states: graph.add_node(state.value, is_start=state in self.start_states, @@ -486,7 +513,7 @@ def to_networkx(self) -> nx.MultiDiGraph: peripheries=2 if state in self.final_states else 1, label=state.value) if state in self.start_states: - add_start_state_to_graph(graph, state) + self.__add_start_state_to_graph(graph, state) for s_from, symbol, s_to in self._transition_function.get_edges(): label_ = symbol.value if label_ == 'epsilon': @@ -495,55 +522,16 @@ def to_networkx(self) -> nx.MultiDiGraph: return graph @classmethod - def from_networkx(cls, graph): + @abstractmethod + def from_networkx(cls, graph: MultiDiGraph) -> "FiniteAutomaton": """ Import a networkx graph into an finite state automaton. \ The imported graph requires to have the good format, i.e. to come \ from the function to_networkx - - Parameters - ---------- - graph : - The graph representation of the automaton - - Returns - ------- - enfa : - A epsilon nondeterministic finite automaton read from the graph - - TODO - ------- - * We lose the type of the node value if going through a dot file - * Explain the format - - Examples - -------- - - >>> enfa = EpsilonNFA() - >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \ - (0, "epsilon", 2)]) - >>> enfa.add_start_state(0) - >>> enfa.add_final_state(1) - >>> graph = enfa.to_networkx() - >>> enfa_from_nx = EpsilonNFA.from_networkx(graph) - """ - enfa = finite_automaton.EpsilonNFA() - for s_from in graph: - for s_to in graph[s_from]: - for transition in graph[s_from][s_to].values(): - if "label" in transition: - enfa.add_transition(s_from, - transition["label"], - s_to) - for node in graph.nodes: - if graph.nodes[node].get("is_start", False): - enfa.add_start_state(node) - if graph.nodes[node].get("is_final", False): - enfa.add_final_state(node) - return enfa - - def write_as_dot(self, filename): + raise NotImplementedError + + def write_as_dot(self, filename: str) -> None: """ Write the automaton in dot format into a file @@ -564,55 +552,90 @@ def write_as_dot(self, filename): """ write_dot(self.to_networkx(), filename) - def is_equivalent_to(self, other): - """ - Checks if the current automaton is equivalent to a given one. - - Parameters - ---------- - other : - An other finite state automaton - - Returns - ------- - is_equivalent : bool - Whether the two automata are equivalent or not - - Examples - -------- - - >>> enfa = EpsilonNFA() - >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \ - (0, "epsilon", 2)]) - >>> enfa.add_start_state(0) - >>> enfa.add_final_state(1) - >>> dfa = enfa.to_deterministic() - >>> dfa.is_deterministic() - True - - """ - self_dfa = self.to_deterministic() - return self_dfa.is_equivalent_to(other) - - def to_deterministic(self): - """ Turns the automaton into a deterministic one""" - raise NotImplementedError - - def is_deterministic(self): - """ Checks if the automaton is deterministic """ + @abstractmethod + def accepts(self, word: Iterable[Hashable]) -> bool: + """ Checks whether the finite automaton accepts a given word """ raise NotImplementedError - def __eq__(self, other): - return self.is_equivalent_to(other) - - def __len__(self): + def get_accepted_words(self, max_length: Optional[int] = None) \ + -> Iterable[List[Symbol]]: + """ + Gets words accepted by the finite automaton. + """ + if max_length is not None and max_length < 0: + return + states_to_visit = deque((start_state, []) + for start_state in self.start_states) + states_leading_to_final = self._get_states_leading_to_final() + words_by_state = {state: set() for state in self.states} + yielded_words = set() + while states_to_visit: + current_state, current_word = states_to_visit.popleft() + if max_length is not None and len(current_word) > max_length: + continue + word_to_add = tuple(current_word) + if not self.__try_add(words_by_state[current_state], word_to_add): + continue + transitions = self.get_transitions_from(current_state) + for symbol, next_state in transitions: + if next_state in states_leading_to_final: + temp_word = current_word.copy() + if symbol != Epsilon(): + temp_word.append(symbol) + states_to_visit.append((next_state, temp_word)) + if self.is_final_state(current_state): + if self.__try_add(yielded_words, word_to_add): + yield current_word + + def _get_states_leading_to_final(self) -> Set[State]: + """ + Gets a set of states from which one + of the final states can be reached. + """ + leading_to_final = self.final_states.copy() + visited = set() + states_to_process: deque[Any] = \ + deque((None, start_state) for start_state in self.start_states) + delayed_states = deque() + while states_to_process: + previous_state, current_state = states_to_process.pop() + if previous_state and current_state in leading_to_final: + leading_to_final.add(previous_state) + continue + if current_state in visited: + delayed_states.append((previous_state, current_state)) + continue + visited.add(current_state) + next_states = self.get_next_states_from(current_state) + if next_states: + states_to_process.append((previous_state, current_state)) + for next_state in next_states: + states_to_process.append((current_state, next_state)) + for previous_state, current_state in delayed_states: + if previous_state and current_state in leading_to_final: + leading_to_final.add(previous_state) + return leading_to_final + + def _get_reachable_states(self) -> Set[State]: + """ Get all states which are reachable """ + visited = set() + states_to_process = deque(self.start_states) + while states_to_process: + current_state = states_to_process.popleft() + visited.add(current_state) + for next_state in self.get_next_states_from(current_state): + if next_state not in visited: + states_to_process.append(next_state) + return visited + + def __len__(self) -> int: """Number of transitions""" return len(self._transition_function) - def __iter__(self): - yield from self._transition_function.__iter__() + def __iter__(self) -> Iterator[Tuple[State, Symbol, State]]: + yield from self._transition_function - def to_dict(self): + def to_dict(self) -> Dict[State, Dict[Symbol, Set[State]]]: """ Get the dictionary representation of the transition function. The \ keys of the dictionary are the source nodes. The items are \ @@ -637,43 +660,52 @@ def to_dict(self): """ return self._transition_function.to_dict() + @abstractmethod + def copy(self: AutomatonT) -> AutomatonT: + """ Copies the current Finite Automaton instance """ + raise NotImplementedError -def to_state(given: Any) -> Union[State, None]: - """ Transforms the input into a state - - Parameters - ---------- - given : any - What we want to transform - """ - if given is None: - return None - if isinstance(given, State): - return given - return State(given) - + def __copy__(self: AutomatonT) -> AutomatonT: + return self.copy() -def to_symbol(given: Any) -> Symbol: - """ Transforms the input into a symbol + def _copy_to(self, fa_to_copy_to: AutomatonT) -> AutomatonT: + """ Copies current automaton properties to the given one """ + for start in self._start_states: + fa_to_copy_to.add_start_state(start) + for final in self._final_states: + fa_to_copy_to.add_final_state(final) + for state in self._states: + for symbol in self._input_symbols: + states = self._transition_function(state, symbol) + for state_to in states: + fa_to_copy_to.add_transition(state, symbol, state_to) + states = self._transition_function(state, Epsilon()) + for state_to in states: + fa_to_copy_to.add_transition(state, Epsilon(), state_to) + return fa_to_copy_to + + @abstractmethod + def is_deterministic(self) -> bool: + """ Checks if the automaton is deterministic """ + raise NotImplementedError - Parameters - ---------- - given : any - What we want to transform - """ - if isinstance(given, Symbol): - return given - if given in ("epsilon", "É›"): - return Epsilon() - return Symbol(given) - - -def add_start_state_to_graph(graph, state): - """ Adds a starting node to a given graph """ - graph.add_node("starting_" + str(state.value), - label="", - shape=None, - height=.0, - width=.0) - graph.add_edge("starting_" + str(state.value), - state.value) + @staticmethod + def __try_add(set_to_add_to: Set[Any], element_to_add: Any) -> bool: + """ + Tries to add a given element to the given set. + Returns True if element was added, otherwise False. + """ + initial_length = len(set_to_add_to) + set_to_add_to.add(element_to_add) + return len(set_to_add_to) != initial_length + + @staticmethod + def __add_start_state_to_graph(graph: MultiDiGraph, state: State) -> None: + """ Adds a starting node to a given graph """ + graph.add_node("starting_" + str(state.value), + label="", + shape=None, + height=.0, + width=.0) + graph.add_edge("starting_" + str(state.value), + state.value) diff --git a/pyformlang/finite_automaton/finite_automaton_object.py b/pyformlang/finite_automaton/finite_automaton_object.py index b182e0c..ed80609 100644 --- a/pyformlang/finite_automaton/finite_automaton_object.py +++ b/pyformlang/finite_automaton/finite_automaton_object.py @@ -2,7 +2,7 @@ Represents an object of a finite state automaton """ -from typing import Any +from typing import Hashable class FiniteAutomatonObject: # pylint: disable=too-few-public-methods @@ -14,15 +14,20 @@ class FiniteAutomatonObject: # pylint: disable=too-few-public-methods The value of the object """ - def __init__(self, value: Any): + def __init__(self, value: Hashable) -> None: self._value = value self._hash = None + def __hash__(self) -> int: + if self._hash is None: + self._hash = hash(self._value) + return self._hash + def __repr__(self) -> str: return str(self._value) @property - def value(self) -> Any: + def value(self) -> Hashable: """ Gets the value of the object Returns diff --git a/pyformlang/finite_automaton/hopcroft_processing_list.py b/pyformlang/finite_automaton/hopcroft_processing_list.py index 5b62220..2bc02f4 100644 --- a/pyformlang/finite_automaton/hopcroft_processing_list.py +++ b/pyformlang/finite_automaton/hopcroft_processing_list.py @@ -2,7 +2,10 @@ For internal usage """ -import numpy as np +from typing import Dict, List, Set, Tuple +from numpy import zeros + +from .symbol import Symbol class HopcroftProcessingList: @@ -10,29 +13,29 @@ class HopcroftProcessingList: For internal usage """ - def __init__(self, n_states, symbols): - self._reverse_symbols = {} + def __init__(self, n_states: int, symbols: Set[Symbol]) -> None: + self._reverse_symbols: Dict[Symbol, int] = {} for i, symbol in enumerate(symbols): self._reverse_symbols[symbol] = i - self._inclusion = np.zeros((n_states, len(symbols)), dtype=bool) - self._elements = [] + self._inclusion = zeros((n_states, len(symbols)), dtype=bool) + self._elements: List[Tuple[int, Symbol]] = [] - def is_empty(self): + def is_empty(self) -> bool: """Check if empty""" return len(self._elements) == 0 - def contains(self, class_name, symbol): + def contains(self, class_name: int, symbol: Symbol) -> bool: """ Check containment """ i_symbol = self._reverse_symbols[symbol] return self._inclusion[class_name, i_symbol] - def insert(self, class_name, symbol): + def insert(self, class_name: int, symbol: Symbol) -> None: """ Insert new element """ i_symbol = self._reverse_symbols[symbol] self._inclusion[class_name, i_symbol] = True self._elements.append((class_name, symbol)) - def pop(self): + def pop(self) -> Tuple[int, Symbol]: """ Pop an element """ res = self._elements.pop() i_symbol = self._reverse_symbols[res[1]] diff --git a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py index 210c91b..5d32776 100644 --- a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py @@ -2,13 +2,11 @@ Representation of a nondeterministic finite automaton """ -from typing import Iterable, Any +from typing import Iterable, Hashable -# pylint: disable=cyclic-import -from pyformlang.finite_automaton import epsilon +from .epsilon import Epsilon from .epsilon_nfa import EpsilonNFA -from .finite_automaton import to_symbol -from .transition_function import InvalidEpsilonTransition +from .utils import to_symbol class NondeterministicFiniteAutomaton(EpsilonNFA): @@ -60,7 +58,7 @@ class NondeterministicFiniteAutomaton(EpsilonNFA): """ - def accepts(self, word: Iterable[Any]) -> bool: + def accepts(self, word: Iterable[Hashable]) -> bool: """ Checks whether the nfa accepts a given word Parameters @@ -85,7 +83,7 @@ def accepts(self, word: Iterable[Any]) -> bool: """ word = [to_symbol(x) for x in word] - current_states = self._start_state + current_states = self._start_states for symbol in word: current_states = self._get_next_states_iterable(current_states, symbol) @@ -110,36 +108,53 @@ def is_deterministic(self) -> bool: False """ - return len(self._start_state) <= 1 and \ + return len(self._start_states) <= 1 and \ self._transition_function.is_deterministic() - def to_deterministic(self) -> "DeterministicFiniteAutomaton": - """ Transforms the nfa into a dfa + def add_transition(self, + s_from: Hashable, + symb_by: Hashable, + s_to: Hashable) -> int: + symb_by = to_symbol(symb_by) + if symb_by == Epsilon(): + raise InvalidEpsilonTransition + return super().add_transition(s_from, symb_by, s_to) - Returns - ---------- - dfa : :class:`~pyformlang.deterministic_finite_automaton\ - .DeterministicFiniteAutomaton` - A dfa equivalent to the current nfa + def copy(self) -> "NondeterministicFiniteAutomaton": + """ Copies the current NFA instance """ + return self._copy_to(NondeterministicFiniteAutomaton()) - Examples - -------- - - >>> nfa = NondeterministicFiniteAutomaton() - >>> nfa.add_transitions([(0, "a", 1), (0, "a", 2)]) - >>> nfa.add_start_state(0) - >>> nfa.add_final_state(1) - >>> dfa = nfa.to_deterministic() - >>> nfa.is_equivalent_to(dfa) - True + @classmethod + def from_epsilon_nfa(cls, enfa: EpsilonNFA) \ + -> "NondeterministicFiniteAutomaton": + """ Builds nfa equivalent to the given enfa + Returns + ---------- + dfa : :class:`~pyformlang.finite_automaton. \ + NondeterministicFiniteAutomaton` + A non-deterministic finite automaton equivalent to the current \ + nfa, with no epsilon transition """ - return self._to_deterministic_internal(False) - - def add_transition(self, - s_from: Any, - symb_by: Any, - s_to: Any) -> int: - if symb_by == epsilon.Epsilon(): - raise InvalidEpsilonTransition - return super().add_transition(s_from, symb_by, s_to) + nfa = NondeterministicFiniteAutomaton() + for state in enfa.start_states: + nfa.add_start_state(state) + for state in enfa.final_states: + nfa.add_final_state(state) + start_eclose = enfa.eclose_iterable(enfa.start_states) + for state in start_eclose: + nfa.add_start_state(state) + for state in enfa.states: + eclose = enfa.eclose(state) + for e_state in eclose: + if e_state in enfa.final_states: + nfa.add_final_state(state) + for symb in enfa.symbols: + for next_state in enfa(e_state, symb): + nfa.add_transition(state, symb, next_state) + return nfa + + +class InvalidEpsilonTransition(Exception): + """Exception raised when an epsilon transition is created in + non-epsilon NFA""" diff --git a/pyformlang/finite_automaton/nondeterministic_transition_function.py b/pyformlang/finite_automaton/nondeterministic_transition_function.py index 71a6287..030a605 100644 --- a/pyformlang/finite_automaton/nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/nondeterministic_transition_function.py @@ -1,14 +1,16 @@ """ A nondeterministic transition function """ -import copy -from typing import Set + +from typing import Dict, Set, Iterable, Tuple +from copy import deepcopy from .state import State from .symbol import Symbol +from .transition_function import TransitionFunction -class NondeterministicTransitionFunction: +class NondeterministicTransitionFunction(TransitionFunction): """ A nondeterministic transition function in a finite automaton. The difference with a deterministic transition is that the return value is @@ -24,10 +26,12 @@ class NondeterministicTransitionFunction: """ - def __init__(self): - self._transitions = {} + def __init__(self) -> None: + self._transitions: Dict[State, Dict[Symbol, Set[State]]] = {} - def add_transition(self, s_from: State, symb_by: Symbol, + def add_transition(self, + s_from: State, + symb_by: Symbol, s_to: State) -> int: """ Adds a new transition to the function @@ -63,9 +67,11 @@ def add_transition(self, s_from: State, symb_by: Symbol, self._transitions[s_from][symb_by] = {s_to} return 1 - def remove_transition(self, s_from: State, symb_by: Symbol, + def remove_transition(self, + s_from: State, + symb_by: Symbol, s_to: State) -> int: - """ Removes a transition to the function + """ Removes a transition from the function Parameters ---------- @@ -120,10 +126,7 @@ def get_number_transitions(self) -> int: counter += len(s_to) return counter - def __len__(self): - return self.get_number_transitions() - - def __call__(self, s_from: State, symb_by: Symbol = None) -> Set[State]: + def __call__(self, s_from: State, symb_by: Symbol) -> Set[State]: """ Calls the transition function as a real function Parameters @@ -135,42 +138,24 @@ def __call__(self, s_from: State, symb_by: Symbol = None) -> Set[State]: Returns ---------- - s_from : :class:`~pyformlang.finite_automaton.State` or None - The destination state or None if it does not exists + s_from : set :class:`~pyformlang.finite_automaton.State` + Set of destination states """ if s_from in self._transitions: - if symb_by is not None: - if symb_by in self._transitions[s_from]: - return self._transitions[s_from][symb_by] - else: - return self._transitions[s_from].items() + if symb_by in self._transitions[s_from]: + return self._transitions[s_from][symb_by] return set() - def is_deterministic(self): - """ Whether the transition function is deterministic - - Returns - ---------- - is_deterministic : bool - Whether the function is deterministic - - Examples - -------- - - >>> transition = NondeterministicTransitionFunction() - >>> transition.add_transition(State(0), Symbol("a"), State(1)) - >>> transition.is_deterministic() - True - - """ - for transitions in self._transitions.values(): - for s_to in transitions.values(): - if len(s_to) > 1: - return False - return True + def get_transitions_from(self, s_from: State) \ + -> Iterable[Tuple[Symbol, State]]: + """ Gets transitions from the given state """ + if s_from in self._transitions: + for symb_by, states_to in self._transitions[s_from].items(): + for state_to in states_to: + yield symb_by, state_to - def get_edges(self): + def get_edges(self) -> Iterable[Tuple[State, Symbol, State]]: """ Gets the edges Returns @@ -180,15 +165,11 @@ def get_edges(self): :class:`~pyformlang.finite_automaton.State`) A generator of edges """ - for state, transitions in self._transitions.items(): - for symbol, next_states in transitions.items(): - for next_state in next_states: - yield state, symbol, next_state - - def __iter__(self): - yield from self.get_edges() + for s_from in self._transitions: + for symb_by, s_to in self.get_transitions_from(s_from): + yield s_from, symb_by, s_to - def to_dict(self): + def to_dict(self) -> Dict[State, Dict[Symbol, Set[State]]]: """ Get the dictionary representation of the transition function. The keys of the dictionary are the source nodes. The items are dictionaries @@ -200,4 +181,27 @@ def to_dict(self): transition_dict : dict The transitions as a dictionary. """ - return copy.deepcopy(self._transitions) + return deepcopy(self._transitions) + + def is_deterministic(self) -> bool: + """ Whether the transition function is deterministic + + Returns + ---------- + is_deterministic : bool + Whether the function is deterministic + + Examples + -------- + + >>> transition = NondeterministicTransitionFunction() + >>> transition.add_transition(State(0), Symbol("a"), State(1)) + >>> transition.is_deterministic() + True + + """ + for transitions in self._transitions.values(): + for s_to in transitions.values(): + if len(s_to) > 1: + return False + return True diff --git a/pyformlang/finite_automaton/partition.py b/pyformlang/finite_automaton/partition.py index 3e7aa66..e292900 100644 --- a/pyformlang/finite_automaton/partition.py +++ b/pyformlang/finite_automaton/partition.py @@ -2,20 +2,26 @@ For internal usage. """ +from typing import Dict, List, Iterable + from .doubly_linked_list import DoublyLinkedList +from .doubly_linked_node import DoublyLinkedNode +from .state import State class Partition: """Class to manage partitions used in Hopcroft minimization algorithm""" - def __init__(self, n_states): - self._class_names = {} # States to class index + def __init__(self, n_states: int) -> None: + self._class_names: Dict[State, int] = {} # States to class index # Class idx to states - self.part = [DoublyLinkedList() for _ in range(n_states)] - self._place = {} # state to position in list + self.part: List[DoublyLinkedList] = \ + [DoublyLinkedList() for _ in range(n_states)] + self._place: Dict[State, DoublyLinkedNode] = {} + # state to position in list self._counter = 0 # Number of classes - def add_class(self, new_class): + def add_class(self, new_class: Iterable[State]) -> None: """Adds a new class""" index = self._counter self._counter += 1 @@ -24,14 +30,15 @@ def add_class(self, new_class): node = self.part[index].append(element) self._place[element] = node - def move_to_new_class(self, elements_to_move): + def move_to_new_class(self, elements_to_move: Iterable[State]) -> None: """Move elements to a new class""" for element in elements_to_move: place = self._place[element] - place.delete() + class_name = self._class_names[element] + self.part[class_name].delete(place) self.add_class(elements_to_move) - def get_valid_sets(self, inverse): + def get_valid_sets(self, inverse: Iterable[State]) -> List[int]: """Get the valid sets""" class_names = [0] * self._counter for element in inverse: @@ -39,7 +46,7 @@ def get_valid_sets(self, inverse): return [i for i, value in enumerate(class_names) if value != 0 and value != len(self.part[i])] - def split(self, to_split, splitter): + def split(self, to_split: int, splitter: Iterable[State]) -> int: """ Splits """ elements_to_move = [] for element in splitter: @@ -48,7 +55,7 @@ def split(self, to_split, splitter): self.move_to_new_class(elements_to_move) return self._counter - 1 - def get_groups(self): + def get_groups(self) -> List[List[State]]: """ Get the groups """ res = [] for i in range(self._counter): diff --git a/pyformlang/finite_automaton/regexable.py b/pyformlang/finite_automaton/regexable.py deleted file mode 100644 index 395d104..0000000 --- a/pyformlang/finite_automaton/regexable.py +++ /dev/null @@ -1,67 +0,0 @@ -""" An abstract class to represent something which are be transformed into -a regex -""" - - -class Regexable: - """ An abstract class to represent something which are be transformed into - a regex - """ - - def to_regex(self) -> "Regex": - """ Tranforms the EpsilonNFA to a regular expression - - Returns - ---------- - regex : :class:`~pyformlang.regular_expression.Regex` - A regular expression equivalent to the current Epsilon NFA - """ - raise NotImplementedError() - - def union(self, other: "Regexable") -> "EpsilonNFA": - """ Makes the union of two regexable objects - - Parameters - ---------- - other : :class:`~pyformlang.finite_automaton.Regexable` - The other regexable object - - Returns - ---------- - enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA` - The union of the two regexable objects - """ - regex0 = self.to_regex() - regex1 = other.to_regex() - regex = regex0 | regex1 - return regex.to_epsilon_nfa() - - def concatenate(self, other: "Regexable") -> "EpsilonNFA": - """ Makes the concatenation of two regexable objects - - Parameters - ---------- - other : :class:`~pyformlang.finite_automaton.Regexable` - The other regexable object - - Returns - ---------- - enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA` - The concatenation of the two regexable objects - """ - regex0 = self.to_regex() - regex1 = other.to_regex() - regex = regex0 + regex1 - return regex.to_epsilon_nfa() - - def kleene_star(self) -> "EpsilonNFA": - """ Makes the kleene star of the current regexable object - - Returns - ---------- - enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA` - The kleene star of the regexable object - """ - regex0 = self.to_regex() - regex = regex0.kleene_star() - return regex.to_epsilon_nfa() diff --git a/pyformlang/finite_automaton/state.py b/pyformlang/finite_automaton/state.py index c8f9f5b..53ed6d9 100644 --- a/pyformlang/finite_automaton/state.py +++ b/pyformlang/finite_automaton/state.py @@ -2,7 +2,7 @@ Representation of a state in a finite state automaton """ -from typing import Any +from typing import Hashable, Any from .finite_automaton_object import FiniteAutomatonObject @@ -22,15 +22,13 @@ class State(FiniteAutomatonObject): # pylint: disable=too-few-public-methods """ - def __init__(self, value): + def __init__(self, value: Hashable) -> None: super().__init__(value) self.index = None self.index_cfg_converter = None def __hash__(self) -> int: - if self._hash is None: - self._hash = hash(self._value) - return self._hash + return super().__hash__() def __eq__(self, other: Any) -> bool: if isinstance(other, State): diff --git a/pyformlang/finite_automaton/symbol.py b/pyformlang/finite_automaton/symbol.py index 7be694b..8599108 100644 --- a/pyformlang/finite_automaton/symbol.py +++ b/pyformlang/finite_automaton/symbol.py @@ -21,12 +21,10 @@ class Symbol(FiniteAutomatonObject): # pylint: disable=too-few-public-methods A """ + def __hash__(self) -> int: + return super().__hash__() + def __eq__(self, other: Any) -> bool: if isinstance(other, Symbol): return self._value == other.value return self._value == other - - def __hash__(self) -> int: - if self._hash is None: - self._hash = hash(self._value) - return self._hash diff --git a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py index 739028b..cf936cf 100644 --- a/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_deterministic_finite_automaton.py @@ -1,14 +1,15 @@ """ Tests for the deterministic finite automata """ -from pyformlang.finite_automaton import DeterministicFiniteAutomaton, Epsilon -from pyformlang.finite_automaton import State -from pyformlang.finite_automaton import Symbol -from pyformlang.finite_automaton import TransitionFunction -from pyformlang.finite_automaton.transition_function import \ - InvalidEpsilonTransition + import pytest +from pyformlang.finite_automaton import EpsilonNFA +from pyformlang.finite_automaton import DeterministicFiniteAutomaton +from pyformlang.finite_automaton import State, Symbol, Epsilon +from pyformlang.finite_automaton import DeterministicTransitionFunction +from pyformlang.finite_automaton import InvalidEpsilonTransition + class TestDeterministicFiniteAutomaton: """ Tests for deterministic finite automata @@ -24,7 +25,7 @@ def test_can_create(self): symb0 = Symbol("a") states = {state0, state1} input_symbols = {symb0} - transition_function = TransitionFunction() + transition_function = DeterministicTransitionFunction() transition_function.add_transition(state0, symb0, state1) start_state = state0 final_states = {state1} @@ -41,7 +42,6 @@ def test_can_create(self): dfa = DeterministicFiniteAutomaton(start_state=state1, final_states={state0, state1}) assert dfa is not None - assert dfa is dfa.to_deterministic() def test_add_transition(self): """ Tests the addition of transitions @@ -77,29 +77,9 @@ def test_accepts(self): """ Tests the acceptance of dfa """ dfa = get_example0() - self._perform_tests_example0(dfa) + perform_tests_example0(dfa) dfa = get_example0_bis() - self._perform_tests_example0(dfa) - - def _perform_tests_example0(self, dfa): - """ Tests for DFA from example 0 """ - symb_a = Symbol("a") - symb_b = Symbol("b") - symb_c = Symbol("c") - symb_d = Symbol("d") - state0 = State(0) - state1 = State(1) - assert dfa.accepts([symb_a, symb_b, symb_c]) - assert dfa.accepts([symb_a, symb_b, symb_b, symb_b, symb_c]) - assert dfa.accepts([symb_a, symb_b, symb_d]) - assert dfa.accepts([symb_a, symb_d]) - assert not dfa.accepts([symb_a, symb_c, symb_d]) - assert not dfa.accepts([symb_d, symb_c, symb_d]) - assert not dfa.accepts([]) - assert dfa.remove_start_state(state1) == 0 - assert dfa.accepts([symb_a, symb_b, symb_c]) - assert dfa.remove_start_state(state0) == 1 - assert not dfa.accepts([symb_a, symb_b, symb_c]) + perform_tests_example0(dfa) dfa.add_start_state(0) assert dfa.accepts(["a", "b", "c"]) @@ -117,13 +97,7 @@ def _perform_tests_example0(self, dfa): def test_copy(self): """ Test the copy of a DFA """ dfa = get_example0().copy() - self._perform_tests_example0(dfa) - - def test_regex(self): - """ Tests the regex transformation """ - dfa = get_example0() - dfa = dfa.to_regex().to_epsilon_nfa() - self._perform_tests_example0(dfa) + perform_tests_example0(dfa) def test_complement(self): """ Tests the complement operation """ @@ -275,10 +249,27 @@ def test_equivalent(self): dfa2.add_final_state(State("D")) assert dfa2 != dfa1 - def test_regex_dfa(self): - dfa1 = get_dfa_example() - dfa_regex = dfa1.to_regex().to_epsilon_nfa() - assert dfa1 == dfa_regex + def test_word_generation(self): + dfa = get_dfa_example_for_word_generation() + accepted_words = list(dfa.get_accepted_words()) + assert [] in accepted_words + assert [Symbol("b"), Symbol("c")] in accepted_words + assert [Symbol("b"), Symbol("d")] in accepted_words + assert len(accepted_words) == 3 + + def test_cyclic_word_generation(self): + dfa = get_cyclic_dfa_example() + accepted_words = list(dfa.get_accepted_words(5)) + assert ["a", "f"] in accepted_words + assert ["a", "b", "e", "f"] in accepted_words + assert ["a", "b", "c", "e", "f"] in accepted_words + assert ["a", "b", "d", "a", "f"] in accepted_words + assert len(accepted_words) == 4 + + def test_dfa_generating_no_words(self): + dfa = get_dfa_example_without_accepted_words() + accepted_words = list(dfa.get_accepted_words()) + assert not accepted_words def get_example0(): @@ -326,3 +317,75 @@ def get_dfa_example(): dfa1.add_start_state(State("A")) dfa1.add_final_state(State("D")) return dfa1 + + +def get_dfa_example_for_word_generation(): + """ DFA example for the word generation test """ + dfa = DeterministicFiniteAutomaton() + states = [State(x) for x in range(4)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + symbol_d = Symbol("d") + dfa.add_transitions([ + (states[0], symbol_a, states[1]), + (states[0], symbol_b, states[2]), + (states[1], symbol_a, states[1]), + (states[2], symbol_c, states[3]), + (states[2], symbol_d, states[3]), + ]) + dfa.add_start_state(states[0]) + dfa.add_final_state(states[0]) + dfa.add_final_state(states[3]) + return dfa + + +def get_cyclic_dfa_example(): + """ Gets DFA example with several cycles on path to final """ + dfa = DeterministicFiniteAutomaton(start_state=0, + final_states={3}) + dfa.add_transitions([ + (0, "a", 1), + (1, "b", 2), + (2, "c", 2), + (2, "d", 0), + (2, "e", 1), + (1, "f", 3), + ]) + return dfa + + +def get_dfa_example_without_accepted_words(): + """ DFA example accepting no words """ + dfa = DeterministicFiniteAutomaton() + states = [State(x) for x in range(4)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + dfa.add_transitions([ + (states[0], symbol_a, states[1]), + (states[2], symbol_b, states[3]), + ]) + dfa.add_start_state(states[0]) + dfa.add_final_state(states[3]) + return dfa + + +def perform_tests_example0(enfa: EpsilonNFA): + """ Tests for DFA from example 0 """ + symb_a = Symbol("a") + symb_b = Symbol("b") + symb_c = Symbol("c") + symb_d = Symbol("d") + state0 = State(0) + state1 = State(1) + assert enfa.accepts([symb_a, symb_b, symb_c]) + assert enfa.accepts([symb_a, symb_b, symb_b, symb_b, symb_c]) + assert enfa.accepts([symb_a, symb_b, symb_d]) + assert enfa.accepts([symb_a, symb_d]) + assert not enfa.accepts([symb_a, symb_c, symb_d]) + assert not enfa.accepts([symb_d, symb_c, symb_d]) + assert not enfa.accepts([]) + assert enfa.remove_start_state(state1) == 0 + assert enfa.accepts([symb_a, symb_b, symb_c]) + assert enfa.remove_start_state(state0) == 1 + assert not enfa.accepts([symb_a, symb_b, symb_c]) diff --git a/pyformlang/finite_automaton/tests/test_deterministic_transition_function.py b/pyformlang/finite_automaton/tests/test_deterministic_transition_function.py new file mode 100644 index 0000000..b04e631 --- /dev/null +++ b/pyformlang/finite_automaton/tests/test_deterministic_transition_function.py @@ -0,0 +1,124 @@ +""" +Test the transition functions +""" + +import pytest + +from pyformlang.finite_automaton import DeterministicTransitionFunction +from pyformlang.finite_automaton import State, Symbol, Epsilon +from pyformlang.finite_automaton import \ + DuplicateTransitionError, InvalidEpsilonTransition + + +class TestDeterministicTransitionFunction: + """ Tests the transitions functions + """ + + def test_creation(self): + """ Tests the creation of transition functions + """ + transition_function = DeterministicTransitionFunction() + assert transition_function is not None + + def test_add_transitions(self): + """ Tests the addition of transitions + """ + transition_function = DeterministicTransitionFunction() + s_from = State(10) + s_to = State(11) + s_to_bis = State(2) + symb_by = Symbol("abc") + transition_function.add_transition(s_from, symb_by, s_to) + transition_function.add_transition(s_from, symb_by, s_to) + with pytest.raises(DuplicateTransitionError) as dte: + transition_function.add_transition(s_from, symb_by, s_to_bis) + dte = dte.value + assert dte.s_from == s_from + assert dte.s_to == s_to_bis + assert dte.symb_by == symb_by + assert dte.s_to_old == s_to + + def test_number_transitions(self): + """ Tests the number of transitions + """ + transition_function = DeterministicTransitionFunction() + assert transition_function.get_number_transitions() == 0 + s_from = State(110) + s_to = State(12) + s_to_bis = State(2) + symb_by = Symbol("a") + transition_function.add_transition(s_from, symb_by, s_to) + assert transition_function.get_number_transitions() == 1 + transition_function.add_transition(s_from, symb_by, s_to) + assert transition_function.get_number_transitions() == 1 + symb_by2 = Symbol("bc") + transition_function.add_transition(s_from, symb_by2, s_to_bis) + assert transition_function.get_number_transitions() == 2 + transition_function.add_transition(s_to, symb_by, s_to_bis) + assert transition_function.get_number_transitions() == 3 + + def test_remove_transitions(self): + """ Tests the removal of transitions + """ + transition_function = DeterministicTransitionFunction() + s_from = State(10) + s_to = State(11) + symb_by = Symbol("abc") + transition_function.add_transition(s_from, symb_by, s_to) + assert transition_function.remove_transition(s_from, + symb_by, + s_to) == 1 + assert transition_function.get_number_transitions() == 0 + assert transition_function(s_to, symb_by) == set() + assert transition_function(s_from, symb_by) == set() + assert transition_function.remove_transition(s_from, + symb_by, + s_to) == 0 + + def test_call(self): + """ Tests the call of a transition function + """ + transition_function = DeterministicTransitionFunction() + s_from = State(0) + s_to = State(1) + symb_by = Symbol("a") + transition_function.add_transition(s_from, symb_by, s_to) + assert transition_function(s_from, symb_by) == {s_to} + assert transition_function(s_to, symb_by) == set() + + def test_get_next_state(self): + """ Tests the transition function call to get a single state """ + transition_function = DeterministicTransitionFunction() + s_from = State(0) + s_to = State(1) + symb_by = Symbol("a") + transition_function.add_transition(s_from, symb_by, s_to) + assert transition_function.get_next_state(s_from, symb_by) == s_to + assert transition_function.get_next_state(s_to, symb_by) is None + + def test_invalid_epsilon(self): + """ Tests invalid transition """ + transition_function = DeterministicTransitionFunction() + s_from = State(0) + s_to = State(1) + epsilon = Epsilon() + with pytest.raises(InvalidEpsilonTransition): + transition_function.add_transition(s_from, epsilon, s_to) + + def test_get_transitions_from(self): + """ Tests iteration of transitions from specified state """ + transition_function = DeterministicTransitionFunction() + states = [State(x) for x in range(0, 4)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + symbol_d = Symbol("d") + transition_function.add_transition(states[0], symbol_a, states[1]) + transition_function.add_transition(states[1], symbol_b, states[2]) + transition_function.add_transition(states[1], symbol_c, states[2]) + transition_function.add_transition(states[1], symbol_d, states[3]) + transitions = list(transition_function.get_transitions_from(states[1])) + assert (symbol_b, states[2]) in transitions + assert (symbol_c, states[2]) in transitions + assert (symbol_d, states[3]) in transitions + assert len(transitions) == 3 diff --git a/pyformlang/finite_automaton/tests/test_epsilon.py b/pyformlang/finite_automaton/tests/test_epsilon.py index 16b57ed..955ac6f 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon.py +++ b/pyformlang/finite_automaton/tests/test_epsilon.py @@ -1,6 +1,7 @@ """ Tests for epsilon transitions """ + from pyformlang.finite_automaton import Epsilon from pyformlang.finite_automaton import Symbol diff --git a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py index 2f83024..b498b8c 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon_nfa.py +++ b/pyformlang/finite_automaton/tests/test_epsilon_nfa.py @@ -1,19 +1,20 @@ """ Tests for epsilon NFA """ -import copy +import copy import networkx -from pyformlang.finite_automaton import EpsilonNFA, State, Symbol, Epsilon -from ..regexable import Regexable -import pytest +from pyformlang.finite_automaton import EpsilonNFA +from pyformlang.finite_automaton import NondeterministicFiniteAutomaton +from pyformlang.finite_automaton import DeterministicFiniteAutomaton +from pyformlang.finite_automaton import State, Symbol, Epsilon class TestEpsilonNFA: """ Tests epsilon NFA """ - # pylint: disable=missing-function-docstring, protected-access + # pylint: disable=missing-function-docstring # pylint: disable=too-many-statements, too-many-public-methods def test_eclose(self): @@ -34,9 +35,8 @@ def test_eclose(self): assert len(enfa.eclose(states[2])) == 3 assert len(enfa.eclose(states[5])) == 2 assert len(enfa.eclose(states[6])) == 1 - assert len(list(enfa._transition_function.get_edges())) == 7 - assert enfa.remove_transition(states[1], epsilon, states[4]) == \ - 1 + assert len(list(iter(enfa))) == 7 + assert enfa.remove_transition(states[1], epsilon, states[4]) == 1 assert not enfa.is_deterministic() def test_accept(self): @@ -47,7 +47,7 @@ def test_copy(self): """ Tests the copy of enda """ self._perform_tests_digits(True) - def _perform_tests_digits(self, should_copy=False): + def _perform_tests_digits(self, should_copy: bool = False): enfa, digits, epsilon, plus, minus, point = get_digits_enfa() if should_copy: enfa = copy.copy(enfa) @@ -73,7 +73,7 @@ def _perform_tests_digits(self, should_copy=False): def test_deterministic(self): """ Tests the transformation to a dfa""" enfa, digits, _, plus, minus, point = get_digits_enfa() - dfa = enfa.to_deterministic() + dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) assert dfa.is_deterministic() assert len(dfa.states) == 6 assert dfa.get_number_transitions() == 65 @@ -87,175 +87,39 @@ def test_deterministic(self): assert not dfa.accepts([point]) assert not dfa.accepts([plus]) - def test_remove_state(self): - " Tests the remove of state """ - enfa = EpsilonNFA() - state0 = State(0) - state1 = State(1) - state2 = State(2) - symb02 = Symbol("a+b") - symb01 = Symbol("c*") - symb11 = Symbol("b+(c.d)") - symb12 = Symbol("a.b.c") - enfa.add_start_state(state0) - enfa.add_final_state(state2) - enfa.add_transition(state0, symb01, state1) - enfa.add_transition(state0, symb02, state2) - enfa.add_transition(state1, symb11, state1) - enfa.add_transition(state1, symb12, state2) - enfa._remove_all_basic_states() - assert enfa.get_number_transitions() == 1 - assert len(enfa.states) == 2 - - def test_to_regex(self): - """ Tests the transformation to regex """ - enfa = EpsilonNFA() - state0 = State(0) - state1 = State(1) - state2 = State(2) - symb_e = Symbol("e") - symb_f = Symbol("f") - symb_g = Symbol("g") - enfa.add_start_state(state0) - enfa.add_final_state(state2) - enfa.add_transition(state0, symb_e, state1) - enfa.add_transition(state1, symb_f, state2) - enfa.add_transition(state0, symb_g, state2) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert enfa2.accepts([symb_e, symb_f]) - assert enfa2.accepts([symb_g]) - assert not enfa2.accepts([]) - assert not enfa2.accepts([symb_e]) - assert not enfa2.accepts([symb_f]) - enfa.add_final_state(state0) - with pytest.raises(ValueError) as _: - enfa._get_regex_simple() - regex = enfa.to_regex() - enfa3 = regex.to_epsilon_nfa() - assert enfa3.accepts([symb_e, symb_f]) - assert enfa3.accepts([symb_g]) - assert enfa3.accepts([]) - assert not enfa3.accepts([symb_e]) - assert not enfa3.accepts([symb_f]) - enfa.remove_start_state(state0) - regex = enfa.to_regex() - enfa3 = regex.to_epsilon_nfa() - assert not enfa3.accepts([symb_e, symb_f]) - assert not enfa3.accepts([symb_g]) - assert not enfa3.accepts([]) - assert not enfa3.accepts([symb_e]) - assert not enfa3.accepts([symb_f]) - enfa.add_start_state(state0) - enfa.add_transition(state0, symb_f, state0) - regex = enfa.to_regex() - enfa3 = regex.to_epsilon_nfa() - assert enfa3.accepts([symb_e, symb_f]) - assert enfa3.accepts([symb_f, symb_e, symb_f]) - assert enfa3.accepts([symb_g]) - assert enfa3.accepts([symb_f, symb_f, symb_g]) - assert enfa3.accepts([]) - assert not enfa3.accepts([symb_e]) - assert enfa3.accepts([symb_f]) - - def test_to_regex2(self): - """ Tests the transformation to regex """ - enfa = EpsilonNFA() - state0 = State(0) - state1 = State(1) - symb_a = Symbol("0") - symb_b = Symbol("1") - enfa.add_start_state(state0) - enfa.add_final_state(state1) - enfa.add_transition(state0, symb_a, state0) - enfa.add_transition(state0, symb_a, state1) - enfa.add_transition(state1, symb_b, state0) - enfa.add_transition(state1, symb_b, state1) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert enfa2.accepts([symb_a]) - assert enfa2.accepts([symb_a, symb_a]) - assert enfa2.accepts([symb_a, symb_a, symb_b]) - assert enfa2.accepts([symb_a, symb_a, symb_b, symb_b]) - assert enfa2.accepts([symb_a, symb_a, - symb_b, symb_b, symb_a]) - assert enfa2.accepts([symb_a, symb_a, symb_b, - symb_b, symb_a, symb_b]) - assert not enfa2.accepts([symb_b]) - - def test_to_regex3(self): - """ Tests the transformation to regex """ - enfa = EpsilonNFA() - state0 = State(0) - state1 = State(1) - symb_a = Symbol("0") - symb_b = Symbol("1") - enfa.add_start_state(state0) - enfa.add_final_state(state1) - enfa.add_transition(state0, symb_a, state0) - enfa.add_transition(state1, symb_b, state0) - enfa.add_transition(state1, symb_b, state1) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert not enfa2.accepts([symb_a]) - assert not enfa2.accepts([symb_a, symb_a]) - assert not enfa2.accepts([symb_a, symb_a, symb_b]) - assert not enfa2.accepts([symb_a, symb_a, - symb_b, symb_b, symb_a]) - assert not enfa2.accepts([symb_a, symb_a, symb_b, - symb_b, symb_a, symb_b]) - assert not enfa2.accepts([symb_b]) - epsilon = Epsilon() - enfa.add_transition(state0, epsilon, state1) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert enfa.accepts([]) - assert enfa.accepts([symb_a]) - assert enfa2.accepts([symb_a]) - assert enfa2.accepts([symb_a, symb_a]) - assert enfa2.accepts([symb_a, symb_a, symb_b, symb_b]) - assert enfa2.accepts([symb_a, symb_a, symb_b, symb_b, - symb_a, symb_b]) - assert enfa2.accepts([symb_b]) - assert enfa2.accepts([]) - enfa.remove_transition(state0, symb_a, state0) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert not enfa2.accepts([symb_a]) - assert not enfa2.accepts([symb_a, symb_a]) - assert not enfa2.accepts([symb_a, symb_a, symb_b]) - assert not enfa2.accepts([symb_a, symb_a, symb_b, - symb_b, symb_a]) - assert not enfa2.accepts([symb_a, symb_a, symb_b, symb_b, - symb_a, symb_b]) - assert enfa2.accepts([symb_b]) - assert enfa2.accepts([]) - enfa.remove_transition(state1, symb_b, state1) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert enfa2.accepts([symb_b, symb_b]) - enfa.add_transition(state0, symb_a, state0) - regex = enfa.to_regex() - enfa2 = regex.to_epsilon_nfa() - assert enfa2.accepts([symb_a, symb_b]) - - def test_union(self): + def test_union0(self): """ Tests the union of two epsilon NFA """ - with pytest.raises(NotImplementedError) as _: - Regexable().to_regex() enfa0 = get_enfa_example0() enfa1 = get_enfa_example1() symb_a = Symbol("a") symb_b = Symbol("b") symb_c = Symbol("c") - enfa = enfa0.union(enfa1) + enfa = enfa0.get_union(enfa1) assert enfa.accepts([symb_b]) assert enfa.accepts([symb_a, symb_b]) assert enfa.accepts([symb_c]) assert not enfa.accepts([symb_a]) assert not enfa.accepts([]) - def test_concatenate(self): + def test_union1(self): + """ + Tests the union of three ENFAs. + Union is (a*b)|(ab+)|c + """ + enfa0 = get_enfa_example0() + enfa1 = get_enfa_example1() + enfa2 = get_enfa_example2() + enfa = enfa0 | enfa2 + enfa |= enfa1 + accepted_words = list(enfa.get_accepted_words(3)) + assert ["b"] in accepted_words + assert ["a", "b"] in accepted_words + assert ["a", "a", "b"] in accepted_words + assert ["a", "b", "b"] in accepted_words + assert ["c"] in accepted_words + assert len(accepted_words) == 5 + + def test_concatenate0(self): """ Tests the concatenation of two epsilon NFA """ enfa0 = get_enfa_example0() enfa1 = get_enfa_example1() @@ -270,7 +134,23 @@ def test_concatenate(self): assert not enfa.accepts([symb_b]) assert not enfa.accepts([]) - def test_kleene(self): + def test_concatenate1(self): + """ + Tests the concatenation of three ENFAs. + Concatenation is a*bc((ab+)|c) + """ + enfa0 = get_enfa_example0() + enfa1 = get_enfa_example1() + enfa2 = get_enfa_example2() + enfa = enfa0 + enfa1 + enfa += enfa2 + accepted_words = list(enfa.get_accepted_words(4)) + assert ["b", "c", "c"] in accepted_words + assert ["a", "b", "c", "c"] in accepted_words + assert ["b", "c", "a", "b"] in accepted_words + assert len(accepted_words) == 3 + + def test_kleene0(self): """ Tests the kleene star of an epsilon NFA """ enfa0 = get_enfa_example0() symb_a = Symbol("a") @@ -284,6 +164,23 @@ def test_kleene(self): assert not enfa.accepts([symb_a]) assert not enfa.accepts([symb_a, symb_b, symb_a]) + def test_kleene1(self): + """ + Tests the kleene star of an ENFA. + Expression is ((ab+)|c)* + """ + enfa = get_enfa_example2() + enfa = enfa.kleene_star() + accepted_words = list(enfa.get_accepted_words(3)) + assert [] in accepted_words + assert ["a", "b"] in accepted_words + assert ["a", "b", "b"] in accepted_words + assert ["a", "b", "c"] in accepted_words + assert ["c", "a", "b"] in accepted_words + for i in range(3): + assert ["c"] * (i + 1) in accepted_words + assert len(accepted_words) == 8 + def test_complement(self): """ Tests the complement operation """ enfa = EpsilonNFA() @@ -382,7 +279,7 @@ def test_minimization(self): enfa = get_enfa_example0_bis() symb_a = Symbol("a") symb_b = Symbol("b") - enfa = enfa.minimize() + enfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa).minimize() assert enfa.is_deterministic() assert len(enfa.states) == 2 assert enfa.accepts([symb_a, symb_b]) @@ -390,7 +287,7 @@ def test_minimization(self): assert enfa.accepts([symb_b]) assert not enfa.accepts([symb_a]) enfa = get_example_non_minimal() - enfa = enfa.minimize() + enfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa).minimize() assert enfa.is_deterministic() assert len(enfa.states) == 3 assert enfa.accepts([symb_a, symb_b]) @@ -398,7 +295,7 @@ def test_minimization(self): assert not enfa.accepts([symb_b]) assert not enfa.accepts([symb_a]) enfa = EpsilonNFA() - enfa = enfa.minimize() + enfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa).minimize() assert enfa.is_deterministic() assert len(enfa.states) == 1 assert not enfa.accepts([]) @@ -546,7 +443,9 @@ def test_equivalent(self): enfa1.add_final_state(state1) enfa1.add_transition(state0, symb_a, state1) enfa1.add_transition(state1, symb_a, state1) - assert enfa0.is_equivalent_to(enfa1) + dfa0 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa0) + dfa1 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa1) + assert dfa0.is_equivalent_to(dfa1) def test_non_equivalent(self): enfa0 = EpsilonNFA() @@ -562,7 +461,9 @@ def test_non_equivalent(self): enfa1.add_final_state(state1) enfa1.add_transition(state0, symb_a, state1) enfa1.add_transition(state1, symb_a, state0) - assert not enfa0.is_equivalent_to(enfa1) + dfa0 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa0) + dfa1 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa1) + assert not dfa0.is_equivalent_to(dfa1) def test_get_as_dict(self): enfa0 = EpsilonNFA() @@ -583,26 +484,18 @@ def test_len(self): assert len(enfa) == 1 def test_call(self): + """ Tests the call of the transition function of the ENFA """ enfa = get_enfa_example1() - assert len(enfa(2)) == 1 + assert enfa(2, "c") == {3} + assert not enfa(3, "a") + assert not enfa(2313, "qwe") - def test_example_doc(self): - enfa = EpsilonNFA() - state0 = State(0) - state1 = State(1) - symb_a = Symbol("0") - symb_b = Symbol("1") - enfa.add_start_state(state0) - enfa.add_final_state(state1) - enfa.add_transition(state0, symb_a, state0) - enfa.add_transition(state1, symb_b, state0) - enfa.add_transition(state1, symb_b, state1) - - # Turn a finite automaton into a regex... - regex = enfa.to_regex() - # And turn it back into an epsilon non deterministic automaton - enfa2 = regex.to_epsilon_nfa() - assert enfa == enfa2 + def test_get_transitions_from(self): + """ Tests the transition obtaining from the given state """ + enfa = get_enfa_example1() + assert list(enfa.get_transitions_from(2)) == [("c", 3)] + assert not list(enfa.get_transitions_from(3)) + assert not list(enfa.get_transitions_from(4210)) def test_remove_epsilon_transitions(self): enfa = EpsilonNFA() @@ -616,11 +509,54 @@ def test_remove_epsilon_transitions(self): enfa.add_start_state("a") enfa.add_final_state("b") assert len(enfa.start_states) == 1 - nfa = enfa.remove_epsilon_transitions() + nfa = NondeterministicFiniteAutomaton.from_epsilon_nfa(enfa) assert len(nfa.start_states) == 3 assert len(nfa.final_states) == 2 assert nfa.get_number_transitions() == 3 - assert nfa.is_equivalent_to(enfa) + dfa0 = DeterministicFiniteAutomaton.from_nfa(nfa) + dfa1 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) + assert dfa0.is_equivalent_to(dfa1) + + def test_word_generation(self): + enfa = get_enfa_example_for_word_generation() + accepted_words = list(enfa.get_accepted_words()) + assert [] in accepted_words + assert [Symbol("b")] in accepted_words + assert [Symbol("c")] in accepted_words + assert [Symbol("d"), Symbol("e")] in accepted_words + assert [Symbol("d"), Symbol("e"), Symbol("f")] in accepted_words + assert len(accepted_words) == 5 + + def test_cyclic_word_generation(self): + enfa = get_cyclic_enfa_example() + max_length = 10 + accepted_words = [[Symbol("a")] + + [Symbol("b")] * (i + 1) + + [Symbol("c")] + for i in range(max_length - 2)] + actual_accepted_words = list(enfa.get_accepted_words(max_length)) + assert accepted_words == actual_accepted_words + + def test_epsilon_cycle_word_generation(self): + enfa = get_epsilon_cycle_enfa_example() + max_length = 4 + accepted_words = list(enfa.get_accepted_words(max_length)) + assert [] in accepted_words + assert [Symbol("a"), Symbol("c")] in accepted_words + assert [Symbol("a"), Symbol("b"), Symbol("c")] in accepted_words + assert [Symbol("a"), Symbol("b"), + Symbol("b"), Symbol("c")] in accepted_words + assert len(accepted_words) == 4 + + def test_max_length_zero_accepting_empty_string(self): + enfa = get_enfa_example_for_word_generation() + accepted_words = list(enfa.get_accepted_words(0)) + assert accepted_words == [[]] + + def test_max_length_zero_not_accepting_empty_string(self): + enfa = get_cyclic_enfa_example() + accepted_words = list(enfa.get_accepted_words(0)) + assert not accepted_words def get_digits_enfa(): @@ -669,7 +605,7 @@ def get_enfa_example0(): def get_enfa_example1(): - """ Gives and example ENFA + """ Gives an example ENFA Accepts c """ enfa1 = EpsilonNFA() @@ -682,6 +618,21 @@ def get_enfa_example1(): return enfa1 +def get_enfa_example2(): + """ Gives an example ENFA + Accepts (ab+)|c + """ + enfa = EpsilonNFA(start_states={0, 3}, + final_states={2, 4}) + enfa.add_transitions([ + (0, "a", 1), + (1, "b", 2), + (2, "b", 2), + (3, "c", 4), + ]) + return enfa + + def get_enfa_example0_bis(): """ A non minimal NFA, equivalent to example0 """ enfa0 = EpsilonNFA() @@ -730,3 +681,75 @@ def get_example_non_minimal(): enfa0.add_transition(state5, symb_b, state3) enfa0.add_transition(state6, symb_b, state4) return enfa0 + + +def get_enfa_example_for_word_generation(): + """ ENFA example for the word generation test """ + enfa = EpsilonNFA() + states = [State(x) for x in range(9)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + symbol_d = Symbol("d") + symbol_e = Symbol("e") + symbol_f = Symbol("f") + epsilon = Epsilon() + enfa.add_transitions([ + (states[0], symbol_a, states[1]), + (states[0], epsilon, states[2]), + (states[1], symbol_a, states[1]), + (states[2], symbol_b, states[3]), + (states[2], symbol_c, states[3]), + (states[4], symbol_d, states[5]), + (states[5], symbol_e, states[6]), + (states[5], symbol_e, states[7]), + (states[7], symbol_f, states[8]), + ]) + enfa.add_start_state(states[0]) + enfa.add_start_state(states[4]) + enfa.add_final_state(states[3]) + enfa.add_final_state(states[4]) + enfa.add_final_state(states[6]) + enfa.add_final_state(states[8]) + return enfa + + +def get_cyclic_enfa_example(): + """ ENFA example with a cycle on the path to the final state """ + enfa = EpsilonNFA() + states = [State(x) for x in range(4)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + epsilon = Epsilon() + enfa.add_transitions([ + (states[0], symbol_a, states[1]), + (states[1], symbol_b, states[2]), + (states[2], epsilon, states[1]), + (states[2], symbol_c, states[3]), + ]) + enfa.add_start_state(states[0]) + enfa.add_final_state(states[3]) + return enfa + + +def get_epsilon_cycle_enfa_example(): + """ ENFA example with an epsilon cycle """ + enfa = EpsilonNFA() + states = [State(x) for x in range(4)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + epsilon = Epsilon() + enfa.add_transitions([ + (states[0], epsilon, states[0]), + (states[0], symbol_a, states[1]), + (states[1], symbol_b, states[1]), + (states[1], epsilon, states[2]), + (states[2], epsilon, states[1]), + (states[1], symbol_c, states[3]), + ]) + enfa.add_start_state(states[0]) + enfa.add_final_state(states[0]) + enfa.add_final_state(states[3]) + return enfa diff --git a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py index 84d786c..9004428 100644 --- a/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/tests/test_nondeterministic_finite_automaton.py @@ -1,14 +1,14 @@ """ Tests for nondeterministic finite automata """ -from pyformlang.finite_automaton import NondeterministicFiniteAutomaton,\ - Epsilon -from pyformlang.finite_automaton import State -from pyformlang.finite_automaton import Symbol -from pyformlang.finite_automaton.transition_function import \ - InvalidEpsilonTransition + import pytest +from pyformlang.finite_automaton import NondeterministicFiniteAutomaton +from pyformlang.finite_automaton import DeterministicFiniteAutomaton +from pyformlang.finite_automaton import State, Symbol, Epsilon +from pyformlang.finite_automaton import InvalidEpsilonTransition + class TestNondeterministicFiniteAutomaton: """ @@ -23,7 +23,7 @@ def test_creation(self): nfa = NondeterministicFiniteAutomaton() assert nfa is not None states = [State(x) for x in range(10)] - nfa = NondeterministicFiniteAutomaton(start_state=set(states)) + nfa = NondeterministicFiniteAutomaton(start_states=set(states)) assert nfa is not None def test_remove_initial(self): @@ -79,7 +79,7 @@ def test_accepts(self): assert not nfa.is_deterministic() assert nfa.accepts([symb_c]) nfa.remove_start_state(state1) - dfa = nfa.to_deterministic() + dfa = DeterministicFiniteAutomaton.from_nfa(nfa) assert dfa.is_deterministic() assert dfa.accepts([symb_a, symb_b, symb_c]) assert dfa.accepts([symb_a, symb_b, symb_b, symb_b, symb_c]) @@ -105,7 +105,7 @@ def test_deterministic(self): nfa.add_transition(state0, symb0, state1) nfa.add_transition(state0, symb1, state0) nfa.add_transition(state1, symb1, state2) - dfa = nfa.to_deterministic() + dfa = DeterministicFiniteAutomaton.from_nfa(nfa) assert len(dfa.states) == 3 assert dfa.get_number_transitions() == 6 @@ -115,3 +115,139 @@ def test_epsilon_refused(self): state1 = State(1) with pytest.raises(InvalidEpsilonTransition): dfa.add_transition(state0, Epsilon(), state1) + + def test_copy(self): + nfa = get_nfa_example_with_duplicates().copy() + assert len(nfa.states) == 9 + assert len(nfa.symbols) == 3 + assert len(nfa.start_states) == 4 + assert len(nfa.final_states) == 3 + assert nfa.get_number_transitions() == 7 + assert nfa.accepts([Symbol("a"), Symbol("c")]) + assert nfa.accepts([Symbol("b"), Symbol("c")]) + assert not nfa.is_deterministic() + + def test_word_generation(self): + nfa = get_nfa_example_for_word_generation() + accepted_words = list(nfa.get_accepted_words()) + assert [] in accepted_words + assert [Symbol("a"), Symbol("b")] in accepted_words + assert [Symbol("a"), Symbol("c")] in accepted_words + assert [Symbol("d"), Symbol("e")] in accepted_words + assert [Symbol("d"), Symbol("e"), Symbol("f")] in accepted_words + assert len(accepted_words) == 5 + + def test_for_duplicate_generation(self): + nfa = get_nfa_example_with_duplicates() + accepted_words = list(nfa.get_accepted_words()) + assert [Symbol("a"), Symbol("c")] in accepted_words + assert [Symbol("b"), Symbol("c")] in accepted_words + assert len(accepted_words) == 2 + + def test_cyclic_word_generation(self): + nfa = get_cyclic_nfa_example() + accepted_words = list(nfa.get_accepted_words(5)) + assert ["a", "d", "g"] in accepted_words + assert ["a", "b", "c", "d", "g"] in accepted_words + assert ["a", "d", "e", "f", "g"] in accepted_words + assert ["b", "f", "g"] in accepted_words + assert ["b", "f", "e", "f", "g"] in accepted_words + assert len(accepted_words) == 5 + + def test_final_state_at_start_generation(self): + nfa = get_nfa_example_with_final_state_at_start() + accepted_words = list(nfa.get_accepted_words()) + assert accepted_words == [[]] + + def test_start_state_at_the_end_generation(self): + nfa = get_nfa_example_with_start_state_at_the_end() + accepted_words = list(nfa.get_accepted_words(5)) + assert [] in accepted_words + assert ["a", "b", "c"] in accepted_words + assert ["a", "b", "e", "b", "c"] in accepted_words + assert ["d", "b", "c"] in accepted_words + assert ["d", "b", "e", "b", "c"] in accepted_words + assert len(accepted_words) == 5 + + +def get_nfa_example_for_word_generation(): + """ + Gets Nondeterministic Finite Automaton \ + example for the word generation test. + """ + nfa = NondeterministicFiniteAutomaton(start_states={0, 4}, + final_states={3, 4, 6, 8}) + nfa.add_transitions([ + (0, "a", 1), + (0, "a", 2), + (1, "a", 1), + (2, "b", 3), + (2, "c", 3), + (4, "d", 5), + (5, "e", 6), + (5, "e", 7), + (7, "f", 8), + ]) + return nfa + + +def get_nfa_example_with_duplicates(): + """ Gets NFA example with duplicate word chains """ + nfa = NondeterministicFiniteAutomaton(start_states={0, 1, 5, 6}, + final_states={3, 4, 8}) + nfa.add_transitions([ + (0, "a", 2), + (1, "a", 2), + (2, "c", 3), + (2, "c", 4), + (5, "a", 7), + (6, "b", 7), + (7, "c", 8), + ]) + return nfa + + +def get_cyclic_nfa_example(): + """ Gets NFA example with several cycles on path to final """ + nfa = NondeterministicFiniteAutomaton(start_states={0, 5}, + final_states={4}) + nfa.add_transitions([ + (0, "a", 1), + (1, "b", 2), + (2, "c", 1), + (1, "d", 3), + (3, "e", 6), + (6, "f", 3), + (3, "g", 4), + (5, "b", 6), + ]) + return nfa + + +def get_nfa_example_with_final_state_at_start(): + """ Gets NFA example with final state at start """ + nfa = NondeterministicFiniteAutomaton(start_states={0, 5}, + final_states={0}) + nfa.add_transitions([ + (0, "a", 1), + (1, "b", 2), + (2, "c", 3), + (2, "d", 4), + (5, "e", 1), + (5, "e", 2), + ]) + return nfa + + +def get_nfa_example_with_start_state_at_the_end(): + """ Gets NFA example with start state at the end """ + nfa = NondeterministicFiniteAutomaton(start_states={0, 3, 4}, + final_states={3}) + nfa.add_transitions([ + (0, "a", 1), + (1, "b", 2), + (2, "e", 1), + (2, "c", 3), + (4, "d", 1), + ]) + return nfa diff --git a/pyformlang/finite_automaton/tests/test_nondeterministic_transition_function.py b/pyformlang/finite_automaton/tests/test_nondeterministic_transition_function.py index 56ec2db..b74440a 100644 --- a/pyformlang/finite_automaton/tests/test_nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/tests/test_nondeterministic_transition_function.py @@ -1,8 +1,9 @@ """ Test the nondeterministic transition functions """ -from pyformlang.finite_automaton import State, Symbol, \ - NondeterministicTransitionFunction + +from pyformlang.finite_automaton import NondeterministicTransitionFunction +from pyformlang.finite_automaton import State, Symbol, Epsilon class TestNondeterministicTransitionFunction: @@ -59,19 +60,19 @@ def test_remove_transitions(self): symb_by = Symbol("a") transition_function.add_transition(s_from, symb_by, s_to) assert transition_function.remove_transition(s_from, - symb_by, - s_to) == 1 + symb_by, + s_to) == 1 assert len(transition_function(s_to, symb_by)) == 0 assert transition_function.get_number_transitions() == 0 assert len(transition_function(s_from, symb_by)) == 0 assert transition_function.remove_transition(s_from, - symb_by, - s_to) == 0 + symb_by, + s_to) == 0 transition_function.add_transition(s_from, symb_by, s_to) transition_function.add_transition(s_from, symb_by, s_from) assert transition_function.remove_transition(s_from, - symb_by, - s_to) == 1 + symb_by, + s_to) == 1 assert transition_function.get_number_transitions() == 1 assert len(transition_function(s_from, symb_by)) == 1 @@ -87,3 +88,23 @@ def test_call(self): assert len(transition_function(s_to, symb_by)) == 0 transition_function.add_transition(s_from, symb_by, s_from) assert transition_function(s_from, symb_by) == {s_to, s_from} + + def test_get_transitions_from(self): + """ Tests iteration of transitions from specified state """ + transition_function = NondeterministicTransitionFunction() + states = [State(x) for x in range(0, 5)] + symbol_a = Symbol("a") + symbol_b = Symbol("b") + symbol_c = Symbol("c") + epsilon = Epsilon() + transition_function.add_transition(states[0], symbol_a, states[1]) + transition_function.add_transition(states[1], symbol_b, states[2]) + transition_function.add_transition(states[1], symbol_c, states[2]) + transition_function.add_transition(states[1], symbol_c, states[3]) + transition_function.add_transition(states[1], epsilon, states[4]) + transitions = list(transition_function.get_transitions_from(states[1])) + assert (symbol_b, states[2]) in transitions + assert (symbol_c, states[2]) in transitions + assert (symbol_c, states[3]) in transitions + assert (epsilon, states[4]) in transitions + assert len(transitions) == 4 diff --git a/pyformlang/finite_automaton/tests/test_state.py b/pyformlang/finite_automaton/tests/test_state.py index 3083a79..0d3d150 100644 --- a/pyformlang/finite_automaton/tests/test_state.py +++ b/pyformlang/finite_automaton/tests/test_state.py @@ -1,6 +1,7 @@ """ Tests the states """ + from pyformlang.finite_automaton import State diff --git a/pyformlang/finite_automaton/tests/test_symbol.py b/pyformlang/finite_automaton/tests/test_symbol.py index 74c560f..5d7be9a 100644 --- a/pyformlang/finite_automaton/tests/test_symbol.py +++ b/pyformlang/finite_automaton/tests/test_symbol.py @@ -1,6 +1,7 @@ """ Tests for the symbols """ + from pyformlang.finite_automaton import Symbol diff --git a/pyformlang/finite_automaton/tests/test_transition_function.py b/pyformlang/finite_automaton/tests/test_transition_function.py deleted file mode 100644 index d0bcaa2..0000000 --- a/pyformlang/finite_automaton/tests/test_transition_function.py +++ /dev/null @@ -1,90 +0,0 @@ -""" -Test the transition functions -""" -from pyformlang.finite_automaton import State, Symbol, TransitionFunction, \ - DuplicateTransitionError, InvalidEpsilonTransition, Epsilon -import pytest - - -class TestTransitionFunction: - """ Tests the transitions functions - """ - - def test_creation(self): - """ Tests the creation of transition functions - """ - transition_function = TransitionFunction() - assert transition_function is not None - - # pylint: disable=protected-access - def test_add_transitions(self): - """ Tests the addition of transitions - """ - transition_function = TransitionFunction() - s_from = State(10) - s_to = State(11) - s_to_bis = State(2) - symb_by = Symbol("abc") - transition_function.add_transition(s_from, symb_by, s_to) - transition_function.add_transition(s_from, symb_by, s_to) - with pytest.raises(DuplicateTransitionError) as dte: - transition_function.add_transition(s_from, symb_by, s_to_bis) - dte = dte.value - assert dte.s_from == s_from - assert dte.s_to == s_to_bis - assert dte.symb_by == symb_by - assert dte.s_to_old == s_to - - def test_number_transitions(self): - """ Tests the number of transitions - """ - transition_function = TransitionFunction() - assert transition_function.get_number_transitions() == 0 - s_from = State(110) - s_to = State(12) - s_to_bis = State(2) - symb_by = Symbol("a") - transition_function.add_transition(s_from, symb_by, s_to) - assert transition_function.get_number_transitions() == 1 - transition_function.add_transition(s_from, symb_by, s_to) - assert transition_function.get_number_transitions() == 1 - symb_by2 = Symbol("bc") - transition_function.add_transition(s_from, symb_by2, s_to_bis) - assert transition_function.get_number_transitions() == 2 - transition_function.add_transition(s_to, symb_by, s_to_bis) - assert transition_function.get_number_transitions() == 3 - - def test_remove_transitions(self): - """ Tests the removal of transitions - """ - transition_function = TransitionFunction() - s_from = State(10) - s_to = State(11) - symb_by = Symbol("abc") - transition_function.add_transition(s_from, symb_by, s_to) - assert transition_function.remove_transition(s_from, - symb_by, - s_to) == 1 - assert transition_function.get_number_transitions() == 0 - assert transition_function(s_to, symb_by) == [] - assert transition_function(s_from, symb_by) == [] - assert transition_function.remove_transition(s_from, - symb_by, - s_to) == 0 - - def test_call(self): - """ Tests the call of a transition function - """ - transition_function = TransitionFunction() - s_from = State(0) - s_to = State(1) - symb_by = Symbol("a") - transition_function.add_transition(s_from, symb_by, s_to) - assert transition_function(s_from, symb_by) == [s_to] - assert transition_function(s_to, symb_by) == [] - - def test_invalid_epsilon(self): - """ Tests invalid transition """ - transition_function = TransitionFunction() - with pytest.raises(InvalidEpsilonTransition): - transition_function.add_transition("1", Epsilon(), "2") diff --git a/pyformlang/finite_automaton/transition_function.py b/pyformlang/finite_automaton/transition_function.py index dc2db5c..4682e89 100644 --- a/pyformlang/finite_automaton/transition_function.py +++ b/pyformlang/finite_automaton/transition_function.py @@ -1,230 +1,81 @@ """ -Representation of a transition function +General transition function representation """ -import copy -from typing import List, Any -from pyformlang.finite_automaton.epsilon import Epsilon +from typing import Dict, Set, Tuple, Iterable, Iterator +from abc import abstractmethod from .state import State from .symbol import Symbol -class InvalidEpsilonTransition(Exception): - """Exception raised when an epsilon transition is created in - deterministic automaton""" +class TransitionFunction(Iterable[Tuple[State, Symbol, State]]): + """ General transition function representation """ + @abstractmethod + def add_transition(self, + s_from: State, + symb_by: Symbol, + s_to: State) -> int: + """ Adds a new transition to the function """ + raise NotImplementedError -class TransitionFunction: - """ A transition function in a finite automaton. - - This is a deterministic transition function. - - Attributes - ---------- - _transitions : dict - A dictionary which contains the transitions of a finite automaton - - Examples - -------- - - >>> transition = TransitionFunction() - >>> transition.add_transition(State(0), Symbol("a"), State(1)) - - Creates a transition function and adds a transition. - - """ - - def __init__(self): - self._transitions = {} - - def add_transition(self, s_from: Any, symb_by: Any, - s_to: Any) -> int: - """ Adds a new transition to the function - - Parameters - ---------- - s_from : :class:`~pyformlang.finite_automaton.State` - The source state - symb_by : :class:`~pyformlang.finite_automaton.Symbol` - The transition symbol - s_to : :class:`~pyformlang.finite_automaton.State` - The destination state - - - Returns - -------- - done : int - Always 1 - - Raises - -------- - DuplicateTransitionError - If the transition already exists - - Examples - -------- - - >>> transition = TransitionFunction() - >>> transition.add_transition(State(0), Symbol("a"), State(1)) - - """ - if symb_by == Epsilon(): - raise InvalidEpsilonTransition() - if s_from in self._transitions: - if symb_by in self._transitions[s_from]: - if self._transitions[s_from][symb_by] != s_to: - raise DuplicateTransitionError(s_from, - symb_by, - s_to, - self._transitions[s_from][ - symb_by]) - else: - self._transitions[s_from][symb_by] = s_to - else: - self._transitions[s_from] = {} - self._transitions[s_from][symb_by] = s_to - return 1 - - # pylint: disable=duplicate-code - def remove_transition(self, s_from: State, symb_by: Symbol, + @abstractmethod + def remove_transition(self, + s_from: State, + symb_by: Symbol, s_to: State) -> int: - """ Removes a transition to the function - - Parameters - ---------- - s_from : :class:`~pyformlang.finite_automaton.State` - The source state - symb_by : :class:`~pyformlang.finite_automaton.Symbol` - The transition symbol - s_to : :class:`~pyformlang.finite_automaton.State` - The destination state - - - Returns - -------- - done : int - 1 is the transition was found, 0 otherwise - - Examples - -------- - - >>> transition = TransitionFunction() - >>> transition.add_transition(State(0), Symbol("a"), State(1)) - >>> transition.remove_transition(State(0), Symbol("a"), State(1)) - - """ - if s_from in self._transitions and \ - symb_by in self._transitions[s_from] and \ - s_to == self._transitions[s_from][symb_by]: - del self._transitions[s_from][symb_by] - return 1 - return 0 - - def __call__(self, s_from: State, symb_by: Symbol = None) -> List[State]: - """ Calls the transition function as a real function - - Parameters - ---------- - s_from : :class:`~pyformlang.finite_automaton.State` - The source state - symb_by : :class:`~pyformlang.finite_automaton.Symbol` - The transition symbol - - Returns - ---------- - s_from : list of :class:`~pyformlang.finite_automaton.State` - The destination state, in a list - - """ - if s_from in self._transitions: - if symb_by is not None: - if symb_by in self._transitions[s_from]: - return [self._transitions[s_from][symb_by]] - else: - return self._transitions[s_from].items() - return [] + """ Removes a transition from the function """ + raise NotImplementedError + @abstractmethod def get_number_transitions(self) -> int: - """ Gives the number of transitions describe by the deterministic \ - function - - Returns - ---------- - n_transitions : int - The number of deterministic transitions - - Examples - -------- + """ Gives the number of transitions described by the function """ + raise NotImplementedError - >>> transition = TransitionFunction() - >>> transition.add_transition(State(0), Symbol("a"), State(1)) - >>> transition.get_number_transitions() - 1 + def __len__(self) -> int: + return self.get_number_transitions() + @abstractmethod + def __call__(self, s_from: State, symb_by: Symbol) -> Set[State]: """ - return sum(len(x) for x in self._transitions.values()) - - def get_edges(self): - """ Gets the edges - - Returns - ---------- - edges : generator of (:class:`~pyformlang.finite_automaton.State`, \ - :class:`~pyformlang.finite_automaton.Symbol`,\ - :class:`~pyformlang.finite_automaton.State`) - A generator of edges + Calls the transition function + as a real function for given state and symbol. """ - for state, transitions in self._transitions.items(): - for symbol, next_states in transitions.items(): - yield state, symbol, next_states - - def __len__(self): - return self.get_number_transitions() - - def __iter__(self): + raise NotImplementedError + + def __contains__(self, transition: Tuple[State, Symbol, State]) -> bool: + """ Whether the given transition is present in the function """ + s_from, symb_by, s_to = transition + return s_to in self(s_from, symb_by) + + @abstractmethod + def get_transitions_from(self, s_from: State) \ + -> Iterable[Tuple[Symbol, State]]: + """ Gets transitions from the given state """ + raise NotImplementedError + + def get_next_states_from(self, s_from: State) -> Set[State]: + """ Gets a set of states that are next to the given one """ + next_states = set() + for _, next_state in self.get_transitions_from(s_from): + next_states.add(next_state) + return next_states + + @abstractmethod + def get_edges(self) -> Iterable[Tuple[State, Symbol, State]]: + """ Gets the edges """ + raise NotImplementedError + + def __iter__(self) -> Iterator[Tuple[State, Symbol, State]]: yield from self.get_edges() - def to_dict(self): - """ - Get the dictionary representation of the transition function. The \ - keys of the dictionary are the source nodes. The items are \ - dictionaries where the keys are the symbols of the transitions and \ - the items are the set of target nodes. - - Returns - ------- - transition_dict : dict - The transitions as a dictionary. - """ - return copy.deepcopy(self._transitions) - - -class DuplicateTransitionError(Exception): - """ Signals a duplicated transition - - Parameters - ---------- - s_from : :class:`~pyformlang.finite_automaton.State` - The source state - symb_by : :class:`~pyformlang.finite_automaton.Symbol` - The transition symbol - s_to : :class:`~pyformlang.finite_automaton.State` - The wanted new destination state - s_to_old : :class:`~pyformlang.finite_automaton.State` - The old destination state - - """ - - def __init__(self, - s_from: State, - symb_by: Symbol, - s_to: State, - s_to_old: State): - super().__init__("Transition from " + str(s_from) + - " by " + str(symb_by) + - " goes to " + str(s_to_old) + " not " + str(s_to)) - self.s_from = s_from - self.symb_by = symb_by - self.s_to = s_to - self.s_to_old = s_to_old + @abstractmethod + def to_dict(self) -> Dict[State, Dict[Symbol, Set[State]]]: + """ Gets the dictionary representation of the transition function """ + raise NotImplementedError + + @abstractmethod + def is_deterministic(self) -> bool: + """ Whether the transition function is deterministic """ + raise NotImplementedError diff --git a/pyformlang/finite_automaton/utils.py b/pyformlang/finite_automaton/utils.py new file mode 100644 index 0000000..ed7e4a2 --- /dev/null +++ b/pyformlang/finite_automaton/utils.py @@ -0,0 +1,96 @@ +""" Utility for finite automata """ + +from typing import Dict, List, AbstractSet, Iterable, Optional, Hashable +from numpy import empty + +from .state import State +from .symbol import Symbol +from .epsilon import Epsilon + + +def to_state(given: Hashable) -> State: + """ Transforms the input into a state + + Parameters + ---------- + given : any + What we want to transform + """ + if isinstance(given, State): + return given + return State(given) + + +def to_symbol(given: Hashable) -> Symbol: + """ Transforms the input into a symbol + + Parameters + ---------- + given : any + What we want to transform + """ + if isinstance(given, Symbol): + return given + if given in ("epsilon", "É›"): + return Epsilon() + return Symbol(given) + + +def to_single_state(l_states: Iterable[State]) -> State: + """ Merge a list of states + + Parameters + ---------- + l_states : list of :class:`~pyformlang.finite_automaton.State` + A list of states + + Returns + ---------- + state : :class:`~pyformlang.finite_automaton.State` + The merged state + """ + values = [] + for state in l_states: + if state is not None: + values.append(str(state.value)) + else: + values.append("TRASH") + values = sorted(values) + return State(";".join(values)) + + +class PreviousTransitions: + """ + Previous transitions for deterministic automata + minimization algorithm. + """ + + def __init__(self, + states: AbstractSet[State], + symbols: AbstractSet[Symbol]) -> None: + self._to_index_state: Dict[State, int] = {} + for i, state in enumerate(states): + self._to_index_state[state] = i + 1 + self._to_index_symbol: Dict[Symbol, int] = {} + for i, symbol in enumerate(symbols): + self._to_index_symbol[symbol] = i + self._conversion = empty((len(states) + 1, len(symbols)), + dtype=State) + + def add(self, + next0: Optional[State], + symbol: Symbol, + state: State) -> None: + """ Internal """ + i_next0 = self._to_index_state[next0] if next0 else 0 + i_symbol = self._to_index_symbol[symbol] + if self._conversion[i_next0, i_symbol] is None: + self._conversion[i_next0, i_symbol] = [state] + else: + self._conversion[i_next0, i_symbol].append(state) + + def get(self, next0: Optional[State], symbol: Symbol) -> List[State]: + """ Internal """ + i_next0 = self._to_index_state[next0] if next0 else 0 + i_symbol = self._to_index_symbol[symbol] + return self._conversion[i_next0, i_symbol] or [] diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 82c3b31..1deb75d 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -9,7 +9,7 @@ from pyformlang import cfg from pyformlang import finite_automaton -from pyformlang import regular_expression +from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.pda.cfg_variable_converter import CFGVariableConverter from .epsilon import Epsilon from .stack_symbol import StackSymbol @@ -17,7 +17,6 @@ from .transition_function import TransitionFunction from .utils import PDAObjectCreator from ..finite_automaton import FiniteAutomaton -from ..finite_automaton.finite_automaton import add_start_state_to_graph INPUT_SYMBOL = 1 @@ -440,7 +439,7 @@ def _initialize_production_from_start_in_to_cfg(self, start): state)])) return productions - def intersection(self, other: Any) -> "PDA": + def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": """ Gets the intersection of the language L generated by the \ current PDA when accepting by final state with something else @@ -468,21 +467,11 @@ def intersection(self, other: Any) -> "PDA": When intersecting with something else than a regex or a finite automaton """ - if isinstance(other, regular_expression.Regex): - enfa = other.to_epsilon_nfa() - other = enfa.to_deterministic() - elif isinstance(other, FiniteAutomaton): - is_deterministic = other.is_deterministic() - if not is_deterministic: - other = other.to_deterministic() - else: - raise NotImplementedError - start_state_other = other.start_states - if len(start_state_other) == 0: + start_state_other = other.start_state + if not start_state_other: return PDA() pda_state_converter = _PDAStateConverter(self._states, other.states) - start_state_other = list(start_state_other)[0] - final_state_other = other.final_states + final_states_other = other.final_states start = pda_state_converter.to_pda_combined_state(self._start_state, start_state_other) pda = PDA(start_state=start, @@ -494,43 +483,40 @@ def intersection(self, other: Any) -> "PDA": while to_process: state_in, state_dfa = to_process.pop() if (state_in in self._final_states and state_dfa in - final_state_other): + final_states_other): pda.add_final_state( pda_state_converter.to_pda_combined_state(state_in, state_dfa)) for symbol in symbols: if symbol == Epsilon(): symbol_dfa = finite_automaton.Epsilon() + next_state_dfa = state_dfa else: symbol_dfa = finite_automaton.Symbol(symbol.value) - if symbol == Epsilon(): - next_states_dfa = [state_dfa] - else: - next_states_dfa = other(state_dfa, symbol_dfa) - if len(next_states_dfa) == 0: + next_state_dfa = other.get_next_state(state_dfa, symbol_dfa) + if not next_state_dfa: continue for stack_symbol in self._stack_alphabet: next_states_self = self._transition_function(state_in, symbol, stack_symbol) for next_state, next_stack in next_states_self: - for next_state_dfa in next_states_dfa: - pda.add_transition( - pda_state_converter.to_pda_combined_state( - state_in, - state_dfa), - symbol, - stack_symbol, - pda_state_converter.to_pda_combined_state( - next_state, - next_state_dfa), - next_stack) - if (next_state, next_state_dfa) not in processed: - to_process.append((next_state, next_state_dfa)) - processed.add((next_state, next_state_dfa)) + pda.add_transition( + pda_state_converter.to_pda_combined_state( + state_in, + state_dfa), + symbol, + stack_symbol, + pda_state_converter.to_pda_combined_state( + next_state, + next_state_dfa), + next_stack) + if (next_state, next_state_dfa) not in processed: + to_process.append((next_state, next_state_dfa)) + processed.add((next_state, next_state_dfa)) return pda - def __and__(self, other): + def __and__(self, other: DeterministicFiniteAutomaton) -> "PDA": """ Gets the intersection of the current PDA with something else Equivalent to: @@ -582,7 +568,7 @@ def to_networkx(self) -> nx.MultiDiGraph: peripheries=2 if state in self.final_states else 1, label=state.value) if state == self._start_state: - add_start_state_to_graph(graph, state) + self.__add_start_state_to_graph(graph, state) if self._start_stack_symbol is not None: graph.add_node("INITIAL_STACK_HIDDEN", label=json.dumps(self._start_stack_symbol.value), @@ -661,6 +647,17 @@ def write_as_dot(self, filename): """ write_dot(self.to_networkx(), filename) + @staticmethod + def __add_start_state_to_graph(graph: nx.MultiDiGraph, state: State) -> None: + """ Adds a starting node to a given graph """ + graph.add_node("starting_" + str(state.value), + label="", + shape=None, + height=.0, + width=.0) + graph.add_edge("starting_" + str(state.value), + state.value) + def _prepend_input_symbol_to_the_bodies(bodies, transition): to_prepend = cfg.Terminal(transition[INPUT][INPUT_SYMBOL].value) diff --git a/pyformlang/pda/tests/test_pda.py b/pyformlang/pda/tests/test_pda.py index 043f63b..569415e 100644 --- a/pyformlang/pda/tests/test_pda.py +++ b/pyformlang/pda/tests/test_pda.py @@ -3,7 +3,8 @@ from pyformlang.pda import PDA, State, StackSymbol, Symbol, Epsilon from pyformlang.cfg import Terminal -from pyformlang import finite_automaton +from pyformlang.finite_automaton import DeterministicFiniteAutomaton +from pyformlang.finite_automaton import State as FAState, Symbol as FASymbol from pyformlang.pda.utils import PDAObjectCreator from pyformlang.regular_expression import Regex @@ -285,11 +286,11 @@ def test_intersection_regex(self): pda.add_transition(state_q, state_e, state_z, state_q, []) pda.add_transition(state_q, Epsilon(), state_x0, state_r, []) - state_s = finite_automaton.State("s") - state_t = finite_automaton.State("t") - i_dfa = finite_automaton.Symbol("i") - e_dfa = finite_automaton.Symbol("e") - dfa = finite_automaton.DeterministicFiniteAutomaton( + state_s = FAState("s") + state_t = FAState("t") + i_dfa = FASymbol("i") + e_dfa = FASymbol("e") + dfa = DeterministicFiniteAutomaton( states={state_s, state_t}, input_symbols={i_dfa, e_dfa}, start_state=state_s, @@ -312,16 +313,15 @@ def test_intersection_regex(self): assert cfg.contains([i_cfg, i_cfg, e_cfg, e_cfg, e_cfg]) - new_pda = pda.intersection( - finite_automaton.DeterministicFiniteAutomaton()) + new_pda = pda.intersection(DeterministicFiniteAutomaton()) assert new_pda.get_number_transitions() == 0 - new_pda = pda.intersection(Regex("")) + new_pda = pda.intersection(Regex("").to_minimal_dfa()) pda_es = new_pda.to_empty_stack() cfg = pda_es.to_cfg() assert not cfg - new_pda = pda & Regex("z|y").to_epsilon_nfa() + new_pda = pda & Regex("z|y").to_minimal_dfa() pda_es = new_pda.to_empty_stack() cfg = pda_es.to_cfg() assert not cfg diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py index fd63b5c..3f6f48f 100644 --- a/pyformlang/regular_expression/python_regex.py +++ b/pyformlang/regular_expression/python_regex.py @@ -2,16 +2,16 @@ A class to read Python format regex """ -import re -import string -import unicodedata +from typing import List, Tuple, Union, Pattern +from re import compile as compile_regex +from string import printable +from unicodedata import lookup -# pylint: disable=cyclic-import -from pyformlang.regular_expression import regex, MisformedRegexError -from pyformlang.regular_expression.regex_reader import \ - WRONG_PARENTHESIS_MESSAGE +from .regex_objects import MisformedRegexError +from .regex_reader import WRONG_PARENTHESIS_MESSAGE +from .regex import Regex -PRINTABLES = list(string.printable) +PRINTABLES = list(printable) TRANSFORMATIONS = { "|": "\\|", @@ -55,7 +55,7 @@ ESCAPED_OCTAL = ["\\0", "\\1", "\\2", "\\3", "\\4", "\\5", "\\6", "\\7"] -class PythonRegex(regex.Regex): +class PythonRegex(Regex): """ Represents a regular expression as used in Python. It adds the following features to the basic regex: @@ -98,11 +98,11 @@ class PythonRegex(regex.Regex): """ - def __init__(self, python_regex): - if not isinstance(python_regex, str): - python_regex = python_regex.pattern + def __init__(self, python_regex: Union[str, Pattern[str]]) -> None: + if isinstance(python_regex, str): + compile_regex(python_regex) # Check if it is valid else: - re.compile(python_regex) # Check if it is valid + python_regex = python_regex.pattern self._python_regex = python_regex self._replace_shortcuts() @@ -114,8 +114,8 @@ def __init__(self, python_regex): self._python_regex = self._python_regex.lstrip('\b') super().__init__(self._python_regex) - def _separate(self): - regex_temp = [] + def _separate(self) -> None: + regex_temp: List[str] = [] for symbol in self._python_regex: if self._should_escape_next_symbol(regex_temp): regex_temp[-1] += symbol @@ -130,16 +130,19 @@ def _separate(self): regex_temp_dot.append(symbol) self._python_regex = " ".join(regex_temp_dot) - def _preprocess_brackets(self): - regex_temp = [] + def _preprocess_brackets(self) -> None: + regex_temp: List[str] = [] in_brackets = 0 - in_brackets_temp = [] + in_brackets_temp: List[List[str]] = [] for symbol in self._python_regex: - if symbol == "[" and not self._should_escape_next_symbol(regex_temp) and \ - (in_brackets == 0 or not self._should_escape_next_symbol(in_brackets_temp[-1])): + if symbol == "[" and \ + not self._should_escape_next_symbol(regex_temp) and \ + (in_brackets == 0 or \ + not self._should_escape_next_symbol(in_brackets_temp[-1])): in_brackets += 1 in_brackets_temp.append([]) - elif symbol == "]" and in_brackets >= 1 and not self._should_escape_next_symbol(in_brackets_temp[-1]): + elif symbol == "]" and in_brackets >= 1 and \ + not self._should_escape_next_symbol(in_brackets_temp[-1]): if len(in_brackets_temp) == 1: regex_temp.append("(") regex_temp += self._preprocess_brackets_content( @@ -169,11 +172,12 @@ def _preprocess_brackets(self): self._python_regex = "".join(regex_temp) @staticmethod - def _recombine(regex_to_recombine): - temp = [] + def _recombine(regex_to_recombine: List[str]) -> List[str]: + temp: List[str] = [] idx = 0 while idx < len(regex_to_recombine): - if regex_to_recombine[idx] == "\\x" and idx < len(regex_to_recombine) - 2 \ + if regex_to_recombine[idx] == "\\x" \ + and idx < len(regex_to_recombine) - 2 \ and regex_to_recombine[idx + 1] in HEXASTRING \ and regex_to_recombine[idx + 2] in HEXASTRING: next_str = "".join(regex_to_recombine[idx + 1:idx + 3]) @@ -193,7 +197,7 @@ def _recombine(regex_to_recombine): while regex_to_recombine[idx_end] != "}": idx_end += 1 name = "".join(regex_to_recombine[idx + 2: idx_end]) - name = unicodedata.lookup(name) + name = lookup(name) temp.append(TRANSFORMATIONS.get(name, name)) idx = idx_end + 1 elif regex_to_recombine[idx] == "\\u": @@ -217,30 +221,35 @@ def _recombine(regex_to_recombine): res.append(x) return res - def _preprocess_brackets_content(self, bracket_content): - bracket_content_temp = [] + def _preprocess_brackets_content(self, bracket_content: List[str]) \ + -> List[str]: + bracket_content_temp: List[str] = [] previous_is_valid_for_range = False for i, symbol in enumerate(bracket_content): # We have a range - if symbol == "-" and not self._should_escape_next_symbol(bracket_content_temp): - if not previous_is_valid_for_range or i == len(bracket_content) - 1: + if symbol == "-" and \ + not self._should_escape_next_symbol(bracket_content_temp): + if not previous_is_valid_for_range or \ + i == len(bracket_content) - 1: # False alarm, no range bracket_content_temp.append("-") previous_is_valid_for_range = True else: # We insert all the characters in the range - bracket_content[i - 1] = self._recombine(bracket_content[i - 1]) + recombined = self._recombine(bracket_content[i - 1].split()) + bracket_content[i - 1] = "".join(recombined) for j in range(ord(bracket_content[i - 1][-1]) + 1, ord(bracket_content[i + 1][-1])): next_char = chr(j) if next_char in TRANSFORMATIONS: - bracket_content_temp.append(TRANSFORMATIONS[next_char]) + bracket_content_temp.append( + TRANSFORMATIONS[next_char]) else: bracket_content_temp.append(next_char) previous_is_valid_for_range = False else: if self._should_escape_next_symbol(bracket_content_temp): - bracket_content_temp[-1] += symbol + bracket_content_temp[-1] += (symbol) else: bracket_content_temp.append(symbol) if (i != 0 and bracket_content[i - 1] == "-" @@ -254,15 +263,15 @@ def _preprocess_brackets_content(self, bracket_content): return bracket_content_temp @staticmethod - def _preprocess_negation(bracket_content): + def _preprocess_negation(bracket_content: List[str]) -> List[str]: if not bracket_content or bracket_content[0] != "^": return bracket_content # We inverse everything return [x for x in ESCAPED_PRINTABLES if x not in bracket_content] @staticmethod - def _insert_or(l_to_modify): - res = [] + def _insert_or(l_to_modify: List[str]) -> List[str]: + res: List[str] = [] for x in l_to_modify: res.append(x) res.append("|") @@ -270,7 +279,8 @@ def _insert_or(l_to_modify): return res[:-1] return res - def _find_previous_opening_parenthesis(self, split_sequence): + def _find_previous_opening_parenthesis(self, + split_sequence: List[str]) -> int: counter = 0 for i in range(len(split_sequence) - 1, -1, -1): temp = split_sequence[i] @@ -283,8 +293,8 @@ def _find_previous_opening_parenthesis(self, split_sequence): raise MisformedRegexError(WRONG_PARENTHESIS_MESSAGE, self._python_regex) - def _preprocess_positive_closure(self): - regex_temp = [] + def _preprocess_positive_closure(self) -> None: + regex_temp: List[str] = [] for symbol in self._python_regex: if symbol != "+" or (self._should_escape_next_symbol(regex_temp)): if self._should_escape_next_symbol(regex_temp): @@ -304,7 +314,8 @@ def _preprocess_positive_closure(self): self._python_regex = "".join(regex_temp) @staticmethod - def _is_repetition(regex_list, idx): + def _is_repetition(regex_list: List[str], idx: int) \ + -> Union[Tuple[int, int, int], Tuple[int, int], None]: if regex_list[idx] == "{": end = idx for i in range(idx + 1, len(regex_list)): @@ -314,7 +325,8 @@ def _is_repetition(regex_list, idx): inner = "".join(regex_list[idx + 1:end]) if "," in inner: split = inner.split(",") - if len(split) != 2 or not split[0].isdigit() or not split[1].isdigit(): + if len(split) != 2 or not split[0].isdigit() or \ + not split[1].isdigit(): return None return int(split[0]), int(split[1]), end if inner.isdigit(): @@ -322,10 +334,10 @@ def _is_repetition(regex_list, idx): return None @staticmethod - def _find_repeated_sequence(regex_list): + def _find_repeated_sequence(regex_list: List[str]) -> List[str]: if regex_list[-1] != ")": return [regex_list[-1]] - res = [")"] + res: List[str] = [")"] counter = -1 for i in range(len(regex_list) - 2, -1, -1): if regex_list[i] == "(": @@ -340,8 +352,8 @@ def _find_repeated_sequence(regex_list): res.append(regex_list[i]) return [] - def _add_repetition(self, regex_list): - res = [] + def _add_repetition(self, regex_list: List[str]) -> List[str]: + res: List[str] = [] idx = 0 while idx < len(regex_list): rep = self._is_repetition(regex_list, idx) @@ -349,7 +361,7 @@ def _add_repetition(self, regex_list): res.append(regex_list[idx]) idx += 1 elif len(rep) == 2: - n_rep, end = rep + n_rep, end = rep[0], rep[1] repeated = self._find_repeated_sequence(res) for _ in range(n_rep - 1): res.extend(repeated) @@ -365,8 +377,8 @@ def _add_repetition(self, regex_list): idx = end + 1 return res - def _preprocess_optional(self): - regex_temp = [] + def _preprocess_optional(self) -> None: + regex_temp: List[str] = [] for symbol in self._python_regex: if symbol == "?": if regex_temp[-1] == ")": @@ -383,11 +395,11 @@ def _preprocess_optional(self): self._python_regex = "".join(regex_temp) @staticmethod - def _should_escape_next_symbol(regex_temp): - return regex_temp and regex_temp[-1] == "\\" + def _should_escape_next_symbol(regex_temp: List[str]) -> bool: + return bool(regex_temp) and regex_temp[-1] == "\\" - def _escape_in_brackets(self): - regex_temp = [] + def _escape_in_brackets(self) -> None: + regex_temp: List[str] = [] in_brackets = False for symbol in self._python_regex: if (symbol == "[" @@ -406,7 +418,7 @@ def _escape_in_brackets(self): regex_temp.append(symbol) self._python_regex = "".join(regex_temp) - def _replace_shortcuts(self): + def _replace_shortcuts(self) -> None: for to_replace, replacement in SHORTCUTS.items(): self._python_regex = self._python_regex.replace(to_replace, replacement) diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index c8155b3..80274b0 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -1,16 +1,18 @@ """ Representation of a regular expression """ -from typing import Iterable -from pyformlang import finite_automaton -# pylint: disable=cyclic-import -import pyformlang.regular_expression.regex_objects -from pyformlang import cfg -from pyformlang.finite_automaton import State -# pylint: disable=cyclic-import -from pyformlang.regular_expression.regex_reader import RegexReader -from pyformlang import regular_expression +from typing import List, Iterable, Tuple, Optional + +from pyformlang.finite_automaton import FiniteAutomaton, EpsilonNFA +from pyformlang.finite_automaton import DeterministicFiniteAutomaton +from pyformlang.finite_automaton import State, Symbol, Epsilon as FAEpsilon +from pyformlang.cfg.cfg import CFG, Production +from pyformlang.cfg.utils import to_variable + +from .regex_reader import RegexReader +from .regex_objects import Epsilon as RegexEpsilon, \ + Empty, Concatenation, Union, KleeneStar class Regex(RegexReader): @@ -85,16 +87,11 @@ class Regex(RegexReader): """ - def __init__(self, regex): - self.head = None - self.sons = None + def __init__(self, regex: str) -> None: super().__init__(regex) + self.sons: List[Regex] # type: ignore self._counter = 0 - self._initialize_enfa() - self._enfa = None - - def _initialize_enfa(self): - self._enfa = finite_automaton.EpsilonNFA() + self._enfa: Optional[EpsilonNFA] = None def get_number_symbols(self) -> int: """ Gives the number of symbols in the regex @@ -139,7 +136,13 @@ def get_number_operators(self) -> int: return 1 + sum(son.get_number_operators() for son in self.sons) return 0 - def to_epsilon_nfa(self): + def to_minimal_dfa(self) -> DeterministicFiniteAutomaton: + """ Builds minimal dfa from current regex """ + enfa = self._to_epsilon_nfa_internal() + dfa = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) + return dfa.minimize() + + def to_epsilon_nfa(self) -> EpsilonNFA: """ Transforms the regular expression into an epsilon NFA Returns @@ -154,28 +157,31 @@ def to_epsilon_nfa(self): >>> regex.to_epsilon_nfa() """ - self._initialize_enfa() - s_initial = self._set_and_get_initial_state_in_enfa() - s_final = self._set_and_get_final_state_in_enfa() - self._process_to_enfa(s_initial, s_final) + return self._to_epsilon_nfa_internal().copy() + + def _to_epsilon_nfa_internal(self) -> EpsilonNFA: + """ Transforms the regular expression into an epsilon NFA """ + if self._enfa is None: + self._enfa = EpsilonNFA() + s_initial = self._set_and_get_initial_state_in_enfa(self._enfa) + s_final = self._set_and_get_final_state_in_enfa(self._enfa) + self._process_to_enfa(self._enfa, s_initial, s_final) return self._enfa - def _set_and_get_final_state_in_enfa(self): + def _set_and_get_final_state_in_enfa(self, enfa: EpsilonNFA) -> State: s_final = self._get_next_state_enfa() - self._enfa.add_final_state(s_final) - return s_final - - def _get_next_state_enfa(self): - s_final = finite_automaton.State(self._counter) - self._counter += 1 + enfa.add_final_state(s_final) return s_final - def _set_and_get_initial_state_in_enfa(self): + def _set_and_get_initial_state_in_enfa(self, enfa: EpsilonNFA) -> State: s_initial = self._get_next_state_enfa() - self._enfa.add_start_state(s_initial) + enfa.add_start_state(s_initial) return s_initial - def _process_to_enfa(self, s_from: State, s_to: State): + def _process_to_enfa(self, + enfa: EpsilonNFA, + s_from: State, + s_to: State) -> None: """ Internal function to add a regex to a given epsilon NFA Parameters @@ -186,72 +192,90 @@ def _process_to_enfa(self, s_from: State, s_to: State): The destination state """ if self.sons: - self._process_to_enfa_when_sons(s_from, s_to) + self._process_to_enfa_when_sons(enfa, s_from, s_to) else: - self._process_to_enfa_when_no_son(s_from, s_to) - - def _process_to_enfa_when_no_son(self, s_from, s_to): - if isinstance(self.head, - pyformlang.regular_expression.regex_objects.Epsilon): - self._add_epsilon_transition_in_enfa_between(s_from, s_to) - elif not isinstance(self.head, - pyformlang.regular_expression.regex_objects.Empty): - symbol = finite_automaton.Symbol(self.head.value) - self._enfa.add_transition(s_from, symbol, s_to) - - def _process_to_enfa_when_sons(self, s_from, s_to): - if isinstance( - self.head, - pyformlang.regular_expression.regex_objects.Concatenation): - self._process_to_enfa_concatenation(s_from, s_to) - elif isinstance(self.head, - pyformlang.regular_expression.regex_objects.Union): - self._process_to_enfa_union(s_from, s_to) - elif isinstance( - self.head, - pyformlang.regular_expression.regex_objects.KleeneStar): - self._process_to_enfa_kleene_star(s_from, s_to) - - def _process_to_enfa_kleene_star(self, s_from, s_to): - # pylint: disable=protected-access - state_first = self._get_next_state_enfa() - state_second = self._get_next_state_enfa() - self._add_epsilon_transition_in_enfa_between(state_second, state_first) - self._add_epsilon_transition_in_enfa_between(s_from, s_to) - self._add_epsilon_transition_in_enfa_between(s_from, state_first) - self._add_epsilon_transition_in_enfa_between(state_second, s_to) - self._process_to_enfa_son(state_first, state_second, 0) - - def _process_to_enfa_union(self, s_from, s_to): + self._process_to_enfa_when_no_son(enfa, s_from, s_to) + + def _process_to_enfa_when_sons(self, + enfa: EpsilonNFA, + s_from: State, + s_to: State) -> None: + if isinstance(self.head, Concatenation): + self._process_to_enfa_concatenation(enfa, s_from, s_to) + elif isinstance(self.head, Union): + self._process_to_enfa_union(enfa, s_from, s_to) + elif isinstance(self.head, KleeneStar): + self._process_to_enfa_kleene_star(enfa, s_from, s_to) + + def _process_to_enfa_when_no_son(self, + enfa: EpsilonNFA, + s_from: State, + s_to: State) -> None: + if isinstance(self.head, RegexEpsilon): + enfa.add_transition(s_from, FAEpsilon(), s_to) + elif not isinstance(self.head, Empty): + symbol = Symbol(self.head.value) + enfa.add_transition(s_from, symbol, s_to) + + def _process_to_enfa_union(self, + enfa: EpsilonNFA, + s_from: State, + s_to: State) -> None: son_number = 0 - self._create_union_branch_in_enfa(s_from, s_to, son_number) + self._create_union_branch_in_enfa(enfa, s_from, s_to, son_number) son_number = 1 - self._create_union_branch_in_enfa(s_from, s_to, son_number) + self._create_union_branch_in_enfa(enfa, s_from, s_to, son_number) - def _create_union_branch_in_enfa(self, s_from, s_to, son_number): + def _process_to_enfa_kleene_star(self, + enfa: EpsilonNFA, + s_from: State, + s_to: State) -> None: + # pylint: disable=protected-access + state_first = self._get_next_state_enfa() + state_second = self._get_next_state_enfa() + enfa.add_transition(state_second, FAEpsilon(), state_first) + enfa.add_transition(s_from, FAEpsilon(), s_to) + enfa.add_transition(s_from, FAEpsilon(), state_first) + enfa.add_transition(state_second, FAEpsilon(), s_to) + self._process_to_enfa_son(enfa, state_first, state_second, 0) + + def _create_union_branch_in_enfa(self, + enfa: EpsilonNFA, + s_from: State, + s_to: State, + son_number: int) -> None: state0 = self._get_next_state_enfa() state2 = self._get_next_state_enfa() - self._add_epsilon_transition_in_enfa_between(s_from, state0) - self._add_epsilon_transition_in_enfa_between(state2, s_to) - self._process_to_enfa_son(state0, state2, son_number) - - def _process_to_enfa_concatenation(self, s_from, s_to): + enfa.add_transition(s_from, FAEpsilon(), state0) + enfa.add_transition(state2, FAEpsilon(), s_to) + self._process_to_enfa_son(enfa, state0, state2, son_number) + + def _process_to_enfa_concatenation(self, + enfa: EpsilonNFA, + s_from: State, + s_to: State) -> None: state0 = self._get_next_state_enfa() state1 = self._get_next_state_enfa() - self._add_epsilon_transition_in_enfa_between(state0, state1) - self._process_to_enfa_son(s_from, state0, 0) - self._process_to_enfa_son(state1, s_to, 1) - - def _add_epsilon_transition_in_enfa_between(self, state0, state1): - self._enfa.add_transition(state0, finite_automaton.Epsilon(), state1) - - def _process_to_enfa_son(self, s_from, s_to, index_son): + enfa.add_transition(state0, FAEpsilon(), state1) + self._process_to_enfa_son(enfa, s_from, state0, 0) + self._process_to_enfa_son(enfa, state1, s_to, 1) + + def _process_to_enfa_son(self, + enfa: EpsilonNFA, + s_from: State, + s_to: State, + index_son: int) -> None: # pylint: disable=protected-access self.sons[index_son]._counter = self._counter - self.sons[index_son]._enfa = self._enfa - self.sons[index_son]._process_to_enfa(s_from, s_to) + self.sons[index_son]._enfa = enfa + self.sons[index_son]._process_to_enfa(enfa, s_from, s_to) self._counter = self.sons[index_son]._counter + def _get_next_state_enfa(self) -> State: + s_final = State(self._counter) + self._counter += 1 + return s_final + def get_tree_str(self, depth: int = 0) -> str: """ Get a string representation of the tree behind the regex @@ -280,7 +304,7 @@ def get_tree_str(self, depth: int = 0) -> str: temp += son.get_tree_str(depth + 1) return temp - def to_cfg(self, starting_symbol="S") -> "CFG": + def to_cfg(self, starting_symbol: str = "S") -> CFG: """ Turns the regex into a context-free grammar @@ -304,11 +328,12 @@ def to_cfg(self, starting_symbol="S") -> "CFG": """ productions, _ = self._get_production(starting_symbol) - cfg_res = cfg.CFG(start_symbol=cfg.utils.to_variable(starting_symbol), + cfg_res = CFG(start_symbol=to_variable(starting_symbol), productions=set(productions)) return cfg_res - def _get_production(self, current_symbol, count=0): + def _get_production(self, current_symbol: str, count: int = 0) \ + -> Tuple[List[Production], int]: next_symbols = [] next_productions = [] for son in self.sons: @@ -322,7 +347,7 @@ def _get_production(self, current_symbol, count=0): next_productions += new_prods return next_productions, count - def __repr__(self): + def __repr__(self) -> str: return self.head.get_str_repr([str(son) for son in self.sons]) def union(self, other: "Regex") -> "Regex": @@ -357,11 +382,11 @@ def union(self, other: "Regex") -> "Regex": """ regex = Regex("") - regex.head = pyformlang.regular_expression.regex_objects.Union() + regex.head = Union() regex.sons = [self, other] return regex - def __or__(self, other): + def __or__(self, other: "Regex") -> "Regex": """ Makes the union with another regex Parameters @@ -427,12 +452,11 @@ def concatenate(self, other: "Regex") -> "Regex": True """ regex = Regex("") - regex.head = \ - pyformlang.regular_expression.regex_objects.Concatenation() + regex.head = Concatenation() regex.sons = [self, other] return regex - def __add__(self, other): + def __add__(self, other: "Regex") -> "Regex": """ Concatenates a regular expression with an other one Parameters @@ -485,11 +509,11 @@ def kleene_star(self) -> "Regex": """ regex = Regex("") - regex.head = pyformlang.regular_expression.regex_objects.KleeneStar() + regex.head = KleeneStar() regex.sons = [self] return regex - def from_string(self, regex_str: str): + def from_string(self, regex_str: str) -> "Regex": """ Construct a regex from a string. For internal usage. Equivalent to the constructor of Regex @@ -540,34 +564,255 @@ def accepts(self, word: Iterable[str]) -> bool: True """ - if self._enfa is None: - self._enfa = self.to_epsilon_nfa() - return self._enfa.accepts(word) + return self._to_epsilon_nfa_internal().accepts(word) @classmethod - def from_python_regex(cls, regex): + def from_finite_automaton(cls, automaton: FiniteAutomaton) -> "Regex": + """ Creates a regular expression from given finite automaton + + Returns + ---------- + regex : :class:`~pyformlang.regular_expression.Regex` + A regular expression equivalent to the current Epsilon NFA + + Examples + -------- + + >>> enfa = EpsilonNFA() + >>> enfa.add_transitions([(0, "abc", 1), (0, "d", 1), \ + (0, "epsilon", 2)]) + >>> enfa.add_start_state(0) + >>> enfa.add_final_state(1) + >>> regex = enfa.to_regex() + >>> regex.accepts(["abc"]) + True + """ - Creates a regex from a string using the python way to write it. + copies = [automaton.copy() for _ in automaton.final_states] + final_states = list(automaton.final_states) + for i in range(len(automaton.final_states)): + for j in range(len(automaton.final_states)): + if i != j: + copies[j].remove_final_state(final_states[i]) + regex_l = [] + for copy in copies: + cls._remove_all_basic_states(copy) + regex_sub = cls._get_regex_simple(copy) + if regex_sub: + regex_l.append(regex_sub) + res = "+".join(regex_l) + return Regex(res) - Careful: - Not everything is implemented, check PythonRegex class \ - documentation for more details. + @classmethod + def _get_regex_simple(cls, automaton: FiniteAutomaton) -> str: + """ Get the regex of an automaton when it only composed of a start and + a final state - It is equivalent to calling PythonRegex constructor directly. + CAUTION: For internal use only! - Parameters + Returns ---------- regex : str - The regex given as a string or compile regex + A regex representing the automaton + """ + if not automaton.final_states or not automaton.start_states: + return "" + if len(automaton.final_states) != 1 or len(automaton.start_states) != 1: + raise ValueError("The automaton is not simple enough!") + if automaton.start_states == automaton.final_states: + # We are suppose to have only one good symbol + for symbol in automaton.symbols: + out_states = automaton(list(automaton.start_states)[0], symbol) + if out_states: + return "(" + str(symbol.value) + ")*" + return "epsilon" + start_to_start, start_to_end, end_to_start, end_to_end = \ + cls._get_bi_transitions(automaton) + return cls.__get_regex_sub(start_to_start, + start_to_end, + end_to_start, + end_to_end) + + @classmethod + def _get_bi_transitions(cls, automaton: FiniteAutomaton) \ + -> Tuple[str, str, str, str]: + """ Internal method to compute the transition in the case of a \ + simple automaton Returns - ------- - python_regex : :class:`~pyformlang.regular_expression.PythonRegex` - The regex + start_to_start : str + The transition from the start state to the start state + start_to_end : str + The transition from the start state to the end state + end_to_start : str + The transition from the end state to the start state + end_to_end : str + The transition from the end state to the end state + ---------- + """ + start = list(automaton.start_states)[0] + end = list(automaton.final_states)[0] + start_to_start = "epsilon" + start_to_end = "" + end_to_end = "epsilon" + end_to_start = "" + for state in automaton.states: + for symbol in automaton.symbols.union({FAEpsilon()}): + for out_state in automaton(state, symbol): + symbol_str = str(symbol.value) + if not symbol_str.isalnum(): + symbol_str = "(" + symbol_str + ")" + if state == start and out_state == start: + start_to_start = symbol_str + elif state == start and out_state == end: + start_to_end = symbol_str + elif state == end and out_state == start: + end_to_start = symbol_str + elif state == end and out_state == end: + end_to_end = symbol_str + return start_to_start, start_to_end, end_to_start, end_to_end - Examples - -------- - >>> Regex.from_python_regex("a+[cd]") + @classmethod + def _remove_all_basic_states(cls, automaton: FiniteAutomaton) -> None: + """ Remove all states which are not the start state or a final state + CAREFUL: This method modifies the current automaton, for internal usage + only! + + The function _create_or_transitions is supposed to be called before + calling this function + """ + cls._create_or_transitions(automaton) + states = automaton.states.copy() + for state in states: + if (state not in automaton.start_states \ + and state not in automaton.final_states): + cls._remove_state(automaton, state) + + @classmethod + def _remove_state(cls, automaton: FiniteAutomaton, state: State) -> None: + """ Removes a given state from the epsilon NFA + + CAREFUL: This method modifies the current automaton, for internal usage + only! + + The function _create_or_transitions is supposed to be called before + calling this function + + Parameters + ---------- + state : :class:`~pyformlang.finite_automaton.State` + The state to remove + + """ + # First compute all endings + out_transitions = {} + input_symbols = automaton.symbols.union({FAEpsilon()}) + for symbol in input_symbols: + out_states = automaton(state, symbol).copy() + for out_state in out_states: + out_transitions[out_state] = str(symbol.value) + automaton.remove_transition(state, symbol, out_state) + if state in out_transitions: + to_itself = "(" + out_transitions[state] + ")*" + del out_transitions[state] + for out_state in list(out_transitions.keys()): + out_transitions[out_state] = to_itself + "." + \ + out_transitions[out_state] + for in_state in automaton.states: + if in_state == state: + continue + for symbol in input_symbols: + out_states = automaton(in_state, symbol) + if state not in out_states: + continue + symbol_str = "(" + str(symbol.value) + ")" + automaton.remove_transition(in_state, symbol, state) + for out_state, next_symb in out_transitions.items(): + new_symbol = Symbol(symbol_str + "." + next_symb) + automaton.add_transition(in_state, new_symbol, out_state) + automaton.states.remove(state) + # We make sure the automaton has the good structure + cls._create_or_transitions(automaton) + + @classmethod + def _create_or_transitions(cls, automaton: FiniteAutomaton) -> None: + """ Creates a OR transition instead of several connections + + CAREFUL: This method modifies the automaton and is designed for \ + internal use only! + """ + for state in automaton.states: + new_transitions = {} + input_symbols = automaton.symbols.union({FAEpsilon()}) + for symbol in input_symbols: + out_states = automaton(state, symbol) + out_states = out_states.copy() + symbol_str = str(symbol.value) + for out_state in out_states: + automaton.remove_transition(state, symbol, out_state) + base = new_transitions.setdefault(out_state, "") + if "+" in symbol_str: + symbol_str = "(" + symbol_str + ")" + if base: + new_transitions[out_state] = "((" + base + ")+(" + \ + symbol_str + "))" + else: + new_transitions[out_state] = symbol_str + for out_state, next_symb in new_transitions.items(): + automaton.add_transition(state, + next_symb, + out_state) + + @classmethod + def __get_regex_sub(cls, + start_to_start: str, + start_to_end: str, + end_to_start: str, + end_to_end: str) -> str: + """ Combines the transitions in the regex simple function """ + if not start_to_end: + return "" + temp, part1 = cls.__get_temp(start_to_end, end_to_start, end_to_end) + part0 = "epsilon" + if start_to_start != "epsilon": + if temp: + part0 = "(" + start_to_start + "+" + temp + ")*" + else: + part0 = "(" + start_to_start + ")*" + elif temp != "epsilon" and temp: + part0 = "(" + temp + ")*" + return "(" + part0 + "." + part1 + ")" + + @classmethod + def __get_temp(cls, + start_to_end: str, + end_to_start: str, + end_to_end: str) -> Tuple[str, str]: + """ + Gets a temp values in the computation + of the simple automaton regex. """ - return regular_expression.PythonRegex(regex) + temp = "epsilon" + if (start_to_end != "epsilon" + or end_to_end != "epsilon" + or end_to_start != "epsilon"): + temp = "" + if start_to_end != "epsilon": + temp = start_to_end + if end_to_end != "epsilon": + if temp: + temp += "." + end_to_end + "*" + else: + temp = end_to_end + "*" + part1 = temp + if not part1: + part1 = "epsilon" + if end_to_start != "epsilon": + if temp: + temp += "." + end_to_start + else: + temp = end_to_start + if not end_to_start: + temp = "" + return temp, part1 diff --git a/pyformlang/regular_expression/regex_objects.py b/pyformlang/regular_expression/regex_objects.py index 053f9b4..65fc5b1 100644 --- a/pyformlang/regular_expression/regex_objects.py +++ b/pyformlang/regular_expression/regex_objects.py @@ -1,7 +1,12 @@ """ Representation of some objects used in regex. """ -import pyformlang + +from typing import List, Iterable +from abc import abstractmethod + +from pyformlang.cfg import Production +from pyformlang.cfg.utils import to_variable, to_terminal class Node: # pylint: disable=too-few-public-methods @@ -13,11 +18,11 @@ class Node: # pylint: disable=too-few-public-methods The value of the node """ - def __init__(self, value): + def __init__(self, value: str) -> None: self._value = value @property - def value(self): + def value(self) -> str: """ Give the value of the node Returns @@ -27,7 +32,8 @@ def value(self): """ return self._value - def get_str_repr(self, sons_repr): + @abstractmethod + def get_str_repr(self, sons_repr: Iterable[str]) -> str: """ The string representation of the node @@ -44,7 +50,9 @@ def get_str_repr(self, sons_repr): """ raise NotImplementedError - def get_cfg_rules(self, current_symbol, sons): + @abstractmethod + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ + -> List[Production]: """ Gets the rules for a context-free grammar to represent the \ operator""" raise NotImplementedError @@ -91,14 +99,15 @@ class Operator(Node): # pylint: disable=too-few-public-methods The value of the operator """ - def __repr__(self): + def __repr__(self) -> str: return "Operator(" + str(self._value) + ")" - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: """ Get the string representation """ raise NotImplementedError - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ + -> List[Production]: """ Gets the rules for a context-free grammar to represent the \ operator""" raise NotImplementedError @@ -113,17 +122,18 @@ class Symbol(Node): # pylint: disable=too-few-public-methods The value of the symbol """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return str(self.value) - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ + -> List[Production]: """ Gets the rules for a context-free grammar to represent the \ operator""" - return [pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_terminal(self.value)])] + return [Production( + to_variable(current_symbol), + [to_terminal(self.value)])] - def __repr__(self): + def __repr__(self) -> str: return "Symbol(" + str(self._value) + ")" @@ -131,15 +141,16 @@ class Concatenation(Operator): # pylint: disable=too-few-public-methods """ Represents a concatenation """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "(" + ".".join(sons_repr) + ")" - def get_cfg_rules(self, current_symbol, sons): - return [pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_variable(son) for son in sons])] + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ + -> List[Production]: + return [Production( + to_variable(current_symbol), + [to_variable(son) for son in sons])] - def __init__(self): + def __init__(self) -> None: super().__init__("Concatenation") @@ -147,16 +158,16 @@ class Union(Operator): # pylint: disable=too-few-public-methods """ Represents a union """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "(" + "|".join(sons_repr) + ")" - def get_cfg_rules(self, current_symbol, sons): - return [pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_variable(son)]) - for son in sons] + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ + -> List[Production]: + return [Production( + to_variable(current_symbol), + [to_variable(son)]) for son in sons] - def __init__(self): + def __init__(self) -> None: super().__init__("Union") @@ -164,24 +175,23 @@ class KleeneStar(Operator): # pylint: disable=too-few-public-methods """ Represents an epsilon symbol """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "(" + ".".join(sons_repr) + ")*" - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ + -> List[Production]: return [ - pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - []), - pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_variable(current_symbol), - pyformlang.cfg.utils.to_variable(current_symbol)]), - pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [pyformlang.cfg.utils.to_variable(son) for son in sons]) + Production( + to_variable(current_symbol), []), + Production( + to_variable(current_symbol), + [to_variable(current_symbol), to_variable(current_symbol)]), + Production( + to_variable(current_symbol), + [to_variable(son) for son in sons]) ] - def __init__(self): + def __init__(self) -> None: super().__init__("Kleene Star") @@ -189,15 +199,14 @@ class Epsilon(Symbol): # pylint: disable=too-few-public-methods """ Represents an epsilon symbol """ - def get_str_repr(self, sons_repr): + def get_str_repr(self, sons_repr: Iterable[str]) -> str: return "$" - def get_cfg_rules(self, current_symbol, sons): - return [pyformlang.cfg.Production( - pyformlang.cfg.utils.to_variable(current_symbol), - [])] + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ + -> List[Production]: + return [Production(to_variable(current_symbol), [])] - def __init__(self): + def __init__(self) -> None: super().__init__("Epsilon") @@ -205,16 +214,17 @@ class Empty(Symbol): # pylint: disable=too-few-public-methods """ Represents an empty symbol """ - def __init__(self): + def __init__(self) -> None: super().__init__("Empty") - def get_cfg_rules(self, current_symbol, sons): + def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ + -> List[Production]: return [] class MisformedRegexError(Exception): """ Error for misformed regex """ - def __init__(self, message: str, regex: str): + def __init__(self, message: str, regex: str) -> None: super().__init__(message + " Regex: " + regex) self._regex = regex diff --git a/pyformlang/regular_expression/regex_reader.py b/pyformlang/regular_expression/regex_reader.py index 1bebd57..11d86fa 100644 --- a/pyformlang/regular_expression/regex_reader.py +++ b/pyformlang/regular_expression/regex_reader.py @@ -2,11 +2,11 @@ A class to read regex """ -import re +from typing import List, Optional +from re import sub -from pyformlang.regular_expression.regex_objects import to_node, Operator, \ - Symbol, Concatenation, Union, \ - KleeneStar, MisformedRegexError, SPECIAL_SYMBOLS +from .regex_objects import to_node, Node, Operator, Symbol, Empty, \ + Concatenation, Union, KleeneStar, MisformedRegexError, SPECIAL_SYMBOLS MISFORMED_MESSAGE = "The regex is misformed here." @@ -19,40 +19,38 @@ class RegexReader: """ # pylint: disable=too-few-public-methods - def __init__(self, regex: str): - self._current_node = None - self.head = None - self.sons = None - self._end_current_group: int = 0 + def __init__(self, regex: str) -> None: + self._current_node: Optional[Node] = None + self.head: Node = Empty() + self.sons: List[RegexReader] = [] + self._end_current_group = 0 regex = _pre_process_regex(regex) self._regex = regex self._components = _get_regex_componants(regex) self._pre_process_input_regex_componants() - self._setup_sons() self._setup_from_regex_componants() - def _remove_useless_extreme_parenthesis_from_components(self): + def _remove_useless_extreme_parenthesis_from_components(self) -> None: if self._begins_with_parenthesis_components(): self._remove_useless_extreme_parenthesis_from_componants() - def _pre_process_input_regex_componants(self): + def _pre_process_input_regex_componants(self) -> None: self._remove_useless_extreme_parenthesis_from_components() self._compute_precedence() self._remove_useless_extreme_parenthesis_from_components() - def _remove_useless_extreme_parenthesis_from_componants( - self): + def _remove_useless_extreme_parenthesis_from_componants(self) -> None: if self._is_surrounded_by_parenthesis(): self._components = self._components[1:-1] self._remove_useless_extreme_parenthesis_from_components() - def _is_surrounded_by_parenthesis(self): + def _is_surrounded_by_parenthesis(self) -> bool: parenthesis_depths = self._get_parenthesis_depths() first_complete_closing = _find_first_complete_closing_if_possible( parenthesis_depths) return first_complete_closing == len(self._components) - 1 - def _get_parenthesis_depths(self): + def _get_parenthesis_depths(self) -> List[int]: depths = [0] for component in self._components: depths.append(depths[-1] + _get_parenthesis_value(component)) @@ -61,7 +59,7 @@ def _get_parenthesis_depths(self): def _begins_with_parenthesis_components(self): return self._components[0] == "(" - def _setup_precedence_when_not_trivial(self): + def _setup_precedence_when_not_trivial(self) -> None: self._set_end_first_group_in_components() if self._end_current_group == len(self._components): self._current_node = None @@ -69,30 +67,30 @@ def _setup_precedence_when_not_trivial(self): self._current_node = to_node( self._components[self._end_current_group]) - def _setup_precedence(self): + def _setup_precedence(self) -> None: if len(self._components) <= 1: self._current_node = None else: self._setup_precedence_when_not_trivial() - def _found_no_union(self, next_node): + def _found_no_union(self, next_node: Optional[Node]) -> bool: return self._end_current_group < len( self._components) and not isinstance(next_node, Union) - def _add_parenthesis_around_part_of_componants(self, index_opening, - index_closing): + def _add_parenthesis_around_part_of_componants( + self, index_opening: int, index_closing: int) -> None: self._components.insert(index_opening, "(") # Add 1 as something was added before self._components.insert(index_closing + 1, ")") - def _compute_precedent_when_not_kleene_nor_union(self): + def _compute_precedent_when_not_kleene_nor_union(self) -> None: while self._found_no_union(self._current_node): self._set_next_end_group_and_node() if isinstance(self._current_node, Union): self._add_parenthesis_around_part_of_componants( 0, self._end_current_group) - def _compute_precedence(self): + def _compute_precedence(self) -> None: """ Add parenthesis for the first group in indicate precedence """ self._setup_precedence() if isinstance(self._current_node, KleeneStar): @@ -102,16 +100,16 @@ def _compute_precedence(self): elif not isinstance(self._current_node, Union): self._compute_precedent_when_not_kleene_nor_union() - def _set_next_end_group_and_node(self): - if isinstance(self._current_node, Operator) and not isinstance( - self._current_node, KleeneStar): + def _set_next_end_group_and_node(self) -> None: + if isinstance(self._current_node, Operator) and \ + not isinstance(self._current_node, KleeneStar): self._end_current_group += 1 self._set_end_first_group_in_components(self._end_current_group) if self._end_current_group < len(self._components): self._current_node = to_node( self._components[self._end_current_group]) - def _set_end_first_group_in_components(self, idx_from=0): + def _set_end_first_group_in_components(self, idx_from: int = 0) -> None: """ Gives the end of the first group """ if idx_from >= len(self._components): self._end_current_group = idx_from @@ -130,7 +128,7 @@ def _set_end_first_group_in_components(self, idx_from=0): else: self._end_current_group = 1 + idx_from - def _setup_non_trivial_regex(self): + def _setup_non_trivial_regex(self) -> None: self._set_end_first_group_in_components() next_node = to_node(self._components[self._end_current_group]) if isinstance(next_node, KleeneStar): @@ -149,15 +147,15 @@ def _setup_non_trivial_regex(self): self.sons.append(self._process_sub_regex(begin_second_group, len(self._components))) - def _setup_empty_regex(self): + def _setup_empty_regex(self) -> None: self.head = to_node("") - def _setup_one_symbol_regex(self): + def _setup_one_symbol_regex(self) -> None: first_symbol = to_node(self._components[0]) self._check_is_valid_single_first_symbol(first_symbol) self.head = first_symbol - def _setup_from_regex_componants(self): + def _setup_from_regex_componants(self) -> None: if not self._components: self._setup_empty_regex() elif len(self._components) == 1: @@ -165,18 +163,15 @@ def _setup_from_regex_componants(self): else: self._setup_non_trivial_regex() - def _process_sub_regex(self, idx_from, idx_to): + def _process_sub_regex(self, idx_from: int, idx_to: int) -> "RegexReader": sub_regex = " ".join(self._components[idx_from:idx_to]) return self.from_string(sub_regex) - def _check_is_valid_single_first_symbol(self, first_symbol): + def _check_is_valid_single_first_symbol(self, first_symbol: Node) -> None: if not isinstance(first_symbol, Symbol): raise MisformedRegexError(MISFORMED_MESSAGE, self._regex) - def _setup_sons(self): - self.sons = [] - - def from_string(self, regex_str: str): + def from_string(self, regex_str: str) -> "RegexReader": """ Read a regex from a string Parameters @@ -192,7 +187,9 @@ def from_string(self, regex_str: str): return RegexReader(regex_str) -def _find_first_complete_closing_if_possible(parenthesis_depths, index_from=0): +def _find_first_complete_closing_if_possible( + parenthesis_depths: List[int], + index_from: int = 0) -> int: try: first_complete_closing = parenthesis_depths.index(0, index_from) except ValueError: @@ -200,7 +197,7 @@ def _find_first_complete_closing_if_possible(parenthesis_depths, index_from=0): return first_complete_closing -def _get_parenthesis_value(component): +def _get_parenthesis_value(component: str) -> int: if component == "(": return 1 if component == ")": @@ -212,8 +209,8 @@ def _pre_process_regex(regex: str) -> str: regex = regex.strip(" ") if regex.endswith("\\") and not regex.endswith("\\\\"): regex += " " - regex = re.sub(r" +", " ", regex) - regex = re.sub(r"\\ ", "\\ ", regex) + regex = sub(r" +", " ", regex) + regex = sub(r"\\ ", "\\ ", regex) if regex.endswith(" "): regex = regex[:-1] res = [] @@ -232,10 +229,10 @@ def _pre_process_regex(regex: str) -> str: return "".join(res) -def _get_regex_componants(regex): +def _get_regex_componants(regex: str) -> List[str]: temp = regex.split(" ") - for i, sub in enumerate(temp): - if sub.endswith("\\") and not sub.endswith("\\\\"): + for i, component in enumerate(temp): + if component.endswith("\\") and not component.endswith("\\\\"): temp[i] += " " if len(temp) > 1 and not temp[-1]: del temp[-1] diff --git a/pyformlang/regular_expression/tests/test_python_regex.py b/pyformlang/regular_expression/tests/test_python_regex.py index db095ae..c0743c0 100644 --- a/pyformlang/regular_expression/tests/test_python_regex.py +++ b/pyformlang/regular_expression/tests/test_python_regex.py @@ -1,6 +1,7 @@ """ Testing python regex parsing """ + import re from pyformlang.regular_expression.python_regex import PythonRegex @@ -11,6 +12,12 @@ class TestPythonRegex: # pylint: disable=missing-function-docstring, too-many-public-methods + def test_simple(self): + regex = PythonRegex("abc") + assert regex.accepts(["a", "b", "c"]) + assert not regex.accepts(["a", "b", "b"]) + assert not regex.accepts(["a", "b"]) + def test_with_brackets(self): regex = PythonRegex("a[bc]") assert regex.accepts(["a", "b"]) diff --git a/pyformlang/regular_expression/tests/test_regex.py b/pyformlang/regular_expression/tests/test_regex.py index 6bbb821..c875544 100644 --- a/pyformlang/regular_expression/tests/test_regex.py +++ b/pyformlang/regular_expression/tests/test_regex.py @@ -1,15 +1,22 @@ """ Tests for regular expressions """ -from pyformlang.regular_expression import Regex, MisformedRegexError -from pyformlang import finite_automaton + import pytest +from pyformlang.regular_expression import Regex, MisformedRegexError +from pyformlang.finite_automaton import EpsilonNFA +from pyformlang.finite_automaton import DeterministicFiniteAutomaton +from pyformlang.finite_automaton import State, Symbol, Epsilon +from pyformlang.finite_automaton.tests.test_deterministic_finite_automaton \ + import get_example0, get_dfa_example, perform_tests_example0 + class TestRegex: """ Tests for regex """ # pylint: disable=missing-function-docstring,too-many-public-methods + # pylint: disable=protected-access def test_creation(self): """ Try to create regex """ @@ -65,10 +72,10 @@ def test_creation(self): def test_to_enfa0(self): """ Tests the transformation to a regex """ - symb_a = finite_automaton.Symbol("a") - symb_b = finite_automaton.Symbol("b") - symb_c = finite_automaton.Symbol("c") - epsilon = finite_automaton.Epsilon() + symb_a = Symbol("a") + symb_b = Symbol("b") + symb_c = Symbol("c") + epsilon = Epsilon() regex = Regex("a|b") enfa = regex.to_epsilon_nfa() assert enfa.accepts([symb_a]) @@ -107,9 +114,9 @@ def test_to_enfa0(self): def test_to_enfa1(self): """ Tests the transformation to a regex """ - symb_a = finite_automaton.Symbol("a") - symb_b = finite_automaton.Symbol("b") - symb_c = finite_automaton.Symbol("c") + symb_a = Symbol("a") + symb_b = Symbol("b") + symb_c = Symbol("c") regex = Regex("a**") enfa = regex.to_epsilon_nfa() assert enfa.accepts([symb_a]) @@ -163,8 +170,8 @@ def test_get_repr(self): regex0 = Regex("a*.(b|c)epsilon") regex_str = str(regex0) regex1 = Regex(regex_str) - dfa0 = regex0.to_epsilon_nfa().to_deterministic().minimize() - dfa1 = regex1.to_epsilon_nfa().to_deterministic().minimize() + dfa0 = regex0.to_minimal_dfa() + dfa1 = regex1.to_minimal_dfa() assert dfa0 == dfa1 def test_accepts(self): @@ -172,19 +179,6 @@ def test_accepts(self): assert regex.accepts(["a"]) assert not regex.accepts(["a", "b"]) - def test_from_python_simple(self): - regex = Regex.from_python_regex("abc") - assert regex.accepts(["a", "b", "c"]) - assert not regex.accepts(["a", "b", "b"]) - assert not regex.accepts(["a", "b"]) - - def test_from_python_brackets(self): - regex = Regex.from_python_regex("a[bc]") - assert regex.accepts(["a", "b"]) - assert regex.accepts(["a", "c"]) - assert not regex.accepts(["a", "b", "c"]) - assert not regex.accepts(["a", "a"]) - def test_space(self): regex = Regex("\\ ") assert regex.accepts([" "]) @@ -292,3 +286,193 @@ def test_backslash_b(self): def test_backslash(self): assert Regex("(\\\\|])").accepts("\\") assert Regex("(\\\\|])").accepts("]") + + def test_remove_state(self): + " Tests the remove of state """ + enfa = EpsilonNFA() + state0 = State(0) + state1 = State(1) + state2 = State(2) + symb02 = Symbol("a+b") + symb01 = Symbol("c*") + symb11 = Symbol("b+(c.d)") + symb12 = Symbol("a.b.c") + enfa.add_start_state(state0) + enfa.add_final_state(state2) + enfa.add_transition(state0, symb01, state1) + enfa.add_transition(state0, symb02, state2) + enfa.add_transition(state1, symb11, state1) + enfa.add_transition(state1, symb12, state2) + Regex._remove_all_basic_states(enfa) + assert enfa.get_number_transitions() == 1 + assert len(enfa.states) == 2 + + def test_from_enfa1(self): + """ Tests the transformation to regex """ + enfa = EpsilonNFA() + state0 = State(0) + state1 = State(1) + state2 = State(2) + symb_e = Symbol("e") + symb_f = Symbol("f") + symb_g = Symbol("g") + enfa.add_start_state(state0) + enfa.add_final_state(state2) + enfa.add_transition(state0, symb_e, state1) + enfa.add_transition(state1, symb_f, state2) + enfa.add_transition(state0, symb_g, state2) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert enfa2.accepts([symb_e, symb_f]) + assert enfa2.accepts([symb_g]) + assert not enfa2.accepts([]) + assert not enfa2.accepts([symb_e]) + assert not enfa2.accepts([symb_f]) + enfa.add_final_state(state0) + with pytest.raises(ValueError) as _: + Regex._get_regex_simple(enfa) + regex = Regex.from_finite_automaton(enfa) + enfa3 = regex.to_epsilon_nfa() + assert enfa3.accepts([symb_e, symb_f]) + assert enfa3.accepts([symb_g]) + assert enfa3.accepts([]) + assert not enfa3.accepts([symb_e]) + assert not enfa3.accepts([symb_f]) + enfa.remove_start_state(state0) + regex = Regex.from_finite_automaton(enfa) + enfa3 = regex.to_epsilon_nfa() + assert not enfa3.accepts([symb_e, symb_f]) + assert not enfa3.accepts([symb_g]) + assert not enfa3.accepts([]) + assert not enfa3.accepts([symb_e]) + assert not enfa3.accepts([symb_f]) + enfa.add_start_state(state0) + enfa.add_transition(state0, symb_f, state0) + regex = Regex.from_finite_automaton(enfa) + enfa3 = regex.to_epsilon_nfa() + assert enfa3.accepts([symb_e, symb_f]) + assert enfa3.accepts([symb_f, symb_e, symb_f]) + assert enfa3.accepts([symb_g]) + assert enfa3.accepts([symb_f, symb_f, symb_g]) + assert enfa3.accepts([]) + assert not enfa3.accepts([symb_e]) + assert enfa3.accepts([symb_f]) + + def test_from_enfa2(self): + """ Tests the transformation to regex """ + enfa = EpsilonNFA() + state0 = State(0) + state1 = State(1) + symb_a = Symbol("0") + symb_b = Symbol("1") + enfa.add_start_state(state0) + enfa.add_final_state(state1) + enfa.add_transition(state0, symb_a, state0) + enfa.add_transition(state0, symb_a, state1) + enfa.add_transition(state1, symb_b, state0) + enfa.add_transition(state1, symb_b, state1) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert enfa2.accepts([symb_a]) + assert enfa2.accepts([symb_a, symb_a]) + assert enfa2.accepts([symb_a, symb_a, symb_b]) + assert enfa2.accepts([symb_a, symb_a, symb_b, symb_b]) + assert enfa2.accepts([symb_a, symb_a, + symb_b, symb_b, symb_a]) + assert enfa2.accepts([symb_a, symb_a, symb_b, + symb_b, symb_a, symb_b]) + assert not enfa2.accepts([symb_b]) + + def test_from_enfa3(self): + """ Tests the transformation to regex """ + enfa = EpsilonNFA() + state0 = State(0) + state1 = State(1) + symb_a = Symbol("0") + symb_b = Symbol("1") + enfa.add_start_state(state0) + enfa.add_final_state(state1) + enfa.add_transition(state0, symb_a, state0) + enfa.add_transition(state1, symb_b, state0) + enfa.add_transition(state1, symb_b, state1) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert not enfa2.accepts([symb_a]) + assert not enfa2.accepts([symb_a, symb_a]) + assert not enfa2.accepts([symb_a, symb_a, symb_b]) + assert not enfa2.accepts([symb_a, symb_a, + symb_b, symb_b, symb_a]) + assert not enfa2.accepts([symb_a, symb_a, symb_b, + symb_b, symb_a, symb_b]) + assert not enfa2.accepts([symb_b]) + epsilon = Epsilon() + enfa.add_transition(state0, epsilon, state1) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert enfa.accepts([]) + assert enfa.accepts([symb_a]) + assert enfa2.accepts([symb_a]) + assert enfa2.accepts([symb_a, symb_a]) + assert enfa2.accepts([symb_a, symb_a, symb_b, symb_b]) + assert enfa2.accepts([symb_a, symb_a, symb_b, symb_b, + symb_a, symb_b]) + assert enfa2.accepts([symb_b]) + assert enfa2.accepts([]) + enfa.remove_transition(state0, symb_a, state0) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert not enfa2.accepts([symb_a]) + assert not enfa2.accepts([symb_a, symb_a]) + assert not enfa2.accepts([symb_a, symb_a, symb_b]) + assert not enfa2.accepts([symb_a, symb_a, symb_b, + symb_b, symb_a]) + assert not enfa2.accepts([symb_a, symb_a, symb_b, symb_b, + symb_a, symb_b]) + assert enfa2.accepts([symb_b]) + assert enfa2.accepts([]) + enfa.remove_transition(state1, symb_b, state1) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert enfa2.accepts([symb_b, symb_b]) + enfa.add_transition(state0, symb_a, state0) + regex = Regex.from_finite_automaton(enfa) + enfa2 = regex.to_epsilon_nfa() + assert enfa2.accepts([symb_a, symb_b]) + + def test_example_doc(self): + enfa0 = EpsilonNFA() + state0 = State(0) + state1 = State(1) + symb_a = Symbol("0") + symb_b = Symbol("1") + enfa0.add_start_state(state0) + enfa0.add_final_state(state1) + enfa0.add_transition(state0, symb_a, state0) + enfa0.add_transition(state1, symb_b, state0) + enfa0.add_transition(state1, symb_b, state1) + + # Turn a finite automaton into a regex... + regex = Regex.from_finite_automaton(enfa0) + # And turn it back into an epsilon non deterministic automaton + enfa1 = regex.to_epsilon_nfa() + dfa0 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa0) + dfa1 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa1) + assert dfa0.is_equivalent_to(dfa1) + + def test_from_dfa0(self): + """ Tests the regex transformation """ + dfa0 = get_example0() + enfa = Regex.from_finite_automaton(dfa0).to_epsilon_nfa() + perform_tests_example0(enfa) + + def test_from_dfa1(self): + dfa1 = get_dfa_example() + enfa = Regex.from_finite_automaton(dfa1).to_epsilon_nfa() + dfa2 = DeterministicFiniteAutomaton.from_epsilon_nfa(enfa) + assert dfa1.is_equivalent_to(dfa2) + + def test_to_minimal_dfa(self): + dfa0 = get_example0() + dfa_regex = Regex.from_finite_automaton(dfa0) + dfa1 = dfa_regex.to_minimal_dfa() + assert dfa0 == dfa1 diff --git a/pyformlang/rsa/box.py b/pyformlang/rsa/box.py index 0baafe5..4f792b9 100644 --- a/pyformlang/rsa/box.py +++ b/pyformlang/rsa/box.py @@ -1,11 +1,12 @@ """ Representation of a box for recursive automaton """ -from typing import Union -from pyformlang.finite_automaton.epsilon_nfa import EpsilonNFA -from pyformlang.finite_automaton.finite_automaton import to_symbol -from pyformlang.finite_automaton.symbol import Symbol +from typing import Set, Hashable, Any + +from pyformlang.finite_automaton import DeterministicFiniteAutomaton +from pyformlang.finite_automaton import State, Symbol +from pyformlang.finite_automaton.utils import to_symbol class Box: @@ -22,62 +23,33 @@ class Box: """ - def __init__(self, enfa: EpsilonNFA, nonterminal: Union[Symbol, str]): - self._dfa = enfa - - nonterminal = to_symbol(nonterminal) - self._nonterminal = nonterminal - - def to_subgraph_dot(self): - """Creates a named subgraph representing a box""" - graph = self._dfa.to_networkx() - strange_nodes = [] - nonterminal = self.nonterminal.value.replace('"', '').replace("'", "").replace(".", "") - dot_string = (f'subgraph cluster_{nonterminal}\n{{ label="{nonterminal}"\n' - f'fontname="Helvetica,Arial,sans-serif"\n' - f'node [fontname="Helvetica,Arial,sans-serif"]\n' - f'edge [fontname="Helvetica,Arial,sans-serif"]\nrankdir=LR;\n' - f'node [shape = circle style=filled fillcolor=white]') - for node, data in graph.nodes(data=True): - node = node.replace('"', '').replace("'", "") - if 'is_start' not in data.keys() or 'is_final' not in data.keys(): - strange_nodes.append(node) - continue - if data['is_start']: - dot_string += f'\n"{node}" [fillcolor = green];' - if data['is_final']: - dot_string += f'\n"{node}" [shape = doublecircle];' - for strange_node in strange_nodes: - graph.remove_node(strange_node) - for node_from, node_to, data in graph.edges(data=True): - node_from = node_from.replace('"', '').replace("'", "") - node_to = node_to.replace('"', '').replace("'", "") - label = data['label'].replace('"', '').replace("'", "") - dot_string += f'\n"{node_from}" -> "{node_to}" [label = "{label}"];' - dot_string += "\n}" - return dot_string + def __init__(self, + dfa: DeterministicFiniteAutomaton, + nonterminal: Hashable) -> None: + self._dfa = dfa + self._nonterminal = to_symbol(nonterminal) @property - def dfa(self): + def dfa(self) -> DeterministicFiniteAutomaton: """ Box's dfa """ return self._dfa @property - def nonterminal(self): + def nonterminal(self) -> Symbol: """ Box's nonterminal """ return self._nonterminal @property - def start_states(self): + def start_states(self) -> Set[State]: """ The start states """ return self._dfa.start_states @property - def final_states(self): + def final_states(self) -> Set[State]: """ The final states """ return self._dfa.final_states - def is_equivalent_to(self, other): + def is_equivalent_to(self, other: "Box") -> bool: """ Check whether two boxes are equivalent Parameters @@ -90,14 +62,44 @@ def is_equivalent_to(self, other): are_equivalent : bool Whether the two boxes are equivalent or not """ + return self._dfa.is_equivalent_to(other.dfa) \ + and self.nonterminal == other.nonterminal + def __eq__(self, other: Any) -> bool: if not isinstance(other, Box): return False - - return self._dfa.is_equivalent_to(other.dfa) and self.nonterminal == other.nonterminal - - def __eq__(self, other): return self.is_equivalent_to(other) - def __hash__(self): - return self._nonterminal.__hash__() + def __hash__(self) -> int: + return hash(self.nonterminal) + + def to_subgraph_dot(self) -> str: + """Creates a named subgraph representing a box""" + graph = self._dfa.to_networkx() + strange_nodes = [] + nonterminal = str(self.nonterminal) \ + .replace('"', '').replace("'", "").replace(".", "") + dot_string = \ + (f'subgraph cluster_{nonterminal}\n{{ label="{nonterminal}"\n' + f'fontname="Helvetica,Arial,sans-serif"\n' + f'node [fontname="Helvetica,Arial,sans-serif"]\n' + f'edge [fontname="Helvetica,Arial,sans-serif"]\nrankdir=LR;\n' + f'node [shape = circle style=filled fillcolor=white]') + for node, data in graph.nodes(data=True): + node = node.replace('"', '').replace("'", "") + if 'is_start' not in data.keys() or 'is_final' not in data.keys(): + strange_nodes.append(node) + continue + if data['is_start']: + dot_string += f'\n"{node}" [fillcolor = green];' + if data['is_final']: + dot_string += f'\n"{node}" [shape = doublecircle];' + for strange_node in strange_nodes: + graph.remove_node(strange_node) + for node_from, node_to, data in graph.edges(data=True): + node_from = node_from.replace('"', '').replace("'", "") + node_to = node_to.replace('"', '').replace("'", "") + label = data['label'].replace('"', '').replace("'", "") + dot_string += f'\n"{node_from}" -> "{node_to}" [label = "{label}"];' + dot_string += "\n}" + return dot_string diff --git a/pyformlang/rsa/recursive_automaton.py b/pyformlang/rsa/recursive_automaton.py index 1d89f36..d823dc8 100644 --- a/pyformlang/rsa/recursive_automaton.py +++ b/pyformlang/rsa/recursive_automaton.py @@ -2,14 +2,14 @@ Representation of a recursive automaton """ -from typing import AbstractSet, Union +from typing import Dict, Set, AbstractSet, Optional, Hashable, Any -from pyformlang.finite_automaton.finite_automaton import to_symbol -from pyformlang.finite_automaton.symbol import Symbol +from pyformlang.finite_automaton import Symbol +from pyformlang.finite_automaton.utils import to_symbol from pyformlang.regular_expression import Regex from pyformlang.cfg import Epsilon -from pyformlang.rsa.box import Box +from .box import Box class RecursiveAutomaton: @@ -28,15 +28,35 @@ class RecursiveAutomaton: def __init__(self, start_box: Box, - boxes: AbstractSet[Box]): - self._nonterminal_to_box = {} + boxes: AbstractSet[Box]) -> None: + self._nonterminal_to_box: Dict[Symbol, Box] = {} + self._start_nonterminal = start_box.nonterminal if start_box not in boxes: - self._nonterminal_to_box[to_symbol(start_box.nonterminal)] = start_box - self._start_nonterminal = to_symbol(start_box.nonterminal) + self._nonterminal_to_box[start_box.nonterminal] = start_box for box in boxes: - self._nonterminal_to_box[to_symbol(box.nonterminal)] = box + self._nonterminal_to_box[box.nonterminal] = box - def get_box_by_nonterminal(self, nonterminal: Union[Symbol, str]): + @property + def nonterminals(self) -> Set[Symbol]: + """ The set of nonterminals """ + return set(self._nonterminal_to_box.keys()) + + @property + def boxes(self) -> Set[Box]: + """ The set of boxes """ + return set(self._nonterminal_to_box.values()) + + @property + def start_nonterminal(self) -> Symbol: + """ The start nonterminal """ + return self._start_nonterminal + + @property + def start_box(self) -> Box: + """ The start box """ + return self._nonterminal_to_box[self.start_nonterminal] + + def get_box_by_nonterminal(self, nonterminal: Hashable) -> Optional[Box]: """ Box by nonterminal @@ -52,50 +72,15 @@ def get_box_by_nonterminal(self, nonterminal: Union[Symbol, str]): """ nonterminal = to_symbol(nonterminal) - if nonterminal in self._nonterminal_to_box: - return self._nonterminal_to_box[nonterminal] + return self._nonterminal_to_box.get(nonterminal, None) - return None - - def get_number_boxes(self): + def get_number_boxes(self) -> int: """ Size of set of boxes """ - return len(self._nonterminal_to_box) - def to_dot(self): - """ Create dot representation of recursive automaton """ - dot_string = 'digraph "" {' - for box in self._nonterminal_to_box.values(): - dot_string += f'\n{box.to_subgraph_dot()}' - dot_string += "\n}" - return dot_string - - @property - def nonterminals(self) -> set: - """ The set of nonterminals """ - - return set(self._nonterminal_to_box.keys()) - - @property - def boxes(self) -> dict: - """ The set of boxes """ - - return self._nonterminal_to_box - - @property - def start_nonterminal(self) -> Symbol: - """ The start nonterminal """ - - return self._start_nonterminal - - @property - def start_box(self): - """ The start box """ - - return self.boxes[self.start_nonterminal] - @classmethod - def from_regex(cls, regex: Regex, start_nonterminal: Union[Symbol, str]): + def from_regex(cls, regex: Regex, start_nonterminal: Hashable) \ + -> "RecursiveAutomaton": """ Create a recursive automaton from regular expression Parameters @@ -111,18 +96,21 @@ def from_regex(cls, regex: Regex, start_nonterminal: Union[Symbol, str]): The new recursive automaton built from regular expression """ start_nonterminal = to_symbol(start_nonterminal) - box = Box(regex.to_epsilon_nfa().minimize(), start_nonterminal) + box = Box(regex.to_minimal_dfa(), start_nonterminal) return RecursiveAutomaton(box, {box}) @classmethod - def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")): - """ Create a recursive automaton from ebnf (ebnf = Extended Backus-Naur Form) + def from_ebnf(cls, text: str, start_nonterminal: Hashable = Symbol("S")) \ + -> "RecursiveAutomaton": + """ Create a recursive automaton from ebnf \ + (ebnf = Extended Backus-Naur Form) Parameters ----------- text : str The text of transform - start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str, optional + start_nonterminal : \ + :class:`~pyformlang.finite_automaton.Symbol` | str, optional The start nonterminal, S by default Returns @@ -131,7 +119,7 @@ def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")): The new recursive automaton built from context-free grammar """ start_nonterminal = to_symbol(start_nonterminal) - productions = {} + productions: Dict[Hashable, str] = {} boxes = set() nonterminals = set() for production in text.splitlines(): @@ -153,12 +141,13 @@ def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")): productions[head] = body for head, body in productions.items(): - boxes.add(Box(Regex(body).to_epsilon_nfa().minimize(), - to_symbol(head))) - start_box = Box(Regex(productions[start_nonterminal.value]).to_epsilon_nfa().minimize(), start_nonterminal) + boxes.add(Box(Regex(body).to_minimal_dfa(), to_symbol(head))) + start_box_dfa = Regex(productions[start_nonterminal.value]) \ + .to_minimal_dfa() + start_box = Box(start_box_dfa, start_nonterminal) return RecursiveAutomaton(start_box, boxes) - def is_equals_to(self, other): + def is_equal_to(self, other: "RecursiveAutomaton") -> bool: """ Check whether two recursive automata are equals by boxes. Not equivalency in terms of formal languages theory, just mapping boxes @@ -173,9 +162,17 @@ def is_equals_to(self, other): are_equivalent : bool Whether the two recursive automata are equals or not """ + return self.boxes == other.boxes + + def __eq__(self, other: Any) -> bool: if not isinstance(other, RecursiveAutomaton): return False - return self.boxes == other.boxes + return self.is_equal_to(other) - def __eq__(self, other): - return self.is_equals_to(other) + def to_dot(self) -> str: + """ Create dot representation of recursive automaton """ + dot_string = 'digraph "" {' + for box in self._nonterminal_to_box.values(): + dot_string += f'\n{box.to_subgraph_dot()}' + dot_string += "\n}" + return dot_string diff --git a/pyformlang/rsa/tests/test_rsa.py b/pyformlang/rsa/tests/test_rsa.py index a24dc13..1fa3162 100644 --- a/pyformlang/rsa/tests/test_rsa.py +++ b/pyformlang/rsa/tests/test_rsa.py @@ -12,8 +12,7 @@ def test_creation(self): """ Test the creation of an RSA """ # S -> a S b | a b regex = Regex("a S b | a b") - enfa = regex.to_epsilon_nfa() - dfa = enfa.minimize() + dfa = regex.to_minimal_dfa() box = Box(dfa, "S") rsa_1 = RecursiveAutomaton(box, set()) @@ -31,8 +30,7 @@ def test_from_regex(self): # S -> a* rsa_2 = RecursiveAutomaton.from_regex(Regex("a*"), "S") - enfa = Regex("a*").to_epsilon_nfa() - dfa = enfa.minimize() + dfa = Regex("a*").to_minimal_dfa() box = Box(dfa, "S") rsa_1 = RecursiveAutomaton(box, set()) @@ -65,8 +63,8 @@ def test_from_ebnf(self): assert rsa1_g2.get_number_boxes() == 2 assert rsa1_g2.nonterminals == {Symbol("S"), Symbol("V")} - dfa_s = Regex("a V b").to_epsilon_nfa().minimize() + dfa_s = Regex("a V b").to_minimal_dfa() assert rsa1_g2.get_box_by_nonterminal("S") == Box(dfa_s, "S") - dfa_v = Regex("c S d | c d").to_epsilon_nfa().minimize() + dfa_v = Regex("c S d | c d").to_minimal_dfa() assert rsa1_g2.get_box_by_nonterminal("V") == Box(dfa_v, "V") diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 0000000..7fd2a49 --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,22 @@ +{ + "include": [ + "pyformlang" + ], + + "exclude": [ + "**/node_modules", + "**/__pycache__", + "**/.*", + "**/tests" + ], + + "pythonVersion": "3.8", + "pythonPlatform": "Linux", + + "strictListInference": true, + "strictSetInference": true, + "strictDictionaryInference": true, + "strictParameterNoneValue": false, + + "reportMissingParameterType": "warning", + }