Support IELR(1) parser generation

junk0612 · junk0612 · commit 2d975c55e611 · 2024-04-23T14:27:11.000+09:00
diff --git a/lib/lrama/state.rb b/lib/lrama/state.rb
@@ -82,6 +82,11 @@ def transitions
       @transitions ||= shifts.map {|shift| [shift, @items_to_state[shift.next_items]] }
     end
 
+    def update_transition(shift, next_state)
+      set_items_to_state(shift.next_items, next_state)
+      @transitions = shifts.map {|sh| [sh, @items_to_state[sh.next_items]] }
+    end
+
     def selected_term_transitions
       term_transitions.reject do |shift, next_state|
         shift.not_selected
@@ -140,5 +145,45 @@ def rr_conflicts
         conflict.type == :reduce_reduce
       end
     end
+
+    def always_follows(shift, next_state)
+      internal_dependencies(shift, next_state).union(successor_dependencies(shift, next_state)).reduce([]) {|result, transition| result += transition[1].term_transitions.map {|shift, _| shift.next_sym } }
+    end
+
+    def internal_dependencies(shift, next_state)
+      nterm_transitions.select {|other_shift, _|
+        @items.find {|item| item.next_sym == shift.next_sym && item.lhs == other_shift.next_sym && item.symbols_after_dot.all?(&:nullable) }
+      }.reduce([[shift, next_state]]) {|result, transition|
+        result += internal_follows(*transition)
+      }
+    end
+
+    def successor_dependencies(shift, next_state)
+      next_state.nterm_transitions.select {|other_shift, _|
+        other_shift.next_sym.nullable
+      }.reduce([[shift, next_state]]) {|result, transition|
+        result += successor_dependencies(*transition)
+      }
+    end
+
+    def inadequacy_list
+      return @inadequacy_list if @inadequacy_list
+
+      list = shifts.to_h {|shift| [shift.next_sym, [[shift, nil]]] }
+      reduces.each do |reduce|
+        reduce_list = (reduce.look_ahead || []).to_h {|sym| [sym, [[reduce, reduce.item]]] }
+        list.merge!(reduce_list) {|_, list_value, reduce_value| list_value + reduce_value }
+      end
+
+      @inadequacy_list = {self => list.select {|_, actions| actions.size > 1 }}
+    end
+
+    def follow_kernel?(item)
+      item.symbols_after_dot.all?(&:nullable)
+    end
+
+    def follow_kernel_items(shift, next_state, item)
+      internal_dependencies(shift, next_state).any? {|shift, _| shift.next_sym == item.next_sym } && item.symbols_after_dot.all?(&:nullable)
+    end
   end
 end
diff --git a/lib/lrama/states.rb b/lib/lrama/states.rb
@@ -93,6 +93,10 @@ def compute
       check_conflicts
     end
 
+    def compute_ielr
+      report_duration(:split_states) { split_states }
+    end
+
     def reporter
       StatesReporter.new(self)
     end
@@ -552,5 +556,225 @@ def check_conflicts
         end
       end
     end
+
+    def split_states
+      @item_lookahead_set = {}
+      @lalr_isocores = Hash.new {|hash, key| hash[key] = key }
+      @ielr_isocores = Hash.new {|hash, key| hash[key] = [key] }
+      @lookaheads_recomputed = Hash.new {|hash, key| hash[key] = false }
+      @predecessors = {}
+      @states.each do |state|
+        state.transitions.each do |shift, next_state|
+          compute_state(state, shift, next_state)
+        end
+      end
+    end
+
+    def merge_lookaheads(state, k)
+      return if state.kernels.all? {|item| (k[item] - item_lookahead_set(state)[item]).empty? }
+
+      state.transitions.each do |shift, next_state|
+        next if @lookaheads_recomputed[next_state]
+        compute_state(state, shift, next_state)
+      end
+    end
+
+    def compute_state(state, shift, next_state)
+      k = propagate_lookaheads(state, next_state)
+      s = @ielr_isocores[next_state].find {|st| is_compatible(st, k) }
+
+      if s.nil?
+        s = @ielr_isocores[next_state].last
+        new_state = State.new(@states.count, s.accessing_symbol, s.kernels)
+        new_state.closure = s.closure
+        new_state.compute_shifts_reduces
+        s.transitions.each do |sh, next_state|
+          new_state.set_items_to_state(sh.next_items, next_state)
+        end
+        @states << new_state
+        @lalr_isocores[new_state] = s
+        @ielr_isocores[s] << new_state
+        @ielr_isocores[s].each do |st|
+          @ielr_isocores[st] = @ielr_isocores[s]
+        end
+        @lookaheads_recomputed[new_state] = true
+        @item_lookahead_set[new_state] = k
+        state.update_transition(shift, new_state)
+      elsif(!@lookaheads_recomputed[s])
+        @item_lookahead_set[s] = k
+        @lookaheads_recomputed[s] = true
+      else
+        merge_lookaheads(s, k)
+      end
+    end
+
+    def propagate_lookaheads(state, next_state)
+      next_state.kernels.to_h {|item|
+        lookahead_sets =
+          if item.position == 1
+            compute_goto_follow_set(@lalr_isocores[state], item.lhs)
+          else
+            kernel = state.kernels.find {|k| k.rule == item.rule && k.position == item.position - 1 }
+            item_lookahead_set(state)[kernel]
+          end
+
+        [item, lookahead_sets & lookahead_set_filters(next_state)[item]]
+      }
+    end
+
+    def lookahead_set_filters(state)
+      state.kernels.to_h {|kernel|
+        [kernel,
+        annotation_list(@lalr_isocores[state])[@lalr_isocores[state]].filter_map {|token, actions|
+          token if token.term? && actions.any? {|item, _| item == kernel }
+        }]
+      }
+    end
+
+    def is_compatible(state, k)
+      @lookaheads_recomputed[state] ||
+        annotation_list(@lalr_isocores[state])[@lalr_isocores[state]].all? {|token, actions|
+          a = dominant_contribution(state, token, actions, item_lookahead_set(state))
+          b = dominant_contribution(state, token, actions, k)
+          a.empty? || b.empty? || a == b
+        }
+    end
+
+    def dominant_contribution(state, token, actions, lookaheads)
+      actions.filter_map {|action, items|
+        action if items.empty? || items.any? {|item, bool| bool && lookaheads[item].include?(token) }
+      }.reject {|action|
+        if action.is_a?(State::Shift)
+          action.not_selected
+        elsif action.is_a?(State::Reduce)
+          action.not_selected_symbols.include?(token)
+        end
+      }
+    end
+
+    def compute_goto_follow_set(state, nterm_token)
+      shift, next_state = state.nterm_transitions.find {|shift, _| shift.next_sym == nterm_token }
+      return [] if shift.nil? && nterm_token.id.s_value == '$accept'
+      state.always_follows(shift, next_state).union(state.kernels.select {|item|
+        state.follow_kernel_items(shift, next_state, item)
+      }.reduce([]) {|result, item|
+        result.union(item_lookahead_set(state)[item])
+      })
+    end
+
+    def item_lookahead_set(state)
+      @item_lookahead_set[state] ||=
+        state.kernels.to_h {|item|
+          value =
+            if item.position > 1
+              prev_state, prev_item = predecessor_with_item(state, item)
+              item_lookahead_set(prev_state)[prev_item]
+            elsif item.position == 1
+              prev_state = predecessors(state).find {|p| p.shifts.any? {|shift| shift.next_sym == item.lhs } }
+              shift, next_state = prev_state.nterm_transitions.find {|shift, _| shift.next_sym == item.lhs }
+              goto_follows(prev_state, shift, next_state)
+            else
+              []
+            end
+          [item, value]
+        }
+    end
+
+    def predecessors(state)
+      @predecessors[state] ||= @states.select {|prev| prev.transitions.any? {|_, to_state| to_state == state } }
+    end
+
+    def predecessor_with_item(state, item)
+      predecessors(state).each do |state|
+        state.kernels.each do |kernel|
+          return [state, kernel] if kernel.rule == item.rule && kernel.position == item.position - 1
+        end
+      end
+    end
+
+    def goto_follows(state, shift, next_state)
+      compute_include_dependencies(state, shift, next_state).reduce([]) {|result, goto|
+        st, sh, next_st = goto
+        result.union(st.always_follows(sh, next_st))
+      }
+    end
+
+    def compute_include_dependencies(state, shift, next_state)
+      internal = state.internal_dependencies(shift, next_state).map {|sh, next_st| [state, sh, next_st] }
+
+      item = state.kernels.find {|kernel| kernel.next_sym == shift.next_sym }
+      return internal unless item.symbols_after_dot.all?(&:nullable)
+
+      s, i = state, item
+      while i.position > 0
+        s = predecessors(s).find {|p| p.kernels.find {|item| item.rule == i.rule && item.position == i.position - 1 } }
+        i = s.kernels.find {|item| item.rule == i.rule && item.position == i.position - 1 }
+      end
+
+      p_shift, p_next_state = s.transitions.find {|sh, _| sh.next_sym == item.lhs }
+      internal.union([[s, p_shift, p_next_state]])
+    end
+
+    def annotation_list(state)
+      manifestations = state.inadequacy_list.transform_values {|hash| hash.to_h {|token, actions| [token, annotate_manifestation(state, token, actions)] } }
+      state.transitions.reduce(manifestations) {|item, transition|
+        item.merge(annotate_predecessor(state, transition[1], annotation_list(transition[1])[transition[1]])) {|state, annotations, other_annotations|
+          annotations.merge(other_annotations) {|token, actions, other_actions|
+            actions.merge(other_actions) {|action, items, other_items|
+              items.merge(other_items) {|item, bool, other_bool|
+                raise if bool != other_bool
+                bool
+              }
+            }
+          }
+        }
+      }
+    end
+
+    def annotate_manifestation(state, token, actions)
+      actions.to_h {|action, item|
+        [action,
+         if action.is_a?(State::Shift)
+           {}
+         elsif action.is_a?(State::Reduce)
+           if item.empty_rule?
+             compute_lhs_contributions(state, item.lhs, token)
+           else
+             state.kernels.to_h {|kernel| [kernel, kernel.rule == item.rule && kernel.end_of_rule?] }
+           end
+         end
+        ]
+      }
+    end
+
+    def compute_lhs_contributions(state, sym, token)
+      shift, next_state = state.nterm_transitions.find {|sh, _| sh.next_sym == sym }
+      if state.always_follows(shift, next_state).include?(token)
+        {}
+      else
+        state.kernels.to_h {|kernel| [kernel, state.follow_kernel?(kernel) && item_lookahead_set(state)[kernel].include?(token)] }
+      end
+    end
+
+    def annotate_predecessor(state, next_state, annotation_list)
+      {state => annotation_list.to_h {|token, actions|
+        next [token, {}] if actions.empty? || actions.any? {|action, hash|
+          hash.keys.any? {|item| hash[item] && item.position == 1 && compute_lhs_contributions(state, item.lhs, token).empty? }
+        }
+        [token, actions.to_h {|action, hash|
+          [action, hash.to_h {|item, _|
+            kernel = state.kernels.find {|k| k.rule == item.rule && k.position == item.position - 1 }
+            [kernel,
+             hash[item] &&
+             (
+               !kernel.nil? && (item_lookahead_set(state)[kernel].include?(token)) ||
+               (item.position == 1 && compute_lhs_contributions(state, item.lhs, token)[item])
+             )
+            ]
+          }]
+        }]
+      }
+      }
+    end
   end
 end