Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic RSM update #9

Open
wants to merge 16 commits into
base: cf_solver
Choose a base branch
from
Open
107 changes: 82 additions & 25 deletions src/main/kotlin/org/srcgll/grammar/combinator/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Grammar combinator
# Grammar combinator
Kotlin DSL for describing context-free grammars.


Expand All @@ -15,14 +15,14 @@ S = A*
*DSL*
```kotlin
class AStar : Grammar() {
var A = Term("a")
var S by NT()
var A = Term("a")
var S by NT()

init {
setStart(S)
S = Many(A)
}
init {
setStart(S)
S = Many(A)
}
}
```
### Non-terminals

Expand All @@ -32,13 +32,13 @@ Non-terminals must be fields of the grammar class. Be sure to declare using dele

Start non-terminal set with method `setStart(nt)`. Can be set once for grammar.

### Terminals
### Terminals

`val A = Term("a")`

`val B = Term(42)`

Terminal is a generic class. Can store terminals of any type. Terminals are compared based on their content.
Terminal is a generic class. Can store terminals of any type. Terminals are compared based on their content.

They can be declared as fields of a grammar class or directly in productions.

Expand All @@ -55,19 +55,19 @@ S3 = '{' S '}' S
*DSL*
```kotlin
class DyckGrammar : Grammar() {
var S by NT()
var S1 by NT()
var S2 by NT()
var S3 by NT()

init {
setStart(S)
S = S1 or S2 or S3 or Epsilon
S1 = Term("(") * S * Term(")") * S
S2 = Term("[") * S * Term("]") * S
S3 = Term("{") * S * Term("}") * S
}
var S by NT()
var S1 by NT()
var S2 by NT()
var S3 by NT()

init {
setStart(S)
S = S1 or S2 or S3 or Epsilon
S1 = Term("(") * S * Term(")") * S
S2 = Term("[") * S * Term("]") * S
S3 = Term("{") * S * Term("}") * S
}
}
```
### Production
A → B = A = B
Expand All @@ -76,24 +76,81 @@ A → B = A = B
(.): Σ∗ × Σ∗ → Σ∗

a . b = a * b
```kotlin
class AB : Grammar() {
var S by NT()

init {
setStart(S)
S = Term("a") * Term("b")
}
}
```
### Alternative
a | b = a or b

```kotlin
class AStar : Grammar() {
var S by NT()

init {
setStart(S)
S = Term("a") or S or Epsilon
}
}
```

### Kleene Star
$a* = U_{i=0}^{\inf}a^i$

a* = Many(a)

`todo: a+ = some(a)`
```kotlin
class AStar : Grammar() {
var S by NT()

init {
setStart(S)
S = many(Term("a"))
}
}
```

### Some
$a* = U_{i=1}^{\inf}a^i$

a+ = some(a)

### Optional
```kotlin
class AStar : Grammar() {
var S by NT()

init {
setStart(S)
S = some(Term("a")) or Epsilon
}
}
```

### Optional
a? -> a | Epsilon

Epsilon -- constant terminal with behavior corresponding to the $\epsilon$ terminal (empty string).

`todo: a? = opt(a)`
a? = opt(a)

```kotlin
class AStar : Grammar() {
var S by NT()

init {
setStart(S)
S = opt(Term("a")) * S
}
}
```

## RSM
## RSM
DSL allows to get the RSM corresponding to the grammar using the `getRsm` method.
The algorithm of RSM construction is based on Brzozowski derivations.

37 changes: 17 additions & 20 deletions src/main/kotlin/org/srcgll/grammar/combinator/regexp/Alternative.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,13 @@ package org.srcgll.grammar.combinator.regexp


data class Alternative
(
internal val left : Regexp,
internal val right : Regexp,
)
: Regexp
{
(
internal val left: Regexp,
internal val right: Regexp,
) : Regexp {
companion object {
fun makeAlternative(left : Regexp, right : Regexp) : Regexp
{
if (left is Empty) return right
fun makeAlternative(left: Regexp, right: Regexp): Regexp {
if (left is Empty) return right
if (right is Empty) return left

if (left is Alternative && (right == left.left || right == left.right)) {
Expand All @@ -22,22 +19,22 @@ data class Alternative
}
return if (left == right) left else Alternative(left, right)
}

fun makeAlternative(literals: Iterable<String>): Regexp {
val terms = literals.map { Term(it) }
val initial: Regexp = terms[0] or terms[1]

return terms.subList(2, terms.size)
.fold(initial) { acc: Regexp, i: Term<String> -> Alternative.makeAlternative(acc, i) }
}
}

override fun derive(symbol : DerivedSymbol) : Regexp
{
override fun derive(symbol: DerivedSymbol): Regexp {
return makeAlternative(left.derive(symbol), right.derive(symbol))
}

}

infix fun Regexp.or(other : Regexp) : Regexp = Alternative.makeAlternative(left = this, other)

fun makeAlternative(literals : Iterable<String>) : Regexp
{
val terms = literals.map { Term(it) }
val initial : Regexp = terms[0] or terms[1]
infix fun Regexp.or(other: Regexp): Regexp = Alternative.makeAlternative(left = this, other)

return terms.subList(2, terms.size)
.fold(initial) { acc : Regexp, i : Term<String> -> Alternative.makeAlternative(acc, i) }
}
fun opt(exp: Regexp): Regexp = Alternative.makeAlternative(exp, Epsilon)
Original file line number Diff line number Diff line change
@@ -1,33 +1,38 @@
package org.srcgll.grammar.combinator.regexp

data class Concat
(
internal val head : Regexp,
internal val tail : Regexp,
)
: Regexp
{
(
internal val head: Regexp,
internal val tail: Regexp,
) : Regexp {

/*
D[s](h.t) = acceptEps(h).D[s](t) | D[s](h).t
*/
override fun derive(symbol : DerivedSymbol) : Regexp
{
override fun derive(symbol: DerivedSymbol): Regexp {
val newHead = head.derive(symbol)

if (!head.acceptEpsilon()) {
return when (newHead) {
Empty -> Empty
Empty -> Empty
Epsilon -> tail
else -> Concat(newHead, tail)
else -> Concat(newHead, tail)
}
}
return when (newHead) {
Empty -> tail.derive(symbol)
Empty -> tail.derive(symbol)
Epsilon -> Alternative.makeAlternative(tail, tail.derive(symbol))
else -> Alternative.makeAlternative(Concat(newHead, tail), tail.derive(symbol))
else -> Alternative.makeAlternative(Concat(newHead, tail), tail.derive(symbol))
}
}
}

infix operator fun Regexp.times(other : Regexp) : Concat = Concat(head = this, other)
infix operator fun Regexp.times(other: Regexp): Concat = Concat(head = this, other)

fun <T> makeConcat(vararg literals: T): Regexp {
val terms = literals.map { Term(it) }
val initial: Regexp = Concat(terms[0], terms[1])

return terms.subList(2, terms.size)
.fold(initial) { acc: Regexp, i: Term<T> -> Concat(acc, i) }
}
6 changes: 2 additions & 4 deletions src/main/kotlin/org/srcgll/grammar/combinator/regexp/Many.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,13 @@ data class Many
{
override fun derive(symbol : DerivedSymbol) : Regexp
{
val newReg = exp.derive(symbol)

return when (newReg) {
return when (val newReg = exp.derive(symbol)) {
Epsilon -> Many(exp)
Empty -> Empty
else -> Concat(newReg, Many(exp))
}
}
}

val Regexp.many : Many
get() = Many(this)
fun some(exp: Regexp) = (exp * Many(exp))
48 changes: 41 additions & 7 deletions src/main/kotlin/org/srcgll/grammar/combinator/regexp/NT.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@ import java.util.*
import kotlin.reflect.KProperty

open class NT : DerivedSymbol {
private lateinit var nonTerm: Nonterminal
private lateinit var rsmDescription: Regexp
protected open lateinit var nonTerm: Nonterminal
protected lateinit var rsmDescription: Regexp

private fun getNewState(regex: Regexp): RSMState {
return RSMState(nonTerm, isStart = false, regex.acceptEpsilon())
protected fun getNewState(regex: Regexp, isStart: Boolean = false): RSMState {
return RSMState(nonTerm, isStart, regex.acceptEpsilon())
}

fun buildRsmBox(): RSMState {
open fun buildRsmBox(): RSMState = buildRsmBox(nonTerm.startState)

protected fun buildRsmBox(startState: RSMState): RSMState {
val regexpToProcess = Stack<Regexp>()
val regexpToRsmState = HashMap<Regexp, RSMState>()
regexpToRsmState[rsmDescription] = nonTerm.startState
regexpToRsmState[rsmDescription] = startState

val alphabet = rsmDescription.getAlphabet()

Expand Down Expand Up @@ -53,7 +55,7 @@ open class NT : DerivedSymbol {
}
}
}
return nonTerm.startState
return startState
}

override fun getNonterminal(): Nonterminal? {
Expand All @@ -73,4 +75,36 @@ open class NT : DerivedSymbol {
}

operator fun getValue(grammar: Grammar, property: KProperty<*>): Regexp = this

}

/**
* Helper class for building rsm delta when deleting/adding rules to the grammar.
* Uses existing grammar nonterminal
*/
class StandAloneNt(nonterminal: Nonterminal) : NT() {
init {
nonTerm = nonterminal
}

/**
* Set description of Rsm, may be recursive
*/
fun setDescription(description: Regexp){
rsmDescription = description
}

/**
* Create new start state for RsmBox
* Otherwise the origin of the Rsm will be ruined.
*/
override fun buildRsmBox(): RSMState = buildRsmBox(getNewState(rsmDescription, true))

/**
* Build rsm from given description in regexp
*/
fun buildRsm(description: Regexp): RSMState{
rsmDescription = description
return buildRsmBox()
}
}
Loading