-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbudoux.go
36 lines (29 loc) · 981 Bytes
/
budoux.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
/*
Package budoux is a golang port of [BudouX](https://github.com/google/budoux) (machine learning powered line break organizer tool).
Note:
This project contains the deliverables of the [BudouX](https://github.com/google/budoux) project.
Note:
BudouX-Go supported plain text only, not supports html inputs.
*/
package budoux
// DefaultThreshold default threshold for splitting a sentence.
const DefaultThreshold = 1000
// Model trained machine learning model.
// key (string) is feature of character, value (int) is score of feature.
type Model map[string]int
// Parser machine learning based sentence splitter.
type Parser struct {
model Model
threshold int
}
// New returns budoux-go parser instance.
func New(model Model, threshold int) *Parser {
return &Parser{
model: model,
threshold: threshold,
}
}
// Parse returns splitted string slice from input.
func (s *Parser) Parse(in string) []string {
return ParseWithThreshold(s.model, in, s.threshold)
}