-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
122 lines (102 loc) · 3.31 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package main
import (
"bufio"
"flag"
"fmt"
"os"
"runtime"
"runtime/pprof"
"github.com/jhjaggars/uniqish/pkg/compare"
"github.com/jhjaggars/uniqish/pkg/peeker"
"github.com/jhjaggars/uniqish/pkg/tokenizers"
)
type GlobalOptions struct {
Cpuprofile *string
Memprofile *string
Bufsize *int
Similarity *int
Stats *bool
}
func (o *GlobalOptions) AddFlags(fs *flag.FlagSet, prefix string) {
if prefix != "" {
prefix = prefix + "."
}
o.Cpuprofile = fs.String(prefix+"cpuprofile", "", "write cpu profile to file")
o.Memprofile = fs.String(prefix+"memprofile", "", "write memory profile to file")
o.Bufsize = fs.Int("bufsize", 1024*2, "how many bytes to read ahead to guess offset")
o.Similarity = fs.Int("similarity", 80, "similarity percentage to consider a match")
o.Stats = fs.Bool("stats", false, "show stats after processing")
}
var options = struct {
Global *GlobalOptions
Lookback *compare.LookBackOptions
Algorithm *compare.AlgorithmOptions
Tokenizer *tokenizers.TokenizerOptions
}{
&GlobalOptions{},
&compare.LookBackOptions{},
&compare.AlgorithmOptions{},
&tokenizers.TokenizerOptions{},
}
func main() {
fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
options.Global.AddFlags(fs, "")
options.Algorithm.AddFlags(fs, "")
options.Lookback.AddFlags(fs, "")
options.Tokenizer.AddFlags(fs, "")
fs.Parse(os.Args[1:])
if err := options.Tokenizer.Validate(); err != nil {
fmt.Fprintln(os.Stderr, err.Error())
os.Exit(2)
}
r := bufio.NewReaderSize(os.Stdin, *options.Global.Bufsize)
peeked, _ := r.Peek(*options.Global.Bufsize)
input := bufio.NewScanner(r)
var processed, printed int
similarityThreshold := (float64(*options.Global.Similarity) / 100.0)
if *options.Global.Cpuprofile != "" {
f, err := os.Create(*options.Global.Cpuprofile)
if err != nil {
panic(err)
}
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}
offset := peeker.Calcoff(peeked, 64)
compareStats := &compare.Stats{}
comparer := compare.New(options.Algorithm, options.Lookback, options.Tokenizer, similarityThreshold, compareStats)
for input.Scan() {
line := input.Text()
linekey := line
if len(line) >= offset {
linekey = line[offset:]
}
if !comparer.Compare(linekey) {
fmt.Println(line)
printed++
}
processed++
}
if *options.Global.Memprofile != "" {
f, err := os.Create(*options.Global.Memprofile)
if err != nil {
fmt.Fprintf(os.Stderr, "could not create memory profile: %s", err.Error())
}
runtime.GC()
if err := pprof.WriteHeapProfile(f); err != nil {
fmt.Fprintf(os.Stderr, "could not write memory profile: %s", err.Error())
}
if err = f.Close(); err != nil {
fmt.Fprintf(os.Stderr, "could not close memory profile file: %s", err.Error())
}
}
if *options.Global.Stats {
fmt.Fprintf(os.Stderr, "Offset: %d\n", offset)
fmt.Fprintf(os.Stderr, "Total lines: %d\n", processed)
fmt.Fprintf(os.Stderr, "Total loops: %d\n", compareStats.Loops)
fmt.Fprintf(os.Stderr, "Total compares: %d\n", compareStats.Compares)
fmt.Fprintf(os.Stderr, "loops/line: %.2f\n", float64(compareStats.Loops)/float64(processed))
fmt.Fprintf(os.Stderr, "average cache search: %.2f\n", (float64(compareStats.Loops)/float64(processed))/float64(*options.Lookback.Lookback))
fmt.Fprintf(os.Stderr, "Printed: %d %.2f%%\n", printed, 100.0*(float64(printed)/float64(processed)))
}
}